% \iffalse meta-comment % %% File: l3token.dtx % % Copyright (C) 2005-2024 The LaTeX Project % % It may be distributed and/or modified under the conditions of the % LaTeX Project Public License (LPPL), either version 1.3c of this % license or (at your option) any later version. The latest version % of this license is in the file % % https://www.latex-project.org/lppl.txt % % This file is part of the "l3kernel bundle" (The Work in LPPL) % and all files in that bundle must be distributed together. % % ----------------------------------------------------------------------- % % The development version of the bundle can be found at % % https://github.com/latex3/latex3 % % for those people who are interested. % %<*driver> \documentclass[full,kernel]{l3doc} \begin{document} \DocInput{\jobname.dtx} \end{document} % % \fi % % \title{^^A % The \pkg{l3token} module\\ Token manipulation^^A % } % % \author{^^A % The \LaTeX{} Project\thanks % {^^A % E-mail: % \href{mailto:latex-team@latex-project.org} % {latex-team@latex-project.org}^^A % }^^A % } % % \date{Released 2024-03-14} % % \maketitle % % \begin{documentation} % % This module deals with tokens. Now this is perhaps not the most % precise description so let's try with a better description: When % programming in \TeX{}, it is often desirable to know just what a % certain token is: is it a control sequence or something % else. Similarly one often needs to know if a control sequence is % expandable or not, a macro or a primitive, how many arguments it % takes etc. Another thing of great importance (especially when it % comes to document commands) is looking ahead in the token stream to % see if a certain character is present and maybe even remove it or % disregard other tokens while scanning. This module provides % functions for both and as such has two primary function % categories: |\token_| for anything that deals with tokens and % |\peek_| for looking ahead in the token stream. % % Most functions we describe here can be used on control sequences, % as those are tokens as well. % % It is important to distinguish two aspects of a token: its % \enquote{shape} (for lack of a better word), which affects the % matching of delimited arguments and the comparison of token lists % containing this token, and its \enquote{meaning}, which affects % whether the token expands or what operation it performs. One can have % tokens of different shapes with the same meaning, but not the % converse. % % For instance, \cs{if:w}, \cs{if_charcode:w}, and \cs[no-index]{tex_if:D} are % three names for the same internal operation of \TeX{}, namely the % primitive testing the next two characters for equality of their % character code. They have the same meaning hence behave identically % in many situations. However, \TeX{} distinguishes them when searching % for a delimited argument. Namely, the example function % |\show_until_if:w| defined below takes everything until \cs{if:w} % as an argument, despite the presence of other copies of \cs{if:w} % under different names. % \begin{verbatim} % \cs_new:Npn \show_until_if:w #1 \if:w { \tl_show:n {#1} } % \show_until_if:w \tex_if:D \if_charcode:w \if:w % \end{verbatim} % A list of all possible shapes and a list of all possible meanings are % given in section~\ref{sec:l3token:all-tokens}. % % \section{Creating character tokens} % % \begin{function}[updated = 2015-11-12] % { % \char_set_active_eq:NN, \char_set_active_eq:Nc, % \char_gset_active_eq:NN, \char_gset_active_eq:Nc % } % \begin{syntax} % \cs{char_set_active_eq:NN} \meta{char} \meta{function} % \end{syntax} % Sets the behaviour of the \meta{char} in situations where it is % active (category code $13$) to be equivalent to that of the % definition of the \meta{function} at the time \cs{char_set_active_eq:NN} % is used. The category code of the \meta{char} is % \emph{unchanged} by this process. The \meta{function} may itself % be an active character. % \end{function} % % \begin{function}[added = 2015-11-12] % { % \char_set_active_eq:nN, \char_set_active_eq:nc, % \char_gset_active_eq:nN, \char_gset_active_eq:nc % } % \begin{syntax} % \cs{char_set_active_eq:nN} \Arg{integer expression} \meta{function} % \end{syntax} % Sets the behaviour of the \meta{char} which has character % code as given by the \meta{integer expression} in situations % where it is active (category code $13$) to be equivalent to that of the % \meta{function} at the time \cs{char_set_active_eq:nN} % is used. The category code of the \meta{char} is % \emph{unchanged} by this process. The \meta{function} may itself % be an active character. % \end{function} % % \begin{function}[EXP, added = 2015-09-09, updated = 2019-01-16] % {\char_generate:nn} % \begin{syntax} % \cs{char_generate:nn} \Arg{charcode} \Arg{catcode} % \end{syntax} % Generates a character token of the given \meta{charcode} and \meta{catcode} % (both of which may be integer expressions). The \meta{catcode} may be % one of % \begin{itemize} % \item $1$ (begin group) % \item $2$ (end group) % \item $3$ (math toggle) % \item $4$ (alignment) % \item $6$ (parameter) % \item $7$ (math superscript) % \item $8$ (math subscript) % \item $10$ (space) % \item $11$ (letter) % \item $12$ (other) % \item $13$ (active) % \end{itemize} % and other values raise an error. The \meta{charcode} may be any one valid % for the engine in use, except that for \meta{catcode} $10$, \meta{charcode} % $0$ is not allowed. % Active characters cannot be generated in older versions of \XeTeX{}. % Another way to build token lists with unusual category codes is % \cs{regex_replace:nnN} |{.*}| \Arg{replacement} \meta{tl~var}. % \begin{texnote} % Exactly two expansions are needed to produce the character. % \end{texnote} % \end{function} % % \begin{variable}[added = 2017-08-07]{\c_catcode_active_space_tl} % Token list containing one character with category code $13$, % (\enquote{active}), and character code $32$ (space). % \end{variable} % % \begin{variable}[added = 2011-09-05]{\c_catcode_other_space_tl} % Token list containing one character with category code $12$, % (\enquote{other}), and character code $32$ (space). % \end{variable} % % \section{Manipulating and interrogating character tokens} % % \begin{function}[updated = 2015-11-11] % { % \char_set_catcode_escape:N , % \char_set_catcode_group_begin:N , % \char_set_catcode_group_end:N , % \char_set_catcode_math_toggle:N , % \char_set_catcode_alignment:N , % \char_set_catcode_end_line:N , % \char_set_catcode_parameter:N , % \char_set_catcode_math_superscript:N , % \char_set_catcode_math_subscript:N , % \char_set_catcode_ignore:N , % \char_set_catcode_space:N , % \char_set_catcode_letter:N , % \char_set_catcode_other:N , % \char_set_catcode_active:N , % \char_set_catcode_comment:N , % \char_set_catcode_invalid:N % } % \begin{syntax} % \cs{char_set_catcode_letter:N} \meta{character} % \end{syntax} % Sets the category code of the \meta{character} to that indicated in % the function name. Depending on the current category code of the % \meta{token} the escape token may also be needed: % \begin{verbatim} % \char_set_catcode_other:N \% % \end{verbatim} % The assignment is local. % \end{function} % % \begin{function}[updated = 2015-11-11] % { % \char_set_catcode_escape:n , % \char_set_catcode_group_begin:n , % \char_set_catcode_group_end:n , % \char_set_catcode_math_toggle:n , % \char_set_catcode_alignment:n , % \char_set_catcode_end_line:n , % \char_set_catcode_parameter:n , % \char_set_catcode_math_superscript:n , % \char_set_catcode_math_subscript:n , % \char_set_catcode_ignore:n , % \char_set_catcode_space:n , % \char_set_catcode_letter:n , % \char_set_catcode_other:n , % \char_set_catcode_active:n , % \char_set_catcode_comment:n , % \char_set_catcode_invalid:n % } % \begin{syntax} % \cs{char_set_catcode_letter:n} \Arg{integer expression} % \end{syntax} % Sets the category code of the \meta{character} which has character % code as given by the \meta{integer expression}. This version can be % used to set up characters which cannot otherwise be given % (\emph{cf.}~the \texttt{N}-type variants). The assignment is local. % \end{function} % % \begin{function}[updated = 2015-11-11]{\char_set_catcode:nn} % \begin{syntax} % \cs{char_set_catcode:nn} \Arg{int expr_1} \Arg{int expr_2} % \end{syntax} % These functions set the category code of the \meta{character} which % has character code as given by the \meta{integer expression}. % The first \meta{integer expression} % is the character code and the second is the category code to apply. % The setting applies within the current \TeX{} group. In general, the % symbolic functions \cs[no-index]{char_set_catcode_\meta{type}} should be preferred, % but there are cases where these lower-level functions may be useful. % \end{function} % % \begin{function}[EXP]{\char_value_catcode:n} % \begin{syntax} % \cs{char_value_catcode:n} \Arg{integer expression} % \end{syntax} % Expands to the current category code of the \meta{character} with % character code given by the % \meta{integer expression}. % \end{function} % % \begin{function}{\char_show_value_catcode:n} % \begin{syntax} % \cs{char_show_value_catcode:n} \Arg{integer expression} % \end{syntax} % Displays the current category code of the \meta{character} with % character code given by the \meta{integer expression} on the % terminal. % \end{function} % % \begin{function}[updated = 2015-08-06]{\char_set_lccode:nn} % \begin{syntax} % \cs{char_set_lccode:nn} \Arg{int expr_1} \Arg{int expr_2} % \end{syntax} % Sets up the behaviour of the \meta{character} when % found inside \cs{text_lowercase:n}, such that \meta{character_1} % will be converted into \meta{character_2}. The two \meta{characters} % may be specified using an \meta{integer expression} for the character code % concerned. This may include the \TeX{} |`|\meta{character} % method for converting a single character into its character % code: % \begin{verbatim} % \char_set_lccode:nn { `\A } { `\a } % Standard behaviour % \char_set_lccode:nn { `\A } { `\A + 32 } % \char_set_lccode:nn { 50 } { 60 } % \end{verbatim} % The setting applies within the current \TeX{} group. % \end{function} % % \begin{function}[EXP]{\char_value_lccode:n} % \begin{syntax} % \cs{char_value_lccode:n} \Arg{integer expression} % \end{syntax} % Expands to the current lower case code of the \meta{character} with % character code given by the % \meta{integer expression}. % \end{function} % % \begin{function}{\char_show_value_lccode:n} % \begin{syntax} % \cs{char_show_value_lccode:n} \Arg{integer expression} % \end{syntax} % Displays the current lower case code of the \meta{character} with % character code given by the \meta{integer expression} on the % terminal. % \end{function} % % \begin{function}[updated = 2015-08-06]{\char_set_uccode:nn} % \begin{syntax} % \cs{char_set_uccode:nn} \Arg{int expr_1} \Arg{int expr_2} % \end{syntax} % Sets up the behaviour of the \meta{character} when % found inside \cs{text_uppercase:n}, such that \meta{character_1} % will be converted into \meta{character_2}. The two \meta{characters} % may be specified using an \meta{integer expression} for the character code % concerned. This may include the \TeX{} |`|\meta{character} % method for converting a single character into its character % code: % \begin{verbatim} % \char_set_uccode:nn { `\a } { `\A } % Standard behaviour % \char_set_uccode:nn { `\A } { `\A - 32 } % \char_set_uccode:nn { 60 } { 50 } % \end{verbatim} % The setting applies within the current \TeX{} group. % \end{function} % % \begin{function}[EXP]{\char_value_uccode:n} % \begin{syntax} % \cs{char_value_uccode:n} \Arg{integer expression} % \end{syntax} % Expands to the current upper case code of the \meta{character} with % character code given by the % \meta{integer expression}. % \end{function} % % \begin{function}{\char_show_value_uccode:n} % \begin{syntax} % \cs{char_show_value_uccode:n} \Arg{integer expression} % \end{syntax} % Displays the current upper case code of the \meta{character} with % character code given by the \meta{integer expression} on the % terminal. % \end{function} % % \begin{function}[updated = 2015-08-06]{\char_set_mathcode:nn} % \begin{syntax} % \cs{char_set_mathcode:nn} \Arg{int expr_1} \Arg{int expr_2} % \end{syntax} % This function sets up the math code of \meta{character}. % The \meta{character} is specified as % an \meta{integer expression} which will be used as the character % code of the relevant character. The setting applies within the % current \TeX{} group. % \end{function} % % \begin{function}[EXP]{\char_value_mathcode:n} % \begin{syntax} % \cs{char_value_mathcode:n} \Arg{integer expression} % \end{syntax} % Expands to the current math code of the \meta{character} with % character code given by the % \meta{integer expression}. % \end{function} % % \begin{function}{\char_show_value_mathcode:n} % \begin{syntax} % \cs{char_show_value_mathcode:n} \Arg{integer expression} % \end{syntax} % Displays the current math code of the \meta{character} with % character code given by the \meta{integer expression} on the % terminal. % \end{function} % % \begin{function}[updated = 2015-08-06]{\char_set_sfcode:nn} % \begin{syntax} % \cs{char_set_sfcode:nn} \Arg{int expr_1} \Arg{int expr_2} % \end{syntax} % This function sets up the space factor for the \meta{character}. % The \meta{character} is specified as % an \meta{integer expression} which will be used as the character % code of the relevant character. The setting applies within the % current \TeX{} group. % \end{function} % % \begin{function}[EXP]{\char_value_sfcode:n} % \begin{syntax} % \cs{char_value_sfcode:n} \Arg{integer expression} % \end{syntax} % Expands to the current space factor for the \meta{character} with % character code given by the % \meta{integer expression}. % \end{function} % % \begin{function}{\char_show_value_sfcode:n} % \begin{syntax} % \cs{char_show_value_sfcode:n} \Arg{integer expression} % \end{syntax} % Displays the current space factor for the \meta{character} with % character code given by the \meta{integer expression} on the % terminal. % \end{function} % % \begin{variable}[added = 2012-01-23, updated = 2015-11-11]{\l_char_active_seq} % Used to track which tokens may require special handling at the document % level as they are (or have been at some point) % of category \meta{active} (catcode~$13$). Each entry in % the sequence consists of a single escaped token, for example |\~|. % Active tokens should be added to the sequence when they are defined for % general document use. % \end{variable} % % \begin{variable}[added = 2012-01-23, updated = 2015-11-11]{\l_char_special_seq} % Used to track which tokens will require special handling when working with % verbatim-like material at the document level as they are not of categories % \meta{letter} (catcode~$11$) or \meta{other} (catcode~$12$). Each entry in % the sequence consists of a single escaped token, for example |\\| for the % backslash or |\{| for an opening brace. ^^A \} % Escaped tokens should be added to the sequence when they are defined for % general document use. % \end{variable} % % \section{Generic tokens} % % \begin{variable}[module = token] % { % \c_group_begin_token, % \c_group_end_token, % \c_math_toggle_token, % \c_alignment_token, % \c_parameter_token, % \c_math_superscript_token, % \c_math_subscript_token, % \c_space_token % } % These are implicit tokens which have the category code described % by their name. They are used internally for test purposes but % are also available to the programmer for other uses. % \begin{texnote} % The tokens \cs{c_group_begin_token}, \cs{c_group_end_token}, and % \cs{c_space_token} are \pkg{expl3} counterparts of \LaTeXe{}'s % \tn{bgroup}, \tn{egroup}, and \cs{@sptoken}. % \end{texnote} % \end{variable} % % \begin{variable} % { % \c_catcode_letter_token, % \c_catcode_other_token % } % These are implicit tokens which have the category code described % by their name. They are used internally for test purposes and should % not be used other than for category code tests. % \end{variable} % % \begin{variable}{\c_catcode_active_tl} % A token list containing an active token. This is used internally % for test purposes and should not be used other than in % appropriately-constructed category code tests. % \end{variable} % % \section{Converting tokens} % % \begin{function}[EXP]{\token_to_meaning:N, \token_to_meaning:c} % \begin{syntax} % \cs{token_to_meaning:N} \meta{token} % \end{syntax} % Inserts the current meaning of the \meta{token} into the input % stream as a series of characters of category code $12$ (other). % This is the primitive \TeX{} description of the \meta{token}, % thus for example both functions defined by \cs{cs_set_nopar:Npn} % and token list variables defined using \cs{tl_new:N} are described % as |macro|s. % \begin{texnote} % This is the \TeX{} primitive \tn{meaning}. % The \meta{token} can thus be an explicit space token or an % explicit begin-group or end-group character token (|{|~or~|}| when % normal \TeX{} category codes apply) even though these are not % valid \texttt{N}-type arguments. % \end{texnote} % \end{function} % % \begin{function}[EXP]{\token_to_str:N, \token_to_str:c} % \begin{syntax} % \cs{token_to_str:N} \meta{token} % \end{syntax} % Converts the given \meta{token} into a series of characters with % category code $12$ (other). If the \meta{token} is a control % sequence, this will start with the current escape character with % category code $12$ (the escape character is part of the % \meta{token}). This function requires only a single expansion. % \begin{texnote} % \cs{token_to_str:N} is the \TeX{} primitive \tn{string}. % The \meta{token} can thus be an explicit space tokens or an % explicit begin-group or end-group character token (|{|~or~|}| when % normal \TeX{} category codes apply) even though these are not % valid \texttt{N}-type arguments. % \end{texnote} % \end{function} % % \begin{function}[EXP,added = 2023-10-15]{\token_to_catcode:N} % \begin{syntax} % \cs{token_to_catcode:N} \meta{token} % \end{syntax} % Converts the given \meta{token} into a number describing its category code. % If \meta{token} is a control sequence this expands to $16$. This can't % detect the categories $0$ (escape character), $5$ (end of line), $9$ % (ignored character), $14$ (comment character), or $15$ (invalid character). % Control sequences or active characters let to a token of one of the % detectable category codes will yield that category. % \end{function} % % \section{Token conditionals} % % \begin{function}[EXP,pTF]{\token_if_group_begin:N} % \begin{syntax} % \cs{token_if_group_begin_p:N} \meta{token} \\ % \cs{token_if_group_begin:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if \meta{token} has the category code of a begin group token % (|{| when normal \TeX{} category codes are in ^^A } % force). % Note that an explicit begin group token cannot be tested in this way, % as it is not a valid \texttt{N}-type argument. % \end{function} % % \begin{function}[EXP,pTF]{\token_if_group_end:N} % \begin{syntax} % \cs{token_if_group_end_p:N} \meta{token} \\ % \cs{token_if_group_end:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if \meta{token} has the category code of an end group token % (^^A { % |}| when normal \TeX{} category codes are in force). % Note that an explicit end group token cannot be tested in this way, % as it is not a valid \texttt{N}-type argument. % \end{function} % % \begin{function}[EXP,pTF]{\token_if_math_toggle:N} % \begin{syntax} % \cs{token_if_math_toggle_p:N} \meta{token} \\ % \cs{token_if_math_toggle:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if \meta{token} has the category code of a math shift token % (|$| when normal \TeX{} category codes are in force). % \end{function} % % \begin{function}[EXP,pTF]{\token_if_alignment:N} % \begin{syntax} % \cs{token_if_alignment_p:N} \meta{token} \\ % \cs{token_if_alignment:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if \meta{token} has the category code of an alignment token % (|&| when normal \TeX{} category codes are in force). % \end{function} % % \begin{function}[EXP,pTF]{\token_if_parameter:N} % \begin{syntax} % \cs{token_if_parameter_p:N} \meta{token} \\ % \cs{token_if_parameter:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if \meta{token} has the category code of a macro parameter token % (|#| when normal \TeX{} category codes are in force). % \end{function} % % \begin{function}[EXP,pTF]{\token_if_math_superscript:N} % \begin{syntax} % \cs{token_if_math_superscript_p:N} \meta{token} \\ % \cs{token_if_math_superscript:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if \meta{token} has the category code of a superscript token % (|^| when normal \TeX{} category codes are in force). % \end{function} % % \begin{function}[EXP,pTF]{\token_if_math_subscript:N} % \begin{syntax} % \cs{token_if_math_subscript_p:N} \meta{token} \\ % \cs{token_if_math_subscript:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if \meta{token} has the category code of a subscript token % (|_| when normal \TeX{} category codes are in force). % \end{function} % % \begin{function}[EXP,pTF]{\token_if_space:N} % \begin{syntax} % \cs{token_if_space_p:N} \meta{token} \\ % \cs{token_if_space:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if \meta{token} has the category code of a space token. % Note that an explicit space token with character code $32$ cannot % be tested in this way, as it is not a valid \texttt{N}-type argument. % \end{function} % % \begin{function}[EXP,pTF]{\token_if_letter:N} % \begin{syntax} % \cs{token_if_letter_p:N} \meta{token} \\ % \cs{token_if_letter:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if \meta{token} has the category code of a letter token. % \end{function} % % \begin{function}[EXP,pTF]{\token_if_other:N} % \begin{syntax} % \cs{token_if_other_p:N} \meta{token} \\ % \cs{token_if_other:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if \meta{token} has the category code of an \enquote{other} % token. % \end{function} % % \begin{function}[EXP,pTF]{\token_if_active:N} % \begin{syntax} % \cs{token_if_active_p:N} \meta{token} \\ % \cs{token_if_active:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if \meta{token} has the category code of an active character. % \end{function} % % \begin{function}[EXP,pTF]{\token_if_eq_catcode:NN} % \begin{syntax} % \cs{token_if_eq_catcode_p:NN} \meta{token_1} \meta{token_2} \\ % \cs{token_if_eq_catcode:NNTF} \meta{token_1} \meta{token_2} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the two \meta{tokens} have the same category code. % \end{function} % % \begin{function}[EXP,pTF]{\token_if_eq_charcode:NN} % \begin{syntax} % \cs{token_if_eq_charcode_p:NN} \meta{token_1} \meta{token_2} \\ % \cs{token_if_eq_charcode:NNTF} \meta{token_1} \meta{token_2} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the two \meta{tokens} have the same character code. % \end{function} % % \begin{function}[EXP,pTF]{\token_if_eq_meaning:NN} % \begin{syntax} % \cs{token_if_eq_meaning_p:NN} \meta{token_1} \meta{token_2} \\ % \cs{token_if_eq_meaning:NNTF} \meta{token_1} \meta{token_2} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the two \meta{tokens} have the same meaning when expanded. % \end{function} % % \begin{function}[updated = 2011-05-23, EXP,pTF]{\token_if_macro:N} % \begin{syntax} % \cs{token_if_macro_p:N} \meta{token} \\ % \cs{token_if_macro:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the \meta{token} is a \TeX{} macro. % \end{function} % % \begin{function}[EXP,pTF]{\token_if_cs:N} % \begin{syntax} % \cs{token_if_cs_p:N} \meta{token} \\ % \cs{token_if_cs:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the \meta{token} is a control sequence. % \end{function} % % \begin{function}[EXP,pTF]{\token_if_expandable:N} % \begin{syntax} % \cs{token_if_expandable_p:N} \meta{token} \\ % \cs{token_if_expandable:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the \meta{token} is expandable. This test returns \meta{false} % for an undefined token. % \end{function} % % \begin{function}[EXP,pTF, updated=2012-01-20]{\token_if_long_macro:N} % \begin{syntax} % \cs{token_if_long_macro_p:N} \meta{token} \\ % \cs{token_if_long_macro:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the \meta{token} is a long macro. % \end{function} % % \begin{function}[EXP,pTF, updated=2012-01-20]{\token_if_protected_macro:N} % \begin{syntax} % \cs{token_if_protected_macro_p:N} \meta{token} \\ % \cs{token_if_protected_macro:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the \meta{token} is a protected macro: for a macro which % is both protected and long this returns \texttt{false}. % \end{function} % % \begin{function}[EXP,pTF, updated=2012-01-20]{\token_if_protected_long_macro:N} % \begin{syntax} % \cs{token_if_protected_long_macro_p:N} \meta{token} \\ % \cs{token_if_protected_long_macro:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the \meta{token} is a protected long macro. % \end{function} % % \begin{function}[EXP,pTF, updated=2012-01-20]{\token_if_chardef:N} % \begin{syntax} % \cs{token_if_chardef_p:N} \meta{token} \\ % \cs{token_if_chardef:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the \meta{token} is defined to be a chardef. % \begin{texnote} % Booleans, boxes and small integer constants are implemented as % \tn{chardef}s. % \end{texnote} % \end{function} % % \begin{function}[EXP,pTF, updated=2012-01-20]{\token_if_mathchardef:N} % \begin{syntax} % \cs{token_if_mathchardef_p:N} \meta{token} \\ % \cs{token_if_mathchardef:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the \meta{token} is defined to be a mathchardef. % \end{function} % % \begin{function}[EXP,pTF, added=2020-10-27]{\token_if_font_selection:N} % \begin{syntax} % \cs{token_if_font_selection_p:N} \meta{token} \\ % \cs{token_if_font_selection:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the \meta{token} is defined to be a font selection command. % \end{function} % % \begin{function}[EXP,pTF, updated=2012-01-20]{\token_if_dim_register:N} % \begin{syntax} % \cs{token_if_dim_register_p:N} \meta{token} \\ % \cs{token_if_dim_register:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the \meta{token} is defined to be a dimension register. % \end{function} % % \begin{function}[EXP,pTF, updated=2012-01-20]{\token_if_int_register:N} % \begin{syntax} % \cs{token_if_int_register_p:N} \meta{token} \\ % \cs{token_if_int_register:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the \meta{token} is defined to be a integer register. % \begin{texnote} % Constant integers may be implemented as integer registers, % \tn{chardef}s, or \tn{mathchardef}s depending on their value. % \end{texnote} % \end{function} % % \begin{function}[EXP,pTF, added=2012-02-15]{\token_if_muskip_register:N} % \begin{syntax} % \cs{token_if_muskip_register_p:N} \meta{token} \\ % \cs{token_if_muskip_register:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the \meta{token} is defined to be a muskip register. % \end{function} % % \begin{function}[EXP,pTF, updated=2012-01-20]{\token_if_skip_register:N} % \begin{syntax} % \cs{token_if_skip_register_p:N} \meta{token} \\ % \cs{token_if_skip_register:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the \meta{token} is defined to be a skip register. % \end{function} % % \begin{function}[EXP,pTF, updated=2012-01-20]{\token_if_toks_register:N} % \begin{syntax} % \cs{token_if_toks_register_p:N} \meta{token} \\ % \cs{token_if_toks_register:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the \meta{token} is defined to be a toks register % (not used by \LaTeX3). % \end{function} % % \begin{function}[updated = 2020-09-11, EXP,pTF]{\token_if_primitive:N} % \begin{syntax} % \cs{token_if_primitive_p:N} \meta{token} \\ % \cs{token_if_primitive:NTF} \meta{token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the \meta{token} is an engine primitive. In \LuaTeX{} % this includes primitive-like commands defined using |token.set_lua|. % \end{function} % % \begin{function}[added = 2020-12-03, EXP, noTF] % {\token_case_catcode:Nn, \token_case_charcode:Nn, \token_case_meaning:Nn} % \begin{syntax} % \cs{token_case_meaning:NnTF} \meta{test token} \\ % ~~"{" \\ % ~~~~\meta{token case_1} \Arg{code case_1} \\ % ~~~~\meta{token case_2} \Arg{code case_2} \\ % ~~~~\ldots \\ % ~~~~\meta{token case_n} \Arg{code case_n} \\ % ~~"}" \\ % ~~\Arg{true code} % ~~\Arg{false code} % \end{syntax} % This function compares the \meta{test token} in turn with each of % the \meta{token cases}. If the two are equal (as described for % \cs{token_if_eq_catcode:NNTF}, \cs{token_if_eq_charcode:NNTF} and % \cs{token_if_eq_meaning:NNTF}, respectively) then the associated % \meta{code} is left in the input stream and other cases are % discarded. If any of the cases are matched, the \meta{true code} is % also inserted into the input stream (after the code for the % appropriate case), while if none match then the \meta{false code} is % inserted. The functions \cs{token_case_catcode:Nn}, % \cs{token_case_charcode:Nn}, and \cs{token_case_meaning:Nn}, which % do nothing if there is no match, are also available. % \end{function} % % \section{Peeking ahead at the next token} % % There is often a need to look ahead at the next token in the input % stream while leaving it in place. This is handled using the % \enquote{peek} functions. The generic \cs{peek_after:Nw} is provided % along with a family of predefined tests for common cases. Peeking % ahead does \emph{not} skip spaces: rather, \cs{peek_remove_spaces:n}. % should be used. In addition, using \cs{peek_analysis_map_inline:n}, % one can map through the following tokens in the input stream and % repeatedly perform some tests. % % \begin{function}{\peek_after:Nw} % \begin{syntax} % \cs{peek_after:Nw} \meta{function} \meta{token} % \end{syntax} % Locally sets the test variable \cs{l_peek_token} equal to \meta{token} % (as an implicit token, \emph{not} as a token list), and then % expands the \meta{function}. The \meta{token} remains in % the input stream as the next item after the \meta{function}. % The \meta{token} here may be \verb*| |, |{| or |}| (assuming % normal \TeX{} category codes), \emph{i.e.}~it is not necessarily the % next argument which would be grabbed by a normal function. % \end{function} % % \begin{function}{\peek_gafter:Nw} % \begin{syntax} % \cs{peek_gafter:Nw} \meta{function} \meta{token} % \end{syntax} % Globally sets the test variable \cs{g_peek_token} equal to \meta{token} % (as an implicit token, \emph{not} as a token list), and then % expands the \meta{function}. The \meta{token} remains in % the input stream as the next item after the \meta{function}. % The \meta{token} here may be \verb*| |, |{| or |}| (assuming % normal \TeX{} category codes), \emph{i.e.}~it is not necessarily the % next argument which would be grabbed by a normal function. % \end{function} % % \begin{variable}{\l_peek_token} % Token set by \cs{peek_after:Nw} and available for testing % as described above. % \end{variable} % % \begin{variable}{\g_peek_token} % Token set by \cs{peek_gafter:Nw} and available for testing % as described above. % \end{variable} % % \begin{function}[updated = 2012-12-20, TF]{\peek_catcode:N} % \begin{syntax} % \cs{peek_catcode:NTF} \meta{test token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the next \meta{token} in the input stream has the same % category code as the \meta{test token} (as defined by the test % \cs{token_if_eq_catcode:NNTF}). Spaces are respected by the test % and the \meta{token} is left in the input stream after % the \meta{true code} or \meta{false code} (as appropriate to the % result of the test). % \end{function} % % \begin{function}[updated = 2012-12-20, TF]{\peek_catcode_remove:N} % \begin{syntax} % \cs{peek_catcode_remove:NTF} \meta{test token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the next \meta{token} in the input stream has the same % category code as the \meta{test token} (as defined by the test % \cs{token_if_eq_catcode:NNTF}). Spaces are respected by the test % and the \meta{token} is removed from the input stream if the % test is true. The function then places either the % \meta{true code} or \meta{false code} in the input stream (as % appropriate to the result of the test). % \end{function} % % \begin{function}[updated = 2012-12-20, TF]{\peek_charcode:N} % \begin{syntax} % \cs{peek_charcode:NTF} \meta{test token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the next \meta{token} in the input stream has the same % character code as the \meta{test token} (as defined by the test % \cs{token_if_eq_charcode:NNTF}). Spaces are respected by the test % and the \meta{token} is left in the input stream after % the \meta{true code} or \meta{false code} (as appropriate to the % result of the test). % \end{function} % % \begin{function}[updated = 2012-12-20, TF]{\peek_charcode_remove:N} % \begin{syntax} % \cs{peek_charcode_remove:NTF} \meta{test token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the next \meta{token} in the input stream has the same % character code as the \meta{test token} (as defined by the test % \cs{token_if_eq_charcode:NNTF}). Spaces are respected by the test % and the \meta{token} is removed from the input stream if the % test is true. The function then places either the % \meta{true code} or \meta{false code} in the input stream (as % appropriate to the result of the test). % \end{function} % % \begin{function}[updated = 2011-07-02, TF]{\peek_meaning:N} % \begin{syntax} % \cs{peek_meaning:NTF} \meta{test token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the next \meta{token} in the input stream has the same % meaning as the \meta{test token} (as defined by the test % \cs{token_if_eq_meaning:NNTF}). Spaces are respected by the test % and the \meta{token} is left in the input stream after % the \meta{true code} or \meta{false code} (as appropriate to the % result of the test). % \end{function} % % \begin{function}[updated = 2011-07-02, TF]{\peek_meaning_remove:N} % \begin{syntax} % \cs{peek_meaning_remove:NTF} \meta{test token} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the next \meta{token} in the input stream has the same % meaning as the \meta{test token} (as defined by the test % \cs{token_if_eq_meaning:NNTF}). Spaces are respected by the test % and the \meta{token} is removed from the input stream if the % test is true. The function then places either the % \meta{true code} or \meta{false code} in the input stream (as % appropriate to the result of the test). % \end{function} % % \begin{function}[added = 2018-10-01]{\peek_remove_spaces:n} % \begin{syntax} % \cs{peek_remove_spaces:n} \Arg{code} % \end{syntax} % Peeks ahead and detect if the following token is a space (category code % $10$ and character code $32$). If so, removes the token and checks the % next token. Once a non-space token is found, the \meta{code} will be % inserted into the input stream. Typically this will contain a \texttt{peek} % operation, but this is not required. % \end{function} % % \begin{function}[added = 2022-01-10]{\peek_remove_filler:n} % \begin{syntax} % \cs{peek_remove_filler:n} \Arg{code} % \end{syntax} % Peeks ahead and detect if the following token is a space (category code % $10$) or has meaning equal to \cs{scan_stop:}. If so, % removes the token and checks the next token. If neither of these cases % apply, expands the next token using \texttt{f}-type expansion, then checks % the resulting leading token in the same way. If after expansion the next % token is neither of the two test cases, the \meta{code} will be inserted % into the input stream. Typically this will contain a \texttt{peek} % operation, but this is not required. % \begin{texnote} % This is essentially a macro-based implementation of how \TeX{} handles % the search for a left brace after for example \tn{everypar}, except that % any non-expandable token cleanly ends the \meta{filler} (i.e.~it does not % lead to a \TeX{} error). % % In contrast to \TeX{}'s filler removal, a construct % \verb|\exp_not:N \foo| will be treated in the same way as \verb|\foo|. % \end{texnote} % \end{function} % % \begin{function}[TF, updated = 2012-12-20]{\peek_N_type:} % \begin{syntax} % \cs{peek_N_type:TF} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the next \meta{token} in the input stream can be safely % grabbed as an \texttt{N}-type argument. The test is \meta{false} % if the next \meta{token} is either an explicit or implicit % begin-group or end-group token (with any character code), or % an explicit or implicit space character (with character code $32$ % and category code $10$), or an outer token (never used in \LaTeX3) % and \meta{true} in all other cases. % Note that a \meta{true} result ensures that the next \meta{token} is % a valid \texttt{N}-type argument. However, if the next \meta{token} % is for instance \cs{c_space_token}, the test takes the % \meta{false} branch, even though the next \meta{token} is in fact % a valid \texttt{N}-type argument. The \meta{token} is left % in the input stream after the \meta{true code} or \meta{false code} % (as appropriate to the result of the test). % \end{function} % % \begin{function}[added = 2020-12-03, updated = 2024-02-07] % {\peek_analysis_map_inline:n} % \begin{syntax} % \cs{peek_analysis_map_inline:n} \Arg{inline function} % \end{syntax} % Repeatedly removes one \meta{token} from the input stream and % applies the \meta{inline function} to it, until % \cs{peek_analysis_map_break:} is called. The \meta{inline function} % receives three arguments for each \meta{token} in the input stream: % \begin{itemize} % \item \meta{tokens}, which both \texttt{o}-expand and % \texttt{e}/\texttt{x}-expand to the \meta{token}. The detailed form of % \meta{tokens} may change in later releases. % \item \meta{char code}, a decimal representation of the character % code of the \meta{token}, $-1$ if it is a control sequence. % \item \meta{catcode}, a capital hexadecimal digit which denotes the % category code of the \meta{token} (0:~control sequence, % 1:~begin-group, 2:~end-group, 3:~math shift, 4:~alignment tab, % 6:~parameter, 7:~superscript, 8:~subscript, A:~space, B:~letter, % C:~other, D:~active). This can be converted to an integer by % writing |"|\meta{catcode}. % \end{itemize} % These arguments are the same as for \cs{tl_analysis_map_inline:nn} % defined in \pkg{l3tl-analysis}. The \meta{char code} and % \meta{catcode} do not take the meaning of a control sequence or % active character into account: for instance, upon encountering the % token \cs{c_group_begin_token} in the input stream, % \cs{peek_analysis_map_inline:n} calls the \meta{inline function} % with |#1| being \cs{exp_not:n} |{| \cs{c_group_begin_token} |}| % (with the current implementation), % |#2|~being~$-1$, and % |#3|~being~$0$, as for any other control sequence. In contrast, % upon encountering an explicit begin-group token~|{|, % ^^A |}| % the \meta{inline function} is called with arguments % \cs{exp_after:wN} |{| \cs{if_false:} |}| \cs{fi:}, $123$ and~$1$. % % The mapping is done at the current group level, \emph{i.e.}~any % local assignments made by the \meta{inline function} remain in % effect after the loop. Within the code, \cs{l_peek_token} is set % equal (as a token, not a token list) to the token under % consideration. % % Peek functions cannot be used within this mapping function (nor % other mapping functions) since the input stream contains trailing % material necessary for the functioning of the loop. % \begin{texnote} % In case the input stream has not yet been tokenized (converted % from characters to tokens), characters are tokenized one by one as % needed by \cs{peek_analysis_map_inline:n} using the current % category code regime. % \end{texnote} % \end{function} % % \begin{function}[added = 2020-12-03] % {\peek_analysis_map_break:, \peek_analysis_map_break:n} % \begin{syntax} % \cs{peek_analysis_map_inline:n} % |{| \dots{} \cs{peek_analysis_map_break:n} \Arg{code} |}| % \end{syntax} % Stops the \cs{peek_analysis_map_inline:n} loop from seeking more % tokens, and inserts \meta{code} in the input stream (empty for % \cs{peek_analysis_map_break:}). % \end{function} % % \begin{function}[added = 2020-12-03, TF]{\peek_regex:n, \peek_regex:N} % \begin{syntax} % \cs{peek_regex:nTF} \Arg{regex} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the \meta{tokens} that follow in the input stream match the % \meta{regular expression}. Any \meta{tokens} that have been read % are left in the input stream after the \meta{true code} or % \meta{false code} (as appropriate to the result of the test). See % \pkg{l3regex} for documentation of the syntax of regular % expressions. The \meta{regular expression} is implicitly anchored % at the start, so for instance \cs{peek_regex:nTF}~|{|~|a|~|}| is % essentially equivalent to \cs{peek_charcode:NTF}~|a|. % \begin{texnote} % Implicit character tokens are correctly considered by % \cs{peek_regex:nTF} as control sequences, while functions that % inspect individual tokens (for instance \cs{peek_charcode:NTF}) % only take into account their meaning. % % The \cs{peek_regex:nTF} function only inspects as few tokens as % necessary to determine whether the regular expression matches. % For instance \cs{peek_regex:nTF} \verb"{ abc | [a-z] }" |{ } { }| % |abc| will only inspect the first token~|a| even though the first % branch |abc| of the alternative is preferred in functions such as % \cs{peek_regex_remove_once:nTF}. This may have an effect on % tokenization if the input stream has not yet been tokenized and % category codes are changed. % \end{texnote} % \end{function} % % \begin{function}[added = 2020-12-03, TF] % {\peek_regex_remove_once:n, \peek_regex_remove_once:N} % \begin{syntax} % \cs{peek_regex_remove_once:nTF} \Arg{regex} \Arg{true code} \Arg{false code} % \end{syntax} % Tests if the \meta{tokens} that follow in the input stream match the % \meta{regex}. If the test is true, the \meta{tokens} are removed % from the input stream and the \meta{true code} is inserted, while if % the test is false, the \meta{false code} is inserted followed by the % \meta{tokens} that were originally in the input stream. % See \pkg{l3regex} for documentation of the syntax of % regular expressions. The \meta{regular expression} is implicitly % anchored at the start, so for instance % \cs{peek_regex_remove_once:nTF}~|{|~|a|~|}| is essentially equivalent to % \cs{peek_charcode_remove:NTF}~|a|. % \begin{texnote} % Implicit character tokens are correctly considered by % \cs{peek_regex_remove_once:nTF} as control sequences, while functions % that inspect individual tokens (for instance % \cs{peek_charcode:NTF}) only take into account their meaning. % \end{texnote} % \end{function} % % \begin{function}[added = 2020-12-03, noTF] % {\peek_regex_replace_once:nn, \peek_regex_replace_once:Nn} % \begin{syntax} % \cs{peek_regex_replace_once:nnTF} \Arg{regex} \Arg{replacement} \Arg{true code} \Arg{false code} % \end{syntax} % If the \meta{tokens} that follow in the input stream match the % \meta{regex}, replaces them according to the \meta{replacement} as % for \cs{regex_replace_once:nnN}, and leaves the result in the input % stream, after the \meta{true code}. Otherwise, leaves \meta{false % code} followed by the \meta{tokens} that were originally in the % input stream, with no modifications. See \pkg{l3regex} for % documentation of the syntax of regular expressions and of the % \meta{replacement}: for instance |\0| in the \meta{replacement} is % replaced by the tokens that were matched in the input stream. The % \meta{regular expression} is implicitly anchored at the start. In % contrast to \cs{regex_replace_once:nnN}, no error arises if the % \meta{replacement} leads to an unbalanced token list: the tokens are % inserted into the input stream without issue. % \begin{texnote} % Implicit character tokens are correctly considered by % \cs{peek_regex_replace_once:nnTF} as control sequences, while % functions that inspect individual tokens (for instance % \cs{peek_charcode:NTF}) only take into account their meaning. % \end{texnote} % \end{function} % % \section{Description of all possible tokens} % \label{sec:l3token:all-tokens} % % Let us end by reviewing every case that a given token can fall into. % This section is quite technical and some details are only meant for % completeness. We distinguish the meaning of the token, which controls % the expansion of the token and its effect on \TeX{}'s state, and its % shape, which is used when comparing token lists such as for delimited % arguments. Two tokens of the same shape must have the same meaning, % but the converse does not hold. % % A token has one of the following shapes. % \begin{itemize} % \item A control sequence, characterized by the sequence of % characters that constitute its name: for instance, \cs{use:n} is a % five-letter control sequence. % \item An active character token, characterized by its character code % (between $0$ and $1114111$ for \LuaTeX{} and \XeTeX{} and less for % other engines) and category code~$13$. % \item A character token, characterized by its character code and % category code (one of $1$, $2$, $3$, $4$, $6$, $7$, $8$, $10$, % $11$ or~$12$ whose meaning is described below). % \end{itemize} % There are also a few internal tokens. The following list may be % incomplete in some engines. % \begin{itemize} % \item Expanding \tn{the}\tn{font} results in a token that looks % identical to the command that was used to select the current font % (such as \tn{tenrm}) but it differs from it in shape. % \item A \enquote{frozen} |\relax|, which differs from the primitive in % shape (but has the same meaning), is inserted when the closing \tn{fi} of a % conditional is encountered before the conditional is evaluated. % \item Expanding \tn{noexpand} \meta{token} (when the \meta{token} is % expandable) results in an internal token, displayed (temporarily) % as \cs[module = {}]{notexpanded: \meta{token}}, whose shape coincides with the % \meta{token} and whose meaning differs from \tn{relax}. % \item An |\outer endtemplate:| can be encountered when peeking ahead % at the next token; this expands to another internal token, % |end of alignment template|. % \item Tricky programming might access a frozen |\endwrite|. % \item Some frozen tokens can only be accessed in interactive % sessions: |\cr|, |\right|, |\endgroup|, |\fi|, |\inaccessible|. % \item In \LuaTeX{}, there is also the strange case of \enquote{bytes} % |^^^^^^1100|$xy$ where $x,y$ are any two lowercase hexadecimal % digits, so that the hexadecimal number ranges from % |"|$11\,0000=1\,114\,112$ to~|"|$110\,0\mathrm{ff}=1\,114\,367$. These are % used to output individual bytes to files, rather than UTF-8. For % the purposes of token comparisons they behave like non-expandable % primitive control sequences (\emph{not characters}) whose % \tn{meaning} is \verb*|the character | followed by the given byte. % If this byte is in the range |80|--|ff| this gives an ``invalid % utf-8 sequence'' error: applying \cs{token_to_str:N} or % \cs{token_to_meaning:N} to these tokens is unsafe. Unfortunately, % they don't seem to be detectable safely by any means except perhaps % Lua code. % \end{itemize} % % The meaning of a (non-active) character token is fixed by its category % code (and character code) and cannot be changed. We call these % tokens \emph{explicit} character tokens. Category codes that a % character token can have are listed below by giving a sample output of % the \TeX{} primitive \tn{meaning}, together with their \LaTeX3 names % and most common example: % \begin{itemize} % \item[1] begin-group character (|group_begin|, often |{|), % \item[2] end-group character (|group_end|, often |}|), % \item[3] math shift character (|math_toggle|, often |$|), %^^A $ % \item[4] alignment tab character (|alignment|, often |&|), % \item[6] macro parameter character (|parameter|, often |#|), % \item[7] superscript character (|math_superscript|, often |^|), % \item[8] subscript character (|math_subscript|, often |_|), % \item[10] blank space (|space|, often character code~$32$), % \item[11] the letter (|letter|, such as |A|), % \item[12] the character (|other|, such as |0|). % \end{itemize} % Category code~$13$ (|active|) is discussed below. Input characters % can also have several other category codes which do not lead to % character tokens for later processing: $0$~(|escape|), % $5$~(|end_line|), $9$~(|ignore|), $14$~(|comment|), and % $15$~(|invalid|). % % The meaning of a control sequence or active character can be identical % to that of any character token listed above (with any character code), % and we call such tokens \emph{implicit} character tokens. The % meaning is otherwise in the following list: % \begin{itemize} % \item a macro, used in \LaTeX3 for most functions and some variables % (|tl|, |fp|, |seq|, \ldots{}), % \item a primitive such as \tn{def} or \tn{topmark}, used in \LaTeX3 % for some functions, % \item a register such as \tn{count}|123|, used in \LaTeX3{} for the % implementation of some variables (|int|, |dim|, \ldots{}), % \item a constant integer such as \tn{char}|"56| or \tn{mathchar}|"121|, % \item a font selection command, % \item undefined. % \end{itemize} % Macros can be \tn{protected} or not, \tn{long} or not (the opposite of % what \LaTeX3 calls |nopar|), and \tn{outer} or not (unused in % \LaTeX3). Their \tn{meaning} takes the form % \begin{quote} % \meta{prefix} |macro:|\meta{argument}|->|\meta{replacement} % \end{quote} % where \meta{prefix} is among \tn{protected}\tn{long}\tn{outer}, % \meta{argument} describes parameters that the macro expects, such as % |#1#2#3|, and \meta{replacement} describes how the parameters are % manipulated, such as~|\int_eval:n{#2+#1*#3}|. % % Now is perhaps a good time to mention some subtleties relating to % tokens with category code $10$ (space). Any input character with this % category code (normally, space and tab characters) becomes a normal % space, with character code~$32$ and category code~$10$. % % When a macro takes an undelimited argument, explicit space characters % (with character code $32$ and category code $10$) are ignored. If the % following token is an explicit character token with category code $1$ % (begin-group) and an arbitrary character code, then \TeX{} scans ahead % to obtain an equal number of explicit character tokens with category % code $1$ (begin-group) and $2$ (end-group), and the resulting list of % tokens (with outer braces removed) becomes the argument. Otherwise, a % single token is taken as the argument for the macro: we call such % single tokens \enquote{N-type}, as they are suitable to be used as an % argument for a function with the signature~\texttt{:N}. % % When a macro takes a delimited argument \TeX{} scans ahead until % finding the delimiter (outside any pairs of begin-group/end-group % explicit characters), and the resulting list of tokens (with outer % braces removed) becomes the argument. Note that explicit space % characters at the start of the argument are \emph{not} ignored in this % case (and they prevent brace-stripping). % % \end{documentation} % % \begin{implementation} % % \section{\pkg{l3token} implementation} % % \begin{macrocode} %<*package> % \end{macrocode} % % \begin{macrocode} %<*tex> % \end{macrocode} % % \begin{macrocode} %<@@=char> % \end{macrocode} % % \subsection{Internal auxiliaries} % % \begin{variable}{\s_@@_stop} % Internal scan mark. % \begin{macrocode} \scan_new:N \s_@@_stop % \end{macrocode} % \end{variable} % % \begin{variable}{\q_@@_no_value} % Internal recursion quarks. % \begin{macrocode} \quark_new:N \q_@@_no_value % \end{macrocode} % \end{variable} % % \begin{macro}[pTF]{\@@_quark_if_no_value:N} % Functions to query recursion quarks. % \begin{macrocode} \__kernel_quark_new_conditional:Nn \@@_quark_if_no_value:N { TF } % \end{macrocode} % \end{macro} % % \subsection{Manipulating and interrogating character tokens} % % \begin{macro}{\char_set_catcode:nn} % \begin{macro}{\char_value_catcode:n} % \begin{macro}{\char_show_value_catcode:n} % Simple wrappers around the primitives. % \begin{macrocode} \cs_new_protected:Npn \char_set_catcode:nn #1#2 { \tex_catcode:D \int_eval:n {#1} = \int_eval:n {#2} \exp_stop_f: } \cs_new:Npn \char_value_catcode:n #1 { \tex_the:D \tex_catcode:D \int_eval:n {#1} \exp_stop_f: } \cs_new_protected:Npn \char_show_value_catcode:n #1 { \exp_args:Nf \tl_show:n { \char_value_catcode:n {#1} } } % \end{macrocode} % \end{macro} % \end{macro} % \end{macro} % % \begin{macro} % { % \char_set_catcode_escape:N , % \char_set_catcode_group_begin:N , % \char_set_catcode_group_end:N , % \char_set_catcode_math_toggle:N , % \char_set_catcode_alignment:N , % \char_set_catcode_end_line:N , % \char_set_catcode_parameter:N , % \char_set_catcode_math_superscript:N , % \char_set_catcode_math_subscript:N , % \char_set_catcode_ignore:N , % \char_set_catcode_space:N , % \char_set_catcode_letter:N , % \char_set_catcode_other:N , % \char_set_catcode_active:N , % \char_set_catcode_comment:N , % \char_set_catcode_invalid:N % } % \begin{macrocode} \cs_new_protected:Npn \char_set_catcode_escape:N #1 { \char_set_catcode:nn { `#1 } { 0 } } \cs_new_protected:Npn \char_set_catcode_group_begin:N #1 { \char_set_catcode:nn { `#1 } { 1 } } \cs_new_protected:Npn \char_set_catcode_group_end:N #1 { \char_set_catcode:nn { `#1 } { 2 } } \cs_new_protected:Npn \char_set_catcode_math_toggle:N #1 { \char_set_catcode:nn { `#1 } { 3 } } \cs_new_protected:Npn \char_set_catcode_alignment:N #1 { \char_set_catcode:nn { `#1 } { 4 } } \cs_new_protected:Npn \char_set_catcode_end_line:N #1 { \char_set_catcode:nn { `#1 } { 5 } } \cs_new_protected:Npn \char_set_catcode_parameter:N #1 { \char_set_catcode:nn { `#1 } { 6 } } \cs_new_protected:Npn \char_set_catcode_math_superscript:N #1 { \char_set_catcode:nn { `#1 } { 7 } } \cs_new_protected:Npn \char_set_catcode_math_subscript:N #1 { \char_set_catcode:nn { `#1 } { 8 } } \cs_new_protected:Npn \char_set_catcode_ignore:N #1 { \char_set_catcode:nn { `#1 } { 9 } } \cs_new_protected:Npn \char_set_catcode_space:N #1 { \char_set_catcode:nn { `#1 } { 10 } } \cs_new_protected:Npn \char_set_catcode_letter:N #1 { \char_set_catcode:nn { `#1 } { 11 } } \cs_new_protected:Npn \char_set_catcode_other:N #1 { \char_set_catcode:nn { `#1 } { 12 } } \cs_new_protected:Npn \char_set_catcode_active:N #1 { \char_set_catcode:nn { `#1 } { 13 } } \cs_new_protected:Npn \char_set_catcode_comment:N #1 { \char_set_catcode:nn { `#1 } { 14 } } \cs_new_protected:Npn \char_set_catcode_invalid:N #1 { \char_set_catcode:nn { `#1 } { 15 } } % \end{macrocode} % \end{macro} % % \begin{macro} % { % \char_set_catcode_escape:n , % \char_set_catcode_group_begin:n , % \char_set_catcode_group_end:n , % \char_set_catcode_math_toggle:n , % \char_set_catcode_alignment:n , % \char_set_catcode_end_line:n , % \char_set_catcode_parameter:n , % \char_set_catcode_math_superscript:n , % \char_set_catcode_math_subscript:n , % \char_set_catcode_ignore:n , % \char_set_catcode_space:n , % \char_set_catcode_letter:n , % \char_set_catcode_other:n , % \char_set_catcode_active:n , % \char_set_catcode_comment:n , % \char_set_catcode_invalid:n % } % \begin{macrocode} \cs_new_protected:Npn \char_set_catcode_escape:n #1 { \char_set_catcode:nn {#1} { 0 } } \cs_new_protected:Npn \char_set_catcode_group_begin:n #1 { \char_set_catcode:nn {#1} { 1 } } \cs_new_protected:Npn \char_set_catcode_group_end:n #1 { \char_set_catcode:nn {#1} { 2 } } \cs_new_protected:Npn \char_set_catcode_math_toggle:n #1 { \char_set_catcode:nn {#1} { 3 } } \cs_new_protected:Npn \char_set_catcode_alignment:n #1 { \char_set_catcode:nn {#1} { 4 } } \cs_new_protected:Npn \char_set_catcode_end_line:n #1 { \char_set_catcode:nn {#1} { 5 } } \cs_new_protected:Npn \char_set_catcode_parameter:n #1 { \char_set_catcode:nn {#1} { 6 } } \cs_new_protected:Npn \char_set_catcode_math_superscript:n #1 { \char_set_catcode:nn {#1} { 7 } } \cs_new_protected:Npn \char_set_catcode_math_subscript:n #1 { \char_set_catcode:nn {#1} { 8 } } \cs_new_protected:Npn \char_set_catcode_ignore:n #1 { \char_set_catcode:nn {#1} { 9 } } \cs_new_protected:Npn \char_set_catcode_space:n #1 { \char_set_catcode:nn {#1} { 10 } } \cs_new_protected:Npn \char_set_catcode_letter:n #1 { \char_set_catcode:nn {#1} { 11 } } \cs_new_protected:Npn \char_set_catcode_other:n #1 { \char_set_catcode:nn {#1} { 12 } } \cs_new_protected:Npn \char_set_catcode_active:n #1 { \char_set_catcode:nn {#1} { 13 } } \cs_new_protected:Npn \char_set_catcode_comment:n #1 { \char_set_catcode:nn {#1} { 14 } } \cs_new_protected:Npn \char_set_catcode_invalid:n #1 { \char_set_catcode:nn {#1} { 15 } } % \end{macrocode} % \end{macro} % % \begin{macro}{\char_set_mathcode:nn} % \begin{macro}{\char_value_mathcode:n} % \begin{macro}{\char_show_value_mathcode:n} % \begin{macro}{\char_set_lccode:nn} % \begin{macro}{\char_value_lccode:n} % \begin{macro}{\char_show_value_lccode:n} % \begin{macro}{\char_set_uccode:nn} % \begin{macro}{\char_value_uccode:n} % \begin{macro}{\char_show_value_uccode:n} % \begin{macro}{\char_set_sfcode:nn} % \begin{macro}{\char_value_sfcode:n} % \begin{macro}{\char_show_value_sfcode:n} % Pretty repetitive, but necessary! % \begin{macrocode} \cs_new_protected:Npn \char_set_mathcode:nn #1#2 { \tex_mathcode:D \int_eval:n {#1} = \int_eval:n {#2} \exp_stop_f: } \cs_new:Npn \char_value_mathcode:n #1 { \tex_the:D \tex_mathcode:D \int_eval:n {#1} \exp_stop_f: } \cs_new_protected:Npn \char_show_value_mathcode:n #1 { \exp_args:Nf \tl_show:n { \char_value_mathcode:n {#1} } } \cs_new_protected:Npn \char_set_lccode:nn #1#2 { \tex_lccode:D \int_eval:n {#1} = \int_eval:n {#2} \exp_stop_f: } \cs_new:Npn \char_value_lccode:n #1 { \tex_the:D \tex_lccode:D \int_eval:n {#1} \exp_stop_f: } \cs_new_protected:Npn \char_show_value_lccode:n #1 { \exp_args:Nf \tl_show:n { \char_value_lccode:n {#1} } } \cs_new_protected:Npn \char_set_uccode:nn #1#2 { \tex_uccode:D \int_eval:n {#1} = \int_eval:n {#2} \exp_stop_f: } \cs_new:Npn \char_value_uccode:n #1 { \tex_the:D \tex_uccode:D \int_eval:n {#1} \exp_stop_f: } \cs_new_protected:Npn \char_show_value_uccode:n #1 { \exp_args:Nf \tl_show:n { \char_value_uccode:n {#1} } } \cs_new_protected:Npn \char_set_sfcode:nn #1#2 { \tex_sfcode:D \int_eval:n {#1} = \int_eval:n {#2} \exp_stop_f: } \cs_new:Npn \char_value_sfcode:n #1 { \tex_the:D \tex_sfcode:D \int_eval:n {#1} \exp_stop_f: } \cs_new_protected:Npn \char_show_value_sfcode:n #1 { \exp_args:Nf \tl_show:n { \char_value_sfcode:n {#1} } } % \end{macrocode} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % % \begin{variable}{\l_char_active_seq, \l_char_special_seq} % Two sequences for dealing with special characters. The first is characters % which may be active, the second longer list is for \enquote{special} % characters more generally. Both lists are escaped so that for example % bulk code assignments can be carried out. In both cases, the order is % by \textsc{ascii} character code (as is done in for example % \cs{ExplSyntaxOn}). % \begin{macrocode} \seq_new:N \l_char_special_seq \seq_set_split:Nnn \l_char_special_seq { } { \ \" \# \$ \% \& \\ \^ \_ \{ \} \~ } \seq_new:N \l_char_active_seq \seq_set_split:Nnn \l_char_active_seq { } { \" \$ \& \^ \_ \~ } % \end{macrocode} % \end{variable} % % \subsection{Creating character tokens} % % \begin{macro} % { % \char_set_active_eq:NN, \char_gset_active_eq:NN, % \char_set_active_eq:Nc, \char_gset_active_eq:Nc, % \char_set_active_eq:nN, \char_gset_active_eq:nN, % \char_set_active_eq:nc, \char_gset_active_eq:nc % } % Four simple functions with very similar definitions, so set up using % an auxiliary. % These are similar to \LuaTeX{}'s \tn{letcharcode} primitive. % \begin{macrocode} \group_begin: \char_set_catcode_active:N \^^@ \cs_set_protected:Npn \@@_tmp:nN #1#2 { \cs_new_protected:cpn { #1 :nN } ##1 { \group_begin: \char_set_lccode:nn { `\^^@ } { ##1 } \tex_lowercase:D { \group_end: #2 ^^@ } } \cs_new_protected:cpe { #1 :NN } ##1 { \exp_not:c { #1 : nN } { `##1 } } } \@@_tmp:nN { char_set_active_eq } \cs_set_eq:NN \@@_tmp:nN { char_gset_active_eq } \cs_gset_eq:NN \group_end: \cs_generate_variant:Nn \char_set_active_eq:NN { Nc } \cs_generate_variant:Nn \char_gset_active_eq:NN { Nc } \cs_generate_variant:Nn \char_set_active_eq:nN { nc } \cs_generate_variant:Nn \char_gset_active_eq:nN { nc } % \end{macrocode} % \end{macro} % % \begin{macro}{\@@_int_to_roman:w} % For efficiency in 8-bit engines, we use the faster primitive approach % to making roman numerals. % \begin{macrocode} \cs_new_eq:NN \@@_int_to_roman:w \tex_romannumeral:D % \end{macrocode} % \end{macro} % % \begin{macro}[EXP]{\char_generate:nn} % \begin{macro}[EXP]{\@@_generate_aux:nn} % \begin{macro}[EXP]{\@@_generate_aux:nnw, \@@_generate_auxii:nnw} % \begin{variable}{\l_@@_tmp_tl} % \begin{macro}[EXP]{\@@_generate_invalid_catcode:} % The aim here is to generate characters of (broadly) arbitrary category % code. Where possible, that is done using engine support (\XeTeX{}, % \LuaTeX{}). There are though various issues which are covered below. At % the interface layer, turn the two arguments into integers up-front so % this is only done once. % \begin{macrocode} \cs_new:Npn \char_generate:nn #1#2 { \exp:w \exp_after:wN \@@_generate_aux:w \int_value:w \int_eval:n {#1} \exp_after:wN ; \int_value:w \int_eval:n {#2} ; } % \end{macrocode} % Before doing any actual conversion, first some special case filtering. % Spaces are out here as \LuaTeX{} emulation only makes normal (charcode % $32$ spaces). However, |^^@| is filtered out separately as that can't be % done with macro emulation either, so is treated separately. That % done, hand off to the engine-dependent part. % \begin{macrocode} \cs_new:Npn \@@_generate_aux:w #1 ; #2 ; { \if_int_odd:w 0 \if_int_compare:w #2 < 1 \exp_stop_f: 1 \fi: \if_int_compare:w #2 = 5 \exp_stop_f: 1 \fi: \if_int_compare:w #2 = 9 \exp_stop_f: 1 \fi: \if_int_compare:w #2 > 13 \exp_stop_f: 1 \fi: \exp_stop_f: \msg_expandable_error:nn { char } { invalid-catcode } \else: \if_int_odd:w 0 \if_int_compare:w #1 < \c_zero_int 1 \fi: \if_int_compare:w #1 > \c_max_char_int 1 \fi: \exp_stop_f: \msg_expandable_error:nn { char } { out-of-range } \else: \if_int_compare:w #2#1 = 100 \exp_stop_f: \msg_expandable_error:nn { char } { null-space } \else: \@@_generate_aux:nnw {#1} {#2} \fi: \fi: \fi: \exp_end: } \tl_new:N \l_@@_tmp_tl % \end{macrocode} % Engine-dependent definitions are now needed for the implementation. Recent % (u)p\TeX{} and the Unicode engines \LuaTeX{} and \XeTeX{} have engine-level % support for expandable character creation. \pdfTeX{} and older (u)p\TeX{} % releases do not. The branching here if low-level to avoid fixing % the category code of the null character used in the false branch. % The final level is the basic definition at the engine level: the arguments % here are integers so there is no need to worry about them too much. % Older versions of \XeTeX{} cannot generate active characters so we filter % that: % at some future stage that may change: the slightly odd ordering of % auxiliaries reflects that. % \begin{macrocode} \group_begin: \char_set_catcode_active:N \^^L \cs_set:Npn ^^L { } \if_cs_exist:N \tex_Ucharcat:D \cs_new:Npn \@@_generate_aux:nnw #1#2#3 \exp_end: { #3 \exp_after:wN \exp_end: \tex_Ucharcat:D #1 \exp_stop_f: #2 \exp_stop_f: } \else: % \end{macrocode} % For engines where \tn{Ucharcat} isn't available or emulated, we have % to work in macros, and cover only the $8$-bit range. The first stage is % to build up a |tl| containing |^^@| with each category code that can % be accessed in this way, with an error set up for the other cases. This % is all done such that it can be quickly accessed using a |\if_case:w| % low-level conditional. The list is done in reverse as this puts the case % of an active token \emph{first}: that's needed to cover the possibility % that it is \tn{outer}. Getting the braces into the list is done using % some standard \cs{if_false:} manipulation, while all of the \cs{exp_not:N} % are required as there is an expansion in the setup. % \begin{macrocode} \char_set_catcode_active:n { 0 } \tl_set:Nn \l_@@_tmp_tl { \exp_not:N ^^@ \exp_not:N \or: } \char_set_catcode_other:n { 0 } \tl_put_right:Nn \l_@@_tmp_tl { ^^@ \exp_not:N \or: } \char_set_catcode_letter:n { 0 } \tl_put_right:Nn \l_@@_tmp_tl { ^^@ \exp_not:N \or: } % \end{macrocode} % For making spaces, there needs to be an |o|-type expansion of a |\use:n| % (or some other tokenization) to avoid dropping the space. % \begin{macrocode} \tl_put_right:Nn \l_@@_tmp_tl { \use:n { ~ } \exp_not:N \or: } \tl_put_right:Nn \l_@@_tmp_tl { \exp_not:N \or: } \char_set_catcode_math_subscript:n { 0 } \tl_put_right:Nn \l_@@_tmp_tl { ^^@ \exp_not:N \or: } \char_set_catcode_math_superscript:n { 0 } \tl_put_right:Nn \l_@@_tmp_tl { ^^@ \exp_not:N \or: } \char_set_catcode_parameter:n { 0 } \tl_put_right:Nn \l_@@_tmp_tl { ^^@ \exp_not:N \or: } \tl_put_right:Nn \l_@@_tmp_tl { { \if_false: } \fi: \exp_not:N \or: } \char_set_catcode_alignment:n { 0 } \tl_put_right:Nn \l_@@_tmp_tl { ^^@ \exp_not:N \or: } \char_set_catcode_math_toggle:n { 0 } \tl_put_right:Nn \l_@@_tmp_tl { ^^@ \exp_not:N \or: } \char_set_catcode_group_end:n { 0 } \tl_put_right:Nn \l_@@_tmp_tl { \if_false: { \fi: ^^@ \exp_not:N \or: } % } \char_set_catcode_group_begin:n { 0 } % { \tl_put_right:Nn \l_@@_tmp_tl { ^^@ \exp_not:N \or: } } % \end{macrocode} % Convert the above temporary list into a series of constant token % lists, one for each character code, using \cs{tex_lowercase:D} to % convert |^^@| in each case. The \texttt{e}-type expansion ensures % that \cs{tex_lowercase:D} receives the contents of the token list. % \begin{macrocode} \cs_set_protected:Npn \@@_tmp:n #1 { \char_set_lccode:nn { 0 } {#1} \char_set_lccode:nn { 32 } {#1} \exp_args:Ne \tex_lowercase:D { \tl_const:Ne \exp_not:c { c_@@_ \@@_int_to_roman:w #1 _tl } { \exp_not:o \l_@@_tmp_tl } } } \int_step_function:nnN { 0 } { 255 } \@@_tmp:n % \end{macrocode} % As \TeX{} is very unhappy if it finds an alignment character inside % a primitive \tn{halign} even when skipping false branches, some % precautions are required. \TeX{} is happy if the token is hidden % between braces within \cs{if_false:} \dots{} \cs{fi:}. The % rather low-level approach here expands in one step to the % \meta{target token} (\cs{or:} \dots{}), then \cs{exp_after:wN} % \meta{target token} (\cs{or:} \dots{}) expands in one step to % \meta{target token}. This means that \cs{exp_not:N} is applied to % a potentially-problematic active token. % \begin{macrocode} \cs_new:Npn \@@_generate_aux:nnw #1#2#3 \exp_end: { #3 \if_false: { \fi: \exp_after:wN \exp_after:wN \exp_after:wN \exp_end: \exp_after:wN \exp_after:wN \if_case:w \tex_numexpr:D 13 - #2 \exp_after:wN \exp_after:wN \exp_after:wN \exp_after:wN \exp_after:wN \exp_after:wN \exp_after:wN \scan_stop: \exp_after:wN \exp_after:wN \exp_after:wN \exp_not:N \cs:w c_@@_ \@@_int_to_roman:w #1 _tl \cs_end: } \fi: } \fi: \group_end: % \end{macrocode} % \end{macro} % \end{variable} % \end{macro} % \end{macro} % \end{macro} % % \begin{variable}{\c_catcode_active_space_tl} % While \cs{char_generate:nn} can produce active characters in some % engines it cannot in general. It would be possible to simply change % the catcode of space but then the code would need to avoid all % spaces, making it quite unreadable. Instead we use the primitive % \cs{tex_lowercase:D} trick. % \begin{macrocode} \group_begin: \char_set_catcode_active:N * \char_set_lccode:nn { `* } { `\ } \tex_lowercase:D { \tl_const:Nn \c_catcode_active_space_tl { * } } \group_end: % \end{macrocode} % \end{variable} % % \begin{macro}{\c_catcode_other_space_tl} % Create a space with category code $12$: an \enquote{other} space. % \begin{macrocode} \tl_const:Ne \c_catcode_other_space_tl { \char_generate:nn { `\ } { 12 } } % \end{macrocode} % \end{macro} % % \subsection{Generic tokens} % % \begin{macrocode} %<@@=token> % \end{macrocode} % % \begin{variable}{\s_@@_mark, \s_@@_stop} % Internal scan marks. % \begin{macrocode} \scan_new:N \s_@@_mark \scan_new:N \s_@@_stop % \end{macrocode} % \end{variable} % % \begin{macro}{\token_to_meaning:N, \token_to_meaning:c} % \begin{macro}{\token_to_str:N, \token_to_str:c} % These are all defined in \pkg{l3basics}, as they are needed % \enquote{early}. This is just a reminder! % \end{macro} % \end{macro} % % \begin{macro}{\token_to_catcode:N} % \begin{macro}{\@@_to_catcode:N} % The macro works by comparing the input token with \cs{if_catcode:w} with all % valid category codes. Since the most common tokens in an average argument % list are of category $11$ or $12$ those are tested first. And since a space % and braces are no ordinary |N|-type arguments, and only control sequences % let to those categories can match them they are tested last. % \begin{macrocode} \cs_new:Npn \token_to_catcode:N { \int_value:w \group_align_safe_begin: \@@_to_catcode:N } \cs_new:Npn \@@_to_catcode:N #1 { \if_catcode:w \exp_not:N #1 \c_catcode_letter_token 11 \else: \if_catcode:w \exp_not:N #1 \c_catcode_other_token 12 \else: \if_catcode:w \exp_not:N #1 \c_math_toggle_token 3 \else: \if_catcode:w \exp_not:N #1 \c_alignment_token 4 \else: \if_catcode:w \exp_not:N #1 ## 6 \else: \if_catcode:w \exp_not:N #1 \c_math_superscript_token 7 \else: \if_catcode:w \exp_not:N #1 \c_math_subscript_token 8 \else: \if_catcode:w \exp_not:N #1 \c_group_begin_token 1 \else: \if_catcode:w \exp_not:N #1 \c_group_end_token 2 \else: \if_catcode:w \exp_not:N #1 \c_space_token 10 \else: \token_if_cs:NTF #1 { 16 } { 13 } \fi: \fi: \fi: \fi: \fi: \fi: \fi: \fi: \fi: \fi: \group_align_safe_end: \exp_stop_f: } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro} % { % \c_group_begin_token, % \c_group_end_token, % \c_math_toggle_token, % \c_alignment_token, % \c_parameter_token, % \c_math_superscript_token, % \c_math_subscript_token, % \c_space_token, % \c_catcode_letter_token, % \c_catcode_other_token % } % We define these useful tokens. For the brace and space tokens things have % to be done by hand: the formal argument spec.~for \cs{cs_new_eq:NN} does % not cover them so we do things by hand. (As currently coded it would % \emph{work} with \cs{cs_new_eq:NN} but that's not really a great idea to % show off: we want people to stick to the defined interfaces and that % includes us.) So that these few odd names go into the log when appropriate % there is a need to hand-apply the \cs{__kernel_chk_if_free_cs:N} check. % \begin{macrocode} \group_begin: \__kernel_chk_if_free_cs:N \c_group_begin_token \tex_global:D \tex_let:D \c_group_begin_token { \__kernel_chk_if_free_cs:N \c_group_end_token \tex_global:D \tex_let:D \c_group_end_token } \char_set_catcode_math_toggle:N \* \cs_new_eq:NN \c_math_toggle_token * \char_set_catcode_alignment:N \* \cs_new_eq:NN \c_alignment_token * \cs_new_eq:NN \c_parameter_token # \cs_new_eq:NN \c_math_superscript_token ^ \char_set_catcode_math_subscript:N \* \cs_new_eq:NN \c_math_subscript_token * \__kernel_chk_if_free_cs:N \c_space_token \use:n { \tex_global:D \tex_let:D \c_space_token = ~ } ~ \cs_new_eq:NN \c_catcode_letter_token a \cs_new_eq:NN \c_catcode_other_token 1 \group_end: % \end{macrocode} % \end{macro} % % \begin{variable}{\c_catcode_active_tl} % Not an implicit token! % \begin{macrocode} \group_begin: \char_set_catcode_active:N \* \tl_const:Nn \c_catcode_active_tl { \exp_not:N * } \group_end: % \end{macrocode} % \end{variable} % % \subsection{Token conditionals} % % \begin{macro}[pTF]{\token_if_group_begin:N} % Check if token is a begin group token. We use the constant % \cs{c_group_begin_token} for this. % \begin{macrocode} \prg_new_conditional:Npnn \token_if_group_begin:N #1 { p , T , F , TF } { \if_catcode:w \exp_not:N #1 \c_group_begin_token \prg_return_true: \else: \prg_return_false: \fi: } % \end{macrocode} % \end{macro} % % \begin{macro}[pTF]{\token_if_group_end:N} % Check if token is a end group token. We use the constant % \cs{c_group_end_token} for this. % \begin{macrocode} \prg_new_conditional:Npnn \token_if_group_end:N #1 { p , T , F , TF } { \if_catcode:w \exp_not:N #1 \c_group_end_token \prg_return_true: \else: \prg_return_false: \fi: } % \end{macrocode} % \end{macro} % % \begin{macro}[pTF]{\token_if_math_toggle:N} % Check if token is a math shift token. We use the constant % \cs{c_math_toggle_token} for this. % \begin{macrocode} \prg_new_conditional:Npnn \token_if_math_toggle:N #1 { p , T , F , TF } { \if_catcode:w \exp_not:N #1 \c_math_toggle_token \prg_return_true: \else: \prg_return_false: \fi: } % \end{macrocode} % \end{macro} % % \begin{macro}[pTF]{\token_if_alignment:N} % Check if token is an alignment tab token. We use the constant % \cs{c_alignment_token} for this. % \begin{macrocode} \prg_new_conditional:Npnn \token_if_alignment:N #1 { p , T , F , TF } { \if_catcode:w \exp_not:N #1 \c_alignment_token \prg_return_true: \else: \prg_return_false: \fi: } % \end{macrocode} % \end{macro} % % \begin{macro}[pTF]{\token_if_parameter:N} % Check if token is a parameter token. We use the constant % \cs{c_parameter_token} for this. We have to trick \TeX{} a bit to % avoid an error message: within a group we prevent % \cs{c_parameter_token} from behaving like a macro parameter character. % The definitions of \cs{prg_new_conditional:Npnn} are global, so they % remain after the group. % \begin{macrocode} \group_begin: \cs_set_eq:NN \c_parameter_token \scan_stop: \prg_new_conditional:Npnn \token_if_parameter:N #1 { p , T , F , TF } { \if_catcode:w \exp_not:N #1 \c_parameter_token \prg_return_true: \else: \prg_return_false: \fi: } \group_end: % \end{macrocode} % \end{macro} % % \begin{macro}[pTF]{\token_if_math_superscript:N} % Check if token is a math superscript token. We use the constant % \cs{c_math_superscript_token} for this. % \begin{macrocode} \prg_new_conditional:Npnn \token_if_math_superscript:N #1 { p , T , F , TF } { \if_catcode:w \exp_not:N #1 \c_math_superscript_token \prg_return_true: \else: \prg_return_false: \fi: } % \end{macrocode} % \end{macro} % % \begin{macro}[pTF]{\token_if_math_subscript:N} % Check if token is a math subscript token. We use the constant % \cs{c_math_subscript_token} for this. % \begin{macrocode} \prg_new_conditional:Npnn \token_if_math_subscript:N #1 { p , T , F , TF } { \if_catcode:w \exp_not:N #1 \c_math_subscript_token \prg_return_true: \else: \prg_return_false: \fi: } % \end{macrocode} % \end{macro} % % \begin{macro}[pTF]{\token_if_space:N} % Check if token is a space token. We use the constant % \cs{c_space_token} for this. % \begin{macrocode} \prg_new_conditional:Npnn \token_if_space:N #1 { p , T , F , TF } { \if_catcode:w \exp_not:N #1 \c_space_token \prg_return_true: \else: \prg_return_false: \fi: } % \end{macrocode} % \end{macro} % % \begin{macro}[pTF]{\token_if_letter:N} % Check if token is a letter token. We use the constant % \cs{c_catcode_letter_token} for this. % \begin{macrocode} \prg_new_conditional:Npnn \token_if_letter:N #1 { p , T , F , TF } { \if_catcode:w \exp_not:N #1 \c_catcode_letter_token \prg_return_true: \else: \prg_return_false: \fi: } % \end{macrocode} % \end{macro} % % \begin{macro}[pTF]{\token_if_other:N} % Check if token is an other char token. We use the constant % \cs{c_catcode_other_token} for this. % \begin{macrocode} \prg_new_conditional:Npnn \token_if_other:N #1 { p , T , F , TF } { \if_catcode:w \exp_not:N #1 \c_catcode_other_token \prg_return_true: \else: \prg_return_false: \fi: } % \end{macrocode} % \end{macro} % % \begin{macro}[pTF]{\token_if_active:N} % Check if token is an active char token. We use the constant % \cs{c_catcode_active_tl} for this. A technical point is that % \cs{c_catcode_active_tl} is in fact a macro expanding to % |\exp_not:N *|, where |*| is active. % \begin{macrocode} \prg_new_conditional:Npnn \token_if_active:N #1 { p , T , F , TF } { \if_catcode:w \exp_not:N #1 \c_catcode_active_tl \prg_return_true: \else: \prg_return_false: \fi: } % \end{macrocode} % \end{macro} % % \begin{macro}[pTF]{\token_if_eq_meaning:NN} % Check if the tokens |#1| and |#2| have same meaning. % \begin{macrocode} \prg_new_eq_conditional:NNn \token_if_eq_meaning:NN \cs_if_eq:NN { p , T , F , TF } % \end{macrocode} % \end{macro} % % \begin{macro}[pTF]{\token_if_eq_catcode:NN} % Check if the tokens |#1| and |#2| have same category code. % \begin{macrocode} \prg_new_conditional:Npnn \token_if_eq_catcode:NN #1#2 { p , T , F , TF } { \if_catcode:w \exp_not:N #1 \exp_not:N #2 \prg_return_true: \else: \prg_return_false: \fi: } % \end{macrocode} % \end{macro} % % \begin{macro}[pTF]{\token_if_eq_charcode:NN} % Check if the tokens |#1| and |#2| have same character code. % \begin{macrocode} \prg_new_conditional:Npnn \token_if_eq_charcode:NN #1#2 { p , T , F , TF } { \if_charcode:w \exp_not:N #1 \exp_not:N #2 \prg_return_true: \else: \prg_return_false: \fi: } % \end{macrocode} % \end{macro} % % \begin{macro}[pTF]{\token_if_macro:N} % \begin{macro}{\@@_if_macro_p:w} % When a token is a macro, \cs{token_to_meaning:N} always outputs % something like |\long macro:#1->#1| so we could naively check to % see if the meaning contains |->|. However, this can fail the five % \tn[no-index]{...mark} primitives, whose meaning has the form % |...mark:|\meta{user material}. The problem is that the % \meta{user material} can contain |->|. % % However, only characters, macros, and marks can contain the colon % character. The idea is thus to grab until the first |:|, and analyse % what is left. However, macros can have any combination of |\long|, % |\protected| or |\outer| (not used in \LaTeX3) before the string % |macro:|. We thus only select the part of the meaning between % the first |ma| and the first following |:|. If this string is % |cro|, then we have a macro. If the string is |rk|, then we have % a mark. The string can also be |cro parameter character | for a % colon with a weird category code (namely the usual category code % of |#|). Otherwise, it is empty. % % This relies on the fact that |\long|, |\protected|, |\outer| % cannot contain |ma|, regardless of the escape character, even if % the escape character is |m|\ldots{} % % Both |ma| and |:| must be of category code $12$ (other), so are % detokenized. % % \begin{macrocode} \use:e { \prg_new_conditional:Npnn \exp_not:N \token_if_macro:N #1 { p , T , F , TF } { \exp_not:N \exp_after:wN \exp_not:N \@@_if_macro_p:w \exp_not:N \token_to_meaning:N #1 \tl_to_str:n { ma : } \s_@@_stop } \cs_new:Npn \exp_not:N \@@_if_macro_p:w #1 \tl_to_str:n { ma } #2 \c_colon_str #3 \s_@@_stop } { \str_if_eq:nnTF { #2 } { cro } { \prg_return_true: } { \prg_return_false: } } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro}[pTF]{\token_if_cs:N} % Check if token has same catcode as a control sequence. This % follows the same pattern as for \cs{token_if_letter:N} \emph{etc.} % We use \cs{scan_stop:} for this. % \begin{macrocode} \prg_new_conditional:Npnn \token_if_cs:N #1 { p , T , F , TF } { \if_catcode:w \exp_not:N #1 \scan_stop: \prg_return_true: \else: \prg_return_false: \fi: } % \end{macrocode} % \end{macro} % % \begin{macro}[pTF]{\token_if_expandable:N} % Check if token is expandable. We use the fact that \TeX{} % temporarily converts \cs{exp_not:N} \meta{token} into \cs{scan_stop:} % if \meta{token} is expandable. An \texttt{undefined} token is not % considered as expandable. No problem nesting the conditionals, % since the third |#1| is only skipped if it is non-expandable (hence % not part of \TeX{}'s conditional apparatus). % \begin{macrocode} \prg_new_conditional:Npnn \token_if_expandable:N #1 { p , T , F , TF } { \exp_after:wN \if_meaning:w \exp_not:N #1 #1 \prg_return_false: \else: \if_cs_exist:N #1 \prg_return_true: \else: \prg_return_false: \fi: \fi: } % \end{macrocode} % \end{macro} % % \begin{macro} % { % \@@_delimit_by_char":w, % \@@_delimit_by_count:w, % \@@_delimit_by_dimen:w, % \@@_delimit_by_ font:w, % \@@_delimit_by_macro:w, % \@@_delimit_by_muskip:w, % \@@_delimit_by_skip:w, % \@@_delimit_by_toks:w, % } % These auxiliary functions are used below to define some % conditionals which detect whether the \tn{meaning} of their % argument begins with a particular string. Each auxiliary takes an % argument delimited by a string, a second one delimited by % \cs{s_@@_stop}, and returns the first one and its delimiter. % This result is eventually compared to another string. % Note that the ``font'' auxiliary is delimited by a space followed by % ``\texttt{font}''. This avoids an unnecessary check for the % \tn{font} primitive below. % \begin{macrocode} \group_begin: \cs_set_protected:Npn \@@_tmp:w #1 { \use:e { \cs_new:Npn \exp_not:c { @@_delimit_by_ #1 :w } ##1 \tl_to_str:n {#1} ##2 \s_@@_stop { ##1 \tl_to_str:n {#1} } } } \@@_tmp:w { char" } \@@_tmp:w { count } \@@_tmp:w { dimen } \@@_tmp:w { ~ font } \@@_tmp:w { macro } \@@_tmp:w { muskip } \@@_tmp:w { skip } \@@_tmp:w { toks } \group_end: % \end{macrocode} % \end{macro} % % \begin{macro}[pTF] % { % \token_if_chardef:N, \token_if_mathchardef:N, % \token_if_long_macro:N, % \token_if_protected_macro:N, \token_if_protected_long_macro:N, % \token_if_font_selection:N, % \token_if_dim_register:N, \token_if_int_register:N, % \token_if_muskip_register:N, % \token_if_skip_register:N, \token_if_toks_register:N, % } % Each of these conditionals tests whether its argument's % \tn{meaning} starts with a given string. This is essentially done % by having an auxiliary grab an argument delimited by the string and % testing whether the argument was empty. Of course, a copy of this % string must first be added to the end of the \tn{meaning} to avoid % a runaway argument in case it does not contain the string. Two % complications arise. First, the escape character is not fixed, and % cannot be included in the delimiter of the auxiliary function (this % function cannot be defined on the fly because tests must remain % expandable): instead the first argument of the auxiliary (plus the % delimiter to avoid complications with trailing spaces) is compared % using \cs{str_if_eq:eeTF} to the result of applying % \cs{token_to_str:N} to a control sequence. Second, the % \tn{meaning} of primitives such as \tn{dimen} or \tn{dimendef} % starts in the same way as registers such as % \tn{dimen}\texttt{123}, so they must be tested for. % % Characters used as delimiters must have catcode~$12$ % and are obtained through \cs{tl_to_str:n}. This requires doing all % definitions within \texttt{e}-expansion. The temporary function % \cs{@@_tmp:w} used to define each conditional receives three % arguments: the name of the conditional, the auxiliary's delimiter % (also used to name the auxiliary), and the string to which one % compares the auxiliary's result. Note that the \tn{meaning} of a % protected long macro starts with |\protected\long macro|, with no % space after |\protected| but a space after |\long|, hence the % mixture of \cs{token_to_str:N} and \cs{tl_to_str:n}. % % For the first six conditionals, \cs{cs_if_exist:cT} turns out to % be \texttt{false} (thanks to the leading space for \texttt{font}), % and the code boils down to a string comparison % between the result of the auxiliary on the \tn{meaning} of the % conditional's argument~|####1|, and~|#3|. Both are evaluated at % run-time, as this is important to get the correct escape character. % % The other five conditionals have additional code that compares the % argument~|####1| to two \TeX{} primitives which would wrongly be % recognized as registers otherwise. Despite using \TeX{}'s % primitive conditional construction, this does not break % when~|####1| is itself a conditional, because branches of the % conditionals are only skipped if |####1|~is one of the two % primitives that are tested for (which are not \TeX{} conditionals). % \begin{macrocode} \group_begin: \cs_set_protected:Npn \@@_tmp:w #1#2#3 { \use:e { \prg_new_conditional:Npnn \exp_not:c { token_if_ #1 :N } ##1 { p , T , F , TF } { \cs_if_exist:cT { tex_ #2 :D } { \exp_not:N \if_meaning:w ##1 \exp_not:c { tex_ #2 :D } \exp_not:N \prg_return_false: \exp_not:N \else: \exp_not:N \if_meaning:w ##1 \exp_not:c { tex_ #2 def:D } \exp_not:N \prg_return_false: \exp_not:N \else: } \exp_not:N \str_if_eq:eeTF { \exp_not:N \exp_after:wN \exp_not:c { @@_delimit_by_ #2 :w } \exp_not:N \token_to_meaning:N ##1 ? \tl_to_str:n {#2} \s_@@_stop } { \exp_not:n {#3} } { \exp_not:N \prg_return_true: } { \exp_not:N \prg_return_false: } \cs_if_exist:cT { tex_ #2 :D } { \exp_not:N \fi: \exp_not:N \fi: } } } } \@@_tmp:w { chardef } { char" } { \token_to_str:N \char" } \@@_tmp:w { mathchardef } { char" } { \token_to_str:N \mathchar" } \@@_tmp:w { long_macro } { macro } { \tl_to_str:n { \long } macro } \@@_tmp:w { protected_macro } { macro } { \tl_to_str:n { \protected } macro } \@@_tmp:w { protected_long_macro } { macro } { \token_to_str:N \protected \tl_to_str:n { \long } macro } \@@_tmp:w { font_selection } { ~ font } { select ~ font } \@@_tmp:w { dim_register } { dimen } { \token_to_str:N \dimen } \@@_tmp:w { int_register } { count } { \token_to_str:N \count } \@@_tmp:w { muskip_register } { muskip } { \token_to_str:N \muskip } \@@_tmp:w { skip_register } { skip } { \token_to_str:N \skip } \@@_tmp:w { toks_register } { toks } { \token_to_str:N \toks } \group_end: % \end{macrocode} % \end{macro} % % \begin{macro}[pTF]{\token_if_primitive:N} % \begin{macro}{\@@_if_primitive:NNw, % \@@_if_primitive_space:w, % \@@_if_primitive_nullfont:N, % \@@_if_primitive_loop:N, % \@@_if_primitive:Nw, % \@@_if_primitive_undefined:N, % \@@_if_primitive_lua:N} %^^A See http://groups.google.com/group/comp.text.tex/browse_thread/thread/0a72666873f8753d# % % We filter out macros first, because they cause endless trouble later % otherwise. % % Primitives are almost distinguished by the fact that the result % of \cs{token_to_meaning:N} is formed from letters only. Every other % token has either a space (e.g., |the letter A|), a digit % (e.g., |\count123|) or a double quote (e.g., |\char"A|). % % Ten exceptions: on the one hand, \cs{tex_undefined:D} is not a % primitive, but its meaning is |undefined|, only letters; % on the other hand, \tn{space}, \tn{italiccorr}, % \tn{hyphen}, \tn{firstmark}, \tn{topmark}, % \tn{botmark}, \tn{splitfirstmark}, \tn{splitbotmark}, % and \tn{nullfont} are primitives, but have non-letters % in their meaning. % % We start by removing the two first (non-space) characters from % the meaning. This removes the escape character (which may be % nonexistent depending on \tn{endlinechar}), and takes care % of three of the exceptions: \tn{space}, \tn{italiccorr} % and \tn{hyphen}, whose meaning is at most two characters. % This leaves a string terminated by some |:|, and \cs{s_@@_stop}. % % The meaning of each one of the five \tn[no-index]{...mark} primitives % has the form \meta{letters}|:|\meta{user material}. In other words, % the first non-letter is a colon. We remove everything after the first % colon. % % We are now left with a string, which we must analyze. For primitives, % it contains only letters. For non-primitives, it contains either % |"|, or a space, or a digit. Two exceptions remain: \cs{tex_undefined:D}, % which is not a primitive, and \tn{nullfont}, which is a primitive. % % Spaces cannot be grabbed in an undelimited way, so we check them % separately. If there is a space, we test for \tn{nullfont}. % Otherwise, we go through characters one by one, and stop at the % first character less than |`A| (this is not quite a test for % \enquote{only letters}, but is close enough to work in this context). % If this first character is |:| then we have a primitive, or % \cs{tex_undefined:D}, and if it is |"| or a digit, then the token % is not a primitive. % % For \LuaTeX{} we use a different implementation which just looks at the % command code for the token and compaes it to a list of non-primitives. % Again, \tn{nullfont} is a special case because it is the only primitive % with the normally non-primitive |set_font| command code. % % In LuaMeta\TeX{} some of the command names are different, so we check for % both versions. The first one is always the \LuaTeX{} version. % % \begin{macrocode} \sys_if_engine_luatex:TF { % %<*lua> do local get_next = token.get_next local get_command = token.get_command local get_index = token.get_index local get_mode = token.get_mode or token.get_index local cmd = command_id local set_font = cmd'get_font' local biggest_char = token.biggest_char and token.biggest_char() or status.getconstants().max_character_code local mode_below_biggest_char = {} local index_not_nil = {} local mode_not_null = {} local non_primitive = { [cmd'left_brace'] = true, [cmd'right_brace'] = true, [cmd'math_shift'] = true, [cmd'mac_param' or cmd'parameter'] = mode_below_biggest_char, [cmd'sup_mark' or cmd'superscript'] = true, [cmd'sub_mark' or cmd'subscript'] = true, [cmd'endv' or cmd'ignore'] = true, [cmd'spacer'] = true, [cmd'letter'] = true, [cmd'other_char'] = true, [cmd'tab_mark' or cmd'alignment_tab'] = mode_below_biggest_char, [cmd'char_given'] = true, [cmd'math_given' or 'math_char_given'] = true, [cmd'xmath_given' or 'math_char_xgiven'] = true, [cmd'set_font'] = mode_not_null, [cmd'undefined_cs'] = true, [cmd'call'] = true, [cmd'long_call' or cmd'protected_call'] = true, [cmd'outer_call' or cmd'tolerant_call'] = true, [cmd'long_outer_call' or cmd'tolerant_protected_call'] = true, [cmd'assign_glue' or cmd'register_glue'] = index_not_nil, [cmd'assign_mu_glue' or cmd'register_mu_glue'] = index_not_nil, [cmd'assign_toks' or cmd'register_toks'] = index_not_nil, [cmd'assign_int' or cmd'register_int'] = index_not_nil, [cmd'assign_attr' or cmd'register_attribute'] = true, [cmd'assign_dimen' or cmd'register_dimen'] = index_not_nil, } luacmd("@@_if_primitive_lua:N", function() local tok = get_next() local is_non_primitive = non_primitive[get_command(tok)] return put_next( is_non_primitive == true and false_tok or is_non_primitive == nil and true_tok or is_non_primitive == mode_not_null and (get_mode(tok) == 0 and true_tok or false_tok) or is_non_primitive == index_not_nil and (get_index(tok) and false_tok or true_tok) or is_non_primitive == mode_below_biggest_char and (get_mode(tok) > biggest_char and true_tok or false_tok)) end, "global") end % %<*tex> \prg_new_conditional:Npnn \token_if_primitive:N #1 { p , T , F , TF } { \@@_if_primitive_lua:N #1 } } { \tex_global:D \tex_chardef:D \c_@@_A_int = `A ~ % \use:e { \prg_new_conditional:Npnn \exp_not:N \token_if_primitive:N #1 { p , T , F , TF } { \exp_not:N \token_if_macro:NTF #1 \exp_not:N \prg_return_false: { \exp_not:N \exp_after:wN \exp_not:N \@@_if_primitive:NNw \exp_not:N \token_to_meaning:N #1 \tl_to_str:n { : : : } \s_@@_stop #1 } } \cs_new:Npn \exp_not:N \@@_if_primitive:NNw #1#2 #3 \c_colon_str #4 \s_@@_stop { \exp_not:N \tl_if_empty:oTF { \exp_not:N \@@_if_primitive_space:w #3 ~ } { \exp_not:N \@@_if_primitive_loop:N #3 \c_colon_str \s_@@_stop } { \exp_not:N \@@_if_primitive_nullfont:N } } } \cs_new:Npn \@@_if_primitive_space:w #1 ~ { } \cs_new:Npn \@@_if_primitive_nullfont:N #1 { \if_meaning:w \tex_nullfont:D #1 \prg_return_true: \else: \prg_return_false: \fi: } \cs_new:Npn \@@_if_primitive_loop:N #1 { \if_int_compare:w `#1 < \c_@@_A_int % \exp_after:wN \@@_if_primitive:Nw \exp_after:wN #1 \else: \exp_after:wN \@@_if_primitive_loop:N \fi: } \cs_new:Npn \@@_if_primitive:Nw #1 #2 \s_@@_stop { \if:w : #1 \exp_after:wN \@@_if_primitive_undefined:N \else: \prg_return_false: \exp_after:wN \use_none:n \fi: } \cs_new:Npn \@@_if_primitive_undefined:N #1 { \if_cs_exist:N #1 \prg_return_true: \else: \prg_return_false: \fi: } } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro}[EXP, noTF] % {\token_case_catcode:Nn, \token_case_charcode:Nn, \token_case_meaning:Nn} % \begin{macro}[EXP]{\@@_case:NNnTF, \@@_case:NNw, \@@_case_end:nw} % The aim here is to allow the case statement to be evaluated % using a known number of expansion steps (two), and without % needing to use an explicit \enquote{end of recursion} marker. % That is achieved by using the test input as the final case, % as this is always true. The trick is then to tidy up % the output such that the appropriate case code plus either % the \texttt{true} or \texttt{false} branch code is inserted. % \begin{macrocode} \cs_new:Npn \token_case_catcode:Nn #1#2 { \exp:w \@@_case:NNnTF \token_if_eq_catcode:NNTF #1 {#2} { } { } } \cs_new:Npn \token_case_catcode:NnT #1#2#3 { \exp:w \@@_case:NNnTF \token_if_eq_catcode:NNTF #1 {#2} {#3} { } } \cs_new:Npn \token_case_catcode:NnF #1#2 { \exp:w \@@_case:NNnTF \token_if_eq_catcode:NNTF #1 {#2} { } } \cs_new:Npn \token_case_catcode:NnTF { \exp:w \@@_case:NNnTF \token_if_eq_catcode:NNTF } \cs_new:Npn \token_case_charcode:Nn #1#2 { \exp:w \@@_case:NNnTF \token_if_eq_charcode:NNTF #1 {#2} { } { } } \cs_new:Npn \token_case_charcode:NnT #1#2#3 { \exp:w \@@_case:NNnTF \token_if_eq_charcode:NNTF #1 {#2} {#3} { } } \cs_new:Npn \token_case_charcode:NnF #1#2 { \exp:w \@@_case:NNnTF \token_if_eq_charcode:NNTF #1 {#2} { } } \cs_new:Npn \token_case_charcode:NnTF { \exp:w \@@_case:NNnTF \token_if_eq_charcode:NNTF } \cs_new:Npn \token_case_meaning:Nn #1#2 { \exp:w \@@_case:NNnTF \token_if_eq_meaning:NNTF #1 {#2} { } { } } \cs_new:Npn \token_case_meaning:NnT #1#2#3 { \exp:w \@@_case:NNnTF \token_if_eq_meaning:NNTF #1 {#2} {#3} { } } \cs_new:Npn \token_case_meaning:NnF #1#2 { \exp:w \@@_case:NNnTF \token_if_eq_meaning:NNTF #1 {#2} { } } \cs_new:Npn \token_case_meaning:NnTF { \exp:w \@@_case:NNnTF \token_if_eq_meaning:NNTF } \cs_new:Npn \@@_case:NNnTF #1#2#3#4#5 { \@@_case:NNw #1 #2 #3 #2 { } \s_@@_mark {#4} \s_@@_mark {#5} \s_@@_stop } \cs_new:Npn \@@_case:NNw #1#2#3#4 { #1 #2 #3 { \@@_case_end:nw {#4} } { \@@_case:NNw #1 #2 } } % \end{macrocode} % To tidy up the recursion, there are two outcomes. If there was a hit to % one of the cases searched for, then |#1| is the code to insert, % |#2| is the \emph{next} case to check on and |#3| is all of % the rest of the cases code. That means that |#4| is the \texttt{true} % branch code, and |#5| tidies up the spare \cs{s_@@_mark} and the % \texttt{false} branch. On the other hand, if none of the cases matched % then we arrive here using the \enquote{termination} case of comparing % the search with itself. That means that |#1| is empty, |#2| is % the first \cs{s_@@_mark} and so |#4| is the \texttt{false} code (the % \texttt{true} code is mopped up by |#3|). % \begin{macrocode} \cs_new:Npn \@@_case_end:nw #1#2#3 \s_@@_mark #4#5 \s_@@_stop { \exp_end: #1 #4 } % \end{macrocode} % \end{macro} % \end{macro} % % \subsection{Peeking ahead at the next token} % % \begin{macrocode} %<@@=peek> % \end{macrocode} % % Peeking ahead is implemented using a two part mechanism. The % outer level provides a defined interface to the lower level material. % This allows a large amount of code to be shared. There are four % cases: % \begin{enumerate} % \item peek at the next token; % \item peek at the next non-space token; % \item peek at the next token and remove it; % \item peek at the next non-space token and remove it. % \end{enumerate} % % \begin{variable}{\l_peek_token} % \begin{variable}{\g_peek_token} % Storage tokens which are publicly documented: the token peeked. % \begin{macrocode} \cs_new_eq:NN \l_peek_token ? \cs_new_eq:NN \g_peek_token ? % \end{macrocode} % \end{variable} % \end{variable} % % \begin{variable}{\l_@@_search_token} % The token to search for as an implicit token: % \emph{cf.}~\cs{l_@@_search_tl}. % \begin{macrocode} \cs_new_eq:NN \l_@@_search_token ? % \end{macrocode} % \end{variable} % % \begin{variable}{\l_@@_search_tl} % The token to search for as an explicit token: % \emph{cf.}~\cs{l_@@_search_token}. % \begin{macrocode} \tl_new:N \l_@@_search_tl % \end{macrocode} % \end{variable} % % \begin{macro} % {\@@_true:w, \@@_true_aux:w, \@@_false:w, \@@_tmp:w} % Functions used by the branching and space-stripping code. % \begin{macrocode} \cs_new:Npn \@@_true:w { } \cs_new:Npn \@@_true_aux:w { } \cs_new:Npn \@@_false:w { } \cs_new:Npn \@@_tmp:w { } % \end{macrocode} % \end{macro} % % \begin{variable}{\s_@@_mark,\s_@@_stop} % Internal scan marks. % \begin{macrocode} \scan_new:N \s_@@_mark \scan_new:N \s_@@_stop % \end{macrocode} % \end{variable} % % \begin{macro}[EXP]{\@@_use_none_delimit_by_s_stop:w} % Functions to gobble up to a scan mark. % \begin{macrocode} \cs_new:Npn \@@_use_none_delimit_by_s_stop:w #1 \s_@@_stop { } % \end{macrocode} % \end{macro} % % \begin{macro}{\peek_after:Nw} % \begin{macro}{\peek_gafter:Nw} % Simple wrappers for \tn{futurelet}: no arguments absorbed % here. % \begin{macrocode} \cs_new_protected:Npn \peek_after:Nw { \tex_futurelet:D \l_peek_token } \cs_new_protected:Npn \peek_gafter:Nw { \tex_global:D \tex_futurelet:D \g_peek_token } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro}{\@@_true_remove:w} % A function to remove the next token and then regain control. % \begin{macrocode} \cs_new_protected:Npn \@@_true_remove:w { \tex_afterassignment:D \@@_true_aux:w \cs_set_eq:NN \@@_tmp:w } % \end{macrocode} % \end{macro} % % \begin{macro}{\peek_remove_spaces:n, \@@_remove_spaces:} % Repeatedly use \cs{@@_true_remove:w} to remove a space and call % \cs{@@_true_aux:w}. % \begin{macrocode} \cs_new_protected:Npn \peek_remove_spaces:n #1 { \cs_set:Npe \@@_false:w { \exp_not:n {#1} } \group_align_safe_begin: \cs_set:Npn \@@_true_aux:w { \peek_after:Nw \@@_remove_spaces: } \@@_true_aux:w } \cs_new_protected:Npn \@@_remove_spaces: { \if_meaning:w \l_peek_token \c_space_token \exp_after:wN \@@_true_remove:w \else: \group_align_safe_end: \exp_after:wN \@@_false:w \fi: } % \end{macrocode} % \end{macro} % % \begin{macro}{\peek_remove_filler:n} % \begin{macro}{\@@_remove_filler:w} % \begin{macro}{\@@_remove_filler:} % \begin{macro}{\@@_remove_filler_expand:w} % Here we expand the input, removing spaces and \cs{scan_stop:} tokens until % we reach a non-expandable token. At that stage we re-insert the payload. % To deal with the problem of |&| tokens, we have to put the align-safe % group in the correct place. % \begin{macrocode} \cs_new_protected:Npn \peek_remove_filler:n #1 { \cs_set:Npn \@@_true_aux:w { \@@_remove_filler:w } \cs_set:Npe \@@_false:w { \exp_not:N \group_align_safe_end: \exp_not:n {#1} } \group_align_safe_begin: \@@_remove_filler:w } \cs_new_protected:Npn \@@_remove_filler:w { \exp_after:wN \peek_after:Nw \exp_after:wN \@@_remove_filler: \exp:w \exp_end_continue_f:w } % \end{macrocode} % Here we can nest conditionals as \cs{l_peek_token} is only skipped over in % the nested one if it's a space: no problems with conditionals or outer % tokens. % \begin{macrocode} \cs_new_protected:Npn \@@_remove_filler: { \if_catcode:w \exp_not:N \l_peek_token \c_space_token \exp_after:wN \@@_true_remove:w \else: \if_meaning:w \l_peek_token \scan_stop: \exp_after:wN \exp_after:wN \exp_after:wN \@@_true_remove:w \else: \exp_after:wN \exp_after:wN \exp_after:wN \@@_remove_filler_expand:w \fi: \fi: } % \end{macrocode} % To deal with undefined control sequences in the same way \TeX{} does, % we need to check for expansion manually. % \begin{macrocode} \cs_new_protected:Npn \@@_remove_filler_expand:w { \exp_after:wN \if_meaning:w \exp_not:N \l_peek_token \l_peek_token \exp_after:wN \@@_false:w \else: \exp_after:wN \@@_remove_filler:w \fi: } % \end{macrocode} % \end{macro} % \end{macro} % \end{macro} % \end{macro} % % \begin{macro}{\@@_token_generic_aux:NNNTF} % The generic functions store the test token in both implicit and % explicit modes, and the \texttt{true} and \texttt{false} code as % token lists, more or less. The two branches have to be absorbed here % as the input stream needs to be cleared for the peek function itself. % Here, |#1| is \cs{@@_true_remove:w} when removing the token and % \cs{@@_true_aux:w} otherwise. % \begin{macrocode} \cs_new_protected:Npn \@@_token_generic_aux:NNNTF #1#2#3#4#5 { \group_align_safe_begin: \cs_set_eq:NN \l_@@_search_token #3 \tl_set:Nn \l_@@_search_tl {#3} \cs_set:Npe \@@_true_aux:w { \exp_not:N \group_align_safe_end: \exp_not:n {#4} } \cs_set_eq:NN \@@_true:w #1 \cs_set:Npe \@@_false:w { \exp_not:N \group_align_safe_end: \exp_not:n {#5} } \peek_after:Nw #2 } % \end{macrocode} % \end{macro} % % \begin{macro}[TF]{\@@_token_generic:NN, \@@_token_remove_generic:NN} % For token removal there needs to be a call to the auxiliary % function which does the work. % \begin{macrocode} \cs_new_protected:Npn \@@_token_generic:NNTF { \@@_token_generic_aux:NNNTF \@@_true_aux:w } \cs_new_protected:Npn \@@_token_generic:NNT #1#2#3 { \@@_token_generic:NNTF #1 #2 {#3} { } } \cs_new_protected:Npn \@@_token_generic:NNF #1#2#3 { \@@_token_generic:NNTF #1 #2 { } {#3} } \cs_new_protected:Npn \@@_token_remove_generic:NNTF { \@@_token_generic_aux:NNNTF \@@_true_remove:w } \cs_new_protected:Npn \@@_token_remove_generic:NNT #1#2#3 { \@@_token_remove_generic:NNTF #1 #2 {#3} { } } \cs_new_protected:Npn \@@_token_remove_generic:NNF #1#2#3 { \@@_token_remove_generic:NNTF #1 #2 { } {#3} } % \end{macrocode} % \end{macro} % % \begin{macro}{\@@_execute_branches_meaning:} % The meaning test is straight forward. % \begin{macrocode} \cs_new:Npn \@@_execute_branches_meaning: { \if_meaning:w \l_peek_token \l_@@_search_token \exp_after:wN \@@_true:w \else: \exp_after:wN \@@_false:w \fi: } % \end{macrocode} % \end{macro} % % \begin{macro}{\@@_execute_branches_catcode:, \@@_execute_branches_charcode:} % \begin{macro} % { % \@@_execute_branches_catcode_aux: , % \@@_execute_branches_catcode_auxii:N , % \@@_execute_branches_catcode_auxiii: % } % The catcode and charcode tests are very similar, and in order to use % the same auxiliaries we do something a little bit odd, firing % \cs{if_catcode:w} and \cs{if_charcode:w} before finding the operands % for those tests, which are only given in the |auxii:N| and % |auxiii:| auxiliaries. For our purposes, three kinds of tokens may % follow the peeking function: % \begin{itemize} % \item control sequences which are not equal to a non-active % character token (\emph{e.g.}, macro, primitive); % \item active characters which are not equal to a non-active % character token (\emph{e.g.}, macro, primitive); % \item explicit non-active character tokens, or control sequences % or active characters set equal to a non-active character token. % \end{itemize} % The first two cases are not distinguishable simply using \TeX{}'s % \tn{futurelet}, because we can only access the \tn{meaning} of % tokens in that way. In those cases, detected thanks to a % comparison with \cs{scan_stop:}, we grab the following token, and % compare it explicitly with the explicit search token stored in % \cs{l_@@_search_tl}. The \cs{exp_not:N} prevents outer macros % (coming from non-\LaTeX3 code) from blowing up. In the third case, % \cs{l_peek_token} is good enough for the test, and we compare it % again with the explicit search token. Just like the peek token, the % search token may be of any of the three types above, hence the need % to use the explicit token that was given to the peek function. % \begin{macrocode} \cs_new:Npn \@@_execute_branches_catcode: { \if_catcode:w \@@_execute_branches_catcode_aux: } \cs_new:Npn \@@_execute_branches_charcode: { \if_charcode:w \@@_execute_branches_catcode_aux: } \cs_new:Npn \@@_execute_branches_catcode_aux: { \if_catcode:w \exp_not:N \l_peek_token \scan_stop: \exp_after:wN \exp_after:wN \exp_after:wN \@@_execute_branches_catcode_auxii:N \exp_after:wN \exp_not:N \else: \exp_after:wN \@@_execute_branches_catcode_auxiii: \fi: } \cs_new:Npn \@@_execute_branches_catcode_auxii:N #1 { \exp_not:N #1 \exp_after:wN \exp_not:N \l_@@_search_tl \exp_after:wN \@@_true:w \else: \exp_after:wN \@@_false:w \fi: #1 } \cs_new:Npn \@@_execute_branches_catcode_auxiii: { \exp_not:N \l_peek_token \exp_after:wN \exp_not:N \l_@@_search_tl \exp_after:wN \@@_true:w \else: \exp_after:wN \@@_false:w \fi: } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro}[TF] % { % \peek_catcode:N, % \peek_catcode_remove:N, % \peek_charcode:N, % \peek_charcode_remove:N, % \peek_meaning:N, % \peek_meaning_remove:N, % } % The public functions themselves cannot be defined using % \cs{prg_new_conditional:Npnn}. Instead, the |TF|, |T|, |F| variants % are defined in terms of corresponding variants of % \cs{@@_token_generic:NNTF} or \cs{@@_token_remove_generic:NNTF}, % with first argument one of \cs{@@_execute_branches_catcode:}, % \cs{@@_execute_branches_charcode:}, or % \cs{@@_execute_branches_meaning:}. % \begin{macrocode} \tl_map_inline:nn { { catcode } { charcode } { meaning } } { \tl_map_inline:nn { { } { _remove } } { \tl_map_inline:nn { { TF } { T } { F } } { \cs_new_protected:cpe { peek_ #1 ##1 :N ####1 } { \exp_not:c { @@_token ##1 _generic:NN ####1 } \exp_not:c { @@_execute_branches_ #1 : } } } } } % \end{macrocode} % \end{macro} % % \begin{macro}[TF]{\peek_N_type:} % \begin{macro} % {\@@_execute_branches_N_type:, \@@_N_type:w, \@@_N_type_aux:nnw} % All tokens are \texttt{N}-type tokens, except in four cases: % begin-group tokens, end-group tokens, space tokens with character % code~$32$, and outer tokens. Since \cs{l_peek_token} might be % outer, we cannot use the convenient \cs{bool_if:nTF} function, and % must resort to the old trick of using \tn{ifodd} to expand a set of % tests. The \texttt{false} branch of this test is taken if the token % is one of the first three kinds of non-\texttt{N}-type tokens % (explicit or implicit), thus we call \cs{@@_false:w}. In the % \texttt{true} branch, we must detect outer tokens, without impacting % performance too much for non-outer tokens. The first filter is to % search for \texttt{outer} in the \tn{meaning} of \cs{l_peek_token}. % If that is absent, \cs{@@_use_none_delimit_by_s_stop:w} cleans up, and % we call \cs{@@_true:w}. Otherwise, the token can be a non-outer % macro or a primitive mark whose parameter or replacement text % contains \texttt{outer}, it can be the primitive \tn{outer}, or it % can be an outer token. Macros and marks would have \texttt{ma} in % the part before the first occurrence of \texttt{outer}; the meaning % of \tn{outer} has nothing after \texttt{outer}, contrarily to outer % macros; and that covers all cases, calling \cs{@@_true:w} or % \cs{@@_false:w} as appropriate. Here, there is no \meta{search % token}, so we feed a dummy \cs{scan_stop:} to the % \cs{@@_token_generic:NNTF} function. % \begin{macrocode} \group_begin: \cs_set_protected:Npn \@@_tmp:w #1 \s_@@_stop { \cs_new_protected:Npn \@@_execute_branches_N_type: { \if_int_odd:w \if_catcode:w \exp_not:N \l_peek_token { \c_zero_int \fi: \if_catcode:w \exp_not:N \l_peek_token } \c_zero_int \fi: \if_meaning:w \l_peek_token \c_space_token \c_zero_int \fi: \c_one_int \exp_after:wN \@@_N_type:w \token_to_meaning:N \l_peek_token \s_@@_mark \@@_N_type_aux:nnw #1 \s_@@_mark \@@_use_none_delimit_by_s_stop:w \s_@@_stop \exp_after:wN \@@_true:w \else: \exp_after:wN \@@_false:w \fi: } \cs_new_protected:Npn \@@_N_type:w ##1 #1 ##2 \s_@@_mark ##3 { ##3 {##1} {##2} } } \exp_after:wN \@@_tmp:w \tl_to_str:n { outer } \s_@@_stop \group_end: \cs_new_protected:Npn \@@_N_type_aux:nnw #1 #2 #3 \fi: { \fi: \tl_if_in:noTF {#1} { \tl_to_str:n {ma} } { \@@_true:w } { \tl_if_empty:nTF {#2} { \@@_true:w } { \@@_false:w } } } \cs_new_protected:Npn \peek_N_type:TF { \@@_token_generic:NNTF \@@_execute_branches_N_type: \scan_stop: } \cs_new_protected:Npn \peek_N_type:T { \@@_token_generic:NNT \@@_execute_branches_N_type: \scan_stop: } \cs_new_protected:Npn \peek_N_type:F { \@@_token_generic:NNF \@@_execute_branches_N_type: \scan_stop: } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macrocode} % % \end{macrocode} % % \begin{macrocode} % % \end{macrocode} % % \end{implementation} % % \PrintIndex