% \iffalse meta-comment % % File: akshar.dtx % --------------------------------------------------------------------------- % Package: akshar % Author: Vu Van Dung % Description: Support for syllables in the Devanagari script % Repository: https://github.com/joulev/akshar % Bug tracker: https://github.com/joulev/akshar/issues % License: The LaTeX Project Public License v1.3c or later % --------------------------------------------------------------------------- % This work may be distributed and/or modified under the conditions of the % LaTeX Project Public License, either version 1.3c of this license or (at % your option) any later version. % % The latest version of this license is in % % http://www.latex-project.org/lppl.txt. % % This work has the LPPL maintenance status `maintained'. % % The Current Maintainer of this work is Vu Van Dung. % % This work consists of the files akshar.dtx % akshar.ins % and the derived file akshar.sty. % --------------------------------------------------------------------------- % The manual uses the following fonts: % * Siddhanta for serif font. % Source: https://sites.google.com/site/bayaryn % * Biolinum for sans font. % Source: Package libertine (https://ctan.org/pkg/libertine) % * Hack Nerd Font for monospace font. % Source: https://www.nerdfonts.com/font-downloads % --------------------------------------------------------------------------- % \fi % \iffalse %<*internal> \iffalse % %<*readme> Package: akshar Author: Vu Van Dung Version: 0.2 (06 Sep 2020) Description: Support for syllables in the Devanagari script Repository: https://github.com/joulev/akshar Bug tracker: https://github.com/joulev/akshar/issues License: The LaTeX Project Public License v1.3c or later. See the source file akshar.dtx for more information. % %<*internal> \fi % %<*driver|package> \def\aksharPackageName{akshar} \def\aksharPackageVersion{0.2} \def\aksharPackageDate{2020/09/06} \def\aksharPackageDescription{Support for syllables in the Devanagari script (JV)} % %<*driver> \documentclass{l3doc} \renewcommand\MacroLongFont{\ttfamily\small} \usepackage[margin=1cm,left=7cm,bottom=1.5cm]{geometry} \usepackage{fontspec} \usepackage{biolinum} \setmainfont{siddhanta}[Scale=0.9, Script=Devanagari, Extension=.ttf] \setmonofont{HackNF}[Scale=0.85, Extension=.ttf, UprightFont=*_R, BoldFont=*_B, ItalicFont=*_I, BoldItalicFont=*_BI] \newfontfamily{\aksharDocFontFallback}{siddhanta} [Scale=0.9,Script=Devanagari,Extension=.ttf] \usepackage[libertine]{newtxmath} \renewcommand\meta[1]{$\langle${\itshape\rmfamily#1}$\rangle$} \usepackage{newunicodechar} \ExplSyntaxOn \tl_map_inline:nn { ािीुूेैोौंःॢृॅँ़ॆॊॉॎॏ॓ॕॖॗॄऺऻ꣠꣡꣢꣣꣤꣥꣦꣧꣨꣩꣪꣫꣬꣭꣮꣯꣰꣱ꣿ्कनमस्कारमंळीममडळीममडमंमममडडमंळीममडममंममडडमंळीममड ॑ ॒ ॓ ॔} { \newunicodechar{#1}{{\iffontchar\font`#1\else\aksharDocFontFallback\fi#1}} } \ExplSyntaxOff \usepackage{akshar} \usepackage[listings]{tcolorbox} \definecolor{myred}{HTML}{C0392B} \definecolor{myblue}{HTML}{2980B9} \definecolor{mygreen}{HTML}{27AE60} \newtcblisting{codeexample}[1][]{size=minimal, sidebyside gap=1cm,lefthand width=6.5cm,lower separated=false, fontupper=\raggedleft,colback=white,colframe=white, verbatim ignore percent,grow to left by=7cm,text side listing, listing options={basicstyle=\ttfamily\small,keywordstyle=\ttfamily\small,columns=fullflexible, numbers=left,numberstyle=\sffamily\tiny\color{gray},stepnumber=1,numbersep=5pt},#1} \pgfkeys{/tcb/.cd,code only/.code={\pgfkeysalso{listing only,grow to left by=-5mm}}} \hypersetup{linkcolor=mygreen,urlcolor=myblue,linktoc=page} ^^A Hacking the index a bit. \IndexPrologue { \section*{Index} \markboth{Index}{Index} \addcontentsline{toc}{section}{Index} Underlined page numbers point to the definition, all others indicate the places where it is used or described. } \usepackage{sectsty} \sectionfont{\LARGE\color{myred}} \subsectionfont{\large\color{myblue}} \usepackage{fontawesome5} \let\bfseries\mdseries^^A For consistency \parindent=0pt\relax \IndexMin=100pt\relax \EnableCrossrefs \CodelineIndex \RecordChanges \begin{document} \DocInput{\aksharPackageName.dtx} \end{document} % % \fi % % % % \title{The \pkg{\aksharPackageName} package} % \author{Vu Van Dung} % \date^^A % {^^A % Version \aksharPackageVersion\ --- \aksharPackageDate\\[1ex]^^A % {\small\faIcon{link}\quad\url{https://ctan.org/pkg/akshar}}\\^^A % {\small\faIcon{github}\quad\url{https://github.com/joulev/akshar}}^^A % } % % \maketitle % % \begin{abstract} % This package provides tools to deal with special characters in a Devanagari % string. % \end{abstract} % % \tableofcontents % \parskip=1ex % % \begin{documentation} % \section{Introduction} % When dealing with processing strings in the Devanagari script, normal \LaTeX\ % commands usually find some difficulties in distinguishing ``normal'' % characters, like क, and ``special'' characters, for example ् or ी. Let's % consider this example code: % \begin{codeexample}[] %\ExplSyntaxOn %\tl_set:Nn \l_tmpa_tl { की } %\tl_count:N \l_tmpa_tl \c_space_token tokens. %\ExplSyntaxOff % \end{codeexample} % The output is 2, but the number of characters in it is only one! The reason % is quite simple: the compiler treats ी as a normal character, and it shouldn't % do so. % % To tackle that, this package provides \pkg{expl3} functions to ``convert'' a % given string, written in the Devanagari script, to a sequence of token lists. % each of these token lists is a ``true'' Devanagari character. You can now do % anything you want with this sequence; and this package does provide some % front-end macros for some simple actions on the input string. % % \section{User manual} % % Due to the current implementation, \emph{all} of these macros and functions % are \emph{not} expandable. % % \subsection{\LaTeXe\ macros}\label{latex2e-macros} % \begin{function}{\aksharStrLen} % \begin{syntax} % \cs{aksharStrLen} \marg{token list} % \end{syntax} % Return the number of Devanagari characters in the \meta{token list}. % \end{function} % \begin{codeexample}[] %There are \aksharStrLen{ नमस्कार } characters in नमस्कार.\par %\ExplSyntaxOn %\pkg{expl3}~returns~\tl_count:n { नमस्कार },~which~is~wrong. %\ExplSyntaxOff % \end{codeexample} % \begin{function}{\aksharStrHead} % \begin{syntax} % \cs{aksharStrHead} \marg{token list} \marg{n} % \end{syntax} % Return the first character of the token list. % \end{function} % \begin{codeexample}[] %\aksharStrHead { मंळीममड } % \end{codeexample} % \begin{function}{\aksharStrTail} % \begin{syntax} % \cs{aksharStrTail} \marg{token list} \marg{n} % \end{syntax} % Return the last character of the token list. % \end{function} % \begin{codeexample}[] %\aksharStrTail { ळीममडमं } % \end{codeexample} % \begin{function}{\aksharStrChar} % \begin{syntax} % \cs{aksharStrChar} \marg{token list} \marg{n} % \end{syntax} % Return the $n$-th character of the token list. % \end{function} % \begin{codeexample}[] %3rd character of नमस्कार is \aksharStrChar{ नमस्कार}{3}.\par %\ExplSyntaxOn %It~is~not~\tl_item:nn { नमस्कार } {3}. %\ExplSyntaxOff % \end{codeexample} % \begin{function}{\aksharStrReplace, \aksharStrReplace*} % \begin{syntax} % \cs{aksharStrReplace} \marg{tl 1} \marg{tl 2} \marg{tl 3} % \end{syntax} % Replace all occurences of \meta{tl 2} in \meta{tl 1} with \meta{tl 3}, and % leaves the modified \meta{tl 1} in the input stream. % % The starred variant will replace only the first occurence of \meta{tl 2}, % all others are left intact. % \end{function} % \begin{codeexample}[] %\ExplSyntaxOn %\pkg{expl3} ~ output:\par %\tl_set:Nn \l_tmpa_tl { मममडडमंळीममड } %\tl_replace_all:Nnn \l_tmpa_tl { म } { स्का } %\tl_use:N \l_tmpa_tl\par %\ExplSyntaxOff %\cs{aksharStrReplace} output:\par %\aksharStrReplace { मममडडमंळीममड } { म } { स्का } % \end{codeexample} % \begin{codeexample}[] %\ExplSyntaxOn %\pkg{expl3} ~ output:\par %\tl_set:Nn \l_tmpa_tl { ममंममडडमंळीममड } %\tl_replace_once:Nnn \l_tmpa_tl { मम } { स्का } %\tl_use:N \l_tmpa_tl\par %\ExplSyntaxOff %\cs{aksharStrReplace*} output:\par %\aksharStrReplace* { ममंममडडमंळीममड } { मम } { स्का } % \end{codeexample} % \begin{function}{\aksharStrRemove, \aksharStrRemove*} % \begin{syntax} % \cs{aksharStrRemove} \marg{tl 1} \marg{tl 2} % \end{syntax} % Remove all occurences of \meta{tl 2} in \meta{tl 1}, and leaves the modified % \meta{tl 1} in the input stream. % % The starred variant will remove only the first occurence of \meta{tl 2}, % all others are left intact. % \end{function} % \begin{codeexample}[] %\ExplSyntaxOn %\pkg{expl3} ~ output:\par %\tl_set:Nn \l_tmpa_tl { मममडडमंळीममड } %\tl_remove_all:Nn \l_tmpa_tl { म } %\tl_use:N \l_tmpa_tl\par %\ExplSyntaxOff %\cs{aksharStrRemove} output:\par %\aksharStrRemove { मममडडमंळीममड } { म } % \end{codeexample} % \begin{codeexample}[] %\ExplSyntaxOn %\pkg{expl3} ~ output:\par %\tl_set:Nn \l_tmpa_tl { ममंममडडमंळीममड } %\tl_remove_once:Nn \l_tmpa_tl { मम } %\tl_use:N \l_tmpa_tl\par %\ExplSyntaxOff %\cs{aksharStrRemove*} output:\par %\aksharStrRemove* { ममंममडडमंळीममड } { मम } % \end{codeexample} % \subsection{\pkg{expl3} functions} % This section assumes that you have a basic knowledge in \LaTeX3 programming. % All macros in \ref{latex2e-macros} directly depend on the following function, % so it is much more powerful than all features we have described above. % \begin{function}{ % \akshar_convert:Nn, \akshar_convert:cn, % \akshar_convert:Nx, \akshar_convert:cx} % \begin{syntax} % \cs{akshar_convert:Nn} \meta{seq var} \marg{token list} % \end{syntax} % This function converts \meta{token list} to a sequence of characters, that % sequence is stored in \meta{seq var}. % \end{function} % \begin{codeexample}[] %\ExplSyntaxOn %\akshar_convert:Nn \l_tmpa_seq { नमस्कार } %\seq_use:Nnnn \l_tmpa_seq { ~and~ } { ,~ } { ,~and~ } %\ExplSyntaxOff % \end{codeexample} % \end{documentation} % % \StopEventually{\PrintIndex} % % \begin{implementation} % \section{Implementation} % \begin{macrocode} %<@@=akshar> %<*package> % \end{macrocode} % Declare the package. By loading \pkg{fontspec}, \pkg{xparse}, and in turn, % \pkg{expl3}, are also loaded. % \begin{macrocode} \RequirePackage{fontspec} \ProvidesExplPackage {\aksharPackageName} {\aksharPackageDate} {\aksharPackageVersion} {\aksharPackageDescription} % \end{macrocode} % \subsection{Variable declarations} % \begin{variable}{\c_@@_joining_tl, \c_@@_diacritics_tl} % These variables store the special characters we need to take into account: % \begin{itemize} % \item \cs{c_@@_joining_tl} is the ``connecting'' character ्. % \item \cs{c_@@_diacritics_tl} is the list of all diacritics: ा, ि, ी, ु, % ू, े, ै, ो, ौ, ं, ः, ॢ, ृ, ॅ, ँ, ़, ॆ, ॊ, ॉ, ॎ, ॏ, ॑, % ॒, ॓, ॔, ॕ, ॖ, ॗ, ॄ, ऺ, ऻ, ꣠, ꣡, ꣢, ꣣, ꣤, ꣥, ꣦, ꣧, ꣨, ꣩, % ꣪, ꣫, ꣬, ꣭, ꣮, ꣯, ꣰, ꣱, ꣿ. % \end{itemize} % \begin{macrocode} \tl_const:Nn \c_@@_joining_tl { ्} \tl_const:Nn \c_@@_diacritics_tl { ा, ि, ी, ु, ू, े, ै, ो, ौ, ं, ः, ॢ, ृ, ॅ, ँ, ़, ॆ, ॊ, ॉ, ॎ, ॏ, ॑, ॒, ॓, ॔, ॕ, ॖ, ॗ, ॄ, ऺ, ऻ, ꣠, ꣡, ꣢, ꣣, ꣤, ꣥, ꣦, ꣧, ꣨, ꣩, ꣪, ꣫, ꣬, ꣭, ꣮, ꣯, ꣰, ꣱, ꣿ } % \end{macrocode} % \end{variable} % \begin{variable}{\l_@@_prev_joining_bool} % When we get to a normal character, we need to know whether it is joined, i.e. % whether the previous character is the joining character. This boolean variable % takes care of that. % \begin{macrocode} \bool_new:N \l_@@_prev_joining_bool % \end{macrocode} % \end{variable} % \begin{variable}{\l_@@_char_seq} % This local sequence stores the output of the converter. % \begin{macrocode} \seq_new:N \l_@@_char_seq % \end{macrocode} % \end{variable} % \begin{variable}{\c_@@_str_g_tl, \c_@@_str_seq_tl, \c_@@_str_comma_tl} % Some self-descriptive constant variables. % \begin{macrocode} \tl_const:Nx \c_@@_str_g_tl { \tl_to_str:n {g} } \tl_const:Nx \c_@@_str_seq_tl { \tl_to_str:n {seq} } \tl_const:Nx \c_@@_str_comma_tl { \tl_to_str:n {,} } % \end{macrocode} % \end{variable} % \begin{variable}{\l_@@_tmpa_tl, \l_@@_tmpb_tl, \l_@@_tmpa_seq, \l_@@_tmpb_seq, % \l_@@_tmpc_seq, \l_@@_tmpd_seq, \l_@@_tmpe_seq, % \l_@@_tmpa_int, \l_@@_tmpb_int} % Some temporary variables. % \begin{macrocode} \tl_new:N \l_@@_tmpa_tl \tl_new:N \l_@@_tmpb_tl \seq_new:N \l_@@_tmpa_seq \seq_new:N \l_@@_tmpb_seq \seq_new:N \l_@@_tmpc_seq \seq_new:N \l_@@_tmpd_seq \seq_new:N \l_@@_tmpe_seq \int_new:N \l_@@_tmpa_int \int_new:N \l_@@_tmpb_int % \end{macrocode} % \end{variable} % \subsection{Messages} % In \cs{akshar_convert:Nn} and friends, the argument needs to be a sequence % variable. There will be an error if it isn't. % ^^A I use underscore here because my editor has a good support for it. % \begin{macrocode} \msg_new:nnnn { akshar } { err_not_a_sequence_variable } { #1 ~ is ~ not ~ a ~ valid ~ LaTeX3 ~ sequence ~ variable. } { You ~ have ~ requested ~ me ~ to ~ assign ~ some ~ value ~ to ~ the ~ control ~ sequence ~ #1, ~ but ~ it ~ is ~ not ~ a ~ valid ~ sequence ~ variable. ~ Read ~ the ~ documentation ~ of ~ expl3 ~ for ~ more ~ information. ~ Proceed ~ and ~ I ~ will ~ pretend ~ that ~ #1 ~ is ~ a ~ local ~ sequence ~ variable ~ (beware ~ that ~ unexpected ~ behaviours ~ may ~ occur). } % \end{macrocode} % In \cs{aksharStrChar}, we need to guard against accessing an `out-of-bound' % character (like trying to get the 8th character in a 5-character string.) % \begin{macrocode} \msg_new:nnnn { akshar } { err_character_out_of_bound } { Character ~ index ~ out ~ of ~ bound. } { You ~ are ~ trying ~ to ~ get ~ the ~ #2 ~ character ~ of ~ the ~ string ~ #1. ~ However ~ that ~ character ~ doesn't ~ exist. ~ Make ~ sure ~ that ~ you ~ use ~ a ~ number ~ between ~ and ~ not ~ including ~ 0 ~ and ~ #3, ~ so ~ that ~ I ~ can ~ return ~ a ~ good ~ output. ~ Proceed ~ and ~ I ~ will ~ return ~ \token_to_str:N \scan_stop:. } % \end{macrocode} % In \cs{aksharStrHead} and \cs{aksharStrTail}, the string must not be blank. % \begin{macrocode} \msg_new:nnnn { akshar } { err_string_empty } { The ~ input ~ string ~ is ~ empty. } { To ~ get ~ the ~ #1 ~ character ~ of ~ a ~ string, ~ that ~ string ~ must ~ not ~ be ~ empty, ~ but ~ the ~ input ~ string ~ is ~ empty. Make ~ sure ~ the ~ string ~ contains ~ something, ~ or ~ proceed ~ and ~ I ~ will ~ use ~ \token_to_str:N \scan_stop:. } % \end{macrocode} % \subsection{Utilities} % \begin{macro}[TF,internal]{\tl_if_in:No} % When we get to a character which is not the joining one, we need to know if % it is a diacritic. The current character is stored in a variable, so an % expanded variant is needed. We only need it to expand only \textbf{o}nce. % \begin{macrocode} \prg_generate_conditional_variant:Nnn \tl_if_in:Nn { No } { TF } % \end{macrocode} % \end{macro} % \begin{macro}[internal]{\seq_set_split:Nxx} % A variant we will need in \cs{@@_var_if_global}. % \begin{macrocode} \cs_generate_variant:Nn \seq_set_split:Nnn { Nxx } % \end{macrocode} % \end{macro} % \begin{macro}[internal]{\msg_error:nnx, \msg_error:nnnxx} % Some variants of \pkg{l3msg} functions that we will need when issuing error % messages. % \begin{macrocode} \cs_generate_variant:Nn \msg_error:nnn { nnx } \cs_generate_variant:Nn \msg_error:nnnnn { nnnxx } % \end{macrocode} % \end{macro} % \begin{macro}[TF]{\@@_tl_if_in_ncomma:NN} % This conditional is essentially \cs{tl_if_in:Nn}, but if |#2| is a comma this % conditional always return false. % \begin{macrocode} \prg_new_conditional:Npnn \@@_tl_if_in_ncomma:NN #1 #2 { T, F, TF } { \tl_if_eq:NNTF \c_@@_str_comma_tl #2 { \prg_return_false: } { \tl_if_in:NoTF #1 {#2} { \prg_return_true: } { \prg_return_false: } } } % \end{macrocode} % \end{macro} % \begin{macro}[TF]{\@@_var_if_global:N} % This conditional checks if |#1| is a global sequence variable or not. In other % words, it returns true iff |#1| is a control sequence in the format % |\g_|\meta{name}|_seq|. If it is not a sequence variable, this function will % (TODO) issue an error message. % \begin{macrocode} \prg_new_conditional:Npnn \@@_var_if_global:N #1 { T, F, TF } { \bool_if:nTF { \exp_last_unbraced:Nf \use_iii:nnn { \cs_split_function:N #1 } } { \msg_error:nnx { akshar } { err_not_a_sequence_variable } { \token_to_str:N #1 } \prg_return_false: } { \seq_set_split:Nxx \l_@@_tmpb_seq { \token_to_str:N _ } { \exp_last_unbraced:Nf \use_i:nnn { \cs_split_function:N #1 } } \seq_get_left:NN \l_@@_tmpb_seq \l_@@_tmpa_tl \seq_get_right:NN \l_@@_tmpb_seq \l_@@_tmpb_tl \tl_if_eq:NNTF \c_@@_str_seq_tl \l_@@_tmpb_tl { \tl_if_eq:NNTF \c_@@_str_g_tl \l_@@_tmpa_tl { \prg_return_true: } { \prg_return_false: } } { \msg_error:nnx { akshar } { err_not_a_sequence_variable } { \token_to_str:N #1 } \prg_return_false: } } } % \end{macrocode} % \end{macro} % \begin{macro}{\@@_int_append_ordinal:n} % Append st, nd, rd or th to interger |#1|. Will be needed in error messages. % \begin{macrocode} \cs_new:Npn \@@_int_append_ordinal:n #1 { #1 \int_case:nnF { #1 } { { 11 } { th } { 12 } { th } { 13 } { th } { -11 } { th } { -12 } { th } { -13 } { th } } { \int_compare:nNnTF { #1 } > { -1 } { \int_case:nnF { #1 - 10 * (#1 / 10) } { { 1 } { st } { 2 } { nd } { 3 } { rd } } { th } } { \int_case:nnF { (- #1) - 10 * ((- #1) / 10) } { { 1 } { st } { 2 } { nd } { 3 } { rd } } { th } } } } % \end{macrocode} % \end{macro} % \subsection{The \cs{akshar_convert:Nn} function and its variants} % \begin{macro}{\akshar_convert:Nn, \akshar_convert:cn, % \akshar_convert:Nx, \akshar_convert:cx} % This converts |#2| to a sequence of \emph{true} Devanagari characters. The % sequence is set to |#1|, which should be a sequence variable. % \begin{macrocode} \cs_new:Npn \akshar_convert:Nn #1 #2 { % \end{macrocode} % Clear anything stored in advance. We don't want different calls of the % function to conflict with each other. % \begin{macrocode} \seq_clear:N \l_@@_char_seq \bool_set_false:N \l_@@_prev_joining_bool % \end{macrocode} % Loop through every token of the input. % \begin{macrocode} \tl_map_variable:NNn {#2} \l_@@_map_tl { \@@_tl_if_in_ncomma:NNTF \c_@@_diacritics_tl \l_@@_map_tl { % \end{macrocode} % It is a diacritic. We append the current diacritic to the last item of the % sequence instead of pushing the diacritic to a new sequence item. % \begin{macrocode} \seq_pop_right:NN \l_@@_char_seq \l_@@_tmpa_tl \seq_put_right:Nx \l_@@_char_seq { \l_@@_tmpa_tl \l_@@_map_tl } } { \tl_if_eq:NNTF \l_@@_map_tl \c_@@_joining_tl { % \end{macrocode} % In this case, the character is the joining character, ्. What we do is % similar to the above case, but \cs{l_@@_prev_joining_bool} is set to true so % that the next character is also appended to this item. % \begin{macrocode} \seq_pop_right:NN \l_@@_char_seq \l_@@_tmpa_tl \seq_put_right:Nx \l_@@_char_seq { \l_@@_tmpa_tl \l_@@_map_tl } \bool_set_true:N \l_@@_prev_joining_bool } { % \end{macrocode} % Now the character is normal. We see if we can push to a new item or not. It % depends on the boolean variable. % \begin{macrocode} \bool_if:NTF \l_@@_prev_joining_bool { \seq_pop_right:NN \l_@@_char_seq \l_@@_tmpa_tl \seq_put_right:Nx \l_@@_char_seq { \l_@@_tmpa_tl \l_@@_map_tl } \bool_set_false:N \l_@@_prev_joining_bool } { \seq_put_right:Nx \l_@@_char_seq { \l_@@_map_tl } } } } } % \end{macrocode} % Set |#1| to \cs{l_@@_char_seq}. The package automatically determines whether % the variable is a global one or a local one. % \begin{macrocode} \@@_var_if_global:NTF #1 { \seq_gset_eq:NN #1 \l_@@_char_seq } { \seq_set_eq:NN #1 \l_@@_char_seq } } % \end{macrocode} % Generate variants that might be helpful for some. % \begin{macrocode} \cs_generate_variant:Nn \akshar_convert:Nn { cn, Nx, cx } % \end{macrocode} % \end{macro} % \subsection{Other internal functions} % \begin{macro}{\@@_seq_push_seq:NN} % Append sequence |#1| to the end of sequence |#2|. A simple loop will do. % \begin{macrocode} \cs_new:Npn \@@_seq_push_seq:NN #1 #2 { \seq_map_inline:Nn #2 { \seq_put_right:Nn #1 { ##1 } } } % \end{macrocode} % \end{macro} % \begin{macro}{\@@_replace:NnnnN} % If |#5| is \cs{c_false_bool}, this function replaces \emph{all} occurences of % |#3| in |#2| by |#4| and stores the output sequence to |#1|. If |#5| is % \cs{c_true_bool}, the replacement only happens \emph{once}. % % The algorithm used in this function: We will use \cs{l_@@_tmpa_int} to store % the ``current position'' in the sequence of |#3|. At first it is set to |1|. % % We will store any subsequence of |#2| that may match |#3| to a temporary % sequence. If it doesn't match, we push this temporary sequence to the output, % but if it matches, |#4| is pushed instead. % % We loop over |#2|. For each of these loops, we need to make sure the % \cs{l_@@_tmpa_int}-th item must indeed appear in |#3|. So we need to compare % that with the length of |#3|. % \begin{itemize} % \item If now \cs{l_@@_tmpa_int} is greater than the length of |#3|, the % whole of |#3| has been matched somewhere, so we reinitialize the integer to % |1| and push |#4| to the output. % % Note that it is possible that the current character might be the start of % another match, so we have to compare it to the first character of |#3|. % If they are not the same, we may now push the current mapping character to % the output and proceed; otherwise the current character is pushed to the % temporary variable. % \item Otherwise, we compare the current loop character of |#2| with the % \cs{l_@@_tmpa_int}-th character of |#3|. % \begin{itemize} % \item If they are the same, we still have a chance that it will match, so % we increase the ``iterator'' \cs{l_@@_tmpa_int} by |1| and push the % current mapping character to the temporary sequence. % \item If they are the same, the temporary sequence won't match. Let's push % that sequence to the output and set the iterator back to |1|. % % Note that now the iterator has changed. Who knows whether the current % character may start a match? Let's compare it to the first character of % |#3|, and do as in the case of \cs{l_@@_tmpa_int} is greater than the % length of |#3|. % \end{itemize} % \end{itemize} % The complexity of this algorithm is $O(m\max(n,p))$, where $m,n,p$ are the % lengths of the sequences created from |#2|, |#3| and |#4|. As |#3| and |#4| % are generally short strings, this is (almost) linear to the length of the % original sequence |#2|. % \begin{macrocode} \cs_new:Npn \@@_replace:NnnnN #1 #2 #3 #4 #5 { \akshar_convert:Nn \l_@@_tmpc_seq {#2} \akshar_convert:Nn \l_@@_tmpd_seq {#3} \akshar_convert:Nn \l_@@_tmpe_seq {#4} \seq_clear:N \l_@@_tmpa_seq \seq_clear:N \l_@@_tmpb_seq \int_set:Nn \l_@@_tmpa_int { 1 } \int_set:Nn \l_@@_tmpb_int { 0 } \seq_map_variable:NNn \l_@@_tmpc_seq \l_@@_map_tl { \int_compare:nNnTF { \l_@@_tmpb_int } > { 0 } { \seq_put_right:NV \l_@@_tmpb_seq \l_@@_map_tl } { \int_compare:nNnTF {\l_@@_tmpa_int} = {1 + \seq_count:N \l_@@_tmpd_seq} { \bool_if:NT {#5} { \int_incr:N \l_@@_tmpb_int } \seq_clear:N \l_@@_tmpb_seq \@@_seq_push_seq:NN \l_@@_tmpa_seq \l_@@_tmpe_seq \int_set:Nn \l_@@_tmpa_int { 1 } \tl_set:Nx \l_@@_tmpa_tl { \seq_item:Nn \l_@@_tmpd_seq { 1 } } \tl_if_eq:NNTF \l_@@_map_tl \l_@@_tmpa_tl { \int_incr:N \l_@@_tmpa_int \seq_put_right:NV \l_@@_tmpb_seq \l_@@_map_tl } { \seq_put_right:NV \l_@@_tmpa_seq \l_@@_map_tl } } { \tl_set:Nx \l_@@_tmpa_tl { \seq_item:Nn \l_@@_tmpd_seq { \l_@@_tmpa_int } } \tl_if_eq:NNTF \l_@@_map_tl \l_@@_tmpa_tl { \int_incr:N \l_@@_tmpa_int \seq_put_right:NV \l_@@_tmpb_seq \l_@@_map_tl } { \int_set:Nn \l_@@_tmpa_int { 1 } \@@_seq_push_seq:NN \l_@@_tmpa_seq \l_@@_tmpb_seq \seq_clear:N \l_@@_tmpb_seq \tl_set:Nx \l_@@_tmpa_tl { \seq_item:Nn \l_@@_tmpd_seq { 1 } } \tl_if_eq:NNTF \l_@@_map_tl \l_@@_tmpa_tl { \int_incr:N \l_@@_tmpa_int \seq_put_right:NV \l_@@_tmpb_seq \l_@@_map_tl } { \seq_put_right:NV \l_@@_tmpa_seq \l_@@_map_tl } } } } } \@@_seq_push_seq:NN \l_@@_tmpa_seq \l_@@_tmpb_seq \@@_var_if_global:NTF #1 { \seq_gset_eq:NN #1 \l_@@_tmpa_seq } { \seq_set_eq:NN #1 \l_@@_tmpa_seq } } % \end{macrocode} % \end{macro} % \subsection{Front-end \LaTeXe\ macros} % \begin{macro}{\aksharStrLen} % Expands to the length of the string. % \begin{macrocode} \NewDocumentCommand \aksharStrLen {m} { \akshar_convert:Nn \l_@@_tmpa_seq {#1} \seq_count:N \l_@@_tmpa_seq } % \end{macrocode} % \end{macro} % \begin{macro}{\aksharStrChar} % Returns the $n$-th character of the string. % \begin{macrocode} \NewDocumentCommand \aksharStrChar {mm} { \akshar_convert:Nn \l_@@_tmpa_seq {#1} \bool_if:nTF { \int_compare_p:nNn {#2} > {0} && \int_compare_p:nNn {#2} < {1 + \seq_count:N \l_@@_tmpa_seq} } { \seq_item:Nn \l_@@_tmpa_seq { #2 } } { \msg_error:nnnxx { akshar } { err_character_out_of_bound } { #1 } { \@@_int_append_ordinal:n { #2 } } { \int_eval:n { 1 + \seq_count:N \l_@@_tmpa_seq } } \scan_stop: } } % \end{macrocode} % \end{macro} % \begin{macro}{\aksharStrHead} % Return the first character of the string. % \begin{macrocode} \NewDocumentCommand \aksharStrHead {m} { \akshar_convert:Nn \l_@@_tmpa_seq {#1} \int_compare:nNnTF { \seq_count:N \l_@@_tmpa_seq } = {0} { \msg_error:nnn { akshar } { err_character_out_of_bound } { first } \scan_stop: } { \seq_item:Nn \l_@@_tmpa_seq { 1 } } } % \end{macrocode} % \end{macro} % \begin{macro}{\aksharStrTail} % Return the last character of the string. % \begin{macrocode} \NewDocumentCommand \aksharStrTail {m} { \akshar_convert:Nn \l_@@_tmpa_seq {#1} \int_compare:nNnTF { \seq_count:N \l_@@_tmpa_seq } = {0} { \msg_error:nnn { akshar } { err_character_out_of_bound } { last } \scan_stop: } { \seq_item:Nn \l_@@_tmpa_seq {\seq_count:N \l_@@_tmpa_seq} } } % \end{macrocode} % \end{macro} % \begin{macro}{\aksharStrReplace, \aksharStrReplace*} % Replace occurences of |#3| of a string |#2| with another string |#4|. % \begin{macrocode} \NewDocumentCommand \aksharStrReplace {smmm} { \IfBooleanTF {#1} { \@@_replace:NnnnN \l_@@_tmpa_seq {#2} {#3} {#4} \c_true_bool } { \@@_replace:NnnnN \l_@@_tmpa_seq {#2} {#3} {#4} \c_false_bool } \seq_use:Nn \l_@@_tmpa_seq {} } % \end{macrocode} % \end{macro} % \begin{macro}{\aksharStrRemove, \aksharStrRemove*} % Remove occurences of |#3| in |#2|. This is just a special case of % \cs{aksharStrReplace}. % \begin{macrocode} \NewDocumentCommand \aksharStrRemove {smm} { \IfBooleanTF {#1} { \@@_replace:NnnnN \l_@@_tmpa_seq {#2} {#3} {} \c_true_bool } { \@@_replace:NnnnN \l_@@_tmpa_seq {#2} {#3} {} \c_false_bool } \seq_use:Nn \l_@@_tmpa_seq {} } % \end{macrocode} % \end{macro} % \begin{macrocode} % % \end{macrocode} % \end{implementation} % % \Finale