@q Copyright 2012-2024, Alexander Shibakov@> @q This file is part of SPLinT@> @q SPLinT is free software: you can redistribute it and/or modify@> @q it under the terms of the GNU General Public License as published by@> @q the Free Software Foundation, either version 3 of the License, or@> @q (at your option) any later version.@> @q SPLinT is distributed in the hope that it will be useful,@> @q but WITHOUT ANY WARRANTY; without even the implied warranty of@> @q MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the@> @q GNU General Public License for more details.@> @q You should have received a copy of the GNU General Public License@> @q along with SPLinT. If not, see .@> @*1\eatone{Bison}\bison\ specific routines. The placeholder code left blank in the common routines is filed in with the code relevant to the output of parser tables in the following sections. @*2Tables. \namedspot{bsfile}Here are all the parser table names. Some tables are not output but adding one to the list in the future will be easy: it does not even have to be done here. @= _register_table_d(yytranslate)@; _register_table_d(yyr1)@; _register_table_d(yyr2)@; _register_table_d(yydefact)@; _register_table_d(yydefgoto)@; _register_table_d(yypact)@; _register_table_d(yypgoto)@; _register_table_d(yytable)@; _register_table_d(yycheck)@; _register_table_d(yystos)@; _register_table_d(yytname)@; _register_table_d(yyprhs)@; _register_table_d(yyrhs)@; @ One special table requires a little bit more preparation. This is a table that lists the depth of the stack before an implicit terminal. It is not one of the tables that is used by \bison\ itself but is needed if the symbolic name processing is to be implemented (\bison\ has access to this information `on the fly'). The `new' \bison\ (starting with version~\.{3.0}) does not generate |yyprhs| and |yyrhs| or any other arrays that contain similar information, so we fake them here if such a crippled version of \bison\ is used. The |yyrimplicit| array will be used by the table output code, together with the postprocessor to output right hand side lengths for the term references that require them in the case when the `native' \bison\ references are used. @= unsigned int yyrthree[YYNRULES + 1] = { 0 }; int yyrimplicit[YYNRULES + 1] = { 0 }; #ifdef BISON_IS_CRIPPLED unsigned int yyrhs[YYNRULES + 1] = { -1 }; unsigned int yyprhs[YYNRULES + 1] = { 0 }; #endif @ We populate this table below $\ldots$ @= #ifndef BISON_IS_CRIPPLED assert( YYNRULES + 1 == sizeof(yyprhs)/sizeof(yyprhs[0]) ); { int i, j; for ( i = 1; i <= YYNRULES; i++ ) { for ( j = 0; yyrhs[ yyprhs[i] + j ] != -1; j++ ) { assert( yyprhs[i] + j < sizeof(yyrhs) ); assert( j < yyr1[i] ); if ( @ ) { @@; } } } } #endif @ @= ( strlen( yytname[ yyrhs[yyprhs[i]+j] ] ) > 1 ) && ( yytname[ yyrhs[yyprhs[i]+j] ][0] == '$' ) && ( yytname[ yyrhs[yyprhs[i]+j] ][1] == '@@' ) @ @= int rule_number; for ( rule_number = 1; rule_number < YYNRULES; rule_number++ ) { if ( yyr1[rule_number] == yyrhs[yyprhs[i]+j] ) { yyrthree[rule_number] = j; break; } } assert( rule_number < YYNRULES ); @ $\ldots$ and add its name to the list. @= _register_table_d(yyrthree)@; @ We list some macros that are used to assist the post processor and take advantage of the |yyrimlicit| array. As at this time the size of the array is unknown (the preamble is included before the parser file by \.{mkeparser.w} so the number of rules is unknown at this point), we declare the array as a pointer. @d BZ( term, anchor ) ( ((YYSTYPE *)&(term)) - ((YYSTYPE *)&(anchor)) + 1 ) @d BZZ( term, anchor ) ( (yyrimplicit_p[yyn] = ((yyrimplicit_p[yyn] < 0) ? yyrimplicit_p[yyn] : ((YYSTYPE *)&(term)) - ((YYSTYPE *)&(anchor)) + 1)), ((YYSTYPE *)&(term)) - ((YYSTYPE *)&(anchor)) + 1 ) @<\Cee\ setup code specific to \bison@>= int *yyrimplicit_p; @ @= yyrimplicit_p = yyrimplicit; @*2Actions. There are several ways of making |yyparse()| execute all portions of the action code. The one chosen here makes sure that none of the tables gets written past its last element. To see how it works, it might be helpful to `walk through' \bison's output to see how each change affects the generated parser. @= if ( output_desc.output_actions ) { int i, j; fprintf( tables_out, "%s", action_desc.preamble ); if ( !bare_actions ) { yypact[0] = YYPACT_NINF; yypgoto[0] = -1; yydefgoto[0] = YYFINAL; } for ( i = 1; i < sizeof(yyr1)/sizeof(yyr1[0]); i++ ) { fprintf( tables_out, action_desc.act_setup, i, yyr2[i] - 1 ); if ( action_desc.print_rule ) { action_desc.print_rule( i ); } if ( yyr2[i] > 0 ) { if ( action_desc.action1 ) { fprintf( tables_out, "%s", action_desc.action1 ); } } for ( j = 2; j <= yyr2[i]; j++ ) { if ( action_desc.actionn ) { fprintf( tables_out, action_desc.actionn, j ); } } if ( !bare_actions ) { yyr1[i] = YYNTOKENS; yydefact[0] = i; yyrimplicit[i] = -yyr2[i]; yyr2[i] = 0; yyparse(YYPARSE_PARAMETERS); } fprintf( tables_out, action_desc.act_suffix, i, yyr2[i] - 1 ); } fprintf( tables_out, "%s", action_desc.postamble ); if ( action_desc.cleanup ) { action_desc.cleanup( &action_desc ); } } for ( int i = 1; i < YYNRULES + 1; i++ ) { if ( yyrimplicit[i] > 0 ) { fprintf( tables_out, "\\yyimplicitlengthset{%d}{%d}%%\n", i, yyrimplicit[i] ); } } @*2Constants. A generic list of constants to be used later in different contexts is defined below. As before, the appropriate macro will be defined generically to do what is required with these names (for example, we can turn each name into a string for reporting purposes). @= _register_const_d(YYEMPTY)@; _register_const_d(YYPACT_NINF)@; _register_const_d(YYLAST)@; _register_const_d(YYNTOKENS)@; _register_const_d(YYNRULES)@; _register_const_d(YYNSTATES)@; _register_const_d(YYFINAL)@; #ifndef YYEOF _register_const_d(YYSYMBOL_YYEOF)@; #endif @ Constants defined to maintain compatibility with the older versions of \bison. @= _register_const_d(YYSYMBOL_YYEOF, YYEOF)@; @*2Tokens. Similar techniques are employed in token output. Tokens are parser specific (the scanner only needs their numeric values) so we need {\it some\/} flexibility to output them in a desired format. For special purposes (say changing the way tokens are typeset) we can control the format tokens are output in. @= char *token_format_char = NULL; char *token_format_affix = NULL; char *token_format_suffix = NULL; char *bootstrap_token_format = NULL; @ @= register_option_("token-format-char", required_argument, 0, TOKEN_FORMAT_CHAR, "")@; register_option_("token-format-affix", required_argument, 0, TOKEN_FORMAT_AFFIX, "")@; register_option_("token-format-suffix", required_argument, 0, TOKEN_FORMAT_SUFFIX, "")@; register_option_("bootstrap-token-format", required_argument, 0, BOOTSTRAP_TOKEN_FORMAT, "")@; @ @= case TOKEN_FORMAT_CHAR:@; token_format_char = (char *)malloc( (strlen(optarg) + 1)*sizeof(char) ); strcpy(token_format_char, optarg); break; case TOKEN_FORMAT_AFFIX:@; token_format_affix = (char *)malloc( (strlen(optarg) + 1)*sizeof(char) ); strcpy(token_format_affix, optarg); break; case TOKEN_FORMAT_SUFFIX:@; token_format_suffix = (char *)malloc( (strlen(optarg) + 1)*sizeof(char) ); strcpy(token_format_suffix, optarg); break; case BOOTSTRAP_TOKEN_FORMAT:@; bootstrap_token_format = (char *)malloc( (strlen(optarg) + 1)*sizeof(char) ); strcpy(bootstrap_token_format, optarg); break; @ @= bool output_tokens:1; @ No tokens are output by default. @= @[@].output_tokens = 0,@[@] @ The only part of the code below that needs any explanation is the `bootstrap' token output. In \bison\ every token has three attributes: its `macro name' (say, \.{STRING}) that is used by the parse code internally, its `print name' (\.{"string"} to continue the example) that \bison\ uses to print the token names in its diagnostic messages, and its numeric value (that can be assigned implicitly by \bison\ itself or explicitly by the user). Only the `print names' are kept in the |yytname| array so to reuse the scanner used by \bison\ we either have to extract the token `macro names' from the \Cee\ code ourselves to pass them on to the lexer, or use a special `stripped down' version of a \bison\ grammar parser to extract the names from the parser's \bison\ grammar. To do this, some token names would still need to be known to the scanner. These tokens are selected by hand to make the `bootstrapping' parser operational. The token list for the \bison\ grammar parser can be examined as part of the appropriate \locallink{bootstraptokens}driver file\endlink. @= if ( output_desc.output_tokens ) { int i; int length; char token; char *token_name; bool too_creative = false; for ( i = 258; i < sizeof(yytranslate)/sizeof(yytranslate[0]) ; i++ ) { token_name = yytname[yytranslate[i]]; if ( token_name ) { fprintf( tables_out, token_format_affix, yytranslate[i], i ); length = 0; while ( (token = *token_name) ) { if ( token_format_char ) { length += fprintf( tables_out, token_format_char, (unsigned int)token ); } if ( token < 040 || token == 0177 ) { too_creative = true; } token_name++; } fprintf( tables_out, token_format_suffix, too_creative ? ".unprintable." : yytname[yytranslate[i]] ); } } } #ifdef BISON_BOOTSTRAP_MODE fprintf( tables_out, "\\bootstrapmodetrue\n" ); fprintf( tables_out, "%% token values needed to bootstrap the parser\n" ); bootstrap_tokens( bootstrap_token_format ); #endif @ The size of the token name table is useful to determine, say, how many `named' tokens the parser uses. @= fprintf( tables_out, "\\constset{YYTRANSLATESIZE}{%d}%%\n", (int)(sizeof(yytranslate)/sizeof(yytranslate[0])) ); @*2Output modes. The code below can be easily extended and modified to output parser tables, actions, and constants in a language of one's choice. We are only interested in \TeX, however, thus other modes are very rudimentary or non-existent at this point. @*3 Token only mode. Token only output mode does exactly what is expected: outputs token names and values in the format of your choosing. @= TOKEN_ONLY_OUT,@[@] @ @= case TOKEN_ONLY_OUT:@; @@; break; @ @= register_option_("token-only-mode", no_argument, 0, TOKEN_ONLY_MODE, "")@; @ @= case TOKEN_ONLY_MODE:@; mode = TOKEN_ONLY_OUT; break; @ @= if ( !token_format_char ) { token_format_char = "{%u}"; } if ( !token_format_affix ) { token_format_affix = "%% token: %d, token value: %d\n\\prettytoken@@{"; } if ( !token_format_suffix ) { token_format_suffix = "}%% %s\n"; } output_desc.output_tokens = 1; @*3Generic output. Generic output is not programmed yet. @= GENERIC_OUT,@[@] @ @= case GENERIC_OUT:@; printf( "This mode is not supported yet\n" ); exit(0); break; @*3\TeX\ output. The \TeX\ mode is the main reason for this software. @= TEX_OUT,@[@] @ @= case TEX_OUT:@; @@; @@; @@; @@; break; @ Some tables require name adjustments due to \TeX's reluctance to treat digits as part of a name. @= #define _register_table_d(name) tex_table(name); @@; #undef _register_table_d yyr1_desc.name = "yyrone"; yyr2_desc.name = "yyrtwo"; @ The memory allocated for the |yytname| table is released at the end. @= void yytname_cleanup( struct table_d *table ); int yytname_formatter_tex( FILE *stream, int index ); int yytname_formatter( FILE *stream, int index ); @ There are a number of helper functions to output complicated names in \TeX. The safest way seems to be to output those as sequences of {\sc ASCII} codes to accommodate names like \.{\$}\.{end} safely. \TeX's \.{\^\^}$\ldots$ convention is supported as well. @= void yytname_cleanup( struct table_d *table ) { free( table->separator ); free( table->null ); } int yytname_formatter_tex( FILE *stream, int index ) { char *token_name = yytname[index]; unsigned char token; int length = 0; fprintf( stream, "\\addname " ); while ( (token = *token_name) ) { if ( token < 040 || token == 0177 ) { /* unprintable characters */ fprintf( stream, "^^%c", token < 0100 ? (unsigned char)(token + 0100) : (unsigned char)(token - 100) ); length += 3; } else { fprintf( stream, "%c", token ); length++; } token_name++; } fprintf( stream, "\n" ); return length; } int yytname_formatter( FILE *stream, int index ) { char *token_name; unsigned char token; int length = 0; bool too_creative = false; /* to indicate if the name is too dangerous to print */ fprintf( stream, "\\addname" ); if ( index >= 0 ) { /* this is not the last name */ token_name = yytname[index]; if ( token_name == NULL ) { token_name = "$impossible"; } while ( (token = *token_name) ) { length += fprintf( stream, "{%u}", (unsigned int)token ); if ( token < 040 || token == 0177 ) { too_creative = true; } token_name++; } fprintf( stream, "%% %s\n", too_creative ? ".unprintable." : yytname[index] ); } else { /* this is the last name */ token_name = yytname[-index]; if ( token_name == NULL ) { token_name = "$impossible"; } while ( (token = *token_name) ) { length += fprintf( stream, "{%u}", (unsigned int)token ); token_name++; if ( token < 040 || token == 0177 ) { too_creative = true; } } fprintf( stream, "%% %s\n\\end\n%%\n", too_creative ? ".unprintable." : ( yytname[-index] ? yytname[-index] : "end of array" ) ); } return length; } @ @= yytname_desc.preamble = "%%\n\\newtable{yytname}{}\\tempca0\\relax%% a robust way to add the yytname array\n"; yytname_desc.separator = NULL; yytname_desc.postamble = NULL; yytname_desc.null = NULL; yytname_desc.null_postamble = NULL; yytname_desc.optimized_numeric = NULL; yytname_desc.prettify = false; yytname_desc.formatter = yytname_formatter; yytname_desc.cleanup = NULL; output_desc.output_yytname = 1; @ @= if ( optimize_actions ) { action_desc.preamble = "%\n% the big switch\n%\n"@/ "\\catcode`\\/=0\\relax % see the documentation for an explanation of this trick\n"@/ "\\def\\yybigswitch#1{%%\n"@/ " \\csname dobisonaction\\number #1\\parsernamespace\\endcsname\n"@; "}\\stashswitch{yybigswitch}%%\n"; action_desc.act_setup = "\n\\expandafter\\def\\csname dobisonaction%d\\parsernamespace\\endcsname{%%\n%%"; action_desc.act_suffix = "}%% end of rule %d\n"; action_desc.action1 = NULL; action_desc.actionn = NULL; action_desc.postamble = "\n\\catcode`\\/=12\\relax\n\n"; action_desc.print_rule = print_rule; action_desc.cleanup = NULL; output_desc.output_actions = 1; } else { action_desc.preamble = "%\n% the big switch\n%\n"@/ "\\catcode`\\/=0\\relax % see the documentation for an explanation of this trick\n"@/ "\\def\\yybigswitch#1{%%\n"@; " \\ifcase#1\\relax\n"; action_desc.act_setup = " \\or %% (rule %d) "; action_desc.act_suffix = ""; action_desc.action1 = NULL; action_desc.actionn = NULL; action_desc.postamble = " \\else\n \\fi\n}\\stashswitch{yybigswitch}%%\n\\catcode`\\/=12\\relax\n\n"; action_desc.print_rule = print_rule; action_desc.cleanup = NULL; output_desc.output_actions = 1; } @ Grammar rules are listed in a readable form alongside the action code to make it possible to quickly find an appropriate action. The rules are not output if a crippled \bison\ is used. @= void print_rule( int n ) { fprintf( tables_out, "%s%s: ", (n < 10 && !optimize_actions ? " " : ""), yytname[yyr1[n]] ); #ifndef BISON_IS_CRIPPLED int i; i = yyprhs[n]; if ( yyrhs[i] < 0 ) { fprintf( tables_out, "" ); } else { while( yyrhs[i] > 0 ) { fprintf( tables_out, "%s ", yytname[yyrhs[i]] ); i++; } } #endif fprintf( tables_out, "\n" ); } @ \TeX\ constant output is another place where the techniques described above are applied. As before, the macro handles the repetitive work of initialization, declaration, etc in each place where the corresponding constant is mentioned. The exceptions are \.{YYPACT\_NINF} and \.{YYSYMBOL\_YYEOF} that have to be handled separately because the underscore in its name makes it difficult to use it as a command sequence name. \def\YYPACTxNINFxdesc{\.{YYPACT\_NINF\_}\\{desc}} \def\YYSYMBOLxYYEOFxdesc{\.{YYSYMBOL\_YYEOF\_}\\{desc}} @s YYPACT_NINF_desc TeX @s YYSYMBOL_YYEOF_desc TeX @= #define _register_const_d(c_name) @[c_name##_desc.format = "\\constset{%s}{%d}%%\n"; \ c_name##_desc.name = #c_name; \ c_name##_desc.value = c_name; \ output_desc.output_##c_name = 1;@] @@; #undef _register_const_d #ifdef YYEOF /* other values have already been set correctly */ #define _register_const_d(c_name,vvalue) @[c_name##_desc.format = "\\constset{%s}{%d}%%\n"; \ c_name##_desc.name = #c_name; \ c_name##_desc.value = vvalue; \ output_desc.output_##c_name = 1;@] @@; #undef _register_const_d #endif YYPACT_NINF_desc.name = "YYPACTNINF"; YYSYMBOL_YYEOF_desc.name = "YYSYMBOLxYYEOF"; @ Token definitions round off the \TeX\ output mode. @= token_format_char = NULL; /* do not output individual characters */ if ( !token_format_affix ) { token_format_affix = "\\tokenset{%d}{%d}"; } if ( !token_format_suffix ) { token_format_suffix = "%% %s\n"; } if ( !bootstrap_token_format ) { bootstrap_token_format = "\\expandafter\\def\\csname token\\parsernamespace %s\\endcsname{%d}%% %s\n"; } /* |output_desc.output_tokens = 1;| is no longer necessary as it is done entirely in \TeX */ @*2 Command line options. We start with the most obvious option, the one begging for help. @ @= register_option_("help", no_argument, 0, LONG_HELP, "")@; @ @= @[@[@], 'h'@] @ @= case 'h': /* short help */@; fprintf(stderr, "Usage: %s [options] output_file\n", argv[0]); exit(0); break; /* should not be needed */ case LONG_HELP:@; fprintf(stderr, "%s [--mode=TeX:options] output_file outputs tables\n" " and constants for a TeX parser\n", argv[0]); exit(0); break; /* should not be needed */ @ @= register_option_("debug", optional_argument, 0, 'b', "")@; register_option_("mode", required_argument, 0, 'm', "")@; register_option_("table-separator", required_argument, 0, 'z', "")@; register_option_("format", required_argument, 0, 'f', "")@; /* name? */ register_option_("table", required_argument, 0, 't', "")@; /* specific table */ register_option_("constant", required_argument, 0, 'c', "")@; /* specific constant */ register_option_("name-length", required_argument, 0, 'l', "")@; /* change |MAX_NAME_LENGTH| */ register_option_("token", required_argument, 0, 'n', "")@; /* specific token */ register_option_("run-parse", required_argument, 0, 'p', "")@; /* run the parser */ register_option_("parse-file", required_argument, 0, 'i', "")@; /* input for the parser */ @ The string below is a list of short options. @ A few options can be discussed immediately. @= char *table_separator = "%s "; @ @= case 'm': /* output mode */@; switch( optarg[0] ) { case 'T': case 't':@; mode = TEX_OUT; break; case 'b': case 'B': case 'g': case 'G':@; mode = GENERIC_OUT; break; default:@; break; } break; case 'z': table_separator = (char *)malloc( (strlen(optarg) + 1)*sizeof(char) ); strcpy(table_separator, optarg); break;