/* Tango/Weevil - A WEB Tangler and Weaver Copyright (C) 1995 Corey Minyard This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. Corey Minyard - minyard@metronet.com */ #include #include #include #include "tango.h" #define GLOBAL 1 #define PROC_DEFN 2 #define INPROC 3 #define PROC_CAN_DEF 4 #define DEFINE 3 #define VARDEF 4 #define FOUND_OPER 5 #define OPER 6 #define NAME 7 #define NONE 8 #define PERIOD 9 #define RESERVED_WORD 10 #define SEMI 11 #define NUMSIGN 12 #define STRUCT 13 typedef struct { int code_state; int last_token; int nest_level; int sqbrace_nest; int paren_level; bool name_seen; bool is_static; bool in_define; bool in_number; bool define_name_found; bool lastescape; char quotechar; bool in_struct; bool in_enum; bool in_enum_define; bool in_extern_c; int struct_nest; bool comment_after_star; t_namelist *local_vars; t_namelist *last_name; int rword; int extern_c_nest; } t_codeinfo_c; #define WHILE_RW 1 #define DO_RW 2 #define FOR_RW 3 #define IF_RW 4 #define SWITCH_RW 5 #define CASE_RW 6 #define DEFAULT_RW 7 #define GOTO_RW 8 #define CONTINUE_RW 9 #define BREAK_RW 10 #define RETURN_RW 11 #define AUTO_RW 12 #define REGISTER_RW 13 #define STATIC_RW 14 #define EXTERN_RW 15 #define TYPEDEF_RW 16 #define CONST_RW 17 #define VOLATILE_RW 18 #define STRUCT_RW 19 #define UNION_RW 20 #define SIZEOF_RW 21 #define ELSE_RW 22 #define ENUM_RW 23 static struct rword_s { char *name; int val; int length; } rwords[] = { { "while", WHILE_RW, 5 }, { "do", DO_RW, 2 }, { "for", FOR_RW, 3 }, { "if", IF_RW, 2 }, { "switch", SWITCH_RW, 6 }, { "case", CASE_RW, 4 }, { "default", DEFAULT_RW, 7 }, { "goto", GOTO_RW, 4 }, { "continue", CONTINUE_RW, 8 }, { "break", BREAK_RW, 5 }, { "return", RETURN_RW, 6 }, { "auto", AUTO_RW, 4 }, { "register", REGISTER_RW, 8 }, { "static", STATIC_RW, 6 }, { "extern", EXTERN_RW, 6 }, { "typedef", TYPEDEF_RW, 7 }, { "const", CONST_RW, 5 }, { "volatile", VOLATILE_RW, 8 }, { "struct", STRUCT_RW, 6 }, { "union", UNION_RW, 5 }, { "sizeof", SIZEOF_RW, 6 }, { "else", ELSE_RW, 4 }, { "enum", ENUM_RW, 4 } }; static const int num_rw = (sizeof(rwords) / sizeof(struct rword_s)); static int is_reserved_word(char *name, int length) { int i; for (i=0; iauto_xref)) { free_namelist_item(lptd, item); } else if (ci->in_define) { list_insert_unique(lptd, &(lptd->curr_macro->pounddefs), item); } else if (! ci->in_struct) { switch(ci->code_state) { case GLOBAL: if (ci->is_static) { list_insert_unique(lptd, &(lptd->curr_macro->staticdefs), item); } else { list_insert_unique(lptd, &(lptd->curr_macro->globaldefs), item); } break; case PROC_DEFN: case PROC_CAN_DEF: list_insert_unique(lptd, &(ci->local_vars), item); item->u.nest_val = ci->nest_level; break; default: free_namelist_item(lptd, item); } } else { free_namelist_item(lptd, item); } } static void add_name_use(t_lptangodat *lptd, t_namelist *item, t_codeinfo_c *ci) { if (! (lptd->auto_xref)) { free_namelist_item(lptd, item); } else if (find_name_in_list(lptd, ci->local_vars, item) != NULL) { /* Don't track uses of local variables. */ free_namelist_item(lptd, item); } else if (!is_intrinsic_type(item->name)) { list_insert_unique(lptd, &(lptd->curr_macro->uses), item); } else { free_namelist_item(lptd, item); } } static void destroy_locals(t_lptangodat *lptd, t_codeinfo_c *ci) { t_namelist *hold; while ( (ci->local_vars != NULL) && (ci->local_vars->u.nest_val >= ci->nest_level)) { hold = ci->local_vars; ci->local_vars = hold->next; free_namelist_item(lptd, hold); } } void c_scan_input(t_lptangodat *lptd, char *line, int length, int lineno) { char *cp; char *start_tok; int count; t_namelist *defname; t_codeinfo_c *ci; ci = lptd->code_info; count = 0; cp = line; start_tok = NULL; while (count < length) { if (lptd->instring) { while (TRUE) { if ( (*cp == ci->quotechar) && (! ci->lastescape)) { lptd->instring = FALSE; break; } if (count >= length) { break; } if ( (*cp == '\\') && (! ci->lastescape)) { ci->lastescape = TRUE; } else { ci->lastescape = FALSE; } cp++; count++; } if ( (ci->last_token == RESERVED_WORD) && (ci->rword == EXTERN_RW) && (count > 2) && ((*(cp-1) == 'C') && (*(cp-2) == '"'))) { ci->in_extern_c = TRUE; ci->extern_c_nest = ci->nest_level; } ci->last_token = NAME; } else if (lptd->in_comment) { while (TRUE) { if (count >= length) { break; } else { if ( (ci->comment_after_star) && (*cp == '/')) { lptd->in_comment = FALSE; break; } if (*cp == '*') { ci->comment_after_star = TRUE; } else { ci->comment_after_star = FALSE; } } cp++; count++; } } else if ( ((*cp >= 'a') && (*cp <= 'z')) || ((*cp >= 'A') && (*cp <= 'Z')) || ((start_tok != NULL) && ((*cp >= '0') && (*cp <= '9'))) || (*cp == '_')) { if ((start_tok == NULL) && (!ci->in_number)) { start_tok = cp; } } else if ( ((*cp >= '0') && (*cp <= '9')) || ( ci->in_number && ( (*cp == '.') || (*cp == '+') || (*cp == '-')))) { ci->in_number = TRUE; } else { ci->in_number = FALSE; if (ci->in_define) { if ( (*cp == '/') && ((count+1) < length) && (*(cp+1) == '*')) { cp++; count++; lptd->in_comment = TRUE; } else if (ci->last_token == NUMSIGN) { ci->last_token = NONE; /* got a # directive. We are only really interested in defines */ if ( (start_tok != NULL) && ((cp - start_tok) == 6) && (strncmp(start_tok, "define", 6) == 0)) { ci->define_name_found = FALSE; } else { ci->define_name_found = TRUE; } } else if ( (start_tok != NULL) && (! ci->define_name_found)) { defname = create_namelist_item(lptd, start_tok, cp - start_tok); add_name_define(lptd, defname, ci); ci->define_name_found = TRUE; } else if ( (*cp == '\\') && ((count+1) < length)) { cp++; count++; } else if (*cp == '\n') { ci->in_define = FALSE; } start_tok = NULL; } else { if ( (start_tok != NULL) /* We just finished with a name */ && (ci->last_token != PERIOD)) /* Ignore struct elements */ { ci->rword = is_reserved_word(start_tok, cp - start_tok); if (ci->rword != -1) { ci->last_token = RESERVED_WORD; /* If static, save that for later. */ if (ci->rword == STATIC_RW) { ci->is_static = TRUE; } if ( ( (ci->rword == STRUCT_RW) || (ci->rword == UNION_RW)) && (! ci->in_struct)) { ci->in_struct = TRUE; ci->struct_nest = ci->nest_level; ci->last_token = STRUCT; } else if (ci->rword == ENUM_RW) { ci->in_enum = TRUE; } /* no other reserved words really matter. */ } /* The first name in a line is a name use, the rest must be definitions unless then are in {}. Also, a name right after struct or union is a define */ else { if (ci->code_state == GLOBAL) { /* The nesting level must be 0 or at the same level as an extern "C", a name see, and not in square braces. */ if ( ( (ci->nest_level == 0) || ( (ci->in_extern_c) && (ci->nest_level <= (ci->extern_c_nest+1))) || (ci->in_struct)) && (ci->name_seen) && (ci->sqbrace_nest == 0)) { /* This is a variable or type declaration. */ ci->last_name = create_namelist_item(lptd, start_tok, cp - start_tok); add_name_define(lptd, ci->last_name, ci); } /* Everything in an enum is a definition */ else if (ci->in_enum_define) { /* This is a variable or type declaration. */ ci->last_name = create_namelist_item(lptd, start_tok, cp - start_tok); add_name_define(lptd, ci->last_name, ci); } else { /* This is a variable or type use. */ ci->name_seen = TRUE; ci->last_name = create_namelist_item(lptd, start_tok, cp - start_tok); add_name_use(lptd, ci->last_name, ci); } } else { /* Handle a name in a procedure or parameter list. */ if ( ( (ci->code_state == PROC_CAN_DEF) || (ci->code_state == PROC_DEFN)) && (ci->name_seen) && (ci->sqbrace_nest == 0)) { /* This is a variable or type declaration. */ ci->last_name = create_namelist_item(lptd, start_tok, cp - start_tok); add_name_define(lptd, ci->last_name, ci); } else { /* This is a variable or type use. */ ci->name_seen = TRUE; ci->last_name = create_namelist_item(lptd, start_tok, cp - start_tok); add_name_use(lptd, ci->last_name, ci); } } ci->last_token = NAME; } } start_tok = NULL; switch(*cp) { case ' ': case '\t': break; case ';': if ( ( (ci->nest_level == 0) || ( (ci->in_extern_c) && (ci->nest_level <= (ci->extern_c_nest+1)))) && (ci->code_state == INPROC)) { destroy_locals(lptd, ci); ci->code_state = GLOBAL; } if ( (ci->in_struct) && (ci->nest_level <= ci->struct_nest)) { ci->in_struct = FALSE; } ci->last_name = NULL; ci->is_static = FALSE; ci->name_seen = FALSE; ci->last_token = SEMI; break; case ',': if (ci->code_state == PROC_DEFN) { /* In a list of procedure parameters, make commas look like semicolons. */ ci->last_name = NULL; ci->last_token = SEMI; ci->is_static = FALSE; ci->name_seen = FALSE; } else if (ci->last_token != NAME) { /* Probably passing parameters to a procedure call, just ignore. */ } break; case ')': ci->paren_level--; if ( (ci->paren_level == 0) && (ci->code_state == PROC_DEFN)) { ci->code_state = INPROC; } break; case '=': if (ci->code_state == PROC_CAN_DEF) { /* We've seen an assign, we are no longer in definition mode. */ ci->code_state = INPROC; if ( (ci->last_name != NULL) && (ci->last_name == ci->local_vars)) { /* I thought the last name was a define, it is acually a use. */ ci->local_vars = ci->last_name->next; add_name_use(lptd, ci->last_name, ci); } } ci->last_token = OPER; break; case '(': if ( (ci->code_state == GLOBAL) && (ci->last_token == NAME)) { ci->code_state = PROC_DEFN; ci->paren_level = 1; ci->last_name = NULL; ci->name_seen = FALSE; } else { ci->paren_level++; if ( (ci->last_token == NAME) && (ci->code_state == PROC_CAN_DEF) && (ci->last_name != NULL) && (ci->last_name == ci->local_vars)) { /* I thought the last name was a define, it is acually a use. */ ci->local_vars = ci->last_name->next; add_name_use(lptd, ci->last_name, ci); } } break; case '.': ci->last_token = PERIOD; break; case '-': if ( ((count+1) < length) && (*(cp+1) == '>')) { cp++; count++; /* Even though '->' is not a period, it works about the same for our purposes. */ ci->last_token = PERIOD; } else { ci->last_token = OPER; } break; case '{': if (ci->code_state == INPROC) { ci->code_state = PROC_CAN_DEF; /* Can define local vars now. */ } else if ( (ci->code_state == GLOBAL) && (ci->last_name != NULL) && (lptd->curr_macro->uses != NULL) && (lptd->curr_macro->uses == ci->last_name) && ( ( (ci->in_struct) && (ci->struct_nest == ci->nest_level)) || (ci->in_enum))) { /* I thought the last item was a use, it was actually a structure or enum define. */ lptd->curr_macro->uses = ci->last_name->next; if (ci->in_struct) { ci->in_struct = FALSE; add_name_define(lptd, ci->last_name, ci); ci->in_struct = TRUE; } else { ci->in_enum_define = TRUE; add_name_define(lptd, ci->last_name, ci); } } ci->nest_level++; ci->last_name = NULL; ci->name_seen = FALSE; ci->last_token = NONE; break; case '}': /* Make sure to get rid of anything declared in this block */ destroy_locals(lptd, ci); ci->nest_level--; ci->in_enum = FALSE; /* an rbrace will alway finish an enum */ ci->in_enum_define = FALSE; /* check to see if this is the end of an extern "C" { } */ if ((ci->in_extern_c) && (ci->extern_c_nest == ci->nest_level)) { ci->in_extern_c = FALSE; } /* An end brace can be like a name, such as at the end of a structure definition. */ ci->last_token = NAME; ci->name_seen = TRUE; if ( (ci->in_struct) && (ci->struct_nest >= ci->nest_level)) { ci->in_struct = FALSE; } if ( ( (ci->nest_level == 0) || ( (ci->in_extern_c) && (ci->nest_level <= (ci->extern_c_nest+1)))) && ( (ci->code_state == INPROC) || (ci->code_state == PROC_CAN_DEF))) { ci->code_state = GLOBAL; ci->name_seen = FALSE; ci->last_token = NONE; } else if (ci->code_state == PROC_CAN_DEF) { ci->code_state = INPROC; } break; case '"': case '\'': ci->quotechar = *cp; lptd->instring = TRUE; ci->lastescape = FALSE; break; case '[': (ci->sqbrace_nest)++; break; case ']': (ci->sqbrace_nest)--; break; case '#': ci->in_define = TRUE; ci->last_token = NUMSIGN; break; case '/': if ( ((count+1) < length) && (*(cp+1) == '*')) { cp++; count++; lptd->in_comment = TRUE; ci->comment_after_star = FALSE; } else { ci->last_token = OPER; } break; default: ci->last_token = OPER; break; } } } cp++; count++; } } void c_output_linenum(t_lptangodat *lptd, int lineno, char *filename) { fprintf(lptd->outfile, "#line %d \"%s\"\n", lineno, filename); } void init_c_lang(t_lptangodat *lptd) { t_codeinfo_c *ci; lptd->code_info = malloc(sizeof(t_codeinfo_c)); if (lptd->code_info == NULL) { fprintf(stderr, "Unable to allocate enough memory\n"); exit(1); } ci = lptd->code_info; ci->code_state = GLOBAL; ci->last_token = NONE; ci->nest_level = 0; ci->sqbrace_nest = 0; ci->paren_level = 0; ci->name_seen = FALSE; ci->is_static = FALSE; ci->in_define = FALSE; ci->in_number = FALSE; ci->define_name_found = FALSE; ci->lastescape = FALSE; ci->comment_after_star = FALSE; ci->quotechar = '\0'; ci->local_vars = NULL; ci->last_name = NULL; ci->in_struct = FALSE; ci->in_enum = FALSE; ci->in_enum_define = FALSE; ci->in_extern_c = FALSE; ci->struct_nest = 0; }