/* #module IdxParse "3-001" *********************************************************************** * * * The software was developed at the Monsanto Company and is provided * * "as-is". Monsanto Company and the auther disclaim all warranties * * on the software, including without limitation, all implied warran- * * ties of merchantabilitiy and fitness. * * * * This software does not contain any technical data or information * * that is proprietary in nature. It may be copied, modified, and * * distributed on a non-profit basis and with the inclusion of this * * notice. * * * *********************************************************************** */ /* * Module Name: IdxParse * * Author: R L Aurbach CR&DS MIS Group 26-Apr-1986 * * Function: * Parse an input line produced by LaTeX \index command into a form usable * for the generation of an automatic index. * * Modification History: * * Version Initials Date Description * ------------------------------------------------------------------------ * 1-001 RLA 26-Apr-1986 Original Code * 2-002 RLA 10-Apr-1987 Add support for the Idx_Extract routine. * 2-003 RLA 15-Apr-1987 Add support for cross-referencing * 2-004 RLA 20-Apr-1987 Change the cross reference symbol to * 3-001 F.H. 17-May-1991 converted to portable C */ /* * Module IdxParse - Module-Wide Data Description Section * * Include Files: */ #ifdef MSDOS #include #include #define F_OK 0 /* access(): File exists */ #else #include extern char *sprintf(); #endif #include #include #include "IdxDef.h" /* * Module Definitions: */ #define TRUE 1 #define FALSE 0 /* * Global Declarations: */ /* * Static Declarations: */ #ifdef MSDOS void idx_parse(char *linebf, char *token_1, char *token_2, char *token_3, char *page_no, int *token_ct, int *flag); int idx_extract(char *string, int *start, int *length); #else void idx_parse(); int idx_extract(); #endif /* * External References: */ /* * Functions Called: */ /* * Function Idx_Parse - Documentation Section * * Discussion: * Parse an input line which is of the general form * \indexentry{token_1>token_2>token_3}{page_no} * into tokens for "token_1", "token_2", "token_3", and "page_no". * Handle missing tokens and allow for imbedded LaTeX commands. * * Calling Synopsis: * Call Idx_Parse (linebf, token_1, token_2, token_3, page_no, token_ct, * flag) * * Inputs: * linebf -> is the input string containing the line to parse. * ASCIZ string passed by reference. * * Outputs: * token_1 -> is the first token seen (if any). * ASCIZ string passed by reference. * * token_2 -> is the second token seen (if any). * ASCIZ string passed by reference. * * token_3 -> is the third token seen (if any). * ASCIZ string passed by reference. * * page_no -> is the page number token (should always be present). * ASCIZ string passed by refernce. * * token_ct -> is the number of tokens seen. * Integer passed by reference. * * flag -> indicates if the page reference is a cross-reference. * Boolean passed by reference. * * Return Value: * none * * Global Data: * none * * Files Used: * none * * Assumed Entry State: * none * * Normal Exit State: * returns with the record processed. * * Error Conditions: * none * * Algorithm: * A. Initialize variables. * B. Verify that the line begins with \indexentry. * C. Extract the index item substring. * D. Parse it into tokens. * E. Extract the page reference substring. * * Special Notes: * This logic is designed to: * * copy sequences such as \{, \}, and $>$ from the input stream * into tokens without recognizing them as token delimiters. * * allow sequences such as {\bf text} in token without recognizing * the special characters as delimiters. * * allow up to three levels of index and one level of page-no, * ignoring all other characters (which shouldn't exist...) */ /* * Function Idx_Parse - Code Section */ void idx_parse(linebf,token_1,token_2,token_3,page_no,token_ct,flag) char *linebf; /* Input string to be parsed */ char *token_1; /* First token string */ char *token_2; /* Second token string */ char *token_3; /* Third token string */ char *page_no; /* Page number token string */ int *token_ct; /* Number of item tokens seen */ int *flag; /* Cross-Reference flag */ { /* * Local Declarations */ int i; /* linebf counter */ int next; /* beginning of next substring */ int len; /* length of substring */ int math_mode = FALSE; /* math-mode flag */ char *ptr; /* pointer to token buffer */ int index; /* index into token buffer */ int end; /* end of substring */ int dummy; /* * Module Body */ /* Initialize assorted variables, flags, etc. */ token_1[0] = '\0'; token_2[0] = '\0'; token_3[0] = '\0'; page_no[0] = '\0'; *token_ct = 0; *flag = FALSE; ptr = token_1; index = 0; /* Verify that the string begins with \indexentry */ if (strncmp(linebf, "\\indexentry{", 12) != 0) return; /* Locate the first substring. */ i = 11; next = idx_extract(linebf, &i, &len); if (len == 0) return; *token_ct = 1; end = i + len; /* For all characters in the substring, parse out the tokens. */ for ( ; i < end; i++) { switch (linebf[i]) { case '$' : if (linebf[i-1] != '\\') { if (math_mode) math_mode = FALSE; else math_mode = TRUE; } ptr[index++] = linebf[i]; break; case '>' : if (math_mode) ptr[index++] = linebf[i]; else { if (index == 0) break; ptr[index] = '\0'; index = 0; switch (*token_ct) { case 1 : ptr = token_2; *token_ct = 2; break; case 2 : ptr = token_3; *token_ct = 3; break; } } break; case '&' : if (linebf[i-1] == '\\') { ptr[index++] = linebf[i]; break; } ptr[index] = '\0'; dummy = end-i; (void)sprintf(page_no, "\\indexentry{%.*s}{}", dummy, &linebf[++i]); *flag = TRUE; return; default : ptr[index++] = linebf[i]; break; } } ptr[index] = '\0'; /* Now parse the second substring and copy it to the page-ref. */ i = next; next = idx_extract(linebf, &i, &len); (void)strncpy (page_no, &linebf[i], len); page_no[len] = '\0'; } /* * Function Idx_Extract - Documentation Section * * Discussion: * Extract a string contained within a {} pair, which may contain imbedded * LaTeX commands. * * Calling Synopsis: * next = Idx_Extract (string, start, length) * * Inputs: * string -> is an ASCIZ string containing a substring enclosed in * matching braces ({}). * * start -> is the string index of the first character of the * string to be searched. It is passed by reference. * * Outputs: * start -> is the string index of the first character of the * substring, passed by reference. * * length -> is the number of characters in the substring, passed by * reference. * * Return Value: * next -> is the string index of the first character after the * '}' which terminates the substring. If the substring * is not terminated by '}', next is the string index of * the '\0' character. * * Global Data: * none * * Files Used: * none * * Assumed Entry State: * none * * Normal Exit State: * none * * Error Conditions: * none * * Algorithm: * A. Scan the string for the first '{'. * B. For all succeeding characters in the string, * 1. If the character is '{' (not preceeded by '\\'), * a. Increment the brace count. * 2. If the character is '}' (not preceeded by '\\'), * a. Decrement the brace count. * b. If the brace count = 0, * 1. Return the start and length of the substring. * * Special Notes: * none */ /* * Function Idx_Extract - Code Section */ int idx_extract (string, start, length) char *string; int *start; int *length; { /* * Local Declarations */ int i; /* string index */ int brace_ct = 0; /* brace count */ /* * Module Body */ /* * Scan the string for the first occurrence of a '{' character which is not * preceeded by a '\\'. The next character marks the beginning of the * substring to be extracted. */ for (i = *start; string[i] != '\0'; i++) { if (string[i] != '{') continue; if (i > 0) if (string[i-1] == '\\') continue; *start = i + 1; brace_ct++; break; } /* * If brace_ct == 0, no leading { was found. This is an error. Return * start = next = end-of-string and length = 0. */ if (brace_ct == 0) { *start = sizeof(string); *length = 0; return (sizeof(string)); } /* * Now search the string for a matching }. */ for (i = *start; string[i] != '\0'; i++) { if ((string[i] == '{') && (string[i-1] != '\\')) brace_ct++; if ((string[i] == '}') && (string[i-1] != '\\')) { brace_ct--; if (brace_ct == 0) { *length = i - *start; return (i + 1); } } } /* * If we get here, no closing } was found. Treat the end of string as a * matching }. */ *length = sizeof(string) - *start; return (sizeof(string)); }