/*- * Copyright 1987 Jeff Sparkes * Department of Computer Science * Memorial University of Newfoundland * St. John's, Nfld. * garfield!jeff1, jeff1@garfield.mun.cdn * * Permission is granted to distribute and/or modify this code, provided * this copyright notice remains intact. * If you use it, let me know. If change it let me know. If you * make money from it, send me a share. */ #include #include #include "token.h" #include "table.h" #define SENTENCE 12 #define WORD 13 #define PARAGRAPH 14 #define SENTENCE2 15 #define SENTENCE3 16 #define SENTENCE4 17 #define Getc(x) x = nextchar(); if (x == 0) return WORD; else if (x == -1) goto top; #define Ungetc(c) current_char-- /*- sws why are these static? static int current_char, current; static char current_word[100]; static int blank_flag = 0, blank_next = 0; static int slashes, slashes_next; */ int current_char, current; char current_word[100]; int blank_flag = 0, blank_next = 0; int slashes, slashes_next; char nextchar(); int word_out = 0; int pending_punct = 0; int word_count, pos = 0; /*struct token token[10];*/ struct token token[20]; extern struct tab base_table[]; extern struct fix superfixes[], subfixes[]; extern struct special special[]; /* sws see if a char from tword is a terminal or not */ int isterm(c) char c; { switch (c) { case WORD: case PARAGRAPH: case SENTENCE: case SENTENCE2: case SENTENCE3: case SENTENCE4: return(1); default: return(0); } } tparse() { char c, c1; int tok, i, j; /* sws initialize */ slashes = 0; slashes_next = 0; blank_flag = 0; blank_next = 0; pos = 0; for (i = 0; i < 100; i++) { current_word[i] = ' '; } for (i = 0; i < 10; i++) { for (j = 0; j < 10; j++) token[i].str[j] = 0; /* sws trouble here */ /* token[i].char_num = 0; */ token[i].char_num = -1; token[i].vowel = V_NONE; token[i].special = 0; } /* sws changed format for readability */ /*- sws some definitions moved to tibdef.tex */ printf("\\bgroup\\tibetan\n"); while (1) { /*- * If we get a %, check for another immediately following. * This is the end of tibetan mode. Otherwise, it is just a * comment, but I figure it should be left in, just in case * some one needs to look at the filter output. */ /* initialize */ for (i = 0; i < 100; i++) current_word[i] = 0; /* now get stuff */ /*- sws rewritten to allow more general checking - including TeX commands inside now tex stuff moved to getchar */ while(1) { c = getc(input); switch(c) { case ' ': /* ignore blanks here (before a word) */ continue; case '#': blank_next = 1; continue; case '\n': c1 = getc(input); if (c1 == '\n') { output_special(PARAGRAPH); continue; } else ungetc(c1, input); continue; case '%': c1 = getc(input); if (c1 == '%') { /* end of tibetan mode */ output_pending_punct(); printf("\\egroup "); return; } else { /* a comment within tibetan mode, pass it through */ ungetc(c1, input); printf("%% "); while ((c = getc(input)) != '\n') putchar(c); putchar('\n'); } continue; case EOF: printf("[tparse] Missing closing %%%%\n"); exit(1); continue; default: /*- ok, we found some stuff that's a word candidate, go to the next phase */ ungetc(c, input); break; } /* we get here only from the default */ break; } /* we get here from the above default case */ /* now read a word */ if (fscanf(input, "%s", current_word) == EOF) { printf("Missing %%%%\n"); exit(1); } current_char = 0; current = -1; tok = 0; tok = tword(); output(current); if (tok != 0) output_special(tok); } } /*- Parse the word, as designated by white space. The tokens are put into an array so that some tokens can affect previous ones. The argument is the number of the current token in array. */ tword() { char c, c1, s[10]; int ind = 0; int i, mtch; /* sws */ int current_save; top: ind = 0; c = Getc(c); while (1) { /*- * Check for end of word delimiters. If it's also end of * sentence, then do the appropriate thing. */ switch (c) { case '#': /* don't output the word separator */ blank_next = 1; return (WORD); case '/': /*#define sldb*/ /* sws to keep track */ current_save = current; #ifdef sldb printf("%%\n%%{\\rm found slash, current = %d}\n", current); #endif /* sws watch out for nextchar picking up specials */ slashes_next = 0; c1 = nextchar(); while (c1 == '/') { slashes_next++; c1 = nextchar(); } #ifdef sldb printf("%%\n%%{\\rm found %d slashes, current = %d}\n", slashes_next,current); #endif /*- * If the slashes are at the end of a word, * keep the count in slashes_next, and * return an end of sentence. */ /* sws old version, loses if we picked up a special */ /* if (isspace(c1) || c1 == 0 || c1 == -1) { return (SENTENCE); } */ /* look for space, formfeed, nl, tab, vtab */ if (isspace(c1) || c1 == 0 || c1 == -1) { #ifdef sldb printf("%%\n%%{\\rm found space, current = %d,", current); printf(" current_save = %d}\n", current_save); #endif if (current != current_save) { /* sws slashes came first */ output_pending_punct(); for (i = 0; i <= slashes_next; i++) printf("\\tibsp\\char115\\tibetan"); slashes_next = 0; return (WORD); } else { /* really end of sentence */ return (SENTENCE); } } /* end of sentence, no space */ if (c1 == '*') { #ifdef sldb printf("%%\n%%{\\rm found [*], current = %d}\n", current); #endif /* must be end of sentence */ return (SENTENCE4); } /*- * Otherwise, the slashes are at the beginning * so output them now. */ /* sws debug*/ /* printf("\n{\\rm current = %d}\n", current); if ( current_char >= 0) printf("{\\rm current char = %c}\n", current_word[current_char]); */ /* sws old version */ output_pending_punct(); for (i = 0; i <= slashes_next; i++) printf("\\tibsp\\char115\\tibetan"); /*sws alternate... */ /*- output(current); for (i=0; i<=slashes; i++) printf("\\tibsp\\char115\\tibetan"); slashes = 0; */ c = c1; slashes_next = 0; continue; case '!': output(current); return (SENTENCE2); case '|': output(current); return (SENTENCE3); case '*': /*output(current);*/ return (SENTENCE4); case '%': Ungetc(c); return (0); case '\n': return (WORD); default: break; } if (isspace(c)) { return (WORD); } current++; /*- If we've found a superfix, parse the next token. If it is a token that the superfix can modify, then use the modified char_num, otherwise the superfix is merely a vowel-less base character */ if (super(c)) { int cur, tok; if ((c1 = current_word[current_char++]) == 0) { Ungetc(c1); goto not_super; } if (sub(c1) || (c == 's' && c1 == 'h') || (vowel(c1) != V_NONE)) { Ungetc(c1); goto not_super; } if (!isalpha(c1)) { Ungetc(c1); goto not_super; } Ungetc(c1); sprintf(s, "%c", c); cur = current; /* fix so that next_char doesn't overwrite with special */ token[current].char_num = 0; /* sws attempt to fix this... note ends get missed here as tok gets forgotten just why is this here? */ /* tword();*/ tok = tword(); /* printf("%%\n%% found tok = %c [%d]\n", tok,tok);*/ #if 0 /* looses super or sub base still ...*/ /* stop if we found an end of sentence etc */ if (isterm(tok)) return(tok); #endif /* apparently fixed later ? */ #if 0 if (tok != 0) { /* looses super base */ /*return(tok);*/ output_special(tok); } #endif /* sws... need to fix this sometime... */ /* requires a space after a / */ /*- * For some reason, // at the end get bypassed */ /* backup till last char */ /* sws also include brackets */ /*while (!isalpha(current_word[current_char])) { current_char--; } current_char++; */ #if 0 while ((NULL == current_word[current_char]) || ('/' == current_word[current_char]) || (!isalpha(current_word[current_char])) && (!current_word[current_char] == '{' ) && (!current_word[current_char] == '}' ) ) { current_char--; } #endif current_char++; if ((token[cur].char_num = match(SUPER, s, token[cur + 1].char_num)) != -1) { int j; sprintf(token[cur].str, "%c%s", c, token[cur + 1].str); token[cur + 1].str[0] = 0; token[cur].vowel = token[cur + 1].vowel; token[cur + 1].char_num = -1; token[cur + 1].special = 0; token[cur + 1].vowel = V_NONE; return (tword()); } else { token[cur].char_num = match(BASE, s, -1); strcat(token[cur].str, s); token[cur].vowel = V_NONE; return (tword()); } } not_super: mtch = 0; while (1) { /*- * Match the g.y case. */ if (c == '.') { token[current].str[ind] = 0; token[current].vowel = V_A; return (tword()); } /*- * Check for a subfix.. */ if (sub(c)) { int t, t1; sprintf(s, "%c", c); t = match(SUB, s, token[current].char_num); if (t != ERROR) { c1 = Getc(c1); if ((t1 = vowel(c1)) == V_NONE) { Ungetc(c1); Ungetc(c); token[current].vowel = V_NONE; return (tword()); } else { token[current].vowel = t1; token[current].char_num = t; strcat(token[current].str, s); return (tword()); } } } token[current].vowel = vowel(c); /*- * We've matched the a+ glyph. */ if (mtch == 0 && token[current].vowel != V_NONE) { if (ind == 0) { /*- * We've matched a single vowel * glyph. */ return (tword()); } token[current].str[ind++] = c; token[current].str[ind] = 0; break; /*- * We've hit a vowel, which is the end of the * glyph. */ } else if (token[current].vowel != V_NONE) { token[current].str[ind] = 0; return (tword()); /*- * Check to see if what we have so far + the * next is a glyph. If not, then this * character is the beginning of the next * one. */ } else { int n; token[current].str[ind++] = c; token[current].str[ind] = 0; if ((n = match(BASE, token[current].str, -1)) != ERROR) { token[current].char_num = n; mtch++; } else if (mtch != 0) { token[current].str[--ind] = 0; Ungetc(c); return (tword()); } else { bad_word(); return (0); } } c = Getc(c); } } } /* sws The actual output routine. 1. dumps any pending punctuation 2. reset some parameters 3. print (count) tokens */ output(count) int count; { int i, shift, cn, ch; char fs[20]; /*- * Indicate that output has occurred. */ output_pending_punct(); blank_flag = blank_next; blank_next = 0; slashes = slashes_next; slashes_next = 0; word_out = 1; for (i = 0; i <= count; i++) { /*- * Check for a single vowel glyph. The output is different * for a single vowel since it has nothing to modify. */ shift = 0; if (token[i].special == SPECIAL) { printf("%s%%\n", token[i].str); continue; } if (token[i].char_num > 127) { cn = token[i].char_num - 128; strcpy(fs, "\\tibsp"); } else { cn = token[i].char_num; strcpy(fs, "\\tibetan"); } if (token[i].str[0] == 0) if (token[i].vowel != V_NONE) shift = 1; else continue; /* save the character so that we can find the last one */ ch = token[i].char_num; switch (token[i].vowel) { case V_A: case V_NONE: if (shift) printf("\\char29"); else printf("\\char%d", cn); break; case V_E: if (shift) printf("\\tibsp\\accent127\\tibetan\\char29"); else printf("\\tibsp\\accent127%s\\char%d", fs, cn); break; case V_I: if (shift) printf("\\tibsp\\accent126\\tibetan\\char29"); else printf("\\tibsp\\accent126%s\\char%d", fs, cn); break; case V_O: if (shift) printf("\\tibsp\\accent125\\tibetan\\char29"); else printf("\\tibsp\\accent125%s\\char%d", fs, cn); break; case V_U: if (shift) printf("\\u{\\char29}"); else printf("\\u{\\char%d}", cn); break; default: break; } } for (i = 0; i < 10; i++) { int j; for (j = 0; j < 10; j++) token[i].str[j] = 0; token[i].char_num = -1; token[i].vowel = V_NONE; token[i].special = 0; } } /* sws This only does something if we are at the end of a paragraph. Else it just sets up some pending punctuation. */ output_special(c) /*sws char c;*/ int c; { /*- * Don't output any special markers unless output has done something * since the last time we've been called. */ if (word_out) { switch (c) { case SENTENCE: if (pending_punct == 0 || pending_punct == WORD) pending_punct = SENTENCE; break; case SENTENCE2: if (pending_punct == 0 || pending_punct == WORD) pending_punct = SENTENCE2; break; case SENTENCE3: if (pending_punct == 0 || pending_punct == WORD) pending_punct = SENTENCE3; break; case SENTENCE4: if (pending_punct == 0 || pending_punct == WORD) pending_punct = SENTENCE4; break; case WORD: if (pending_punct == 0) pending_punct = WORD; break; case PARAGRAPH: output_pending_punct(); printf("\n\n"); break; default: break; } } } /* sws this prints out any ending stuff that is in the pipeline */ output_pending_punct() { int i; switch (pending_punct) { /* this controls the amount of space at the end of sentences */ case SENTENCE: for (i = 0; i < slashes; i++) printf("\\tibsp\\char115\\tibetan"); printf("\\filler\\tibsp\\char115\\tspace\\tibetan\n"); break; case SENTENCE2: printf("\\filler\\tibsp\\char121\\tspace\\tibetan\n"); break; case SENTENCE3: printf("\\filler\\tspace\\tibetan\n"); break; case SENTENCE4: /* same as sentence, but no space at end */ printf("\\filler"); for (i = 0; i <= slashes; i++) printf("\\tibsp\\char115\\tibetan"); break; case WORD: if (blank_flag) { printf("\\filler\\tenrm\\ \\tibetan\n"); blank_flag = 0; } else { /*printf("\\filler\\tibsp\\char114\\tenrm\\ \\tibetan\n");*/ printf("\\filler"); printf("\\twspace"); printf("\\tibsp\\char114"); printf("\\twspace"); printf("\\tibetan\n"); } break; default: break; } pending_punct = 0; } /* sws new function to check for tex brackets */ /* {} */ int isbracket(c) char c; { return ( (c == '{') || (c == '}') ); } /* sws new function to check for tex termination */ /* terminate with ' ,/{}\#' and \0 */ /* later worry about eof? */ int istexterm(c) char c; { return ( (c == ' ') || (c == ',') || (c == '/') || (c == '{') || (c == '}') || (c == '\\') || (c == '#') || (c == NULL) ); } /* sws */ /* This gets the next character from the current string. It catches specials first and outputs them so they don't get to tword current_char should be +1 from the current character location in current_word */ char nextchar() { int i,j; int no_space; /* look for end of string */ if ( current_word[current_char] == NULL ) { return (current_word[current_char]); } /* look for /'s */ /* report the character just after */ else if ( current_word[current_char] == '/' ) { /* count them */ i = 1; while ( current_word[current_char+i] == '/' ) { i++; } /* i is now the number of slashes */ /* sws... need to check for //x or // (space - end of word)*/ no_space = 0; if ( ( current_word[current_char+i] == NULL ) || ( isspace(current_word[current_char+i]) ) ) { blank_next = 1; /* printf("%%\n%% found blank after %d\n",i);*/ } else if ( current_word[current_char+i]=='*' ) { /* no space after last shad */ no_space = 1; blank_next = 0; } else { /* printf("%%\n%% found [%c] after %d\n", current_word[current_char+i],i);*/ } if (current == -1) current = 0; else if (token[current].char_num != -1) current++; token[current].char_num = 0; token[current].special = SPECIAL; /* token[current].str = "\\tibsp\\char115\\tibetan\0";*/ /* strcpy(token[current].str, "\\tibsp\\char115\\tibetan\0");*/ /* setup */ /* see if we need a space to start - slashes after something */ if (current_char >0) strcpy(token[current].str, "\\filler\\tibsp\0"); else strcpy(token[current].str, "\\tibsp\0"); /* output the shads */ for (j=0; j\n", current_word); } /* end of tparse.c */