/* * Copyright 1987 Jeff Sparkes * Department of Computer Science * Memorial University of Newfoundland * St. John's, Nfld. * garfield!jeff1, jeff1@garfield.mun.cdn * * Permission is granted to distribute and/or modify this code, provided * this copyright notice remains intact. * If you use it, let me know. If change it let me know. If you * make money from it, send me a share. */ #include #include #include "token.h" #include "table.h" #define SENTENCE 12 #define WORD 13 #define PARAGRAPH 14 #define SENTENCE2 15 #define SENTENCE3 16 #define Getc(x) x = nextchar(); if (x == 0) return WORD; else if (x == -1) goto top; #define Ungetc(c) current_char-- static int current_char, current; static char current_word[100]; static int blank_flag = 0, blank_next = 0; static int slashes, slashes_next; char word_out, nextchar(), pending_punct; int word_count, pos = 0; struct token token[10]; extern struct tab base_table[]; extern struct fix superfixes[], subfixes[]; extern struct special special[]; tparse() { char c; int tok, i, j; for (i = 0; i < 10; i++) { for (j = 0; j < 10; j++) token[i].str[j] = 0; token[i].char_num = 0; token[i].vowel = V_NONE; token[i].special = 0; } printf("\\bgroup\\tibetan "); printf("\\def\\u#1{\\vtop{\\baselineskip0pt\\hbox{#1}\\hbox{\\tibsp\\char123}}}"); printf("\\hyphenpenalty=10000\\parindent=0pt "); printf("\\newbox\\fillerbox\\setbox\\fillerbox\\hbox{\\vrule height0.4cm depth0.4cm width0pt}\\def\\filler{\\copy\\fillerbox}"); while (1) { /* * If we get a %, check for another immediately following. * This is the end of tibetan mode. Otherwise, it is just a * comment, but I figure it should be left in, just in case * some one needs to look at the filter output. */ for (i=0; i<100; i++) current_word[i] = 0; c = getc(input); while (isspace(c)) { char c1; if (c == '\n') { c1 = getc(input); if (c1 == '\n') { output_special(PARAGRAPH); break; } else ungetc(c1, input); } c = getc(input); } while (isspace(c) || c == '#') { if (c == '#') blank_next = 1; c = getc(input); } ungetc(c, input); if (fscanf(input, "%s", current_word) == EOF) { printf("Missing %%%%\n"); exit(1); } current_char = 0; if (current_word[0] == '%') { if (current_word[1] == '%') { output_pending_punct(); printf("\\egroup "); return; } else { printf("%% %s", current_word + 1); while ((c = getc(input)) != '\n') putchar(c); putchar('\n'); } } current = -1; tok = 0; tok = tword(); output(current); if (tok != 0) output_special(tok); } } /* * Parse the word, as designated by white space. The tokens are put into an * array so that some tokens can affect previous ones. The argument is the * number of the current token in array. */ tword() { char c, c1, s[10]; int ind = 0; int i, mtch; top: ind = 0; c = Getc(c); while (1) { /* * Check for end of word delimiters. If it's also end of * sentence, then do the appropriate thing. */ switch (c) { case '#': /* don't output the word separator */ blank_next = 1; return(WORD); case '/': slashes_next = 0; c1 = nextchar(); while (c1 == '/') { slashes_next++; c1 = nextchar(); } /* * If the slashes are at the end of a word, * keep the count in slashes_next, and * return an end of sentence. */ if (isspace(c1) || c1 == 0 || c1 == -1) { return (SENTENCE); } /* * Otherwise, the slashes are at the beginning * so output them now. */ output_pending_punct(); for (i=0; i<=slashes_next; i++) printf("\\tibsp\\char115\\tibetan"); c = c1; slashes_next = 0; continue; case '!': output(current); return (SENTENCE2); case '|': output(current); return (SENTENCE3); case '%': Ungetc(c); return (0); case '\n': return (WORD); default: break; } if (isspace(c)) { return (WORD); } current++; /* * If we've found a superfix, parse the next token. If it is * a token that the superfix can modify, then use the * modified char_num, otherwise the superfix is merely a * vowel-less base character */ if (super(c)) { int cur; if ((c1 = current_word[current_char++]) == 0) { Ungetc(c1); goto not_super; } if (sub(c1) || (c == 's' && c1 == 'h') || (vowel(c1) != V_NONE)) { Ungetc(c1); goto not_super; } if (!isalpha(c1)) { Ungetc(c1); goto not_super; } Ungetc(c1); sprintf(s, "%c", c); cur = current; /* fix so that next_char doesn't overwrite with special */ token[current].char_num = 0; tword(); /* * For some reason, // at the end get bypassed */ while(!isalpha(current_word[current_char])) current_char--; current_char++; if ((token[cur].char_num = match(SUPER, s, token[cur+1].char_num)) != -1) { int j; sprintf(token[cur].str, "%c%s", c, token[cur + 1].str); token[cur+1].str[0] = 0; token[cur].vowel = token[cur+1].vowel; token[cur+1].char_num = -1; token[cur+1].special = 0; token[cur+1].vowel = V_NONE; return (tword()); } else { token[cur].char_num = match(BASE, s, -1); strcat(token[cur].str, s); token[cur].vowel = V_NONE; return (tword()); } } not_super: mtch = 0; while (1) { /* * Match the g.y case. */ if (c == '.') { token[current].str[ind] = 0; token[current].vowel = V_A; return (tword()); } /* * Check for a subfix.. */ if (sub(c)) { int t, t1; sprintf(s, "%c", c); t = match(SUB, s, token[current].char_num); if (t != ERROR) { c1 = Getc(c1); if ((t1 = vowel(c1)) == V_NONE) { Ungetc(c1); Ungetc(c); token[current].vowel = V_NONE; return(tword()); } else { token[current].vowel = t1; token[current].char_num = t; strcat(token[current].str, s); return (tword()); } } } token[current].vowel = vowel(c); /* * We've matched the a+ glyph. */ if (mtch == 0 && token[current].vowel != V_NONE) { if (ind == 0) { /* * We've matched a single vowel * glyph. */ return (tword()); } token[current].str[ind++] = c; token[current].str[ind] = 0; break; /* * We've hit a vowel, which is the end of the * glyph. */ } else if (token[current].vowel != V_NONE) { token[current].str[ind] = 0; return (tword()); /* * Check to see if what we have so far + the * next is a glyph. If not, then this * character is the beginning of the next * one. */ } else { int n; token[current].str[ind++] = c; token[current].str[ind] = 0; if ((n = match(BASE, token[current].str, -1)) != ERROR) { token[current].char_num = n; mtch++; } else if (mtch != 0) { token[current].str[--ind] = 0; Ungetc(c); return (tword()); } else { bad_word(); return (0); } } c = Getc(c); } } } output(count) int count; { int i, shift, cn, ch; char fs[20]; /* * Indicate that output has occurred. */ output_pending_punct(); blank_flag = blank_next; blank_next = 0; slashes = slashes_next; slashes_next = 0; word_out = 1; for (i = 0; i <= count; i++) { /* * Check for a single vowel glyph. The output is different * for a single vowel since it has nothing to modify. */ shift = 0; if (token[i].special == SPECIAL) { printf("%s%%\n", token[i].str); continue; } if (token[i].char_num > 127) { cn = token[i].char_num - 128; strcpy(fs, "\\tibsp"); } else { cn = token[i].char_num; strcpy(fs, "\\tibetan"); } if (token[i].str[0] == 0) if (token[i].vowel != V_NONE) shift = 1; else continue; /* save the character so that we can find the last one */ ch = token[i].char_num; switch (token[i].vowel) { case V_A: case V_NONE: if (shift) printf("\\char29"); else printf("\\char%d", cn); break; case V_E: if (shift) printf("\\tibsp\\accent127\\tibetan\\char29"); else printf("\\tibsp\\accent127%s\\char%d", fs, cn); break; case V_I: if (shift) printf("\\tibsp\\accent126\\tibetan\\char29"); else printf("\\tibsp\\accent126%s\\char%d", fs, cn); break; case V_O: if (shift) printf("\\tibsp\\accent125\\tibetan\\char29"); else printf("\\tibsp\\accent125%s\\char%d", fs, cn); break; case V_U: if (shift) printf("\\u{\\char29}"); else printf("\\u{\\char%d}", cn); break; default: break; } } for (i = 0; i < 10; i++) { int j; for (j = 0; j < 10; j++) token[i].str[j] = 0; token[i].char_num = -1; token[i].vowel = V_NONE; token[i].special = 0; } } output_special(c) char c; { /* * Don't output any special markers unless output has done something * since the last time we've been called. */ if (word_out) { switch (c) { case SENTENCE: if (pending_punct == 0 || pending_punct == WORD) pending_punct = SENTENCE; break; case SENTENCE2: if (pending_punct == 0 || pending_punct == WORD) pending_punct = SENTENCE2; break; case SENTENCE3: if (pending_punct == 0 || pending_punct == WORD) pending_punct = SENTENCE3; break; case WORD: if (pending_punct == 0) pending_punct = WORD; break; case PARAGRAPH: output_pending_punct(); printf("\n\n"); break; default: break; } } } output_pending_punct() { int i; switch (pending_punct) { /* this controls the amount of space at the end of sentences */ case SENTENCE: for (i=0; i\n", current_word); }