/* rtf2text - read rtf input, write text of document (text extraction). This installs callbacks for the ascii and control token classes. The control class is necessary so that special characters such as \par, \tab, \sect, etc. can be converted. It's problematic what to do with text in headers and footers, and what to do about tables. This really is quite a stupid program, for instance, it could keep track of the current leader character and dump that out when a tab is encountered. 04 Feb 91 Paul DuBois dubois@primate.wisc.edu 04 Feb 91 V1.0. Created. 27 Feb 91 V1.01. Updated for distribution 1.05. */ # include # include "rtf.h" /* structure for mapping character values >= 128 to text strings for different character sets. */ typedef struct CharMap CharMap; struct CharMap { int charVal; char *charStr; }; extern CharMap ansiCharMap[]; /* these are defined below */ extern CharMap macCharMap[]; extern CharMap pcCharMap[]; extern CharMap pcaCharMap[]; /* Default is ANSI but I hope we don't see \ansi, since its char map is empty... */ CharMap *charMap = ansiCharMap; static void Text (); static void Control (); static void CharSet (); static void Destination (); static void SpecialChar (); int main (argc, argv) int argc; char **argv; { RTFInit (); --argc; ++argv; /* not clever; only allows stdin or one named file to be read */ if (argc > 0) { if (freopen (argv[0], "r", stdin) == NULL) { fprintf (stderr, "Can't open \"%s\"\n", argv[0]); exit (1); } } /* install class callbacks and process the input stream */ RTFSetClassCallback (rtfText, Text); RTFSetClassCallback (rtfControl, Control); RTFRead (); exit (0); } static void Text () { PutChar (rtfMajor); } static void Control () { switch (rtfMajor) { case rtfCharSet: CharSet (); break; case rtfDestination: Destination (); break; case rtfSpecialChar: SpecialChar (); break; } } static void CharSet () { switch (rtfMinor) { case rtfAnsiCharSet: charMap = ansiCharMap; break; case rtfMacCharSet: charMap = macCharMap; break; case rtfPcCharSet: charMap = pcCharMap; break; case rtfPcaCharSet: charMap = pcaCharMap; break; } } /* This function notices destinations that should be ignored and skips to their ends. This keeps, for instance, picture data from being considered as plain text. */ static void Destination () { switch (rtfMinor) { case rtfPict: case rtfFNContSep: case rtfFNContNotice: case rtfInfo: case rtfIndexRange: case rtfITitle: case rtfISubject: case rtfIAuthor: case rtfIOperator: case rtfIKeywords: case rtfIComment: case rtfIVersion: case rtfIDoccomm: RTFSkipGroup (); break; } } static void SpecialChar () { switch (rtfMinor) { case rtfPage: case rtfSect: case rtfRow: case rtfLine: case rtfPar: PutChar ('\n'); break; case rtfCell: PutChar (' '); /* make sure cells are separated */ break; case rtfNoBrkSpace: PutChar (' '); break; case rtfTab: PutChar ('\t'); break; case rtfNoBrkHyphen: PutChar ('-'); break; } } /* Eventually this should keep track of the destination of the current state and only write text when in the initial state. */ PutChar (c) int c; { CharMap *cmp; char *p = "X"; if (c < 128) putchar (c); else { for (cmp = charMap; cmp->charStr != NULL; cmp++) { if (c == cmp->charVal) { p = cmp->charStr; break; } } fputs (p, stdout); } } CharMap ansiCharMap [] = { 0, NULL }; CharMap macCharMap [] = { 0xa0, "+", /* dagger */ 0xa1, "deg.", /* degree */ 0xa2, "cents", /* cent */ 0xa5, "o", /* bullet */ 0xa7, "B", /* German B? */ 0xa8, "reg.", /* registered */ 0xa9, "(c)", /* copyright */ 0xaa, "(TM)", /* trademark */ 0xab, "'", /* acute accent */ 0xad, "!=", /* not equal */ 0xae, "AE", /* joined A-E */ 0xb1, "+/-", /* plus or minus */ 0xb2, "<=", /* less than or equal */ 0xb3, ">=", /* greater than or equal */ 0xb5, "u", /* micro */ 0xb6, "d", /* delta */ 0xbe, "ae", /* joined a-e */ 0xc5, "~", /* approximately */ 0xc7, "<<", /* alternate quote */ 0xc8, ">>", /* alternate end-quote*/ 0xc9, "...", /* ellipsis */ 0xca, " ", /* unbreakable space */ 0xd0, "-", /* short dash */ 0xd1, "--", /* long dash */ 0xd2, "\"", /* left curly double quote */ 0xd3, "\"", /* right curly double quote */ 0xd4, "`", /* left curly single quote */ 0xd5, "'", /* right curly single quote */ 0xd6, "/", /* divide */ 0, NULL }; CharMap pcCharMap [] = { 0, NULL }; CharMap pcaCharMap [] = { 0, NULL };