/****************************************** dosToTex - converts 8bit encodings to (La)TeX multibyte escape sequences. usage: dosToTex [ -1 | -u ] < inputfile.dos > outputfile.tex dosToTex -h Switches: -1 : assume that the input file has been encoded in the cp1252 (Windows) character set -u : assume that the input file has been encoded in the "US ASCII" (EndNote terminology) character set -h : print a help line neither -1 nor -u: assume that the input file has been encoded in the ANSI (EndNote terminology) character set Note that it does NOT convert line feeds etc as does dos2unix; therefore piping in addition through dos2unix may be useful: dosToTex [ -1 | -u ] < inputfile.dos | dos2unix > outputfile.tex dosToTex [ -1 | -u ] < inputfile.dos | sed 's/\r//' > outputfile.tex As a more versatile alternative, one might combine the converter 'recode' with the use of \usepackage{latin1}[inputenc] in the LaTeX source: http://recode.progiciels-bpi.ca/ http://directory.fsf.org/recode.html See also http://www.cs.uu.nl/wais/html/na-dir/internationalization/font-faq.html http://wwwvms.mppmu.mpg.de/FAQ/iso-charset.faq http://budling.nytud.hu/~szigetva/etcetera/Hungarian/converters/dos2tex http://www.ctan.org/tex-archive/support/xtexshell/tfc.cc http://billposer.org/Software/uni2ascii.html Richard J. Mathar, http://www.mpia.de/~mathar Dec 07, 2015 *****************************************/ #include "config.h" #ifdef HAVE_STDLIB_H #include #endif #ifdef HAVE_STDIO_H #include #endif #ifdef HAVE_UNISTD_H #include #endif void usage(char *argv0) { printf("usage: %s ; # decode EndNote\n",argv0) ; printf("\t %s -1 ; # decode the CP1252 Windows character set\n",argv0) ; printf("\t %s -u ; # decode US ASCII\n",argv0) ; printf("\t %s -h ; # help: print these usage lines here\n",argv0) ; printf("Reads from the standard input, writes to the standard output.\n") ; } int main(int argc, char *argv[]) { int c ; char oc ; int useendnote = 1 , /* according to page 108 of the EndNote 7 Manual, default */ useUs = 0 , /* US ASCII according to the table on p 109 of the EndNote 7 manual */ usecpc1252 = 0 ; /* according to http://czyborra.com/charsets/cp1252.gif */ while ( (oc=getopt(argc,argv,"1hu")) != -1 ) { switch(oc) { case '1' : usecpc1252 = 1 ; useendnote = 0 ; break ; case 'u' : useUs = 1 ; useendnote = 0 ; break ; case 'h' : usage(argv[0]) ; return 0 ; case '?' : fprintf(stderr,"Invalid command line option %c\n",oc) ; usage(argv[0]) ; break ; } } /* start at 0x80, end at 0xff, according to http://czyborra.com/charsets/cp1252.gif */ char *cpc1252[] = { NULL, NULL, ",", "$f$", "``", "$\\ldots$", "\\dag ", "\\ddag ", "\\symbol{94}", NULL, "\\v{S}", "<", "{\\OE}", NULL, "\\v{Z}", NULL, NULL, "'", "'", "``", "''", NULL, "-", "--", "\\symbol{126}", NULL, "\\v{s}", ">", "{\\oe}", NULL, "\\v{z}", "{\\\"Y}", NULL, "!'", NULL, "\\pounds ", NULL, NULL, "$\\mid$", "\\S ", "\\symbol{127}", "\\copyright ", NULL, "$\\ll$", "$\\neg$", NULL, NULL, "\\symbol{22}", "$^0$", "$\\pm$", "$^2$", "$^3$", "'", "$\\mu$", "\\P ", "$\\cdot$", NULL, "$^1$", NULL, "$\\gg$", "1/4", "1/2", "3/4", "?'", "\\`A" , "\\'A" , "\\^A" , "\\~A" , "{\\\"A}" , "{\\AA}" , "{\\AE}" , "\\c{C}" , "\\`E" , "\\'E" , "\\^E" , "{\\\"E}" , "\\`I" , "\\'I" , "\\^I" , "{\\\"I}" , NULL, "\\~N" , "\\`O" , "\\'O" , "\\^O" , "\\~O" , "{\\\"O}" , "$\\times$" , "{\\O}" , "\\`U" , "\\'U" , "\\^U" , "{\\\"U}" , "\\'Y" , NULL, "\\ss " , "\\`a" , "\\'a" , "\\^a" , "\\~a" , "{\\\"a}" , "{\\aa}" , "{\\ae}" , "\\c{c}" , "\\`e" , "\\'e" , "\\^e" , "{\\\"e}" , "\\`{\\i}" , "\\'{\\i}" , "\\^{\\i}" , "{\\\"{\\i}}" , NULL, "\\~n" , "\\`o" , "\\'o" , "\\^o" , "\\~o" , "\\\"o" , "$\\div$" , "{\\o}" , "\\`u" , "\\'u" , "\\^u" , "{\\\"u}" , "\\'y" , NULL, "{\\\"y}" } ; /* starts at 129, ends at 255, according to page 108 of the EndNote 7 Manual */ char *endn[] = { "\\_", NULL, NULL, NULL, "$\\ldots$", "\\dag ", "\\ddag ", "\\symbol{94}", NULL, NULL, NULL, "{\\OE}", NULL, NULL, NULL, NULL, NULL, NULL, "``", "''", NULL, NULL, "--", "\\symbol{126}", NULL, NULL, NULL, "{\\oe}", NULL, NULL, "{\\\"Y}", NULL, "!`", NULL, "\\pounds ", NULL, NULL, "$\\mid$", "\\S ", "\\symbol{127}", "\\copyright ", NULL, "$\\ll$", "$\\neg$", NULL, NULL, "\\symbol{22}", "\\symbol{23}", "$\\pm$", "$^2$", NULL, "'", "$\\mu$", "\\P ", "$\\cdot$", NULL, "$^1$", "$^0$", "$\\gg$", "1/4","1/2","3/4", "?`", "\\`A", "\\'A", "\\^A", "\\~A", "{\\\"A}", "{\\AA}", "{\\AE}", "\\c{C}", "\\`E", "\\'E", "\\^E", "{\\\"E}", "\\`I", "\\'I", "\\^I", "{\\\"I}", NULL, "\\~N", "\\`O", "\\'O", "\\^O", "\\~O", "{\\\"O}", "$\\times$", "{\\O}", "\\`U", "\\'U", "\\^U", "{\\\"U}", "\\'Y", NULL, "\\ss ", "\\`a", "\\'a", "\\^a", "\\~a", "{\\\"a}", "{\\aa}", "{\\ae}", "\\c{c}", "\\`e", "\\'e", "\\^e", "{\\\"e}", "\\`{\\i}", "\\'{\\i}", "\\^{\\i}", "{\\\"{\\i}}", NULL, "\\~n", "\\`o", "\\'o", "\\^o", "\\~o", "{\\\"o}", "$\\div$", "{\\o}", "\\`u", "\\'u", "\\^u", "{\\\"u}", "\\'y", NULL, "{\\\"y}" } ; /* starts at 128, ends at 165 */ char *usasc[] = { "\\c{C}", "{\\\"u}", "\\'e", "\\^a", "{\\\"a}", "\\`a", "{\\aa}", "\\c{c}", "\\^e", "{\\\"e}", "\\`e", "{\\\"{\\i}}", "\\^{\\i}", "\\`{\\i}", "{\\\"A}", "{\\AA}", "\\'E", "{\\ae}", "{\\AE}", "\\^o", "{\\\"o}", "\\`o", "\\^u", "\\`u", "{\\\"y}", "{\\\"O}", "{\\\"U}", NULL, "\\pounds ",NULL, "P", "$f$", "\\'a", "\\'{\\i}", "\\'o", "\\'u", "\\~n", "\\~N",NULL,NULL, "?'", "\\_", "$\\neg$", "1/2", "1/4", "!'", "$\\ll$", "$\\gg$" /* case 225 : "\\ss ", case 246 : "$\\div$", */ } ; if ( usecpc1252) { while( (c=getchar()) != EOF) { if( c >= 0x80 && c <= 0xff) /* in the table ? */ if ( cpc1252[c-0x80] ) printf("%s",cpc1252[c-0x80]) ; else putchar(c) ; else putchar(c) ; } } else if ( useendnote) { while( (c=getchar()) != EOF) { if( c >= 128 && c <= 255) /* in the table ? */ { if ( endn[c-128] ) printf("%s",endn[c-128]) ; else putchar(c) ; } else putchar(c) ; } } else if ( useUs) { while( (c=getchar()) != EOF) { if( c >= 128 && c <= 175) /* in the table ? */ if ( usasc[c-128] ) printf("%s",usasc[c-128]) ; else putchar(c) ; else if ( c == 179) printf("$^3$") ; else if ( c == 225) printf("\\ss ") ; else if ( c == 227) printf("\\P ") ; else if ( c == 230) printf("$\\mu$") ; else if ( c == 241) printf("$\\pm$") ; else if ( c == 246) printf("$\\div$") ; else if ( c == 253) printf("$^2$") ; else putchar(c) ; } } return 0 ; }