#!/usr/bin/perl -w # $Id: exceltex 155 2006-04-30 11:25:16Z pez $ # # helper script for the exceltex latex package. reads contents of # M$ Excel files using the Spreadsheet::ParseExcel perl module. # # (c) 2004-2006 by Hans-Peter Doerr # # exceltex is free software. you can redistribute or modify it under # the terms of the GNU GENERAL PUBLIC LICENSE Version 2. See COPYING for # details. use strict; my $VERSION = "0.5.1"; my $DEBUG = 0; # this is the default encoding for all files written by exceltex my $ENCODING = "latin1"; # some lookup tables my %L2N; { my $i = 1; for ('A' .. 'Z') { $L2N{$_} = $i; ++$i } } my @N2L; { for ('A' .. 'Z') { push(@N2L, $_) } } #XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX package util; #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub DEBUG { my ($msg) = @_; if ($DEBUG) { chomp($msg); print STDERR "DEBUG: $msg\n"; } } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub cell2xy { my ($cell) = @_; $cell = uc($cell); return undef if (! ($cell =~ m/^([A-Z]+)([0-9]+)$/)); # return (column, row) return (col2x($1), $2-1); } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub xy2cell { my ($x, $y) = @_; # is is likely to be slow as hell return undef if (! ($x =~ m/^[0-9]+$/g && $y =~ m/^[0-9]+$/g)); ++$y; return x2col($x) . "$y"; } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub col2x { my ($l) = @_; my $len = length($l); my @S = split //, $l; my $n = 0; if ($len == 1) { $n = $L2N{$l} - 1 } if ($len == 2) { $n = 26 * $L2N{$S[0]} + $L2N{$S[1]} - 1 } return $n; } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub x2col { my ($n) = @_; return undef if (! $n =~ m/[0-9]+/); my $l; if ($n > 25) { $l = $N2L[int($n/26)-1] . $N2L[$n % 26] } else { $l = $N2L[$n] } return $l; } #XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX package main; use strict; use Getopt::Long; use Encode; my $Workbooks = {}; # workbook cache my $nItems = 0; # number of processed items my $nCells = 0; # number of processed cells my $nTabs = 0; # number of processed tabs my $Warnings = 0; # number of warnings my $Jobname = ""; # jobname of .tex file my $inputDir = ""; # data goes here my $indexFile = ""; my $optFormat = 1; # use formatting? my $optColor = 1; # use colors? my $optComma = 0; # use comma for decimals my $optFmtSci = 0; # reformat scientific numbers? my $ignWarn = 0; # ignore warnings? my $euroSymbol = "EUR"; # use this for the EUR currency symbol my $cleanup = 0; # cleanup inputDir my $EXTNS = "excltx"; # read global and local config files # will be overwritten by commandline options readConfigFile("$ENV{HOME}/.exceltexrc") if (-f "$ENV{HOME}/.exceltexrc"); readConfigFile(".exceltexrc") if (-f ".exceltexrc"); # handle commandline switches GetOptions('format!' => \$optFormat, 'color!' => \$optColor, 'comma!' => \$optComma, 'reformat-sn!' => \$optFmtSci, 'p|plain' => sub{$optFormat=0; $optColor=0; $optFmtSci=0;}, 'd|debug' => \$DEBUG, 'c|cleanup' => \$cleanup, 'e|encoding=s' => \$ENCODING, 'o|euro-symbol=s' => \$euroSymbol, 'h|help' => sub{ usage(); exit 0;}, 'v|version' => sub{ print "exceltex, version $VERSION\n"; exit 0}, 'w|ignore-warnings' => \$ignWarn, ) || exit 2; if (@ARGV == 0) { usage(); exit 2; } # encoding of output files $ENCODING = lc($ENCODING); if (! ($ENCODING eq "latin1" || $ENCODING eq "iso-8859-1" || $ENCODING eq "latin9" || $ENCODING eq "iso-8859-15" || $ENCODING eq "utf8")) { print STDERR "Unsupported encoding: $ENCODING\n"; print STDERR "currently supported: iso-8859-1 (latin1), iso-8859-15 " . "(latin9), utf8\n"; exit_error(); } print "exceltex helper script, version $VERSION\n"; # get jobname from first argument if ($ARGV[0] =~ m/(\w+)\.tex/g) { $Jobname = $1; } else { $Jobname = $ARGV[0]; } if (! defined($Jobname)) { print STDERR "can't determine jobname.\n"; exit_error(); } util::DEBUG("jobname is: $Jobname\n"); # data from spreadsheet goes here $inputDir = $Jobname . '-' . $EXTNS; $indexFile = $Jobname . '.' . $EXTNS; # cleanup? if ($cleanup) { cleanup(); exit 0; } if (! -f $indexFile) { print STDERR "can't read index '$indexFile: $!.\n"; print STDERR "run latex first to create the index.\n"; exit_error(); } # create data dir if (! -d $inputDir) { if (! mkdir $inputDir) { print STDERR "can't create data directory '$inputDir': $!\n"; exit_error(); } } # rock'n'roll processIndex($indexFile); # exiting message if ($Warnings) { print STDERR "exceltex finished with $nItems items ($nCells cells " . "and $nTabs tabulars)\n"; print STDERR "*** not all items proccessed fine, there were " . "$Warnings Warnings\n"; exit 0 if ($ignWarn); exit 1; } else { print "exceltex successfully finished with $nItems items ($nCells cells " . "and $nTabs tabulars)\n"; } exit 0; #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub exit_error { print STDERR "aborting.\n"; exit 2; } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub usage { print <<__EOF; exceltex version $VERSION usage: exceltex [options] file[.tex] options: -h|--help show this help -v|--version show program version and exit -c|--cleanup remove temporary files created by previous runs -w|--ignore-warnings exit with status zero, even on warnings -o|--euro-symbol=sym use sym for displaying the euro currency symbol [EUR] -e|--encoding=enc set encoding to enc. Currently supported encodings are: latin1, latin9, utf8 [latin1] --[no]reformat-sn (dont) reformat scientific numbers to A X 10^B notation --[no]comma (dont) use comma for decimal numbers --[no]format (dont) use formatting --[no]nocolor (dont) use colors -p|--plain shorthand for --noformat --nocolor --noreformat-sn __EOF } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub readConfigFile { my ($file) = @_; open(I, "<$file") || return; while() { chomp(); my ($key, $val) = split('=', $_); if ($key eq "encoding") { $ENCODING = $val } elsif ($key eq "reformat-sn") { $optFmtSci = $val } elsif ($key eq "color") { $optColor = $val } elsif ($key eq "format") { $optFormat = $val } elsif ($key eq "comma") { $optComma = $val } elsif ($key eq "euro-symbol") { $euroSymbol = $val } elsif ($key eq "plain") { $optColor=0; $optFormat=0} else { print STDERR "unsupported config option in '$file': " . "$key, ignoring\n" } } close(I); } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub cleanup { if (-f $indexFile) { print "removing $indexFile\n"; unlink($indexFile); } return if (! defined($inputDir)); return if (! -d $inputDir); print "cleaning up $inputDir/\n"; unlink <$inputDir/c-*>; unlink <$inputDir/t-*>; } # read index file, extract data, write to files #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub processIndex { my ($index) = @_; if (! open(I, "< $index")) { print STDERR "can't read index '$index': $!. Run latex first?\n"; exit_error(); } my $cellrefs = 0; while() { chomp($_); # if cellrefs is switched on, we store data in files with name # inputDir/[c,t]-file!sheet!cell if ($. == 1 && $_ eq ";cellrefs") { print "using cell referencing by name\n"; $cellrefs = 1; } # ignore lines beginning with ; next if ($_ =~ m/^;/); my $type; my $idx = undef; my $source; my @flags; if ($cellrefs == 1) {($type, $source, @flags) = split(':', $_)} else {($type, $idx, $source, @flags) = split(':', $_)} # parse flags # not used yet if (@flags != 0) { my $ok = 0; foreach (@flags) { $ok = 1 if ($_ eq 'plain'); } print STDERR "Index '$indexFile' corrupt at line $.: bad flag\n"; exit_error(); } if (! defined($source)) { print STDERR "Index '$indexFile' corrupt at line $.\n"; exit_error(); } my ($f, $s, $c1, $c2) = parseSource($source, $type); if (! $s) { print STDERR "Index '$indexFile' corrupt at line $.\n"; exit_error(); } my $string = ""; if ($type eq 't') { $string = getTabString($f, $s, $c1, $c2); } elsif ($type eq 'c') { $string = getCellString($f, $s, $c1); } else { print STDERR "Index '$indexFile' corrupt at line $.: " . "unknown $type '$type'\n"; exit_error(); } # no need to write empty data next if (! defined($string)); $string = decode_utf8($string, 0); $string = encode($ENCODING, $string, 0); if (! writeBuf($type, $idx, $string, $source)) { print STDERR "can't write cell data: $!.\n"; exit_error(); } } close(I); return 1; } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub getFile { my $file = $Jobname . ".xls"; return $file if (-f $file); return undef; } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub parseSource { my ($string, $type) = @_; my @tokens = split('!', $string); my $file; my $sheet; my $cel1; my $cel2; if ($type eq 'c') { if (@tokens == 2) { $file = getFile(); $sheet = $tokens[0]; $cel1 = $tokens[1]; } elsif (@tokens == 3) { $file = $tokens[0]; $sheet = $tokens[1]; $cel1 = $tokens[2]; } else { return undef; } } elsif ($type eq 't') { if (@tokens == 3) { $file = getFile(); $sheet = $tokens[0]; $cel1 = $tokens[1]; $cel2 = $tokens[2]; } elsif (@tokens == 4) { $file = $tokens[0]; $sheet = $tokens[1]; $cel1 = $tokens[2]; $cel2 = $tokens[3]; } else { return undef; } } else { return (undef, undef, undef, undef); } return ($file, $sheet, $cel1, $cel2); } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub writeBuf { my ($type, $idx, $string, $source) = @_; return 1 if ($string eq ""); my $wfile; if (! defined($idx)) { $wfile = $inputDir . "/" . $type . "-" . $source } else { $wfile = $inputDir . "/" . $type . "-" . $idx } return 0 if (! open (O, ">$wfile")); return 0 if (! print O $string); return 0 if (! close(O)); return 1; } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub getWorkbook { my ($file) = @_; if (! -f $file) { print STDERR "can't read '$file': it does not exists.\n"; exit_error(); } if (! defined($Workbooks->{$file})) { my $type = "xls"; if ($file =~ m/^\w+\.(\w+)$/g) { $type = $1 }; if ($type eq "xls") { print("reading '$file'\n"); # readExcel->new() will exit, if anything bad happens $Workbooks->{$file} = readExcel->new($file); } } return $Workbooks->{$file}; } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub texifyCell { my ($cel) = @_; my $MATH = 0; # wether to enclose string in $$ for math mode my $string = $cel->{value}; return "" if (! defined($string) || $string eq ""); # escape latex control characters $string =~ s/\{/\\\{/g; $string =~ s/\}/\\\}/g; # escape backslash if not followed by { or } $string =~ s/\\(?!({|}))/\\ensuremath\{\\backslash\}/g; $string =~ s/#/\\#/g; $string =~ s/\$/\\\$/g; $string =~ s/%/\\%/g; $string =~ s/&/\\&/g; $string =~ s/~/\\~/g; $string =~ s/_/\\_/g; $string =~ s/\^/\{\\verb ^ \}/g; # FIXME: needs more testing! # hack for EUR sign, works for me but is definetly ugly :( # will need some information on how exactly this # user-entered formatting stuff works (in excel) $string =~ s/\[.+\xac-{0,1}\d{1,3}\]/$euroSymbol/g; # EUR in currency cells $string =~ s/\[.+EUR\]/$euroSymbol/g; # " $string =~ s/.{1,1}\xac/$euroSymbol/g; # single EUR sign # use decimal comma if requested $string =~ s/(\d)\.(\d)/$1,$2/g if ($optComma == 1); # reformat scientific numbers if requested if ($optFmtSci == 1) { $MATH = 1 if ($string =~ s/([0-9]+)[eE]([+-][0-9]+)/ simplifyScientificNumber($1, $2)/ge); } # apply cell formatting and colors if not requested otherweise $string = applyFormatting($string, $cel, $MATH) if($optFormat == 1); # same goes for colors $string = applyColor($string, $cel) if ($optColor == 1); util::DEBUG " texified => '$string'"; return "\$$string\$" if ($MATH); return $string; } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub applyFormatting { my ($s, $c, $m) = @_; # font bold? if ($c->{bold} && $m) { $s = "\\mathbf{$s}"; } elsif ($c->{bold}) { $s = "\\textbf{$s}"; } # font italic? if ($c->{italic} && $m) { $s = "\\mathit{$s}"; } elsif ($c->{italic}) { $s = "\\textit{$s}"; } # underlined? striked out? $s = "\\uline{$s}" if ($c->{underline}); $s = "\\sout{$s}" if ($c->{strikeout}); return $s; } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub applyColor { my ($s, $c) = @_; # only evaluate color if it is not black anyway if ($c->{color} != 8) { # convert rrggbb hex color triplet to float rgb $s = "\\textcolor[rgb]{" . colormap($c->{color}) . "}{$s}"; } return $s; } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub colormap { my ($color) = @_; util::DEBUG(" color => $color\n"); # get rgb triplet as floats return join(', ', map {hex($_)/255.0} unpack('a2a2a2', Spreadsheet::ParseExcel->ColorIdxToRGB($color))); } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub simplifyScientificNumber { my ($f, $e) = @_; # extract signum my $s = substr($e, 0, 1); $s = '' if ($s eq "+"); # remove signum & trailing zeros $e =~ s/^[+-]*//; $e =~ s/^0*(\d+)/$1/; return $f if ($e == 0); return sprintf("%s \\times 10^{%s%s}", $f, $s, $e); } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub warning { my ($msg) = @_; chomp($msg); print STDERR "warning: " . $msg . "\n"; ++$Warnings; } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub getCellString { my ($file, $sheet, $cell) = @_; util::DEBUG("get $file -> $sheet -> $cell\n"); my $w = getWorkbook($file); return undef if (! defined($w)); my ($x, $y) = util::cell2xy($cell); if (! defined($x) || ! defined($y)) { warning("cell '$cell' is invalid\n"); return undef; } my $c = $w->getCell($sheet, $x, $y); warning($w->error()) if (! defined($c->{value})); ++$nItems; ++$nCells; return texifyCell($c); } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub getTabString { my ($file, $sheet, $cell1, $cell2) = @_; # cell1 is upper-left and cell2 is lower-right corner # of the selected cell-range my ($lfx, $upy) = util::cell2xy($cell1); my ($rtx, $loy) = util::cell2xy($cell2); if (! defined($lfx)) { warning "cell '$cell1' is invalid\n"; return undef; } if (! defined($rtx)) { warning "cell '$cell2' is invlaid\n"; return undef; } my $book = getWorkbook($file); my $buf; my $empty = 1; for (my $y = $upy; $y <= $loy; ++$y) { for (my $x = $lfx; $x <= $rtx; ++$x) { my $c = $book->getCell($sheet, $x, $y); warning($book->error()) if (! defined($c)); $empty = 0 if(defined($c->{value})); $buf .= texifyCell($c); $buf .= " & " if ($x < $rtx); } $buf .= " \\\\\n"; } warning("table $cell1!$cell2 is empty\n") if ($empty); ++$nItems; ++$nTabs; return $buf; } #XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX package readExcel; use strict; use Spreadsheet::ParseExcel; use Encode; #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub new { my ($class, $file) = @_; my $self = {}; $self->{ERROR} = ""; if (! -f $file) { print STDERR "can't read '$file', it does not exists.\n"; exit_error(); } $self->{book} = Spreadsheet::ParseExcel::Workbook->Parse($file); if (! defined($self->{book})) { print STDERR "SpreadSheet::ParseExcel Error: " . "can't parse '$file' : $!\n"; exit_error(); } $self->{file} = $file; bless $self, $class; return $self; } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub getCell { my ($self, $sheet, $x, $y) = @_; my $ws = $self->{book}->Worksheet($sheet); if (! defined($ws)) { $self->_setError("sheet '$sheet' does not exist"); return undef; } my $cell = $ws->Cell($y, $x); if(! $cell) { $self->_setError("cell '$sheet!" . util::xy2cell($x, $y) . "' is emtpy " . "or out of range"); return { value => undef } } # we're internally operating with utf8, for now my $value = encode_utf8($cell->Value()); util::DEBUG " value => '$value'"; # return hash containing value and formatting return { value => $value, bold => $cell->{Format}->{Font}->{Bold}, italic => $cell->{Format}->{Font}->{Italic}, color => $cell->{Format}->{Font}->{Color}, underline => $cell->{Format}->{Font}->{Underline}, strikeout => $cell->{Format}->{Font}->{Strikeout} }; } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub _setError { my ($self, $err) = @_; $self->{ERROR} = $err; } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: sub error { my $self = shift; my $err = $self->{ERROR}; $self->{ERROR} = undef; return $err; }