#! /usr/bin/env perl # Use: # $ perldoc texdirflatten # to see embedded documentation. Alternatively, you can create manual # or HTML pages using pod2man and pod2html. =pod =head1 NAME B - Collects all components of a (La)TeX file in a single output directory -- i.e., flattens its hierarchy. =head1 SYNOPSIS texdirflatten [-1|--onetex] [-f|--file input.tex] [-o outputdir] [--imgexts .ext1,.ext2,...] [--debug] [-V|--version] [-?|--help] =head1 DESCRIPTION This Perl script parses a LaTeX file recursively, scanning all child files, and collects details of any included and other data files, such as graphics and BiBTeX bibliography files. These component files, are then all put into a single directory (thus "flattening" the document's directory tree). This is useful in distributing manuscripts to collaborators or in submitting to journals. =for comment Run without parameters to see usage. =head1 OPTIONS =over =item B<--file>, B<-f> I Specifies input (La)TeX file. =item B<--onetex>, B<-1> If specified, produces a single TeX file by expanding all \input and \include commands in place. =item B<--output>, B<-o> I Directory to collect all files. B will copy each source file, graphics and bibliography file to this directory. It will be created if it is unexistent. If unspecified, it defaults to C. =item B<--imgexts> I<.ext1,.ext2[,...]> Prepends to the prioritized list of image extensions to search when trying to find the image to copy to output folder. The first file with the extension found will be copied and search stopped. Default order is C<"", ".eps", ".pdf", ".pstex"> (note that it includes files with no extension). If using pdflatex, one may want to add PDF before EPS with C<--imgexts .pdf>. Don't forget the dot before the extension! =item B<--debug> Enables copious amounts of debugging output - useful if something is going wrong. =item B<--version>, B<-V> Displays the current version number and the usage and exits. =item B<--help>, B<-?> Show this manual page. =back =head1 EXAMPLES The following example scans C in the current directory and gathers it and all its components in the C directory: $ texdirflatten -f manuscript.tex -o submit_01 =head1 CAVEATS Please take backups before running this command. No warranties whatsoever provided. You may need to run C on EPS files if you are using C: $ for i in *.eps; do epstopdf $i; done =head1 BUGS Bug reports and patches are welcome. =head1 AUTHOR Cengiz Gunay users.sf.net> =head1 COPYRIGHT AND LICENSE Copyleft 2003-2017, Cengiz Gunay This library is free software; you may redistribute it and/or modify it under the same terms as Perl itself. =cut # TODO: # - parse BIBINPUTS environment variable and search figures and such # there as well. # - also copy bibtex style file package texdirflatten; use Getopt::Long qw(GetOptions); use Pod::Usage; use strict; use warnings; use re 'eval'; # Global my $outputdir = "flat"; # Output directory my $file = ""; # File to read my @imgexts = ("", ".eps", ".pdf", ".pstex"); # Ordered list of image extensions to search and copy my $imgexts_pre; # Prepend to @imgexts my $onetex = 0; # Indicates all input and include commands be expanded my $version = "1.3"; # 2017-02-02 my $DEBUG = 0; # Enable debugging output my $help; # parse helpers my %texfiles; # GetOpt::Long:: my $result = GetOptions ("output|o=s" => \$outputdir, "onetex|1" => \$onetex, "file|f=s" => \$file, "imgexts=s" => \$imgexts_pre, "debug" => \$DEBUG, "version|V" => sub { print "texdirflatten version $version\n"; pod2usage( -section => "SYNOPSIS"); }, "help|?" => \$help ); # regexp for none or an even number of backslashes (\): my $unescaped = '(? 2 ) if ($help); pod2usage( -section => "SYNOPSIS") if ($file eq ""); if ($imgexts_pre) { unshift(@imgexts, split(/,/, $imgexts_pre)); print "imgexts: ". join(",", @imgexts) . "\n" if $DEBUG; } system "mkdir $outputdir" if (not -d $outputdir); # start recursing parseTeX($file, "", popfile($file), 0); sub parseTeX { my $file = shift; my $inputdir = shift; my $outfile = "$outputdir/" . shift; my $level = shift; # Add a proper TeX suffix if it's missing $outfile .= '.tex' if ! ($outfile =~ /\.tex\s*$/i ); my @flats; my @longs; my $popped; open my $FILE, $file or die "Cannot find file to read $file\n"; # Read to whole file first, and then scan for regexps my $contents = ""; while(<$FILE>) { $contents = $contents . $_; } close $FILE; ## First, remove comments so that input commands inside are ignored. # Also remove trailing newline and all whitespace at the beginning # of the following line like TeX does it. Only for %'s following # the proper backslash pattern. $contents =~ s/( $unescaped )\%.*?\n[ \t]*/$1/gx; # Look for graphics include statement if ($contents =~ /\\input\@path\{\{([^}]*)\}\}/) { $inputdir = $1; print "Found input directory: $inputdir\n" if $DEBUG; } print "Parsing '$file' in directory '$inputdir'\n"; # Default value #$inputdir = './' if (-z $inputdir); # three cases: graphics, inputs and bibs # an \includegraphics statement @::flats = (); @::longs = (); #$::popped = ""; $contents =~ s/\\includegraphics(\[[^\]]*\])?\{([^}]*)\} (?{ $::popped = flattenfilename($2); push @::longs, $2; push @::flats, $::popped }) /\\includegraphics$1\{$::popped\}/gmx; if ($#::flats > -1) { my ($long, $flat); foreach $long (@::longs) { $flat = shift @::flats; # convert LyX directory dots $long =~ s/\\lyxdot /./g; $flat =~ s/\\lyxdot /./g; print "Looking for graphics: '$long'\n" if $DEBUG; my @dirs = ("", "$inputdir", $inputdir . '../figures/'); seekfile($long, $flat, \@imgexts, ["", "$inputdir", $inputdir . '../figures/']); } } # Must do bibs before input commands because, when expanded, they # add new bibs inside. replacebibs(\$contents, $inputdir); # an \input or \include statement @flats = (); @longs = (); $contents =~ s/\\(input|include)\{([^}]*)\}/"\\${1}\{" . ($popped = flattenfilename($2) and push @longs, $2 and push @flats,$popped and $popped) . "\}"/egm; if ($#flats > -1) { #print "Found " . scalar @longs . " items on line: '@flats'.\n"; my ($long, $flat); foreach $long (@longs) { $flat = shift @flats; print "Found Input/Include: '$long'\n" if $DEBUG; # recurse to parse that tex file unless it's done already if (not exists $texfiles{$flat}) { if (! -r $long ) { if (-r $inputdir . $long) { $long = $inputdir . $long; } elsif (-r "$long.tex") { $long .= '.tex'; } } $texfiles{$flat} = $long; my $newcontents = parseTeX($long, basedir($long), $flat, $level + 1); #later if ( $onetex ) { # If specified, expand inclusion directives die "Failed to find input/include to expand for '$flat'!\n" if (! ( $contents =~ s/\\(input|include) \{ \Q$flat\E \} (?{ print "===Found \\$1\{$flat\} to expand.\n" if $DEBUG; }) / ($1 eq 'include' ? "\\clearpage\n" : '') . "$newcontents"/gxem ) ); } } } } #print "Writing to \"$outfile\"\n" . # "----------------------------------------\n" . # "$contents\n" . # "----------------------------------------\n"; if (! $onetex || $level == 0) { # do output at zero recurse level # open flat output file open my $COPY, ">$outfile" or die "Cannot write file $outfile\n"; # write to flat copy print $COPY $contents; close $COPY; } else { return $contents; } } # Look for file to copy sub seekfile { my ($file, $dest, $exts, $dirs) = @_; my @exts = @$exts; my @dirs = @$dirs; OUT: foreach my $dir (@dirs) { foreach my $ext (@exts) { my $tryfile = "$dir$file$ext"; if (-r $tryfile) { # add the extension to $dest only if it hasn't have any dots #(! ($dest =~ /\./)) ? $ext : ""; => better not to have this my $dext = $ext; if (system("cp $tryfile $outputdir/$dest$dext") != 0) { die "Cannot copy $tryfile!\n"; } # If we found it, then get out of the loop print "Copied: '$tryfile' (ext: $ext) -> '$dest$dext'\n" if $DEBUG; last OUT; } } } } # Return everything after the last / sub popfile { my $file = shift (@_); my @a = split /\//, $file; #print "Popping $file... split: @a ($#a), pop: " . pop(@a) . "\n"; pop @a; } # Make a new file by flattening directory names sub flattenfilename { local $_ = shift; # convert slashes to pluses, dots to Xs tr|/|+|; tr|\.|X|; # add a unique number at the beginning so files don't start with a # dot $_ = sprintf("%03d_", ++$::filecount) . "$_"; print "===Flattened file to '$_'\n" if $DEBUG; return $_; } sub basedir { my $file = shift (@_); my @a = split /\//, $file; #print "Popping $file... split: @a ($#a), pop: " . pop(@a) . "\n"; pop @a; # remove file part my $basedir = join('/', @a); # combine the rest with /'s $basedir .= '/' if (! $basedir =~ /^$/); # add a trailing / if not empty $basedir; } # get the bibliography files sub replacebibs { my ($contentsref, $inputdir) = @_; #my $contents = $$contentsref; my $popped; #print "bib contents: $contents\n"; local @::flats = (); local @::longs = (); local @::refs; $" = ','; # set list item separator # search for \bibliography statements $$contentsref =~ s/\\bibliography(\[[^\]]*\])? { (?{ @::refs = (); #print "Found bibliography!\n"; #initialize ref list }) (?: \s* ([^}, ]+)(?=[,}]),? (?{ # for each word do: #local (@_flats, @_longs, @_refs); #print "Word: \"$+\", "; $popped = flattenfilename($+); push @::longs, $+; push @::flats,$popped; push @::refs, $popped; }) )+ \s* } (?{ #print "\n+++ $#::flats; $#::refs; $#::longs\n"; #push @::flats, @_flats; push @longs, @_longs; push @refs, @_refs; #print "Refs: @::refs\n"; })/"\\bibliography" . ($1 || "") . "{@::refs}"/egmx;# && #print "bib contents: $$contentsref\n"; #print "\n*** $#::flats; $#::refs; $#::longs\n"; $" = ' '; # restore list item separator if ($#::flats > -1) { #print "Found " . scalar @::longs . " items on line: '@::flats'.\n"; my ($long, $flat); foreach $long (@::longs) { $flat = shift @::flats; print "Found bib. file: '$long'\n" if $DEBUG; # recurse to parse that tex file unless it's done already # convert LyX directory dots $long =~ s/\\lyxdot /./g; seekfile($long, "$flat", [".bib"], ["", "$inputdir"]); } } }