#! /usr/bin/env perl ########################################################## # Bundle all the files needed to build a LaTeX document. # # By Scott Pakin # ########################################################## ######################################################################## # bundledoc # # Copyright (C) 2018-2019 Scott Pakin # # # # This program may be distributed and/or modified under the conditions # # of the LaTeX Project Public License, either version 1.3c of this # # license or (at your option) any later version. # # # # The latest version of this license is in: # # # # http://www.latex-project.org/lppl.txt # # # # and version 1.3c or later is part of all distributions of LaTeX # # version 2008/05/04 or later. # # # # This program consists of the file bundledoc and all the files listed # # in the Files section of the associated README file. # ######################################################################## use 5.006; # Fail gracefully if we're not using Perl v5.6.0+. our $VERSION = "3.4"; # Specify the version of bundledoc. use File::Basename; use File::Copy; use File::Spec::Functions qw(abs2rel catfile devnull rel2abs rootdir updir); use File::Path; use File::Temp qw(tempdir); use Getopt::Long; use Pod::Usage; use warnings; use strict; ###################################################################### # Variables the user can override with a configuration file # DEFAULTS: Linux + kpathsea my %uservariable; $uservariable{"bundle"} = '(tar -cvf - $BDINPUTS | gzip --best > $BDBASE.tar.gz)'; $uservariable{"find"} = 'kpsewhich -progname=latex $BDINPUTS'; $uservariable{"sink"} = '> ' . devnull() . ' 2>&1'; # OS path separator (Is there a better way to get this?) my $pathsep = catfile ("", ""); # File that lists original directory names my $manifest = "MANIFEST"; # Cleaned-up name of this Perl script my $progname = basename($0); # Other global variables my ($verbose, $keepdirs, $depfile, $localonly); # Taken from the command line my (@exclude_files, @include_files, @listdeps); # Taken from the command line my ($texfile, $docdirname); # Derived from $depfile my %subdirs; # Map from a qualified name to its subdirectory ###################################################################### # Find a file in the LaTeX hierarchy and fully qualify it, # or abort if we can't. sub qualifyname ($) { my $filename = $_[0]; $filename = "\"$filename\"" if $filename =~ /\s/ && substr($filename, 0, 1) ne '"'; my $command = $uservariable{find}; $command =~ s/[\$\%]BDINPUTS\%?/$filename/g; $command =~ s/[\$\%]BDBASE\%?/$ENV{"BDBASE"}/g; my $qualified = `$command` || die "${progname}: \"$uservariable{find}\" (BDINPUTS=\"$_[0]\") failed to find $_[0]\n"; chomp $qualified; $qualified = rel2abs $qualified; return $qualified; } # Execute a command, and return an error message if it fails. sub executecmd ($) { my $command = ($verbose ? $_[0] : "$_[0] $uservariable{sink}"); $command =~ s/[\$\%]BDINPUTS\%?/$ENV{"BDINPUTS"}/g; $command =~ s/[\$\%]BDBASE\%?/$ENV{"BDBASE"}/g; my $retval; if ($verbose) { print "EXECUTING $command WITH BDINPUTS = $ENV{BDINPUTS}\n"; } if ($pathsep eq "\\") { # Dirty trick to work around idiotic "\" --> "/" conversion $retval = system qq("$command"); } else { $retval = system $command; } die "${progname}: $!. Failing command was:\n\t$command\n" if $retval; } # Append ".dep" to the dependency file if necessary. sub find_dependency_file ($) { my $depfile = $_[0]; # Return the file as is if it exists. return $depfile if -e $depfile || $depfile =~ /\.dep$/; # Return the file with ".dep" appended if that exists. return "${depfile}.dep" if -e "${depfile}.dep"; # Give up and return the file as specified. return $depfile; } # Try hard to find a configuration file. Return the name of the actual file we # located. sub find_config_file ($) { my $configfile = $_[0]; no warnings; # Don't complain if kpsewhich isn't found. # Attempt 1: See if the file exists as named. return $configfile if -e $configfile; # Attempt 2: Append ".cfg" if it's not already there. my $config_cfg = $configfile; if ($config_cfg !~ /\.cfg$/) { $config_cfg = "${configfile}.cfg"; return $config_cfg if -e $config_cfg; } # Attempt 3: Use kpsewhich to find the file with ".cfg" appended. (We # don't search for it without ".cfg" because kpsewhich may find a version # with ".tex" appended.) my $kpfound = `kpsewhich $config_cfg`; if (defined $kpfound) { chomp $kpfound; return $kpfound if -e $kpfound; } # Give up. Return the file name as specified. return $configfile; } # Read a configuration file, and set %uservariable accordingly. sub process_config_file ($) { my $configfile = find_config_file $_[0]; printf("CONFIGURATION FILE: %s\n", rel2abs($configfile)) if $verbose; my $prevline = ""; open (CONFIGFILE, "<", $configfile) || do { my $configlist = $configfile; if (! -e $configfile) { # Error is "file not found". List the files we considered. $configlist .= "; also considered ${configfile}.cfg" if $configfile !~ /\.cfg$/; } die "${progname}: $! ($configlist)\n"; }; while (my $oneline=) { # Read a line and trim it. chomp $oneline; $oneline =~ s/^\s+//; $oneline =~ s/\s+$//; # Determine if we should process the line now, later, or never. next if $oneline =~ /^\#.*$/; # Discard comments. if ($oneline =~ m|^(.*)\\$|) { # Handle line continuations. $prevline .= $1; next; } $oneline = $prevline . $oneline; $prevline = ""; next if $oneline =~ /^\s*$/; # Discard blank lines. # Parse the line. $oneline =~ /^(\w+)\s*:\s*(.*)$/; $uservariable{lc($1)} = $2; } close CONFIGFILE; } # Find all dependencies listed in the input file. sub find_dependencies () { open (DEPFILE, "<$depfile") || die "${progname}: $! ($depfile)\n"; my $braced = "\\{([^\\}]*)\\}"; # Regular expression for a braced name my @dependencies; # List of fully-qualified filenames @dependencies = qualifyname $texfile; while (my $oneline=) { # Parse an input line into its component fields. next if !($oneline =~ /^\s*\*$braced\s*$braced\s*$braced\s*$/); my $filetype = $1; my $filename = $2; my $versioninfo = $3; # Extract the subdirectory if one exists. my $subdir = dirname $filename; my $qname = ""; # Determine what to do based on the file type. # NOTE: The PROCESSDEP block currently ignores the following # file types: # # * application # * tfm # * format PROCESSDEP: { $filetype eq "package" && do { $qname = qualifyname("$filename.sty"); last PROCESSDEP; }; $filetype eq "class" && do { $qname = qualifyname("$filename.cls"); last PROCESSDEP; }; $filetype eq "file" && do { $qname = qualifyname("$filename"); last PROCESSDEP; }; } # If the file is of one of the above types, add the qualified # name to the list of dependencies. In addition, if it is in # a proper subdirectory, add an entry to the subdirs hash. unless ($qname eq "") { push @dependencies, $qname; $subdirs{$qname} = $subdir unless $subdir eq "."; } } close DEPFILE; @dependencies = sort @dependencies; # If --localonly was specified, discard dependencies that are not # in the same directory as the .tex file (or a subdirectory # thereof). if ($localonly) { my @newdeps; my $texdir = dirname qualifyname $texfile; my $lentexdir = length $texdir; my $numdeleted = 0; foreach my $dep (@dependencies) { if (substr($dep, 0, $lentexdir) ne $texdir) { $numdeleted++; } else { push @newdeps, $dep; } } @dependencies = @newdeps; print "OMITTING $numdeleted non-local file(s)\n" if $verbose; } # If --exclude was specified, discard all dependencies containing # excluded text as a substring. my $numexcluded = 0; foreach my $excluded (@exclude_files) { my @newdeps; foreach my $dep (@dependencies) { if ($dep =~ /\Q$excluded\E/) { $numexcluded++; } else { push @newdeps, $dep; } } @dependencies = @newdeps; } print "OMITTING $numexcluded excluded file(s)\n" if $verbose && $numexcluded; # If --include was specified, include the corresponding files. my $numextras = 0; foreach my $include_glob (@include_files) { foreach my $include_file (glob $include_glob) { push @dependencies, qualifyname $include_file; $numextras++; } } print "INCLUDING $numextras additional file(s)\n" if $verbose && $numextras; # Remove duplicates. my %uniquedeps; foreach my $dep (@dependencies) { $uniquedeps{$dep} = 1; } my $numdups = @dependencies - keys %uniquedeps; print "REMOVING $numdups duplicate file(s)\n" if $verbose && $numdups; @dependencies = sort keys %uniquedeps; # If --listdeps was specified, output the list of dependencies. if (@listdeps) { @dependencies = map {abs2rel($_)} @dependencies if grep /^rel$/, @listdeps; print join("\n", @dependencies), "\n"; exit 0 if grep /^only$/, @listdeps; } # Return the list of dependencies. return @dependencies; } ###################################################################### # Parse and process the command-line options. my $showhelp = ""; $keepdirs = 0; $verbose = 0; $localonly = 0; my $configfile; Getopt::Long::Configure("bundling"); GetOptions ('texfile=s' => \$texfile, 'directory=s' => \$docdirname, 'manifest:s' => \$manifest, 'keepdirs!' => \$keepdirs, 'localonly!' => \$localonly, 'exclude=s' => \@exclude_files, 'include=s' => \@include_files, 'config=s' => \$configfile, 'v|verbose!' => \$verbose, 'listdeps=s' => \@listdeps, 'V|version' => sub { print "bundledoc $VERSION\n"; exit -1 }, 'help' => \$showhelp) || pod2usage (-exitval => 1, -verbose => 0); pod2usage (-exitval => 0, -verbose => 1) if $showhelp && $verbose; pod2usage (-message => "(For more detailed help, enter \"$0 --help --verbose\".)", -exitval => 0, -verbose => 0) if $showhelp; pod2usage (-message => "${progname}: Too few arguments", -exitval => 1, -verbose => 0) if $#ARGV==-1; $depfile = find_dependency_file shift; # Dependencies from snapshot.sty process_config_file $configfile if defined $configfile; ($texfile = $depfile) =~ s/\.[^.]*$/.tex/ if !$texfile; # Main LaTeX source file ($docdirname = basename($depfile)) =~ s/\.[^.]*$// if !$docdirname; # Name to use for the document directory ($ENV{"BDBASE"} = rel2abs($depfile)) =~ s/\.[^.]*$//; # May be needed by user-defined variables print "BDBASE = '$ENV{BDBASE}'\n" if $verbose; $ENV{"BDBASE"} = "\"$ENV{BDBASE}\""; # Expand and sanity check @listdeps. @listdeps = map {s/\s+//g; split /,/} @listdeps; foreach my $ldep (@listdeps) { if ($ldep !~ /^(rel|only|yes|no)$/) { pod2usage (-message => "${progname}: Invalid --listdeps option \"$ldep\"", -exitval => 1, -verbose => 0); } } @listdeps = () if grep /^no$/, @listdeps; # Create a bundled file. my @dependencies = find_dependencies(); if ($keepdirs) { # Put absolute pathnames in the tar file. # QUESTIONS: Does this work with multiple drive letters in Windows? # What about UNC names? chdir rootdir() || die "${progname}: $! (".rootdir().")\n"; $ENV{"BDINPUTS"} = join (" ", map {s|^/+||; s/\"//g; "\"$_\""} @dependencies); executecmd $uservariable{"bundle"}; } else { # Copy each of the dependencies to a temporary directory and tar it up. my $tempdir = tempdir("bundledoc-XXXXXX", TMPDIR => 1, CLEANUP => 1); my $tempdir2 = catfile $tempdir, $docdirname; print "CREATING $tempdir\n" if $verbose; # White lie: already created print "CREATING $tempdir2\n" if $verbose; mkdir ($tempdir2, 0777) || die "${progname}: $! ($tempdir2)\n"; foreach my $dep (@dependencies) { # If the current dependency is in a subdirectory of the # original location, create the same subdirectory in the # temporary directory and copy the file there. Otherwise just # copy it to the root of the temporary directory. my $targetdir = $tempdir2; if (my $sd = $subdirs{$dep}) { $targetdir = catfile($targetdir, $sd); unless (-d $targetdir) { print "CREATING $targetdir\n" if $verbose; mkpath($targetdir, 0, 0777) || die "${progname}: $! ($targetdir)\n"; } } copy ($dep, $targetdir) || die "${progname}: $! ($targetdir)\n"; } if ($manifest ne "") { my $manifest = catfile $tempdir2, $manifest; my $manifestdir = dirname $manifest; unless (-d $manifestdir) { print "CREATING $manifestdir\n" if $verbose; mkpath($manifestdir, 0, 0777) || die "${progname}: $! ($manifestdir)\n"; } print "WRITING $manifest\n" if $verbose; open (MANIFEST, ">$manifest") || die "${progname}: $! ($manifest)\n"; print MANIFEST join ("\n", @dependencies), "\n"; close MANIFEST; } chdir $tempdir || die "${progname}: $! ($tempdir)\n"; $ENV{"BDINPUTS"} = "\"$docdirname\""; executecmd $uservariable{"bundle"}; # Clean up our mess. # NOTE: We rmtree $tempdir2 and rmdir $tempdir because rmtree # complained on WinNT when trying to remove a top-level # directory (e.g., "\s4r."). print "REMOVING $tempdir2\n" if $verbose; rmtree $tempdir2, 0, 1; chdir (updir()); print "REMOVING $tempdir\n" if $verbose; rmdir $tempdir || die "${progname}: $! ($tempdir)\n"; } print "FINISHED.\n" if $verbose; exit 0; __END__ ###################################################################### =head1 NAME bundledoc - bundle all the files needed by a LaTeX document =head1 SYNOPSIS bundledoc [B<--version>] [B<--help>] [B<-->[B]B] [B<--texfile>=I] [B<--directory>=I] [B<-->[B]B] [B<--exclude>=I] [B<--include>=I] [B<--manifest>=I] [B<-->B=[yes|no|only|rel]...] [B<-->[B]B] [B<--config>=I] I =head1 DESCRIPTION B is a post-processor for the B package that bundles together all the classes, packages, and files needed to build a given LaTeX document. It reads the F<.dep> file that B produces, finds each of the files mentioned therein, and packages them into a single archive file (e.g., a F<.tar.gz> file), suitable for moving across systems, transmitting to a colleague, etc. As the simplest example possible, consider a LaTeX file called, say, F: \RequirePackage{snapshot} % Needed by bundledoc \documentclass[11pt]{article} \begin{document} Hello, world! \end{document} The C<\RequirePackage{snapshot}> causes a F file to be produced. When B is then given C as an argument, it locates the dependent files -- F, F, and F -- and bundles them into a single archive file, along with F and a F file (described in L<"OPTIONS">, below). =head1 OPTIONS In the following descriptions, I refers to the name of your main LaTeX document (no extension). B requires the name of the dependency file produced by B, normally IF<.dep>). (For convenience, the file can be specified without its F<.dep> extension.) The following options may also be given: =over 4 =item B<--version> Output the B script's version number. This overrides all of the remaining options. =item B<--help> Give a brief usage message. This overrides all of the remaining options. =item B<-->[B]B (default: C) B normally does not output anything except error messages. With C<--verbose>, it outputs copious status messages. =item B<--texfile>=I
(default: IF<.tex>) B's dependency file does not list the main LaTeX file (the one that gets passed to B). In order for B to find and bundle that file, B assumes it has the same name as the B dependency file but with a F<.tex> extension. If this is not the case, then use C<--texfile> to specify the correct filename. =item B<--directory>=I (default: I) When B creates an archive (e.g., a F<.tar> or F<.zip> file) containing the document's files, it puts all of them in a directory to avoid cluttering the current directory with files. If the given dependency file is called IF<.dep> then the resulting archive will, by default, store all the dependent files in a I directory. To change the directory name use the C<--directory> option. =item B<-->[B]B (default: C) Although B normally archives all of the files named in the F<.dep> file, the C<--localonly> option tells B to exclude all files located in a directory other than the F<.tex> file's directory or one of its subdirectories. =item B<--exclude>=I (default: I) While C<--localonly> causes files outside of the F<.tex> file's directory tree to be omitted from the archive, C<--exclude> provides finer-grained control over files to omit from the archive. The C<--exclude> option, which can be specified repeatedly on the command line, causes all files whose name contains I to be omitted from the archive. =item B<--include>=I (default: I) The C<--include> option, which can be specified repeatedly on the command line, instructs B to include in the archive all of the files matching I, even if they're not referenced in the F<.dep> file. =item B<--manifest>=I (default: F) In addition to the dependent files, B includes in the archive file one extra file called, by default, ``F''. F is a text file that lists the original filenames of all the dependencies. To change the filename from ``F'' to something else, use the C<--manifest> option. As a special case, C<--manifest=""> tells B not to include a manifest file at all. =item B<--listdeps>=[yes|no|only|rel]...] (default: C) C<--listdeps> accepts one or more of C, C, C, or C as a comma-separated list. As long as C does not appear in this list, B outputs all of the main LaTeX file's dependencies. If the list contains C, then B outputs the list of dependencies with relative pathnames. If the list contains C, then B exits after displaying the list, without producing an archive. =item B<-->[B]B (default: C) Normally, the archive file that B produces contains a single directory -- and subdirectories, if the document refers explicitly to them -- in which all the dependent files lie. If C<--keepdirs> is specified, all the dependent files are stored with their original pathnames. For example, if F depends on F, F, and F, then the I archive will normally contain the following files: =over 4 =item * F =item * F =item * F =item * F =item * F =back However, C<--keepdirs> will cause the I archive to contain the following sorts of filenames instead: =over 4 =item * F =item * F =item * F =item * F =back C<--directory> is not used when C<--keepdirs> is in effect. In addition, no manifest file is written to the archive file as it contains redundant information. =item B<--config>=I (default: EnoneE) The C<--config> option is used to point B to the appropriate configuration (F<.cfg>) file for your TeX distribution and operating system. B comes with a few configuration files and it's easy to write more. See L<"CONFIGURATION FILES"> (below) for a description of the configuration file format. For convenience, the file can be specified without its F<.cfg> extension. =back =head1 CONFIGURATION FILES =head2 Format Configuration files follow a fairly simple format. Lines beginning with C<#> are comments. Blank lines are ignored. All other lines are of the form: variable: value The current version of B recognizes the following variables: =over 4 =item B The command to use to bundle a set of files into a single archive file =item B The affix to a command to discard its output =item B The command to find a file within the TeX tree(s). =back Values that are too long for one line can be split across multiple lines by using C<\> as the line-continuation symbol. There are two environment variables that B makes available for use by configuration-file commands: C, which is set to I (as in L<"OPTIONS">), and C, which is set to a space-separated list of files that a command is to operate upon. That is, when the command associated with C is running, C contains the list of all the files that are to be archived. In contrast, when the command associated with C is running, C contains the name of the file to search for. =head2 Examples The following configuration file parallels B's default values of the various configuration-file variables, which represents a kpathsea-based TeX distribution running on a generic Unix system, which doesn't necessarily have any of the GNU tools, such as B or GNU B: # "Default" configuration file # By Scott Pakin bundle: (tar -cvf - $BDINPUTS | compress > $BDBASE.tar.Z) sink: > /dev/null 2>&1 find: kpsewhich -progname=latex $BDINPUTS The parentheses in the C line tell the Unix shell to run the command in a subshell. This is to make the C affix work properly (i.e., so there aren't two C>'s in the same command). Notice how the commands treat C and C like any other environment variables in a Unix shell, using C<$> to take their value. Other operating systems use different conventions for referring to environment variables. For instance, a configuration file for a Windows-based TeX distribution would use C<%BDBASE%> and C<%BDINPUTS%> instead. The value for C is specific to an operating system. The value for C is specific to a TeX distribution. C is where the most opportunity for customization lies. You can use C to specify your favorite archive format. For example, you can produce a shar file on Unix with something like: bundle: (shar --archive-name="$BDBASE" $BDINPUTS > $BDBASE.sh) or a CAB file on Microsoft Windows with something like: bundle: cabarc -r -p N %BDBASE%.cab %BDINPUTS% =head1 EXAMPLES Assume that F was produced from F by following the instructions in L. The following command produces a F<.zip> file with the MikTeX TeX distribution running on Microsoft Windows: bundledoc --config=miktex.cfg myfile.dep This can be abbreviated to bundledoc --config=miktex myfile The following builds a F<.tar.gz> archive with the TeX Live distribution running on a Unix-like operating system. B will produce verbose output describing its operations. All files not in the same directory tree as F and all files containing ".fd" or ".sty" in their names are omitted. However, all F<.bib> files in the current directory will be included in the archive even though none of them are referenced by F. Finally, no F file will be produced. bundledoc --config=texlive-unix.cfg --verbose --localonly \ --exclude=.fd --exclude=.cfg --include="*.bib" --manifest="" \ myfile.dep =head1 FILES The user must have previously installed F and used it to produce a dependency file for his document. Besides that, the set of external files needed by B is system-specific and depends on the configuration file used. (See L<"CONFIGURATION FILES">, above.) B currently comes with two configuration files: =over 4 =item F Configuration file for TeX Live installations on Unix or Linux. TeX Live is a kpathsea-based TeX distribution that runs on various flavors of Unix and Microsoft Windows. F assumes you have B and uses it to produce a F<.tar.gz> archive file. The configuration file has B use B to find LaTeX files. =item F Configuration file for MikTeX installations. MikTeX is a popular TeX distribution for Microsoft Windows. F assumes you have B and uses it to produce a F<.zip> archive file. The configuration file now has B use B to find LaTeX files; older version of MikTeX required the rather nonstandard B for this purpose. =item F This is a variant of F that uses B instead of B to archive files. B is a script included in the B distribution that generates a self-extracting F<.tex> file based on LaTeX's C environment. =back =head1 NOTES =head2 Including and excluding files The C<--localonly>, C<--exclude>, and C<--include> options provide control over the archive's contents. C<--exclude> and C<--include> can be specified repeatedly on the command line. The order in which these options are specified is immaterial; B processes file inclusions and exclusions in the following order: =over 4 =item 1. All files referenced by the F<.dep> file are added to the list of files to archive. =item 2. If C<--localonly> is specified, all files not found in the F<.tex> file's directory are removed from the list. =item 3. For each C<--exclude> string specified, all files containing that string are removed from the list. =item 4. For each C<--include> file specification, the set of files designated by its expansion are added to the list. =back =head2 Issues When Running Under Microsoft Windows First, because B is a Perl script, you should do one of the following to run it under Windows: =over 4 =item * C =item * Rename F to F and run C. (This is assuming you have a file association set up for F<.pl>.) =item * Run the B script (if you have it) to convert F to F, then run C. =back Second, Windows uses a multi-rooted filesystem (i.e., multiple drive letters). I wouldn't be surprised if bad things were to happen if the files to be bundled are scattered across drives. In addition, Windows supports ``UNC'' filenames, which have no drive letter at all, just a machine and share name. UNC filenames are also untested waters for B. Be careful! =head2 Testing Status I have tested B only with Perl v5.6.0 and later and only on the following platforms: =over 4 =item * Linux + TeX Live =item * Linux + teTeX =item * Windows NT + MiKTeX =item * Solaris + ??? (something kpathsea-based) =back It is my hope that B works on many more platforms than those. I tried to make the program itself fairly independent of the operating system; only the configuration files should have to change to run B on a different system. =head2 Future Work I'd like B to work on as wide a variety of TeX distributions as possible. If your platform is significantly different from the ones listed in L<"Testing Status"> (e.g., if you're running S) and you need to create a substantially different configuration file from F and F, please send it to me at the address listed in L<"AUTHOR"> so I can include it in a future version of B. (I make no promises, though). Once B works on all the major operating systems and TeX distributions it would be really convenient if I could get B to detect the platform it's running on and automatically select an appropriate configuration file. Finally, it would be handy for B to include fonts in the archive file. At a minimum, it should include F<.tfm> files, but it would be even better if it included F<.mf>, F<.pfb>, F<.ttf>, and other common font formats, as well. =head2 Acknowledgments Thanks to Fabien Vignes-Tourneret for suggesting what became the C<--localonly> option and for a discussion that led to the C<--exclude> and C<--include> options; to Marius Kleiner for updating B to properly handle document subdirectories; and to Frank Mittelbach for suggesting using Kpathsea to help find F<.cfg> files and to automatically append F<.cfg> and F<.dep> extensions if necessary. =head1 SEE ALSO arlatex(1), gzip(1), kpsewhich(1), latex(1), perl(1), zip(1), the B documentation =head1 AUTHOR Scott Pakin, I