#! /usr/local/bin/perl $VERSION=1.01; # 12 Sep 1999 13:57:21 use vars qw($VERSION); $Author='Michael Friendly (friendly@yorku.ca)'; # Copyright 1999 $License = 'LPPL'; # released under the LPPL license ######################################################################## # texref: a Perl script for finding cross-refs in a LaTeX file # texref reads a .tex file, and (recursively) all \input{} and # \include{} files referenced therein, collecting the names of all # \labels and \*ref{}s as it goes. It then prints the list of labels # together with information about where those labels are referenced # in the .tex file(s). # Pretty documentation: run pod2man or pod2text on this script, # or just read the documentation after __END__ # A C/C++ program of the same name was written by Maurizio Loreti # and is available by ftp at # cmssun1.pd.infn.it/pub/MLO/texref.tar.gz # Changes: # 1.00 Initial version. [Thanks to Bernd Schandl # for suggestions.] # 1.01 Added $opt_sortby = input, and made it the default. ###################### Start of configuration ###################### $opt_aux = 1; # Search .aux files for extended label info $opt_sort = 1; # Sort labels $opt_sortby = 'input'; # Sort by label|def|input $VERBOSE = 0; # Lots of messages... # How to search for files on the various TeX paths: # If teTeX is installed, use kpsetool to locate tex/bst/sty files before # trying the TEXINPUTS path, for efficiency. $KPSEWHICH = '/usr/local/teTeX/bin/kpsewhich'; # won't fail if not -x # Otherwise, uncomment the following statement #undef $KPSEWHICH; $TEXINPUTS = "TEXINPUTS"; # Name of TEXINPUTS environment variable # These patterns are currently NOT USED. Why don't they work when used # in get_xrefs? # Patterns for include-type statements, leaving filename in #1 $include_pat = join('|', ('\\input\b\{?(\S+)\}', '\\include\s*\{(\S+)\}' )); # Patterns for references to a label, leaving label in $1 $ref_pat = '\\[a-z]*ref\{([^}]+)\}'; # pattern for single ref $refs_pat = '\\[a-z]*refs\{([^}]+)\}'; # pattern for multiple refs $ref_range = '\\[a-z]*range\{([^}]+)\}\{([^}]+)\}'; # for ref range ###################### End of configuration ###################### ######################### Initializations ######################## # remove path from our name $progname = $0; $progname =~ s@(.*)/@@i; $in_preamble = 1; @labels = (); ################## get and process command options ############### use Getopt::Long; $result = GetOptions ('aux!', 'help', 'match=s', 'out=s', 'sort', 'sortby=s', 'unref', 'verbose'); &usage() if $opt_help; # and exit if ($opt_out) { open(STDOUT, ">$opt_out") or die "$progname: -out $opt_out: can't create.\n"; } else { select(STDOUT); $| = 1; } $VERBOSE=1 if $opt_verbose; $texfile = $ARGV[0] || &usage(); # usage doesn't return shift; ######################################################################## # Split the tex filename up into a path, name, and extension # ($TEXFILEPATH,$TEXFILENAME,$TEXFILEEXT) = &splitfn($texfile,".tex"); ######################################################################## # Locate the requested TeX file. It's either in the current (or # specified) directory or no path was specified and it's on the # TEXINPUTS path. # $Qtexfile = &cleanup_texfilename($texfile); if (! -e $Qtexfile ) { if ($texfile !~ /[\/\\]/) { # no path... $Qtexfile = &find_on_path("$ENV{$TEXINPUTS}", "$texfile", 'tex'); $Qtexfile = &find_on_path("$ENV{$TEXINPUTS}", "$texfile".".tex", 'tex') if $Qtexfile eq ""; die "Cannot find \"$texfile\[.tex\]\" on $TEXINPUTS path.\n" if $Qtexfile eq ""; $texfile = $Qtexfile ; } else { die "Cannot find \"$texfile\[.tex\]\".\n"; } } else { $texfile = $Qtexfile; } # Main &get_xrefs($texfile); if ($opt_aux) { &find_auxfiles($TEXFILENAME); print "Found AUX files: ", @aux, "\n" if $VERBOSE; unshift (@aux, $TEXFILENAME) unless $AUXSEEN{$TEXFILENAME . '.aux'}; foreach (@aux) { &parse_auxfile($_); } } #print "labels:\n", join(' ', @labels), "\n"; if ($opt_sort) { @keys = ($opt_sortby =~ /label/) ? (sort keys %label_def) : ($opt_sortby =~ /def/) ? (sort {$label_def{$a} cmp $label_def{$b} }keys %label_def) : @labels; } else { @keys = keys %label_def; } foreach (@keys) { $lab = $_; if ($opt_match) { next unless $lab =~ m%$opt_match%; } if ($label_ref{$lab}) { print "$lab\t($label_def{$lab}): ", join(', ',@{$label_ref{$lab}}),"\n" unless $opt_unref; # print "$lab\t($label_def{$lab}): @{$label_ref{$lab}}\n" unless $opt_unref; } else { print "$lab\t($label_def{$lab}): UNREF\n"; } } exit; ######################################################################## # Parse the TeX file looking for \label and \ref. Build the # hashes %label_def and %label_ref to hold the definitions and # references. sub get_xrefs { local($texfile) = @_; local($in_preamble,$p,$lineno); local(*TEXFILE); open (TEXFILE, $texfile) || die "Can't read $texfile.\n"; $lineno=0; while () { chop; $lineno++; s/%.*//; # decomment last if /\\endinput/; # are we done? if (/\\begin\s*\{document\s*\}/) { $in_preamble = 0; } next if $in_preamble; if (m#\\label\{([^}]+)\}#) { next if $1 =~ m/\#\d/; # avoid tex command defs push(@labels, $1) unless $seen{'LABEL' . $1}++; $label_def{$1} = "$texfile $lineno"; } # parse different kinds of references, store info in $label{$key} while (m#\\[a-z]*ref\{([^}]+)\}#g) { $ref = $1; next if $ref =~ m/\#\d/; # avoid tex command defs push @{ $label_ref{$1} }, "$texfile $lineno"; } # multiple refs: \refs{fig:one,fig:two} --> $1 while (m#\\[a-z]*refs\{([^}]+)\}#g) { @refs = split(',',$1); foreach $ref (@refs) { next if $ref =~ m/\#\d/; # avoid tex command defs push @{ $label_ref{$ref} }, "$texfile $lineno"; } } # range refs: \figrange{fig:one}{fig:two} --> $1 $2 while (m#\\[a-z]*range\{([^}]+)\}\{([^}]+)\}#g) { @refs = ($1, $2); foreach $ref (@refs) { next if $ref =~/#\d/; # avoid tex command defs push @{ $label_ref{$ref} }, "$texfile $lineno"; } } # Search recursively in included files if (m#\\(input|include)\b\{?([/\w\d.]+)#) { $p = $2; $p .= '.tex' unless $p =~ /\.tex$/; if ($opt_expand) { $fullname = &find_on_path($ENV{"$TEXINPUTS"}, $p, 'tex'); if ($fullname ne "" && -r $fullname) { $p = $fullname; } } if (-r $p) { print STDERR "Recursing into $p\n" if $VERBOSE; &get_xrefs($p); } } } close (TEXFILE); } ######################################################################## # Parse a TeX .aux file looking for \newlabel. Append the label number # and page location to the %label_def entry. sub parse_auxfile { my ($texfilename) = shift; local ($auxfile) = &auxfile($texfilename, "aux"); local ($rest, $curfile); if (-r $auxfile) { print STDERR "Reading $auxfile ..." if $VERBOSE; open (AUXFILE, $auxfile); while () { chop; if (/\\newlabel\{([^{}]+)\}/) { local ($label, $number, $page, $def); $label = $1; $rest = $'; $rest =~ s/[{}]/ /g; ($number, $page) = split(' ', $rest); $def = $label_def{$label} . " #$number P$page"; $label_def{$label} = $def; } } close (AUXFILE); print STDERR "done\n" if $VERBOSE; } } ######################################################################## # Cleanup the TeX filename. Add the extension ".tex" if it doesn't # already have an extension. # sub cleanup_texfilename { local($texfile) = @_; local($path,$base,$ext) = &splitfn($texfile,".tex"); $ext = "tex" if ($ext eq ""); $path = "" if $path eq './'; $path . $base . "." . $ext; } ######################################################################## # This helpful little routine locates a file on a TeX path. The path can # be ":" or ";" delimited. If the file is found, it's fully qualified # name is returned, otherwise the null string is returned. If the # input path contains "/" or "\" then either it is returned (if the file # specified exists) or the empty string is returned, the path _is not_ # searched. # sub find_on_path { local($path, $file, $type) = @_; local($dir, $filename); $filename = ""; if ($KPSEWHICH && -x $KPSEWHICH) { chop($filename = `$KPSEWHICH $type $file`); #print "kpse: $filename\n"; $filename = "" if $filename =~ /not found/; } unless ($filename) { if ($file =~ /\/|\\/) { $filename = $file if -e $file; } else { foreach $dir (split(/;|:/,$path)) { #print "looking for $file in $dir\n"; $dir =~ s/\\/\//g; $filename = $dir . "/" . $file; last if -e $filename; $filename = ""; } } } $filename; } ######################################################################## # Break a filename into it's path, basename, and extension components. # The path returned always ends with a slash. "./" is returned if the # file has no path. If the filename passed in does not exist, the # default extension passed in is tried (actually, is assumed to be # correct). # sub splitfn { local ($filename, $defext) = @_; local ($path, $basename, $ext) = ("", "", ""); $filename =~ tr/\\/\//; # translate \ into / $filename = $filename . $defext if ! -r $filename; if ($filename =~ /\//) { ($path = $filename) =~ s/\/[^\/]*$//; ($basename = $filename) =~ s/.*\///; } else { $path = "."; $basename = $filename; } if ($basename =~ /\./) { ($ext = $basename) =~ s/.*\.//; $basename =~ s/\.[^.]*$//; } ($path . "/",$basename,$ext); } sub find_auxfiles { my ($texfilename) = shift; # look for new aux files... local ($curfile); local ($logfile) = &auxfile($texfilename, "log"); if (-r $logfile) { print STDERR "Reading $logfile ..." if $VERBOSE; open (LOGFILE, $logfile); while () { chop; while (/\(([^\s]+\.aux)\)/g) { $curfile = $1; push (@aux, $curfile) unless $AUXSEEN{$curfile}++; } } close (LOGFILE); print STDERR "done\n" if $VERBOSE; } } ######################################################################## # Input: an extension, `ext' # Output: the filename $TEXFILENAME . `.ext' # Note: auxilliary files are always placed in the current directory, # the path is ignored. # sub auxfile { local($texfilename, $ext) = @_; local($dot)=''; $dot = "." if $ext ne "" && $ext !~ /^\./; return ($texfilename =~ /$dot$ext/) ? $texfilename : $texfilename . $dot . $ext; } ######################################################################## # sub usage { print <<"EOF"; Find LaTex cross references, Version: $VERSION, $Author Usage: $progname texfile[.tex] where may be abbreviated to unique truncations, and are: -aux|noaux do (dont) find & read .aux file(s) for label #, page -help print this measly help -match=regexp regexp to select labels printed -out=filename send output to a file -sort|nosort do (dont) sort the labels -sortby=label if sorting, sort by the label string =def by label def string -unref print only unreferenced labels -verbose lots of messages EOF exit 1; } __END__ =head1 NAME texref - Find cross-references for a LaTeX file =head1 SYNOPSIS B [B<-(no)aux>] [B<-help>] [B<-match> I] [B<-out> I] [B<-(no)sort>] [B<-sortby label|def|input>] [B<-unref>] [B<-verbose>] I[.tex] =head1 DESCRIPTION B reads a .tex file, and (recursively) all \input{} and \include{} files referenced therein, collecting the names of all \labels and \*ref{}s as it goes. It then prints the list of labels together with information about where those labels are referenced in the .tex file(s). References to labels are recognized as commands of the form \[a-z]*ref{key} This allows for custom LaTeX referencing commands, such as \newcommand{figref}[1]{Figure~\ref{#1}} as well as the varioref package, which uses the \vref{key} command. In addition, the program recognizes lists of references, encoded as commands of the form, \[a-z]*refs{key1,key2,key3} which expand to something like 'Figure 1, Figure 2, and Figure 3'. These could be provided by the LaTeX commands \newcommand{\figrefs}[1]{\dorefs{#1}{Figures}} \newcommand{\tabrefs}[1]{\dorefs{#1}{Tables}} \newcommand{\exrefs}[1]{\dorefs{#1}{Examples}} \makeatletter \newcommand{\dorefs}[2]{% \let\@dummy\@empty #2~% \@for\@term:=#1\do{% \@dummy \edef\@dummy{\ref{\@term}, }}% \expandafter\format@last\@dummy} \def\format@last#1, {and #1} \makeatother Finally, the program recognizes reference ranges, like \figrange{key1}{key3} which expand to 'Figure 1--3', from commands like \newcommand{\figrange}[2]{Figures~\ref{#1}--\ref{#2}} \newcommand{\tabrange}[2]{Tables~\ref{#1}--\ref{#2}} Labels are described by the filename and line number where they are defined; if the B<-aux> option is in effect, the corresponding I<.aux> files are also read (\newlabel{} lines) to obtain the label number and the page location in the document, assuming the file has been processed by LaTeX. =head1 OPTIONS AND ARGUMENTS All options may be abbreviated to their unique truncations, so B<-h>, B<-he>, B<-hel> all print help. Options which take an argument may be followed by a blank or '='. Those values may need to be quoted if they contain characters interpreted by your shell. =over 4 =item B<-aux> | B<-noaux> If the B<-aux> option is in effect, the .log file is read to find the I<.aux> files corresponding to the .tex file. Then, each .aux file is read (looking for \newlabel{} lines) to obtain the label number and the page location in the document. This assumes that the .tex file has been processed by LaTeX so that the information in the .aux files is up to date. The default is B<-aux> (unless changed in the configuration section). =item B<-help> Print a brief help message and exit. =item B<-match>S< >I Print only the labels which match the I. If you use systematic labels (e.g., fig:name for figures, sec:name for sections, etc.) B<-match> 'fig:' will select only figure labels, etc. Use a pipe to grep to select more finely (e.g., to find figure labels in Chapter 4). =item B<-out>S< >I Send the output to a named file rather than to STDOUT. =item B<-sort> | B<-nosort> Sort (or do not sort) the labels in the output. If sorting, the sort order can be set by the B<-sortby> option. If not sorting, the order in the output is apparently haphazard (the order of perl keys for a hash). =item B<-sortby>S< >[label|def|input] If sorting, B<-sortby>=label means that the output is sorted by the label string. B<-sortby>=def means that the output is sorted by the label definition string, which normally looks line filename linenumber labelnum pagenum B<-sortby>=input (the default) simply lists the labels in the order encountered in the input file(s). This order is easiest to use for resolving unreferenced labels. =item B<-unref> If specified, only labels for which no corresponding \*ref references have been found are printed. =item B<-verbose> Lots of messages, written to STDERR. =back 4 =head1 EXAMPLES To show the output format, the following command uses all defaults: % texref drew sec:mosaic (drew.tex 186 #2 P2): drew.tex 137 eq:pij (drew.tex 212 #1 P2): UNREF tab:hairdat (tab/hairdat.tex 3 #1 P3): drew.tex 222, drew.tex 455 fig:mosaic3i (drew.tex 248 #1 P3): drew.tex 231, drew.tex 234 fig:mosaic34 (drew.tex 306 #2 P5): drew.tex 282, drew.tex 311, drew.tex 322, drew.tex 486 eq:pijkl (drew.tex 344 #2 P5): drew.tex 348 ... To print only unrefereced labels: % texref -unr drew eq:pij (drew.tex 212 #1 P2): UNREF =head1 LIMITATIONS The program is tuned to LaTeX, not TeX. It does not handle references to subfigures, as provided by the subfigure and subfigmat packages. There is no -undef option to print labels which are referenced but not defined. But you did catch these when you LaTeX'd your file, right? The output format is inflexible. =head1 BUGS Sorting by def sorts the label definitions as strings, so 'myfile 1020' appears before 'myfile 723', which appears before 'myfile 9'. =head1 SEE ALSO ftp://cmssun1.pd.infn.it/pub/MLO/texref.tar.gz texdepend: CTAN support/texdepend texfind: CTAN support/texfind =head1 AUTHOR Michael Friendly =head1 LICENSE B is distributed under the terms of the LaTeX Project Public License (LPPL). This means that you may freely copy or distribute this software, but if you modify it and distribute it (even locally) you must change the name to avoid confusion. See: CTAN:: help/Catalog/Licenses.html.