#!/usr/bin/perl #################################################################### # # File: bibweb # Author: John H. Palmieri # URL: http://www.math.washington.edu/~palmieri/Bibweb/ # Version: 0.49 of Wed Nov 19 12:58:29 PST 2003 # Description: retrieve bibliographical information from MathSciNet # automatically # Copyright (c) 1997, 1998, 1999, 2000, 2001, 2002, 2003 John H. Palmieri # License: distributed under GNU General Public License -- see below. # #################################################################### # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # (see file COPYING) along with this program; if not, write to the # Free Software Foundation, Inc., 59 Temple Place - Suite 330, # Boston, MA 02111-1307, USA. # #################################################################### # # Command line options: # FILE use FILE.aux as input, FILE.bib as output # -i FILE specify FILE as input (aux) file. # -o FILE specify FILE as output (bib) file. If FILE ends in # ".bib", write to FILE; otherwise, write to FILE.bib # -c CITATION looks up CITATION, rather than using an auxfile for input # -b get output in bibtex format (default) # -r insert the review in the bibtex output # -d get output in dvi format (written to MR#.dvi, where # 'MR#' is the Math Reviews number of the reference) # -p get output in postscript format (written to MR#.ps) # -pdf get output in pdf format (written to MR#.pdf) # -t get output in text format (written to MR#.txt) # -m NUM return at most NUM entries (where NUM is rounded up # to 5, 10, 20, 50, 100, 1000) # -e WEB_SITE use WEB_SITE for MathSciNet search # -L use lynx instead of wget (note: lynx is slower) # -h print brief help message # # If you use only one of the -i and -o options, bibweb makes a guess # as to what the other file should be. # # At the moment, -d, -p, -pdf, -t only work if you have lynx available. #################################################################### $bibtex = 'bibtex'; $thisprog = 'bibweb'; $version = '0.49'; # good choices for e_math: 'www.ams.org', 'ams.rice.edu', # 'ams.mathematik.uni-bielefeld.de', 'ams.mpim-bonn.mpg.de', # 'ams.u-strasbg.fr', 'ams.impa.br' $e_math = $ENV{MATHSCINET_SITE}; unless ($e_math) {$e_math = 'www.ams.org'} # % is not the official BibTeX comment character yet, so you might want # to change these to be absolutely compatible. Of course, I forget what # the official way is to do comments; maybe @comment? $bibtex_short_comment = '%%'; $bsc = $bibtex_short_comment; $bibtex_long_comment = '%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%'; # $use_stdout = 0; $usage = < \$auxfile, "output|o=s" => \$bibfile, "stdout|std|s" => \$use_stdout, "cite|c=s" => \$only_cite, "bibtex|bib|b" => \$use_bibtex, "review|rev|r" => \$review, "dvi|d" => \$use_dvi, "postscript|ps|p" => \$use_postscript, "pdf" => \$use_pdf, "text|txt|t" => \$use_text, "max|m=i" => \$user_max, "emath|e=s" => \$e_math, "lynx|Lynx|L" => \$use_lynx, "separator|sep=s" => \$new_separator, "help|h" => \$help) or die "$usage\n"; if ($help) { die "$usage\n" }; OUTPUTTYPE: { if ($use_bibtex) { $output_type = 'bibtex'; $getitem_return = 'bibtex'; last OUTPUTTYPE; }; if ($use_dvi) { $output_type = 'dvi'; $use_lynx = 1; $getitem_return = 'dvi'; last OUTPUTTYPE; }; if ($use_postscript) { $output_type = 'postscript'; $use_lynx = 1; $getitem_return = 'ps'; last OUTPUTTYPE; }; if ($use_pdf) { $output_type = 'pdf'; $use_lynx = 1; $getitem_return = 'pdf'; last OUTPUTTYPE; }; if ($use_text) { $output_type = 'text'; $use_lynx = 1; $getitem_return = 'hl'; last OUTPUTTYPE; }; $use_bibtex = 1; $output_type = 'bibtex'; $getitem_return = 'bibtex'; } chomp($which_wget = `which wget`); chomp($which_lynx = `which lynx`); chomp($which_webget = `which webget`); if (!$use_lynx and (-x $which_wget)) { $wget = 'wget'; $wget_switches = ' --quiet -O - '; } elsif (!$use_lynx and (-x $which_webget)) { $wget = 'webget'; $wget_switches = ''; } elsif (-x $which_lynx) { $wget = 'lynx'; $wget_switches = ' -source '; } else { die "I can't find wget, lynx, or webget. Your path may not be set correctly, or you may need to install one of these programs. Lynx must be available in order to use the -dvi, -ps, -pdf, and -txt options.\n" } if ($e_math eq 'ams') {$e_math = 'www.ams.org'} if ($e_math eq 'rice') {$e_math = 'ams.rice.edu'} if ($e_math eq 'bielefeld') {$e_math = 'ams.mathematik.uni-bielefeld.de'} if ($e_math eq 'bonn') {$e_math = 'ams.mpim-bonn.mpg.de'} if ($e_math eq 'strasbg') {$e_math = 'ams.u-strasbg.fr'} if ($e_math eq 'impa') {$e_math = 'ams.impa.br'} if ($e_math =~ / ^www.ams.org$| ^ams.rice.edu$| ^ams.mathematik.uni-bielefeld.de$| ^ams.mpim-bonn.mpg.de$| ^ams.u-strasbg.fr$| ^ams.impa.br$ /x) { print "Using \`$e_math\' for the MathSciNet search.\n" } else { print "Warning: Your choice of \`$e_math\' for the MathSciNet site is not one of the recommended choices. Proceeding anyway...\n\n"; } if ($e_math eq 'www.ams.org') {$redirect = 'Providence%2C+RI+USA'} if ($e_math eq 'ams.rice.edu') {$redirect = 'Houston%2C+TX+USA'} if ($e_math eq 'ams.mathematik.uni-bielefeld.de') { $redirect = 'Bielefeld%2C+Germany'} if ($e_math eq 'ams.mpim-bonn.mpg.de') {$redirect = 'Bonn%2C+Germany'} if ($e_math eq 'ams.u-strasbg.fr') {$redirect = 'Strasbourg%2C+France'} if ($e_math eq 'ams.impa.br') {$redirect = 'Rio+de+Janeiro%2C+Brazil'} if ($user_max) { MAX: { if ($user_max <= 5) { $max_matches = 5; last MAX; }; if ($user_max <= 10) { $max_matches = 10; last MAX; }; if ($user_max <= 20) { $max_matches = 20; last MAX; }; if ($user_max <= 50) { $max_matches = 50; last MAX; }; if ($user_max <= 100) { $max_matches = 100; last MAX; }; if ($user_max <= 1000) { $max_matches = 1000; last MAX; }; $max_matches = 5; }; if ($user_max != $max_matches) { print "Rounding -m argument up to $max_matches.\n\n"; }; } else { $max_matches = 5; } unless ($auxfile and $bibfile) { $file = shift(@ARGV); $auxfile = ($auxfile or $file); $bibfile = ($bibfile or "$file.bib"); } if ($bibfile and $bibfile !~ m/\.bib$/) { $bibfile = "$bibfile.bib"; } if ($only_cite) { $only_cite =~ s(\'\")(); @bibtex_output = (1); $use_stdout = 1 if ($bibfile eq "" or $bibfile eq ".bib"); } else { $auxfile =~ s/\.aux$//; if ($auxfile and not $bibfile) { $bibfile = "$auxfile.bib" } unless (-e "$auxfile.aux") {die "Couldn't read $auxfile.aux\n"}; unless ($use_stdout) { open(BIBFILE, ">>$bibfile") or die "Couldn't open $bibfile\n"; print "Appending output to $bibfile . . . \n\n"; } @bibtex_output = `$bibtex $auxfile`; } $semicolon = ";"; if ($new_separator) { $semicolon = quotemeta ($new_separator) } foreach $warning (@bibtex_output) { # get citation if ($only_cite) { $citation_orig = $citation = $only_cite; $citation =~ tr/,/./; } elsif ($warning =~ (/^Warning--I didn\'t find a database entry for \"([^\"]*)\"$/)) { $citation_orig = $citation = $1; next if &check_bibfile($citation_orig); } else { next; } # split citation into author, etc. $citation =~ s/-and-/-/g; $citation =~ tr/./,/; $author = ''; $title = ''; $journal = ''; $year = ''; $dr = 'all'; $misc = ''; if ($citation =~ m/$semicolon/) { ($authors, $titles, $journals, $year) = split(/$semicolon/, $citation); $author = join('+and+', split(/-/, $authors)); $title = join('+and+', split(/-/, $titles)); $journal = join('+and+', split(/-/, $journals)); } else { ($author, $subcitation) = split(/-/, $citation, 2); if ($subcitation =~ /([<>=]?\d+)\Z/) { $subcitation = $`; $year = $1; } $misc = join('+and+', split(/-/, $subcitation)); } # parse year entry YEAR: { if ($year eq '') { $dr = 'all'; last YEAR; }; if ('<' eq substr($year, 0, 1)) { $ord = 'lt'; $yr = substr($year, 1); $dr = 'pubyear'; last YEAR; }; if ('=' eq substr($year, 0, 1)) { $ord = 'eq'; $yr = substr($year, 1); $dr = 'pubyear'; last YEAR; }; if ('>' eq substr($year, 0, 1)) { $ord = 'gt'; $yr = substr($year, 1); $dr = 'pubyear'; last YEAR; }; $ord = 'eq'; $yr = $year; $dr = 'pubyear'; } # construct URL $wget_url = "\'http://$e_math/msnmain?fn=130&form=fullsearch&preferred_language-en&Submit=Start+Search&" . "pg4=AUCN&s4=$author&co4=AND" . "&pg5=TI&s5=$title&co5=AND" . "&pg6=JOUR&s6=$journal&co6=AND" . "&pg7=ALLF&s7=$misc" . "&dr=$dr" . "&yrop=$ord&arg3=$yr" . "&yearRangeFirst=&yearRangeSecond=" . "&pg3=ET&s3=All&l=20&reference_lists=show&redirect=$redirect\'"; unless ($use_stdout) { &bib_print("", "$bibtex_long_comment \n"); } &bib_print("working on citation \'$citation_orig\' \n", "$bsc citation \'$citation_orig\' \n" ); $match_info_printed = ''; $done_getting_bibtex = ''; # print $wget_url; # call wget and process its output foreach $line (`$wget $wget_url $wget_switches`) { # print $line; # get number of matches if (($line =~ (/Number of Matches:.*<\/b> ([0-9]*)/)) or ($line =~ (/Item:.*([0-9]*)<\/strong>\s*$/))) { # print $line; $matches = $1; # print $matches; if ($matches > $max_matches) { &bib_print("More than $max_matches matches found " . "($matches); " . "please refine your search criteria,\n" . "or use the -m option. \n\n", "$bsc More than $max_matches matches found " . "($matches). \n$bsc\n"); last; } elsif ( not $match_info_printed ) { $plural = "es"; $match_info_printed = 1; if ($matches == 1) {$plural = ""} &bib_print("$matches match$plural found. \n\n", "" ); } } # given match, get MR number, convert to forms for use in URL #elsif ($line =~ (/TYPE=\"checkbox\" VALUE=\"([0-9][0-9_a-z]*)\"/)) { elsif ((($matches == 1) and ($line =~ (/MR([^<]*)<\/strong>\s*\(([^)]*)\)<\/strong>/)) and (not $done_getting_bibtex)) or (($matches > 1) and ($line =~ (/MR([^<]*)<\/strong>\s*\(([^)]*)\)<\/strong>/)))) { #print $line; if ($matches == 1) { $done_getting_bibtex = 1 }; # there are now new MR numbers: a unique 7 digit number # assigned to each item in the database. Assign this to $mr, # and use it to search. Assign old MR number to $old_mr, and # use it for printing. $mr = $1; $old_mr = $2; &bib_print("", "$bsc Math Reviews number: $old_mr \n"); $old_mr =~ tr/ //d; $old_mr =~ tr/_/:/; $old_mr =~ tr/\#/:/; $old_mr =~ tr/,/:/; # print $old_mr; $new_wget_url = "\'http://$e_math/mathscinet-getitem?mr=$mr&return=$getitem_return\'"; # bibtex output if ($output_type eq 'bibtex') { # call wget again, process its output $at_sign_found = 0; foreach $line2 (`$wget $new_wget_url $wget_switches`) { # look for @ at start of bibtex if (not $at_sign_found) { $at_sign_found = ($line2 =~ /@/); } if ($at_sign_found) { # remove html tags from text and print it (if not blank) $line2 = &remove_html($line2); unless (($line2 =~ (/^\s*$/)) or ($line2 =~ "MathSciNet")) { $mr =~ s/CMP//; if ($matches == 1 and $line2 =~ (/$old_mr/)) { $line2 =~ s/MR$old_mr/$citation_orig/; } if ($line2 =~ "(c).*[0-9]*.*American Mathematical Society") { # &bib_print("", "$bsc $line2\n$bsc\n"); &bib_print ("", "\n"); } else { &bib_print("", "$line2" ); if ($review && $line2 =~ "MRREV") { &print_review($mr); } } } } } } # dvi, ps, pdf, txt output else { $wget_switches = " -dump > MR$mr.$getitem_return"; system ("$wget $new_wget_url $wget_switches"); print "Output written to MR$mr.$getitem_return\n"; } } # warning messages... elsif ($line =~ (/Sorry, no matches found/)) { &bib_print("No matches found; please revise your search criteria. \n\n", "$bsc No matches found. \n$bsc\n"); last; } elsif ($line =~ (/Sorry, /)) { &bib_print("There was an unknown error. " . "Please check for typos, or just try again.\n\n", "$bsc Syntax or server error. \n$bsc\n"); } } } close(BIBFILE); # run bibtex again, to make use of new keys exec "$bibtex $auxfile" unless ($only_cite); ########## simple subroutines # scan BIBFILE for citation, to see if you've looked for it before sub check_bibfile { local($answer) = 0; local($cite) = $_[0]; if ($] >= 5) { $cite = "\Q$_[0]\E"; } if (-e $bibfile) { open(BIBFILE, "$bibfile"); while () { if (/$bsc\s*citation\s*\'$cite\'/) { $answer = 1; last; } } close(BIBFILE); if ($answer) { print "You've searched for $_[0] before.\n\n" }; $answer; } else { 0; } } # remove html tags (strings like and ) sub remove_html { local($text) = $_[0]; $text =~ s/<\/[^\>]*\>/ /g; $text =~ s/<[^\>]*\>//g; $text; } # print to screen and to BIBFILE, unless writing to STDOUT sub bib_print { local($line1, $line2) = ($_[0], $_[1]); if ($use_stdout) { if ($use_bibtex) {print $line2} else {print $line1} } else { open(BIBFILE, ">>$bibfile") or die "Couldn't open $bibfile\n"; print $line1; print BIBFILE "$line2"; } } # print the review of the current $mr item. sub print_review { local($mr) = $_[0]; local($command) = "$wget \'http://$e_math/mathscinet-getitem?mr=$mr\' $wget_switches"; local($count) = 0; local($line); &bib_print ("", " REVIEW = {"); foreach $line (`$command`) { # print $line; if ($line =~ "Reviewed") { $count++; chomp($line); &bib_print ("", &remove_html($line) . " },\n"); } if ($count == 2) { &bib_print ("", &remove_html($line)) } if ($line =~ "citation to clipboard") { $count++ } if ($line =~ "
") { $count++ } } }