#!/usr/bin/perl # create diagrams with the rst package in LaTeX from URML # args: urml2latex -i xmlfile docid analysis-id # change this for relname replacements sub replace_relname($) { my $rel = $_[0]; $rel =~ s/evaluation/eval/i; $rel =~ s/elaboration/elab/i; $rel =~ s/object/obj/i; $rel =~ s/attribute/attr/i; $rel =~ s/additional/addl/i; return $rel; } @args = (); $putSegmentsInTree=0; foreach(@ARGV) { if (/\-i/i) { $putSegmentsInTree=1; } else { push @args, $_; } } $fn = $args[0]; unless($fn) { print "Converts a fully specified URML analysis to LaTeX rst format.\nUsage: urml2latex [-i] xmlfile [docid [analysis-id]]\n"; exit; } warn "loading $fn\n"; { local(*INPUT, $/); open (INPUT, $fn) || die "can't open $fn: $!"; $f = ; close INPUT; } $docid = $args[1]; $anaid = $args[2]; if ($docid eq '') { print "printing all document ids: \n"; while ($f =~ /\]+)(\"?)\s*>/isg) { print $2."\n"; } } else { unless ($f =~ /(.*?)<\s*\/\s*document\s*>/is) { die "Document $docid not found."; } $doc = $3; $docid =~ s/_//g; # more than one analysis? my $mdoc; %ana = (); while ($doc =~/(.*?)<\/\s*analysis\s*>/isg) { $ana{$2} = $4; } if (scalar keys %ana >1) { if ($anaid && exists($ana{$anaid})) { $mdoc = $ana{$anaid}; } else { print "Please specify the analysis of this document as argument. Printing all available analyses:\n"; foreach (keys %ana) { print $_."\n"; } exit; } } else { $mdoc = $doc; } print $mdoc; print "% rhetorical diagram\n"; # store all relations in a hash while ($mdoc =~ /<\s*(relation|hypRelation|parRelation|segment)\s*(.*?)>(.*?)<\s*\/\1\s*>/isg) { my $relclass = $1; my $relattr = $2; my $relcontent = $3; if ($relattr =~ /id\s*=\s*\"(.+?)\"/) { my $relid = $1; # check relation if ($relclass eq 'hypRelation' && !($relcontent =~ /<\s*nucleus/is)) { warn "Node $relid is a $relclass, but has no nucleus -- ignoring that node!"; } else { $relid =~ s/_//g; $relsxml{$relid} = $relcontent; $relclass{$relid} = $relclass; if ($relattr =~ /type\s*\=\s*\"(.*?)\"/is) { $reltype{$relid} = &replace_relname($1); } if ($relattr =~ /group\s*=\s*\"(.*?)\"/is) { $relgroup{$relid} = $1; } } } } # bottom-up approach, start with segment # assign labels foreach(keys %relclass) { if ($relclass{$_} eq 'segment') { if ($putSegmentsInTree) { $rellatex{$_} = '\rstsegment{'.&conv_text($relsxml{$_}).'}'; } else { $rellatex{$_} = "\\rstsegment{\\refr{$_}}"; } $complete{$_} = 1; } else { $rellatex{$_} = $relsxml{$_}; $complete{$_} = 0; } } # there is no top node, so we need to expand nodes sucessively. while(1) { my $nodescompleted=0; # success count my $uncompletenodes=0; # number of non-complete nodes in the chart foreach(keys %relclass) { # if ($relclass{$_} eq 'segment') # { # my @parents = (); # getParents($relid, \@parents); # # } my $id = $_; $rt = $reltype{$_}; $rc = $relclass{$_}; #only un-complete nodes unless ($complete{$id}) { $parentcomplete=1; #print "before: $rellatex{$id} \n"; $rellatex{$id} =~ s/(<(satellite|nucleus|element)\s*(.*?)>)/&replaceDaughter($1,$2,$3);/isge; #print "after: $rellatex{$id} \n"; $rellatex{$id} =~ s/^\s+//sg; $rellatex{$id} =~ s/\s+$//sg; if ($parentcomplete) # is complete after conv. of XML to LaTeX code { if ($rc eq 'hypRelation') { $rellatex{$id} = '\dirrel'.$rellatex{$id} ; } elsif ($rc eq 'parRelation') { $rellatex{$id} = '\multirel{'.$rt.'}'.$rellatex{$id} ; } else { warn "unspecified relation type: tag used!"; $rellatex{$id} = '\multirel'.$rellatex{$id} ; } $complete{$id} = 1; $nodescompleted++; } $uncompletenodes++; } } print "uncomp: $uncompletenodes compl: $nodescompleted\n"; last if ($uncompletenodes == 0); if ($nodescompleted == 0) { warn "Not all nodes could be transformed to tree nodes -- circularities or undefined but referenced nodes in graph structure?\nPrinting largest existing analysis!"; last; } } #find biggest tree (with most nodes) my $maxnodes=0; my $largestnode = ''; foreach(keys %relclass) { if ($complete{$_}) { #count nodes my $nodes = 0; while($rellatex{$_} =~ /\\(dir|multi)rel/isg) { $nodes++; } if ($nodes > $maxnodes) { $maxnodes = $nodes; $largestnode = $_; } } } if($largestnode) { print $rellatex{$largestnode}; } else { warn "could not find largest node!"; foreach(keys %relclass) { if ($complete{$_}) { print $rellatex{$_}."\n\n"; } } } print "\n\n"; &pr_corpustext unless ($putSegmentsInTree); exit; } sub replaceDaughter($$$$$) { my $tag = $1; my $role = $2; my $attr = $3; # my $rc = $4; must be passed down as global vars # my $rt = $5; #print "replace daughter tag=$tag role=$role attr=$attr \n"; if ($attr =~ /id\s*=\s*\"(.*?)\"/is) { my $did = $1; # daughter id $did =~ s/_//g; if ($complete{$did}) { my $text = ""; if ($rc eq 'hypRelation') { if ($role eq 'satellite') { $text .= '{'.$rt.'}'; } else { $text .= '{}'; } } else { $text .= ''; } return $text.'{'.$rellatex{$did}.'}'; } } $parentcomplete=0; # parent is not complete because XML code returned return $tag; } # # sub getParents($$) # { # my \@parents = $_[1]; # my $id = $_[0]; # foreach(keys %relclass) # { # if ($relcontent =~ /id\s*=\s*\"$id\"/) # { # push @parents, $id; # } # } # # } sub pr_corpustext { print '\begin{rhetoricaltext}'."\n"; while($doc =~ /]+)(\"?)[^>]*>(.*?)<\/\s*segment\s*>/isg ) { my $segid = $2; my $seg = $4; #$segid =~ s/$docid\.?//; $segid =~ s/_//g; $seg = &conv_text($seg); if ($seg) { print "\\unit[$segid]{$seg}\n"; } } print '\source{'."$docid}".'\end{rhetoricaltext}'."\n"; return; } sub conv_text ($) { my $seg = $_[0]; $seg =~ s/\<\/?sign[^>]*>//isg; $seg =~ s/(Ä|\Ä)/\\\"\{A\}/sg; $seg =~ s/(Ö|\Ö)/\\\"\{O\}/sg; $seg =~ s/(Ü|\Ü)/\\\"\{U\}/sg; $seg =~ s/(ä|\ä)/\\\"\{a\}/sg; $seg =~ s/(ö|\ö)/\\\"\{o\}/sg; $seg =~ s/(ü|\ü)/\\\"\{u\}/sg; $seg =~ s/(ß|\ß)/\\\"\{ss\}/sg; $seg =~ s/\$/\\\$/sg; return $seg; }