loncom/build/xfml_parse.pl - view

File: [LON-CAPA] / loncom / build / xfml_parse.pl
Revision 1.2: download - view: text, annotated - select for diffs
Fri Feb 1 10:56:41 2002 UTC (23 years, 5 months ago) by harris41
Branches: MAIN
CVS tags: stable_2002_spring, HEAD

a lot of cleaning up, debugging, and commenting

#!/usr/bin/perl # YEAR=2002 # 1/26,1/27,1/28,1/29,1/30,1/31 - Scott Harrison # ### # Read in 2 XML file; first is the filter specification, the second # is the XML file to be filtered ############################################################################### ## ## ## ORGANIZATION OF THIS PERL SCRIPT ## ## 1. Notes ## ## 2. Get command line arguments ## ## 3. First pass through (grab distribution-specific information) ## ## 4. Second pass through (parse out what is not necessary) ## ## 5. Third pass through (translate markup according to specified mode) ## ## 6. Functions (most all just format contents of different markup tags) ## ## 7. POD (plain old documentation, CPAN style) ## ## ## ############################################################################### # ----------------------------------------------------------------------- Notes # # I am using a multiple pass-through approach to parsing # the xfml file. This saves memory and makes sure the server # will never be overloaded. # # This is meant to parse files meeting the piml document type. # See xfml.dtd. XFML=XML Filtering Markup Language. use HTML::TokeParser; use strict; unless (@ARGV) { print <<END; Incorrect invocation. Example usages: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml END } my %eh; my %ih; my $tofilter=shift @ARGV; open IN,"<$tofilter"; my @lines=<IN>; my $parsestring=join('',@lines); undef @lines; close IN; my $parser = HTML::TokeParser->new(\$parsestring) or die('can\'t create TokeParser object'); $parser->xml_mode('1'); # Define handling methods for mode-dependent text rendering my %conditions; &cc; $parser->{textify}={ xfml => \&format_xfml, 'when:name' => \&format_when_name, 'when:attribute' => \&format_when_attribute, 'when:cdata' => \&format_when_cdata, 'choice:include' => \&format_choice_include, 'choice:exclude' => \&format_choice_exclude, }; my $text; my $xfml; my $wloc=0; my %eha; while (my $token = $parser->get_tag('xfml')) { &format_xfml(@{$token}); $text = $parser->get_text('/xfml'); $token = $parser->get_tag('/xfml'); } #open IN,"<$tofilter"; my @lines2=<>; my $parsestring2=join('',@lines2); undef @lines2; $parser = HTML::TokeParser->new(\$parsestring2) or die('can\'t create TokeParser object'); $parser->xml_mode('1'); my $token; my $hloc=0; my %ts; my $tr; my $echild=0; my $exclude=0; my $excluden=0; my $excludea=0; my $et=0; my $cdata=''; my $excludenold=0; my $ign=0; while ($token = $parser->get_token()) { if ($token->[0] eq 'D') { print $token->[1]; } elsif ($token->[0] eq 'C') { print $token->[1]; } elsif ($token->[0] eq 'S') { $cdata=''; $hloc++; # if token can be excluded, then pretend it is until all conditions are # run (eha); then output during end tag processing # else, output # a token can be excluded when it is an eh key, or a child node of # an eh key if ($eh{$token->[1]}) { $echild=$token->[1]; } if ($echild) { # run through names for echild # then attributes and/or values and/or cdata my $name=$token->[1]; my @attributes=@{$token->[3]}; my %atthash=%{$token->[2]}; foreach my $namemlist (@{$eha{$echild}->{'name'}}) { foreach my $namematch (@{$namemlist}) { my $nm=$namematch; $nm=~s/^.//; $nm=~s/.$//; if ($name=~/$nm/) { $excludenold=$excluden; $excluden++; foreach my $attributemlist (@{$eha{$echild}->{'attribute'}}) { foreach my $attributematch (@{$attributemlist}) { my ($an,$am)= split(/\=/,$attributematch,2); $am=~s/^.//; $am=~s/.$//; if ($atthash{$an}) { if ($atthash{$an}=~/$am/) { $excludea++; } } } } } } } $tr.=$token->[4]; } else { print $token->[4]; } } elsif ($token->[0] eq 'E') { if ($echild) { $tr.=$token->[2]; if ($excluden) { my $i=0; CDATALOOP: foreach my $cdatamlist (@{$eha{$echild}->{'cdata'}}) { $i++; my $j; foreach my $cdatamatch (@{$cdatamlist}) { $j++; # print "CDATA: $cdatamatch, $cdata\n"; my $cm=$cdatamatch; my $not=0; if ($cm=~/\!/) { $not=1; $cm=~s/^.//; } $cm=~s/^.//; $cm=~s/.$//; if ($not and $cdata=~/$cm/) { $ign=1; $exclude=0; } if ((!$not and $cdata!~/$cm/) or ($not and $cdata=~/$cm/)) { # nothing happens # $exclude=0; } elsif (($not and $cdata!~/$cm/) or (!$not and $cdata=~/$cm/)) { $exclude++ unless $ign; } } } } } if ($eh{$token->[1]}) { $ign=0; $echild=0; if (!$exclude and !$excludea) { print $tr; # print $token->[2]; $tr=''; } elsif ($exclude>0 or $excludea>0) { # print "EXCLUDING $token->[1] $exclude $excludea $excluden\n"; $exclude=0; $excluden=0; $excludea=0; $tr=''; } $exclude=0; $excluden=0; $excludea=0; } else { if ($echild) { # $tr.=$token->[2]; } else { print $token->[2]; $tr=''; } } $hloc--; } elsif ($token->[0] eq 'T') { if ($echild) { $tr.=$token->[1]; $cdata=$token->[1]; } else { print $token->[1]; $tr=''; } } } # ------------------------------------------------------------ clear conditions sub cc { @{$conditions{'name'}}=(); pop @{$conditions{'name'}}; @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}}; @{$conditions{'value'}}=(); pop @{$conditions{'value'}}; @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}}; } # --------------------------------------- remove starting and ending whitespace sub trim { my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s; } # --------------------------------------------------------- Format xfml section sub format_xfml { my (@tokeninfo)=@_; return ''; } # ---------------------------------------------------- Format when:name section sub format_when_name { my (@tokeninfo)=@_; $wloc++; my $att_match=$tokeninfo[2]->{'match'}; push @{$conditions{'name'}},$att_match; my $text=&trim($parser->get_text('/when:name')); $parser->get_tag('/when:name'); $wloc--; &cc unless $wloc; return ''; } # ----------------------------------------------- Format when:attribute section sub format_when_attribute { my (@tokeninfo)=@_; $wloc++; my $att_match=$tokeninfo[2]->{'match'}; push @{$conditions{'attribute'}},$att_match; my $text=&trim($parser->get_text('/when:attribute')); $parser->get_tag('/when:attribute'); $wloc--; &cc unless $wloc; return ''; } # --------------------------------------------------- Format when:cdata section sub format_when_cdata { my (@tokeninfo)=@_; $wloc++; my $att_match=$tokeninfo[2]->{'match'}; push @{$conditions{'cdata'}},$att_match; my $text=&trim($parser->get_text('/when:cdata')); $parser->get_tag('/when:cdata'); $wloc--; &cc unless $wloc; return ''; } # ----------------------------------------------- Format choice:include section sub format_choice_include { my (@tokeninfo)=@_; my $text=&trim($parser->get_text('/choice:include')); $parser->get_tag('/choice:include'); $ih{$tokeninfo[2]->{'match'}}++; return ''; } # ----------------------------------------------- Format choice:exclude section sub format_choice_exclude { my (@tokeninfo)=@_; my $text=&trim($parser->get_text('/choice:exclude')); $parser->get_tag('/choice:exclude'); $eh{$tokeninfo[2]->{'nodename'}}++; push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}}, [@{$conditions{'name'}}]; push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}}, [@{$conditions{'attribute'}}]; push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}}, [@{$conditions{'value'}}]; push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}}, [@{$conditions{'cdata'}}]; return ''; }