loncom/build/xfml_parse.pl - view

File: [LON-CAPA] / loncom / build / xfml_parse.pl
Revision 1.1: download - view: text, annotated - select for diffs
Tue Jan 29 10:42:42 2002 UTC (23 years, 5 months ago) by harris41
Branches: MAIN
CVS tags: HEAD

parses xfml; there are some current limitations here (like I don't
match contextual conditions and am not yet supporting choice:include)

#!/usr/bin/perl # YEAR=2002 # 1/26,1/27,1/28 - Scott Harrison # Read in 2 XML file; first is the filter specification, the second # is the XML file to be filtered use HTML::TokeParser; use strict; unless (@ARGV) { print <<END; Incorrect invocation. Example usages: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml END } my %eh; my %ih; my $tofilter=shift @ARGV; my @lines=<>; my $parsestring=join('',@lines); undef @lines; my $parser = HTML::TokeParser->new(\$parsestring) or die('can\'t create TokeParser object'); $parser->xml_mode('1'); # Define handling methods for mode-dependent text rendering my %conditions; &cc; $parser->{textify}={ xfml => \&format_xfml, 'when:name' => \&format_when_name, 'when:attribute' => \&format_when_attribute, 'when:cdata' => \&format_when_cdata, 'choice:include' => \&format_choice_include, 'choice:exclude' => \&format_choice_exclude, }; my $text; my $xfml; my $wloc=0; my %eha; while (my $token = $parser->get_tag('xfml')) { &format_xfml(@{$token}); $text = $parser->get_text('/xfml'); # print $xfml; # print $text; $token = $parser->get_tag('/xfml'); } open IN,"<$tofilter"; my @lines2=<IN>; close IN; my $parsestring2=join('',@lines2); undef @lines2; $parser = HTML::TokeParser->new(\$parsestring2) or die('can\'t create TokeParser object'); $parser->xml_mode('1'); my $token; my $hloc=0; my %ts; my $tr; my $echild=0; my $exclude=0; my $excluden=0; my $excludea=0; my $et=0; my $cdata=''; while ($token = $parser->get_token()) { # from HTML::TokeParser documentation: # ["S", $tag, %$attr, @$attrseq, $text] # ["E", $tag, $text] # ["T", $text, $is_data] # ["C", $text] # ["D", $text] # ["PI", $token0, $text] # push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}}, # @{$conditions{'name'}}; # push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}}, # @{$conditions{'attribute'}}; # push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}}, # @{$conditions{'value'}}; # push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}}, # @{$conditions{'cdata'}}; if ($token->[0] eq 'D') { print $token->[1]; } elsif ($token->[0] eq 'C') { print $token->[1]; } elsif ($token->[0] eq 'S') { $cdata=''; $hloc++; # if token can be excluded, then pretend it is until all conditions are # run (eha); then output during end tag processing # else, output # a token can be excluded when it is an eh key, or a child node of # an eh key if ($eh{$token->[1]}) { $echild=$token->[1]; # print "ECHILD=$echild\n"; } if ($echild) { # run through names for echild # then attributes and/or values and/or cdata my $name=$token->[1]; my @attributes=@{$token->[3]}; my %atthash=%{$token->[2]}; foreach my $namemlist (@{$eha{$echild}->{'name'}}) { foreach my $namematch (@{$namemlist}) { my $nm=$namematch; $nm=~s/^.//; $nm=~s/.$//; if ($name=~/$nm/) { # print "NMATCH: $nm ($name)\n"; $excluden++; foreach my $attributemlist (@{$eha{$echild}->{'attribute'}}) { foreach my $attributematch (@{$attributemlist}) { my ($an,$am)= split(/\=/,$attributematch,2); $am=~s/^.//; $am=~s/.$//; # print 'AM:'."($an,$am)\t"; # print 'ATT:'.join(',',%atthash)."\n"; if ($atthash{$an}) { if ($atthash{$an}=~/$am/) { $excludea++; # print "AMATCH: $am (". # join(',', # @attributes) # ."\n"; } } } } } } } $tr.=$token->[4]; } else { print $token->[4]; } } elsif ($token->[0] eq 'E') { if ($echild) { $tr.=$token->[2]; if ($excluden) { foreach my $cdatamlist (@{$eha{$echild}->{'cdata'}}) { foreach my $cdatamatch (@{$cdatamlist}) { # print "CDATA: $cdatamatch, $cdata\n"; my $cm=$cdatamatch; my $not=0; if ($cm=~/\!/) { $not=1; $cm=~s/^.//; } $cm=~s/^.//; $cm=~s/.$//; if ((!$not and $cdata!~/$cm/) or ($not and $cdata=~/$cm/)) { # print "CMISMATCH: $cm ($cdata)\n"; } elsif (($not and $cdata!~/$cm/) or (!$not and $cdata=~/$cm/)) { $exclude++; } } } } } if ($eh{$token->[1]}) { $echild=0; if (!$exclude and !$excludea) { print $tr; # print $token->[2]; $tr=''; } elsif ($exclude>0 or $excludea>0) { # print "EXCLUDING $token->[1] $excludea $excluden\n"; $exclude=0; $excluden=0; $excludea=0; $tr=''; } $exclude=0; $excluden=0; $excludea=0; } else { if ($echild) { # $tr.=$token->[2]; } else { print $token->[2]; $tr=''; } } $hloc--; } elsif ($token->[0] eq 'T') { if ($echild) { $tr.=$token->[1]; $cdata=$token->[1]; } else { print $token->[1]; $tr=''; } } } # ------------------------------------------------------------ clear conditions sub cc { @{$conditions{'name'}}=(); pop @{$conditions{'name'}}; @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}}; @{$conditions{'value'}}=(); pop @{$conditions{'value'}}; @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}}; } # --------------------------------------- remove starting and ending whitespace sub trim { my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s; } # --------------------------------------------------------- Format xfml section sub format_xfml { my (@tokeninfo)=@_; return ''; } # ---------------------------------------------------- Format when:name section sub format_when_name { my (@tokeninfo)=@_; $wloc++; my $att_match=$tokeninfo[2]->{'match'}; push @{$conditions{'name'}},$att_match; my $text=&trim($parser->get_text('/when:name')); $parser->get_tag('/when:name'); # print 'Name Matching...'.$att_match; $wloc--; &cc unless $wloc; return ''; } # ----------------------------------------------- Format when:attribute section sub format_when_attribute { my (@tokeninfo)=@_; $wloc++; my $att_match=$tokeninfo[2]->{'match'}; push @{$conditions{'attribute'}},$att_match; my $text=&trim($parser->get_text('/when:attribute')); $parser->get_tag('/when:attribute'); # print 'Attribute Matching...'.$att_match; $wloc--; &cc unless $wloc; return ''; } # --------------------------------------------------- Format when:cdata section sub format_when_cdata { my (@tokeninfo)=@_; $wloc++; my $att_match=$tokeninfo[2]->{'match'}; # print 'Cdata Matching...'.$att_match; push @{$conditions{'cdata'}},$att_match; my $text=&trim($parser->get_text('/when:cdata')); $parser->get_tag('/when:cdata'); $wloc--; &cc unless $wloc; return ''; } # ----------------------------------------------- Format choice:include section sub format_choice_include { my (@tokeninfo)=@_; my $text=&trim($parser->get_text('/choice:include')); $parser->get_tag('/choice:include'); $ih{$tokeninfo[2]->{'match'}}++; return ''; } # ----------------------------------------------- Format choice:exclude section sub format_choice_exclude { my (@tokeninfo)=@_; my $text=&trim($parser->get_text('/choice:exclude')); $parser->get_tag('/choice:exclude'); $eh{$tokeninfo[2]->{'nodename'}}++; push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}}, [@{$conditions{'name'}}]; push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}}, [@{$conditions{'attribute'}}]; push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}}, [@{$conditions{'value'}}]; push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}}, [@{$conditions{'cdata'}}]; return ''; }