loncom/build/xfml_parse.pl - view

File: [LON-CAPA] / loncom / build / xfml_parse.pl
Revision 1.3: download - view: text, annotated - select for diffs
Wed Feb 20 00:21:42 2002 UTC (22 years, 3 months ago) by harris41
Branches: MAIN
CVS tags: HEAD

a cleaner leaner script (major rewrite)

#!/usr/bin/perl # YEAR=2002 # 1/26,1/27,1/28,1/29,1/30,1/31 - Scott Harrison # ### # Read in 2 XML file; first is the filter specification, the second # is the XML file to be filtered ############################################################################### ## ## ## ORGANIZATION OF THIS PERL SCRIPT ## ## 1. Notes ## ## 2. Read in filter file ## ## 3. Initialize and clear conditions ## ## 4. Run through and apply clauses ## ## ## ############################################################################### # ----------------------------------------------------------------------- Notes # # This is meant to parse files meeting the xfml document type. # See xfml.dtd. XFML=XML Filtering Markup Language. use HTML::TokeParser; use strict; unless (@ARGV) { print <<END; Incorrect invocation. Example usages: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml END } my %eh; # ---------------------------------------------- Read in filter file from @ARGV my $tofilter=shift @ARGV; open IN,"<$tofilter"; my @lines=<IN>; my $parsestring=join('',@lines); undef @lines; close IN; my $parser = HTML::TokeParser->new(\$parsestring) or die('can\'t create TokeParser object'); $parser->xml_mode('1'); # --------------------------------------------- initialize and clear conditions my %conditions; &cc; # Define handling methods for mode-dependent text rendering $parser->{textify}={ 'xfml' => \&format_xfml, 'when:name' => \&format_when_name, 'when:attribute' => \&format_when_attribute, 'when:cdata' => \&format_when_cdata, 'choice:exclude' => \&format_choice_exclude, 'clause' => \&format_clause, }; my $text; my $xfml; my $wloc=0; my %eha; # ----------------------------------------------- Run through and apply clauses my @lines2=<>; my $output=join('',@lines2); undef @lines2; my $lparser = HTML::TokeParser->new(\$output) or die('can\'t create TokeParser object'); $lparser->xml_mode('1'); my $parsestring2; while (my $token = $parser->get_tag('clause')) { $parsestring2=$output; $lparser = HTML::TokeParser->new(\$parsestring2); $lparser->xml_mode('1'); $output=''; &format_clause(@{$token}); $text = $parser->get_text('/clause'); $token = $parser->get_tag('/clause'); my $token=''; my $ttype=''; my $excludeflag=0; my $outcache=''; while ($token = $lparser->get_token()) { if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; } elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1]; } elsif ($token->[0] eq 'T') { if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S' or $ttype eq 'E') { $output.=$token->[1]; } else { $outcache.=$token->[1]; } } elsif ($token->[0] eq 'S') { if ($eh{$token->[1]} or $excludeflag==1) { $ttype=''; $excludeflag=1; $outcache.=$token->[4]; } else { $ttype='S'; $output.=$token->[4]; } if ($excludeflag==1) { } } elsif ($token->[0] eq 'E') { if ($eh{$token->[1]} and $excludeflag==1) { $ttype='E'; $excludeflag=0; $outcache.=$token->[2]; my $retval=&evalconditions($outcache); if (&evalconditions($outcache)) { $output.=$outcache; } else { $output.=''; } $outcache=''; } elsif ($excludeflag==1) { $ttype=''; $outcache.=$token->[2]; } else { $output.=$token->[2]; $ttype='E'; } } } &cc; } print $output; # -------------------------------------------------------------- evalconditions sub evalconditions { my ($parsetext)=@_; my $eparser = HTML::TokeParser->new(\$parsetext); unless (@{$conditions{'name'}} or @{$conditions{'attribute'}}) { return 0; } my $nameflag=0; my $cdataflag=0; my $matchflag=0; my $Ttoken=''; while (my $token = $eparser->get_token()) { if ($token->[0] eq 'S') { foreach my $name (@{$conditions{'name'}}) { my $flag=0; my $match=$name; if ($match=~/^\!/) { $match=~s/^\!//g; $flag=1; } $match=~s/^\///g; $match=~s/\/$//g; if ((!$flag and $token->[1]=~/$match/) or ($flag and $token->[1]!~/$match/)) { $nameflag=1; } } $Ttoken=''; } elsif ($token->[0] eq 'E') { foreach my $name (@{$conditions{'name'}}) { my $flag=0; my $match=$name; if ($match=~/^\!/) { $match=~s/^\!//g; $flag=1; } $match=~s/^\///g; $match=~s/\/$//g; if ((!$flag and $token->[1]=~/$match/) or ($flag and $token->[1]!~/$match/)) { foreach my $cdata (@{$conditions{'cdata'}}) { my $flag=0; my $match=$cdata; if ($match=~/^\!/) { $match=~s/^\!//g; $flag=1; } $match=~s/^\///g; $match=~s/\/$//g; if ((!$flag and $Ttoken=~/$match/) or ($flag and $Ttoken!~/$match/)) { $cdataflag=1; } } if (@{$conditions{'cdata'}}) { if ($cdataflag) { return 0; } } else { if ($nameflag) { return 0; } } $nameflag=0; } } } elsif ($token->[0] eq 'T') { if ($nameflag) { $Ttoken.=$token->[1]; } } } return 1; } # ------------------------------------------------------------ clear conditions sub cc { @{$conditions{'name'}}=(); pop @{$conditions{'name'}}; @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}}; @{$conditions{'value'}}=(); pop @{$conditions{'value'}}; @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}}; %eh=(1,1); delete $eh{1}; } # --------------------------------------- remove starting and ending whitespace sub trim { my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s; } # --------------------------------------------------------- Format xfml section sub format_xfml { my (@tokeninfo)=@_; return ''; } # ------------------------------------------------------- Format clause section sub format_clause { my (@tokeninfo)=@_; return ''; } # ---------------------------------------------------- Format when:name section sub format_when_name { my (@tokeninfo)=@_; # $wloc++; my $att_match=$tokeninfo[2]->{'match'}; push @{$conditions{'name'}},$att_match; my $text=&trim($parser->get_text('/when:name')); $parser->get_tag('/when:name'); # $wloc--; # &cc unless $wloc; return ''; } # --------------------------------------------------- Format when:cdata section sub format_when_cdata { my (@tokeninfo)=@_; $wloc++; my $att_match=$tokeninfo[2]->{'match'}; push @{$conditions{'cdata'}},$att_match; my $text=&trim($parser->get_text('/when:cdata')); $parser->get_tag('/when:cdata'); $wloc--; # &cc unless $wloc; return ''; } # ----------------------------------------------- Format choice:exclude section sub format_choice_exclude { my (@tokeninfo)=@_; my $text=&trim($parser->get_text('/choice:exclude')); $parser->get_tag('/choice:exclude'); $eh{$tokeninfo[2]->{'nodename'}}++; push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}}, [@{$conditions{'name'}}]; push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}}, [@{$conditions{'attribute'}}]; push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}}, [@{$conditions{'value'}}]; push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}}, [@{$conditions{'cdata'}}]; return ''; }