Annotation of loncom/build/xfml_parse.pl, revision 1.2

1.1       harris41    1: #!/usr/bin/perl
                      2: 
                      3: # YEAR=2002
1.2     ! harris41    4: # 1/26,1/27,1/28,1/29,1/30,1/31 - Scott Harrison
        !             5: #
        !             6: ###
1.1       harris41    7: 
                      8: # Read in 2 XML file; first is the filter specification, the second
                      9: # is the XML file to be filtered
                     10: 
1.2     ! harris41   11: ###############################################################################
        !            12: ##                                                                           ##
        !            13: ## ORGANIZATION OF THIS PERL SCRIPT                                          ##
        !            14: ## 1. Notes                                                                  ##
        !            15: ## 2. Get command line arguments                                             ##
        !            16: ## 3. First pass through (grab distribution-specific information)            ##
        !            17: ## 4. Second pass through (parse out what is not necessary)                  ##
        !            18: ## 5. Third pass through (translate markup according to specified mode)      ##
        !            19: ## 6. Functions (most all just format contents of different markup tags)     ##
        !            20: ## 7. POD (plain old documentation, CPAN style)                              ##
        !            21: ##                                                                           ##
        !            22: ###############################################################################
        !            23: 
        !            24: # ----------------------------------------------------------------------- Notes
        !            25: #
        !            26: # I am using a multiple pass-through approach to parsing
        !            27: # the xfml file.  This saves memory and makes sure the server
        !            28: # will never be overloaded.
        !            29: #
        !            30: # This is meant to parse files meeting the piml document type.
        !            31: # See xfml.dtd.  XFML=XML Filtering Markup Language.
        !            32: 
1.1       harris41   33: use HTML::TokeParser;
                     34: use strict;
                     35: 
                     36: unless (@ARGV) {
                     37:     print <<END;
                     38: Incorrect invocation.
                     39: Example usages:
                     40: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
                     41: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
                     42: END
                     43: }
                     44: 
                     45: my %eh;
                     46: my %ih;
                     47: my $tofilter=shift @ARGV;
1.2     ! harris41   48: open IN,"<$tofilter";
        !            49: my @lines=<IN>; my $parsestring=join('',@lines); undef @lines;
        !            50: close IN;
1.1       harris41   51: my $parser = HTML::TokeParser->new(\$parsestring) or
                     52:     die('can\'t create TokeParser object');
                     53: $parser->xml_mode('1');
                     54: 
                     55: # Define handling methods for mode-dependent text rendering
                     56: 
                     57: my %conditions; &cc;
                     58: 
                     59: $parser->{textify}={
                     60:     xfml => \&format_xfml,
                     61:     'when:name' => \&format_when_name,
                     62:     'when:attribute' => \&format_when_attribute,
                     63:     'when:cdata' => \&format_when_cdata,
                     64:     'choice:include' => \&format_choice_include,
                     65:     'choice:exclude' => \&format_choice_exclude,
                     66:     };
                     67: 
                     68: my $text;
                     69: my $xfml;
                     70: my $wloc=0;
                     71: my %eha;
                     72: 
                     73: while (my $token = $parser->get_tag('xfml')) {
                     74:     &format_xfml(@{$token});
                     75:     $text = $parser->get_text('/xfml');
                     76:     $token = $parser->get_tag('/xfml');
                     77: }
                     78: 
1.2     ! harris41   79: #open IN,"<$tofilter";
        !            80: my @lines2=<>; my $parsestring2=join('',@lines2); undef @lines2;
1.1       harris41   81: $parser = HTML::TokeParser->new(\$parsestring2) or
                     82:     die('can\'t create TokeParser object');
                     83: $parser->xml_mode('1');
                     84: 
                     85: my $token;
                     86: my $hloc=0;
                     87: my %ts;
                     88: my $tr;
                     89: my $echild=0;
                     90: my $exclude=0;
                     91: my $excluden=0;
                     92: my $excludea=0;
                     93: my $et=0;
                     94: my $cdata='';
1.2     ! harris41   95: my $excludenold=0;
        !            96: my $ign=0;
        !            97: 
1.1       harris41   98: while ($token = $parser->get_token()) {
                     99:     if ($token->[0] eq 'D') {
                    100: 	print $token->[1];
                    101:     }
                    102:     elsif ($token->[0] eq 'C') {
                    103: 	print $token->[1];
                    104:     }
                    105:     elsif ($token->[0] eq 'S') {
                    106: 	$cdata='';
                    107: 	$hloc++;
                    108: # if token can be excluded, then pretend it is until all conditions are
                    109: # run (eha); then output during end tag processing
                    110: # else, output
                    111: 
                    112: # a token can be excluded when it is an eh key, or a child node of
                    113: # an eh key
                    114: 
                    115: 	if ($eh{$token->[1]}) {
                    116: 	    $echild=$token->[1];
                    117: 	}
                    118: 	if ($echild) {
                    119: 	    # run through names for echild
                    120: 	    # then attributes and/or values and/or cdata
                    121: 	    my $name=$token->[1];
                    122: 	    my @attributes=@{$token->[3]};
                    123: 	    my %atthash=%{$token->[2]};
                    124: 	    foreach my $namemlist (@{$eha{$echild}->{'name'}}) {
                    125: 		foreach my $namematch (@{$namemlist}) {
                    126: 		    my $nm=$namematch; $nm=~s/^.//; $nm=~s/.$//;
                    127: 		    if ($name=~/$nm/) {
1.2     ! harris41  128: 			$excludenold=$excluden;
1.1       harris41  129: 			$excluden++;
                    130: 			foreach my $attributemlist
                    131: 			    (@{$eha{$echild}->{'attribute'}}) {
                    132: 				foreach my $attributematch 
                    133: 				    (@{$attributemlist}) {
                    134: 					my ($an,$am)=
                    135: 					    split(/\=/,$attributematch,2);
                    136: 					$am=~s/^.//;
                    137: 					$am=~s/.$//;
                    138: 					if ($atthash{$an}) {
                    139: 					    if ($atthash{$an}=~/$am/) {
                    140: 						$excludea++;
                    141: 					    }
                    142: 					}
                    143: 				    }
                    144: 			    }
                    145: 		    }
                    146: 		}
                    147: 	    }
                    148: 	    $tr.=$token->[4];
                    149: 	}
                    150: 	else {
                    151: 	    print $token->[4];
                    152: 	}
                    153:     }
                    154:     elsif ($token->[0] eq 'E') {
                    155: 	if ($echild) {
                    156: 	    $tr.=$token->[2];
                    157: 	    if ($excluden) {
1.2     ! harris41  158: 		my $i=0;
        !           159: 		CDATALOOP:
1.1       harris41  160: 		foreach my $cdatamlist (@{$eha{$echild}->{'cdata'}}) {
1.2     ! harris41  161: 		    $i++;
        !           162: 		    my $j;
1.1       harris41  163: 		    foreach my $cdatamatch (@{$cdatamlist}) {
1.2     ! harris41  164: 			$j++;
1.1       harris41  165: #				print "CDATA: $cdatamatch, $cdata\n";
                    166: 			my $cm=$cdatamatch;
                    167: 			my $not=0;
                    168: 			if ($cm=~/\!/) {
                    169: 			    $not=1;
                    170: 			    $cm=~s/^.//;
                    171: 			}
                    172: 			$cm=~s/^.//; $cm=~s/.$//;
1.2     ! harris41  173: 			if ($not and $cdata=~/$cm/) {
        !           174: 			    $ign=1; $exclude=0;
        !           175: 			}
1.1       harris41  176: 			if ((!$not and $cdata!~/$cm/)
                    177: 			    or ($not and $cdata=~/$cm/)) {
1.2     ! harris41  178: #				nothing happens
        !           179: #			    $exclude=0;
1.1       harris41  180: 			}
                    181: 			elsif (($not and $cdata!~/$cm/)
                    182: 			       or (!$not and $cdata=~/$cm/)) {
1.2     ! harris41  183: 			    $exclude++ unless $ign;
1.1       harris41  184: 			}
                    185: 		    }
                    186: 		}
                    187: 	    }
                    188: 	}
                    189: 	if ($eh{$token->[1]}) {
1.2     ! harris41  190: 	    $ign=0;
1.1       harris41  191: 	    $echild=0;
                    192: 	    if (!$exclude and !$excludea) {
                    193: 		print $tr;
                    194: #		print $token->[2];
                    195: 		$tr='';
                    196: 	    }
                    197: 	    elsif ($exclude>0 or $excludea>0) {
1.2     ! harris41  198: #		print "EXCLUDING $token->[1] $exclude $excludea $excluden\n";
1.1       harris41  199: 		$exclude=0; $excluden=0; $excludea=0;
                    200: 		$tr='';
                    201: 	    }
                    202: 	    $exclude=0; $excluden=0; $excludea=0;
                    203: 	}
                    204: 	else {
                    205: 	    if ($echild) {
                    206: #		$tr.=$token->[2];
                    207: 	    }
                    208: 	    else {
                    209: 		print $token->[2];
                    210: 		$tr='';
                    211: 	    }
                    212: 	}
                    213: 	$hloc--;
                    214:     }
                    215:     elsif ($token->[0] eq 'T') {
                    216: 	if ($echild) {
                    217: 	    $tr.=$token->[1];
                    218: 	    $cdata=$token->[1];
                    219: 	}
                    220: 	else {
                    221: 	    print $token->[1];
                    222: 	    $tr='';
                    223: 	}
                    224:     }
                    225: }
                    226: 
                    227: # ------------------------------------------------------------ clear conditions
                    228: sub cc {
                    229:     @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
                    230:     @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
                    231:     @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
                    232:     @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
                    233: }
                    234: 
                    235: # --------------------------------------- remove starting and ending whitespace
                    236: sub trim {
                    237:     my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
                    238: }
                    239: 
1.2     ! harris41  240: 
        !           241: 
1.1       harris41  242: # --------------------------------------------------------- Format xfml section
                    243: sub format_xfml {
                    244:     my (@tokeninfo)=@_;
                    245:     return '';
                    246: }
                    247: 
                    248: # ---------------------------------------------------- Format when:name section
                    249: sub format_when_name {
                    250:     my (@tokeninfo)=@_;
                    251:     $wloc++;
                    252:     my $att_match=$tokeninfo[2]->{'match'};
                    253:     push @{$conditions{'name'}},$att_match;
                    254:     my $text=&trim($parser->get_text('/when:name'));
                    255:     $parser->get_tag('/when:name');
                    256:     $wloc--;
                    257:     &cc unless $wloc;
                    258:     return '';
                    259: }
                    260: 
                    261: # ----------------------------------------------- Format when:attribute section
                    262: sub format_when_attribute {
                    263:     my (@tokeninfo)=@_;
                    264:     $wloc++;
                    265:     my $att_match=$tokeninfo[2]->{'match'};
                    266:     push @{$conditions{'attribute'}},$att_match;
                    267:     my $text=&trim($parser->get_text('/when:attribute'));
                    268:     $parser->get_tag('/when:attribute');
                    269:     $wloc--;
                    270:     &cc unless $wloc;
                    271:     return '';
                    272: }
                    273: 
                    274: # --------------------------------------------------- Format when:cdata section
                    275: sub format_when_cdata {
                    276:     my (@tokeninfo)=@_;
                    277:     $wloc++;
                    278:     my $att_match=$tokeninfo[2]->{'match'};
                    279:     push @{$conditions{'cdata'}},$att_match;
                    280:     my $text=&trim($parser->get_text('/when:cdata'));
                    281:     $parser->get_tag('/when:cdata');
                    282:     $wloc--;
                    283:     &cc unless $wloc;
                    284:     return '';
                    285: }
                    286: 
                    287: # ----------------------------------------------- Format choice:include section
                    288: sub format_choice_include {
                    289:     my (@tokeninfo)=@_;
                    290:     my $text=&trim($parser->get_text('/choice:include'));
                    291:     $parser->get_tag('/choice:include');
                    292:     $ih{$tokeninfo[2]->{'match'}}++;
                    293:     return '';
                    294: }
                    295: 
                    296: # ----------------------------------------------- Format choice:exclude section
                    297: sub format_choice_exclude {
                    298:     my (@tokeninfo)=@_;
                    299:     my $text=&trim($parser->get_text('/choice:exclude'));
                    300:     $parser->get_tag('/choice:exclude');
                    301:     $eh{$tokeninfo[2]->{'nodename'}}++;
                    302:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
                    303:          [@{$conditions{'name'}}];
                    304:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
                    305:          [@{$conditions{'attribute'}}];
                    306:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
                    307:          [@{$conditions{'value'}}];
                    308:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
                    309:          [@{$conditions{'cdata'}}];
                    310:     return '';
                    311: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>