Annotation of loncom/build/xfml_parse.pl, revision 1.6

1.1       harris41    1: #!/usr/bin/perl
                      2: 
1.4       harris41    3: # -------------------------------------------------------- Documentation notice
                      4: # Run "perldoc ./lpml_parse.pl" in order to best view the software
                      5: # documentation internalized in this program.
                      6: 
                      7: # --------------------------------------------------------- License Information
                      8: # The LearningOnline Network with CAPA
                      9: # piml_parse.pl - Linux Packaging Markup Language parser
                     10: #
1.6     ! matthew    11: # $Id: xfml_parse.pl,v 1.5 2002/04/08 12:51:03 harris41 Exp $
1.4       harris41   12: #
                     13: # Written by Scott Harrison, codeharrison@yahoo.com
                     14: #
                     15: # Copyright Michigan State University Board of Trustees
                     16: #
                     17: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
                     18: #
                     19: # LON-CAPA is free software; you can redistribute it and/or modify
                     20: # it under the terms of the GNU General Public License as published by
                     21: # the Free Software Foundation; either version 2 of the License, or
                     22: # (at your option) any later version.
                     23: #
                     24: # LON-CAPA is distributed in the hope that it will be useful,
                     25: # but WITHOUT ANY WARRANTY; without even the implied warranty of
                     26: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     27: # GNU General Public License for more details.
                     28: #
                     29: # You should have received a copy of the GNU General Public License
                     30: # along with LON-CAPA; if not, write to the Free Software
                     31: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
                     32: #
                     33: # /home/httpd/html/adm/gpl.txt
                     34: #
                     35: # http://www.lon-capa.org/
                     36: #
1.1       harris41   37: # YEAR=2002
1.4       harris41   38: # 1/26,1/27,1/28,1/29,1/30,1/31,2/20,4/8 - Scott Harrison
1.2       harris41   39: #
                     40: ###
1.1       harris41   41: 
                     42: # Read in 2 XML file; first is the filter specification, the second
                     43: # is the XML file to be filtered
                     44: 
1.2       harris41   45: ###############################################################################
                     46: ##                                                                           ##
                     47: ## ORGANIZATION OF THIS PERL SCRIPT                                          ##
                     48: ## 1. Notes                                                                  ##
1.3       harris41   49: ## 2. Read in filter file                                                    ##
                     50: ## 3. Initialize and clear conditions                                        ##
                     51: ## 4. Run through and apply clauses                                          ##
1.2       harris41   52: ##                                                                           ##
                     53: ###############################################################################
                     54: 
                     55: # ----------------------------------------------------------------------- Notes
                     56: #
1.3       harris41   57: # This is meant to parse files meeting the xfml document type.
1.2       harris41   58: # See xfml.dtd.  XFML=XML Filtering Markup Language.
                     59: 
1.1       harris41   60: use HTML::TokeParser;
                     61: use strict;
                     62: 
                     63: unless (@ARGV) {
1.4       harris41   64:     print(<<END);
1.1       harris41   65: Incorrect invocation.
                     66: Example usages:
                     67: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
                     68: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
                     69: END
                     70: }
                     71: 
                     72: my %eh;
1.3       harris41   73: 
                     74: # ---------------------------------------------- Read in filter file from @ARGV
1.1       harris41   75: my $tofilter=shift @ARGV;
1.4       harris41   76: open(IN,"<$tofilter"); my @lines=<IN>;
1.3       harris41   77: my $parsestring=join('',@lines); undef @lines; close IN;
1.1       harris41   78: my $parser = HTML::TokeParser->new(\$parsestring) or
                     79:     die('can\'t create TokeParser object');
                     80: $parser->xml_mode('1');
                     81: 
1.3       harris41   82: # --------------------------------------------- initialize and clear conditions
1.1       harris41   83: my %conditions; &cc;
                     84: 
1.3       harris41   85: # Define handling methods for mode-dependent text rendering
1.1       harris41   86: $parser->{textify}={
1.3       harris41   87:     'xfml' => \&format_xfml,
1.1       harris41   88:     'when:name' => \&format_when_name,
                     89:     'when:attribute' => \&format_when_attribute,
                     90:     'when:cdata' => \&format_when_cdata,
                     91:     'choice:exclude' => \&format_choice_exclude,
1.3       harris41   92:     'clause' => \&format_clause,
1.1       harris41   93:     };
                     94: 
                     95: my $text;
                     96: my $xfml;
                     97: my $wloc=0;
                     98: my %eha;
                     99: 
1.3       harris41  100: # ----------------------------------------------- Run through and apply clauses
                    101: my @lines2=<>; my $output=join('',@lines2); undef @lines2;
                    102: my $lparser = HTML::TokeParser->new(\$output) or
                    103:     die('can\'t create TokeParser object');
                    104: $lparser->xml_mode('1');
                    105: my $parsestring2;
                    106: while (my $token = $parser->get_tag('clause')) {
                    107:     $parsestring2=$output;
                    108:     $lparser = HTML::TokeParser->new(\$parsestring2);
                    109:     $lparser->xml_mode('1');
                    110:     $output='';
                    111:     &format_clause(@{$token});
                    112:     $text = $parser->get_text('/clause');
                    113:     $token = $parser->get_tag('/clause');
                    114: 
                    115:     my $token='';
                    116:     my $ttype='';
                    117:     my $excludeflag=0;
                    118:     my $outcache='';
                    119:     while ($token = $lparser->get_token()) {
                    120: 	if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; }
                    121: 	elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1];	}
                    122: 	elsif ($token->[0] eq 'T') {
                    123: 	    if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S'
                    124: 		or $ttype eq 'E') {
                    125: 		$output.=$token->[1];
                    126: 	    }
                    127: 	    else {
                    128: 		$outcache.=$token->[1];
                    129: 	    }
                    130: 	}
                    131: 	elsif ($token->[0] eq 'S') {
                    132: 	    if ($eh{$token->[1]} or $excludeflag==1) {
                    133: 		$ttype='';
                    134: 		$excludeflag=1;
                    135: 		$outcache.=$token->[4];
                    136: 	    }
                    137: 	    else {
                    138: 		$ttype='S';
                    139: 		$output.=$token->[4];
                    140: 	    }
                    141: 	    if ($excludeflag==1) {
                    142: 		
                    143: 	    }
                    144: 	}
                    145: 	elsif ($token->[0] eq 'E') {
                    146: 	    if ($eh{$token->[1]} and $excludeflag==1) {
                    147: 		$ttype='E';
                    148: 		$excludeflag=0;
                    149: 		$outcache.=$token->[2];
                    150: 		if (&evalconditions($outcache)) {
1.6     ! matthew   151: 		    $output.='<!-- FILTERED OUT -->';
1.3       harris41  152: 		}
                    153: 		else {
1.6     ! matthew   154: 		    $output.=$outcache;
1.3       harris41  155: 		}
                    156: 		$outcache='';
                    157: 	    }
                    158: 	    elsif ($excludeflag==1) {
                    159: 		$ttype='';
                    160: 		$outcache.=$token->[2];
                    161: 	    }
                    162: 	    else {
                    163: 		$output.=$token->[2];
                    164: 		$ttype='E';
                    165: 	    }
                    166: 	}
                    167:     }
                    168:     &cc;
1.1       harris41  169: }
1.3       harris41  170: print $output;
1.1       harris41  171: 
1.3       harris41  172: # -------------------------------------------------------------- evalconditions
                    173: sub evalconditions {
                    174:     my ($parsetext)=@_;
                    175:     my $eparser = HTML::TokeParser->new(\$parsetext);
                    176:     unless (@{$conditions{'name'}} or
                    177: 	    @{$conditions{'attribute'}}) {
1.6     ! matthew   178: 	return 1;
1.1       harris41  179:     }
1.3       harris41  180:     my $nameflag=0;
                    181:     my $cdataflag=0;
                    182:     my $matchflag=0;
                    183:     my $Ttoken='';
                    184:     while (my $token = $eparser->get_token()) {
                    185: 	if ($token->[0] eq 'S') {
                    186: 	    foreach my $name (@{$conditions{'name'}}) {
                    187: 		my $flag=0;
                    188: 		my $match=$name;
                    189: 		if ($match=~/^\!/) {
                    190: 		    $match=~s/^\!//g;
                    191: 		    $flag=1;
                    192: 		}
                    193: 		$match=~s/^\///g;
                    194: 		$match=~s/\/$//g;
                    195: 		if ((!$flag and $token->[1]=~/$match/) or
                    196: 		    ($flag and $token->[1]!~/$match/)) {
                    197: 		    $nameflag=1;
1.1       harris41  198: 		}
                    199: 	    }
1.3       harris41  200: 	    $Ttoken='';
1.1       harris41  201: 	}
1.3       harris41  202: 	elsif ($token->[0] eq 'E') {
                    203: 	    foreach my $name (@{$conditions{'name'}}) {
                    204: 		my $flag=0;
                    205: 		my $match=$name;
                    206: 		if ($match=~/^\!/) {
                    207: 		    $match=~s/^\!//g;
                    208: 		    $flag=1;
                    209: 		}
                    210: 		$match=~s/^\///g;
                    211: 		$match=~s/\/$//g;
                    212: 		if ((!$flag and $token->[1]=~/$match/) or
                    213: 		    ($flag and $token->[1]!~/$match/)) {
                    214: 		    foreach my $cdata (@{$conditions{'cdata'}}) {
                    215: 			my $flag=0;
                    216: 			my $match=$cdata;
                    217: 			if ($match=~/^\!/) {
                    218: 			    $match=~s/^\!//g;
                    219: 			    $flag=1;
1.1       harris41  220: 			}
1.3       harris41  221: 			$match=~s/^\///g;
                    222: 			$match=~s/\/$//g;
                    223: 			if ((!$flag and $Ttoken=~/$match/) or
                    224: 			    ($flag and $Ttoken!~/$match/)) {
                    225: 			    $cdataflag=1;
1.2       harris41  226: 			}
1.3       harris41  227: 		    }
                    228: 		    if (@{$conditions{'cdata'}}) {
                    229: 			if ($cdataflag) {
                    230: 			    return 0;
1.1       harris41  231: 			}
1.3       harris41  232: 		    }
                    233: 		    else {
                    234: 			if ($nameflag) {
                    235: 			    return 0;
1.1       harris41  236: 			}
                    237: 		    }
1.3       harris41  238: 		    $nameflag=0;
1.1       harris41  239: 		}
                    240: 	    }
                    241: 	}
1.3       harris41  242: 	elsif ($token->[0] eq 'T') {
                    243: 	    if ($nameflag) {
                    244: 		$Ttoken.=$token->[1];
1.1       harris41  245: 	    }
                    246: 	}
                    247:     }
1.3       harris41  248:     return 1;
1.1       harris41  249: }
                    250: 
                    251: # ------------------------------------------------------------ clear conditions
                    252: sub cc {
                    253:     @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
                    254:     @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
                    255:     @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
                    256:     @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
1.3       harris41  257:     %eh=(1,1); delete $eh{1};
1.1       harris41  258: }
                    259: 
                    260: # --------------------------------------- remove starting and ending whitespace
                    261: sub trim {
                    262:     my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
                    263: }
                    264: 
1.2       harris41  265: 
                    266: 
1.3       harris41  267: 
1.1       harris41  268: # --------------------------------------------------------- Format xfml section
                    269: sub format_xfml {
                    270:     my (@tokeninfo)=@_;
                    271:     return '';
                    272: }
                    273: 
1.3       harris41  274: # ------------------------------------------------------- Format clause section
                    275: sub format_clause {
                    276:     my (@tokeninfo)=@_;
                    277:     return '';
                    278: }
                    279: 
1.1       harris41  280: # ---------------------------------------------------- Format when:name section
                    281: sub format_when_name {
                    282:     my (@tokeninfo)=@_;
1.3       harris41  283: #    $wloc++;
1.1       harris41  284:     my $att_match=$tokeninfo[2]->{'match'};
                    285:     push @{$conditions{'name'}},$att_match;
                    286:     my $text=&trim($parser->get_text('/when:name'));
                    287:     $parser->get_tag('/when:name');
1.3       harris41  288: #    $wloc--;
                    289: #    &cc unless $wloc;
1.1       harris41  290:     return '';
                    291: }
                    292: 
                    293: # --------------------------------------------------- Format when:cdata section
                    294: sub format_when_cdata {
                    295:     my (@tokeninfo)=@_;
                    296:     $wloc++;
                    297:     my $att_match=$tokeninfo[2]->{'match'};
                    298:     push @{$conditions{'cdata'}},$att_match;
                    299:     my $text=&trim($parser->get_text('/when:cdata'));
                    300:     $parser->get_tag('/when:cdata');
                    301:     $wloc--;
1.3       harris41  302: #    &cc unless $wloc;
1.1       harris41  303:     return '';
                    304: }
                    305: 
                    306: # ----------------------------------------------- Format choice:exclude section
                    307: sub format_choice_exclude {
                    308:     my (@tokeninfo)=@_;
                    309:     my $text=&trim($parser->get_text('/choice:exclude'));
                    310:     $parser->get_tag('/choice:exclude');
                    311:     $eh{$tokeninfo[2]->{'nodename'}}++;
                    312:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
                    313:          [@{$conditions{'name'}}];
                    314:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
                    315:          [@{$conditions{'attribute'}}];
                    316:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
                    317:          [@{$conditions{'value'}}];
                    318:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
                    319:          [@{$conditions{'cdata'}}];
                    320:     return '';
                    321: }
1.4       harris41  322: 
                    323: # ----------------------------------- POD (plain old documentation, CPAN style)
                    324: 
                    325: =pod
                    326: 
                    327: =head1 NAME
                    328: 
1.5       harris41  329: xfml_parse.pl - This is meant to parse XFML files (XML Filtering Markup Language.)
1.4       harris41  330: 
                    331: =head1 SYNOPSIS
                    332: 
                    333: Usage is for lpml file to come in through standard input.
                    334: 
                    335: =over 4
                    336: 
                    337: =item * 
                    338: 
                    339: 1st argument is name of xfml file.
                    340: 
                    341: =back
                    342: 
                    343: Example:
                    344: 
                    345:  cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
                    346: 
                    347: or
                    348: 
                    349:  perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
                    350: 
                    351: =head1 DESCRIPTION
                    352: 
                    353: I am using a multiple pass-through approach to parsing
                    354: the xfml file.  This saves memory and makes sure the server
                    355: will never be overloaded.
                    356: 
                    357: =head1 README
                    358: 
                    359: I am using a multiple pass-through approach to parsing
                    360: the xfml file.  This saves memory and makes sure the server
                    361: will never be overloaded.
                    362: 
                    363: =head1 PREREQUISITES
                    364: 
                    365: HTML::TokeParser
                    366: 
                    367: =head1 COREQUISITES
                    368: 
                    369: =head1 OSNAMES
                    370: 
                    371: linux
                    372: 
                    373: =head1 SCRIPT CATEGORIES
                    374: 
                    375: Packaging/Administrative
                    376: 
                    377: =head1 AUTHOR
                    378: 
                    379:  Scott Harrison
                    380:  codeharrison@yahoo.com
                    381: 
                    382: Please let me know how/if you are finding this script useful and
                    383: any/all suggestions.  -Scott
                    384: 
                    385: =cut
                    386: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>