loncom/build/xfml_parse.pl - diff

Return to xfml_parse.pl CVS log

Up to [LON-CAPA] / loncom / build

Diff for /loncom/build/xfml_parse.pl between versions 1.2 and 1.3

-version 1.2, 2002/02/01 10:56:41
+version 1.3, 2002/02/20 00:21:42
  Line 12
  ##                                                                           ##
  ## ORGANIZATION OF THIS PERL SCRIPT                                          ##
  ## 1. Notes                                                                  ##
- ## 2. Get command line arguments                                             ##
+ ## 2. Read in filter file                                                    ##
- ## 3. First pass through (grab distribution-specific information)            ##
+ ## 3. Initialize and clear conditions                                        ##
- ## 4. Second pass through (parse out what is not necessary)                  ##
+ ## 4. Run through and apply clauses                                          ##
- ## 5. Third pass through (translate markup according to specified mode)      ##
- ## 6. Functions (most all just format contents of different markup tags)     ##
- ## 7. POD (plain old documentation, CPAN style)                              ##
  ##                                                                           ##
  ###############################################################################
  # ----------------------------------------------------------------------- Notes
  #
- # I am using a multiple pass-through approach to parsing
+ # This is meant to parse files meeting the xfml document type.
- # the xfml file.  This saves memory and makes sure the server
- # will never be overloaded.
- #
- # This is meant to parse files meeting the piml document type.
  # See xfml.dtd.  XFML=XML Filtering Markup Language.
  use HTML::TokeParser;
- Line 43  END
+ Line 36  END
  }
  my %eh;
- my %ih;
+ # ---------------------------------------------- Read in filter file from @ARGV
  my $tofilter=shift @ARGV;
- open IN,"<$tofilter";
+ open IN,"<$tofilter"; my @lines=<IN>;
- my @lines=<IN>; my $parsestring=join('',@lines); undef @lines;
+ my $parsestring=join('',@lines); undef @lines; close IN;
- close IN;
  my $parser = HTML::TokeParser->new(\$parsestring) or
      die('can\'t create TokeParser object');
  $parser->xml_mode('1');
- # Define handling methods for mode-dependent text rendering
+ # --------------------------------------------- initialize and clear conditions
  my %conditions; &cc;
+ # Define handling methods for mode-dependent text rendering
  $parser->{textify}={
-     xfml => \&format_xfml,
+     'xfml' => \&format_xfml,
      'when:name' => \&format_when_name,
      'when:attribute' => \&format_when_attribute,
      'when:cdata' => \&format_when_cdata,
-     'choice:include' => \&format_choice_include,
      'choice:exclude' => \&format_choice_exclude,
+     'clause' => \&format_clause,
      };
  my $text;
- Line 70  my $xfml;
+ Line 63  my $xfml;
  my $wloc=0;
  my %eha;
- while (my $token = $parser->get_tag('xfml')) {
+ # ----------------------------------------------- Run through and apply clauses
-     &format_xfml(@{$token});
+ my @lines2=<>; my $output=join('',@lines2); undef @lines2;
-     $text = $parser->get_text('/xfml');
+ my $lparser = HTML::TokeParser->new(\$output) or
-     $token = $parser->get_tag('/xfml');
- }
- #open IN,"<$tofilter";
- my @lines2=<>; my $parsestring2=join('',@lines2); undef @lines2;
- $parser = HTML::TokeParser->new(\$parsestring2) or
      die('can\'t create TokeParser object');
- $parser->xml_mode('1');
+ $lparser->xml_mode('1');
+ my $parsestring2;
- my $token;
+ while (my $token = $parser->get_tag('clause')) {
- my $hloc=0;
+     $parsestring2=$output;
- my %ts;
+     $lparser = HTML::TokeParser->new(\$parsestring2);
- my $tr;
+     $lparser->xml_mode('1');
- my $echild=0;
+     $output='';
- my $exclude=0;
+     &format_clause(@{$token});
- my $excluden=0;
+     $text = $parser->get_text('/clause');
- my $excludea=0;
+     $token = $parser->get_tag('/clause');
- my $et=0;
- my $cdata='';
+     my $token='';
- my $excludenold=0;
+     my $ttype='';
- my $ign=0;
+     my $excludeflag=0;
+     my $outcache='';
- while ($token = $parser->get_token()) {
+     while ($token = $lparser->get_token()) {
-     if ($token->[0] eq 'D') {
+ 	if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; }
- 	print $token->[1];
+ 	elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1];	}
-     }
+ 	elsif ($token->[0] eq 'T') {
-     elsif ($token->[0] eq 'C') {
+ 	    if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S'
- 	print $token->[1];
+ 		or $ttype eq 'E') {
-     }
+ 		$output.=$token->[1];
-     elsif ($token->[0] eq 'S') {
+ 	    }
- 	$cdata='';
+ 	    else {
- 	$hloc++;
+ 		$outcache.=$token->[1];
- # if token can be excluded, then pretend it is until all conditions are
+ 	    }
- # run (eha); then output during end tag processing
- # else, output
- # a token can be excluded when it is an eh key, or a child node of
- # an eh key
- 	if ($eh{$token->[1]}) {
- 	    $echild=$token->[1];
  	}
- 	if ($echild) {
+ 	elsif ($token->[0] eq 'S') {
- 	    # run through names for echild
+ 	    if ($eh{$token->[1]} or $excludeflag==1) {
- 	    # then attributes and/or values and/or cdata
+ 		$ttype='';
- 	    my $name=$token->[1];
+ 		$excludeflag=1;
- 	    my @attributes=@{$token->[3]};
+ 		$outcache.=$token->[4];
- 	    my %atthash=%{$token->[2]};
+ 	    }
- 	    foreach my $namemlist (@{$eha{$echild}->{'name'}}) {
+ 	    else {
- 		foreach my $namematch (@{$namemlist}) {
+ 		$ttype='S';
- 		    my $nm=$namematch; $nm=~s/^.//; $nm=~s/.$//;
+ 		$output.=$token->[4];
- 		    if ($name=~/$nm/) {
+ 	    }
- 			$excludenold=$excluden;
+ 	    if ($excludeflag==1) {
- 			$excluden++;
- 			foreach my $attributemlist
- 			    (@{$eha{$echild}->{'attribute'}}) {
- 				foreach my $attributematch
- 				    (@{$attributemlist}) {
- 					my ($an,$am)=
- 					    split(/\=/,$attributematch,2);
- 					$am=~s/^.//;
- 					$am=~s/.$//;
- 					if ($atthash{$an}) {
- 					    if ($atthash{$an}=~/$am/) {
- 						$excludea++;
- 					    }
- 					}
- 				    }
- 			    }
- 		    }
- 		}
  	    }
- 	    $tr.=$token->[4];
  	}
- 	else {
+ 	elsif ($token->[0] eq 'E') {
- 	    print $token->[4];
+ 	    if ($eh{$token->[1]} and $excludeflag==1) {
+ 		$ttype='E';
+ 		$excludeflag=0;
+ 		$outcache.=$token->[2];
+ 		my $retval=&evalconditions($outcache);
+ 		if (&evalconditions($outcache)) {
+ 		    $output.=$outcache;
+ 		}
+ 		else {
+ 		    $output.='<!-- FILTERED OUT -->';
+ 		}
+ 		$outcache='';
+ 	    }
+ 	    elsif ($excludeflag==1) {
+ 		$ttype='';
+ 		$outcache.=$token->[2];
+ 	    }
+ 	    else {
+ 		$output.=$token->[2];
+ 		$ttype='E';
+ 	    }
  	}
      }
-     elsif ($token->[0] eq 'E') {
+     &cc;
- 	if ($echild) {
+ }
- 	    $tr.=$token->[2];
+ print $output;
- 	    if ($excluden) {
- 		my $i=0;
+ # -------------------------------------------------------------- evalconditions
- 		CDATALOOP:
+ sub evalconditions {
- 		foreach my $cdatamlist (@{$eha{$echild}->{'cdata'}}) {
+     my ($parsetext)=@_;
- 		    $i++;
+     my $eparser = HTML::TokeParser->new(\$parsetext);
- 		    my $j;
+     unless (@{$conditions{'name'}} or
- 		    foreach my $cdatamatch (@{$cdatamlist}) {
+ 	    @{$conditions{'attribute'}}) {
- 			$j++;
+ 	return 0;
- #				print "CDATA: $cdatamatch, $cdata\n";
+     }
- 			my $cm=$cdatamatch;
+     my $nameflag=0;
- 			my $not=0;
+     my $cdataflag=0;
- 			if ($cm=~/\!/) {
+     my $matchflag=0;
- 			    $not=1;
+     my $Ttoken='';
- 			    $cm=~s/^.//;
+     while (my $token = $eparser->get_token()) {
+ 	if ($token->[0] eq 'S') {
+ 	    foreach my $name (@{$conditions{'name'}}) {
+ 		my $flag=0;
+ 		my $match=$name;
+ 		if ($match=~/^\!/) {
+ 		    $match=~s/^\!//g;
+ 		    $flag=1;
+ 		}
+ 		$match=~s/^\///g;
+ 		$match=~s/\/$//g;
+ 		if ((!$flag and $token->[1]=~/$match/) or
+ 		    ($flag and $token->[1]!~/$match/)) {
+ 		    $nameflag=1;
+ 		}
+ 	    }
+ 	    $Ttoken='';
+ 	}
+ 	elsif ($token->[0] eq 'E') {
+ 	    foreach my $name (@{$conditions{'name'}}) {
+ 		my $flag=0;
+ 		my $match=$name;
+ 		if ($match=~/^\!/) {
+ 		    $match=~s/^\!//g;
+ 		    $flag=1;
+ 		}
+ 		$match=~s/^\///g;
+ 		$match=~s/\/$//g;
+ 		if ((!$flag and $token->[1]=~/$match/) or
+ 		    ($flag and $token->[1]!~/$match/)) {
+ 		    foreach my $cdata (@{$conditions{'cdata'}}) {
+ 			my $flag=0;
+ 			my $match=$cdata;
+ 			if ($match=~/^\!/) {
+ 			    $match=~s/^\!//g;
+ 			    $flag=1;
  			}
- 			$cm=~s/^.//; $cm=~s/.$//;
+ 			$match=~s/^\///g;
- 			if ($not and $cdata=~/$cm/) {
+ 			$match=~s/\/$//g;
- 			    $ign=1; $exclude=0;
+ 			if ((!$flag and $Ttoken=~/$match/) or
+ 			    ($flag and $Ttoken!~/$match/)) {
+ 			    $cdataflag=1;
  			}
- 			if ((!$not and $cdata!~/$cm/)
+ 		    }
- 			    or ($not and $cdata=~/$cm/)) {
+ 		    if (@{$conditions{'cdata'}}) {
- #				nothing happens
+ 			if ($cdataflag) {
- #			    $exclude=0;
+ 			    return 0;
  			}
- 			elsif (($not and $cdata!~/$cm/)
+ 		    }
- 			       or (!$not and $cdata=~/$cm/)) {
+ 		    else {
- 			    $exclude++ unless $ign;
+ 			if ($nameflag) {
+ 			    return 0;
  			}
  		    }
+ 		    $nameflag=0;
  		}
  	    }
  	}
- 	if ($eh{$token->[1]}) {
+ 	elsif ($token->[0] eq 'T') {
- 	    $ign=0;
+ 	    if ($nameflag) {
- 	    $echild=0;
+ 		$Ttoken.=$token->[1];
- 	    if (!$exclude and !$excludea) {
- 		print $tr;
- #		print $token->[2];
- 		$tr='';
- 	    }
- 	    elsif ($exclude>0 or $excludea>0) {
- #		print "EXCLUDING $token->[1] $exclude $excludea $excluden\n";
- 		$exclude=0; $excluden=0; $excludea=0;
- 		$tr='';
- 	    }
- 	    $exclude=0; $excluden=0; $excludea=0;
- 	}
- 	else {
- 	    if ($echild) {
- #		$tr.=$token->[2];
  	    }
- 	    else {
- 		print $token->[2];
- 		$tr='';
- 	    }
- 	}
- 	$hloc--;
-     }
-     elsif ($token->[0] eq 'T') {
- 	if ($echild) {
- 	    $tr.=$token->[1];
- 	    $cdata=$token->[1];
- 	}
- 	else {
- 	    print $token->[1];
- 	    $tr='';
  	}
      }
+     return 1;
  }
  # ------------------------------------------------------------ clear conditions
- Line 230  sub cc {
+ Line 221  sub cc {
      @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
      @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
      @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
+     %eh=(1,1); delete $eh{1};
  }
  # --------------------------------------- remove starting and ending whitespace
- Line 239  sub trim {
+ Line 231  sub trim {
  # --------------------------------------------------------- Format xfml section
  sub format_xfml {
      my (@tokeninfo)=@_;
      return '';
  }
+ # ------------------------------------------------------- Format clause section
+ sub format_clause {
+     my (@tokeninfo)=@_;
+     return '';
+ }
  # ---------------------------------------------------- Format when:name section
  sub format_when_name {
      my (@tokeninfo)=@_;
-     $wloc++;
+ #    $wloc++;
      my $att_match=$tokeninfo[2]->{'match'};
      push @{$conditions{'name'}},$att_match;
      my $text=&trim($parser->get_text('/when:name'));
      $parser->get_tag('/when:name');
-     $wloc--;
+ #    $wloc--;
-     &cc unless $wloc;
+ #    &cc unless $wloc;
-     return '';
- }
- # ----------------------------------------------- Format when:attribute section
- sub format_when_attribute {
-     my (@tokeninfo)=@_;
-     $wloc++;
-     my $att_match=$tokeninfo[2]->{'match'};
-     push @{$conditions{'attribute'}},$att_match;
-     my $text=&trim($parser->get_text('/when:attribute'));
-     $parser->get_tag('/when:attribute');
-     $wloc--;
-     &cc unless $wloc;
      return '';
  }
- Line 280  sub format_when_cdata {
+ Line 266  sub format_when_cdata {
      my $text=&trim($parser->get_text('/when:cdata'));
      $parser->get_tag('/when:cdata');
      $wloc--;
-     &cc unless $wloc;
+ #    &cc unless $wloc;
-     return '';
- }
- # ----------------------------------------------- Format choice:include section
- sub format_choice_include {
-     my (@tokeninfo)=@_;
-     my $text=&trim($parser->get_text('/choice:include'));
-     $parser->get_tag('/choice:include');
-     $ih{$tokeninfo[2]->{'match'}}++;
      return '';
  }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.2
changed lines
	Added in v.1.3