Diff for /loncom/build/xfml_parse.pl between versions 1.1 and 1.4

version 1.1, 2002/01/29 10:42:42 version 1.4, 2002/04/08 10:52:24
Line 1 Line 1
 #!/usr/bin/perl  #!/usr/bin/perl
   
   # -------------------------------------------------------- Documentation notice
   # Run "perldoc ./lpml_parse.pl" in order to best view the software
   # documentation internalized in this program.
   
   # --------------------------------------------------------- License Information
   # The LearningOnline Network with CAPA
   # piml_parse.pl - Linux Packaging Markup Language parser
   #
   # $Id$
   #
   # Written by Scott Harrison, codeharrison@yahoo.com
   #
   # Copyright Michigan State University Board of Trustees
   #
   # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
   #
   # LON-CAPA is free software; you can redistribute it and/or modify
   # it under the terms of the GNU General Public License as published by
   # the Free Software Foundation; either version 2 of the License, or
   # (at your option) any later version.
   #
   # LON-CAPA is distributed in the hope that it will be useful,
   # but WITHOUT ANY WARRANTY; without even the implied warranty of
   # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   # GNU General Public License for more details.
   #
   # You should have received a copy of the GNU General Public License
   # along with LON-CAPA; if not, write to the Free Software
   # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   #
   # /home/httpd/html/adm/gpl.txt
   #
   # http://www.lon-capa.org/
   #
 # YEAR=2002  # YEAR=2002
 # 1/26,1/27,1/28 - Scott Harrison  # 1/26,1/27,1/28,1/29,1/30,1/31,2/20,4/8 - Scott Harrison
   #
   ###
   
 # Read in 2 XML file; first is the filter specification, the second  # Read in 2 XML file; first is the filter specification, the second
 # is the XML file to be filtered  # is the XML file to be filtered
   
   ###############################################################################
   ##                                                                           ##
   ## ORGANIZATION OF THIS PERL SCRIPT                                          ##
   ## 1. Notes                                                                  ##
   ## 2. Read in filter file                                                    ##
   ## 3. Initialize and clear conditions                                        ##
   ## 4. Run through and apply clauses                                          ##
   ##                                                                           ##
   ###############################################################################
   
   # ----------------------------------------------------------------------- Notes
   #
   # This is meant to parse files meeting the xfml document type.
   # See xfml.dtd.  XFML=XML Filtering Markup Language.
   
 use HTML::TokeParser;  use HTML::TokeParser;
 use strict;  use strict;
   
 unless (@ARGV) {  unless (@ARGV) {
     print <<END;      print(<<END);
 Incorrect invocation.  Incorrect invocation.
 Example usages:  Example usages:
 cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml  cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
Line 19  END Line 70  END
 }  }
   
 my %eh;  my %eh;
 my %ih;  
   # ---------------------------------------------- Read in filter file from @ARGV
 my $tofilter=shift @ARGV;  my $tofilter=shift @ARGV;
 my @lines=<>; my $parsestring=join('',@lines); undef @lines;  open(IN,"<$tofilter"); my @lines=<IN>;
   my $parsestring=join('',@lines); undef @lines; close IN;
 my $parser = HTML::TokeParser->new(\$parsestring) or  my $parser = HTML::TokeParser->new(\$parsestring) or
     die('can\'t create TokeParser object');      die('can\'t create TokeParser object');
 $parser->xml_mode('1');  $parser->xml_mode('1');
   
 # Define handling methods for mode-dependent text rendering  # --------------------------------------------- initialize and clear conditions
   
 my %conditions; &cc;  my %conditions; &cc;
   
   # Define handling methods for mode-dependent text rendering
 $parser->{textify}={  $parser->{textify}={
     xfml => \&format_xfml,      'xfml' => \&format_xfml,
     'when:name' => \&format_when_name,      'when:name' => \&format_when_name,
     'when:attribute' => \&format_when_attribute,      'when:attribute' => \&format_when_attribute,
     'when:cdata' => \&format_when_cdata,      'when:cdata' => \&format_when_cdata,
     'choice:include' => \&format_choice_include,  
     'choice:exclude' => \&format_choice_exclude,      'choice:exclude' => \&format_choice_exclude,
       'clause' => \&format_clause,
     };      };
   
 my $text;  my $text;
Line 44  my $xfml; Line 97  my $xfml;
 my $wloc=0;  my $wloc=0;
 my %eha;  my %eha;
   
 while (my $token = $parser->get_tag('xfml')) {  # ----------------------------------------------- Run through and apply clauses
     &format_xfml(@{$token});  my @lines2=<>; my $output=join('',@lines2); undef @lines2;
     $text = $parser->get_text('/xfml');  my $lparser = HTML::TokeParser->new(\$output) or
 #    print $xfml;  
 #    print $text;  
     $token = $parser->get_tag('/xfml');  
 }  
   
 open IN,"<$tofilter";  
 my @lines2=<IN>; close IN; my $parsestring2=join('',@lines2); undef @lines2;  
 $parser = HTML::TokeParser->new(\$parsestring2) or  
     die('can\'t create TokeParser object');      die('can\'t create TokeParser object');
 $parser->xml_mode('1');  $lparser->xml_mode('1');
   my $parsestring2;
 my $token;  while (my $token = $parser->get_tag('clause')) {
 my $hloc=0;      $parsestring2=$output;
 my %ts;      $lparser = HTML::TokeParser->new(\$parsestring2);
 my $tr;      $lparser->xml_mode('1');
 my $echild=0;      $output='';
 my $exclude=0;      &format_clause(@{$token});
 my $excluden=0;      $text = $parser->get_text('/clause');
 my $excludea=0;      $token = $parser->get_tag('/clause');
 my $et=0;  
 my $cdata='';      my $token='';
 while ($token = $parser->get_token()) {      my $ttype='';
 # from HTML::TokeParser documentation:      my $excludeflag=0;
 #             ["S",  $tag, %$attr, @$attrseq, $text]      my $outcache='';
 #             ["E",  $tag, $text]      while ($token = $lparser->get_token()) {
 #             ["T",  $text, $is_data]   if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; }
 #             ["C",  $text]   elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1]; }
 #             ["D",  $text]   elsif ($token->[0] eq 'T') {
 #             ["PI", $token0, $text]      if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S'
 #    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},   or $ttype eq 'E') {
 #         @{$conditions{'name'}};   $output.=$token->[1];
 #    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},      }
 #         @{$conditions{'attribute'}};      else {
 #    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},   $outcache.=$token->[1];
 #         @{$conditions{'value'}};      }
 #    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},  
 #         @{$conditions{'cdata'}};  
     if ($token->[0] eq 'D') {  
  print $token->[1];  
     }  
     elsif ($token->[0] eq 'C') {  
  print $token->[1];  
     }  
     elsif ($token->[0] eq 'S') {  
  $cdata='';  
  $hloc++;  
 # if token can be excluded, then pretend it is until all conditions are  
 # run (eha); then output during end tag processing  
 # else, output  
   
 # a token can be excluded when it is an eh key, or a child node of  
 # an eh key  
   
  if ($eh{$token->[1]}) {  
     $echild=$token->[1];  
 #    print "ECHILD=$echild\n";  
  }   }
  if ($echild) {   elsif ($token->[0] eq 'S') {
     # run through names for echild      if ($eh{$token->[1]} or $excludeflag==1) {
     # then attributes and/or values and/or cdata   $ttype='';
     my $name=$token->[1];   $excludeflag=1;
     my @attributes=@{$token->[3]};   $outcache.=$token->[4];
     my %atthash=%{$token->[2]};      }
     foreach my $namemlist (@{$eha{$echild}->{'name'}}) {      else {
  foreach my $namematch (@{$namemlist}) {   $ttype='S';
     my $nm=$namematch; $nm=~s/^.//; $nm=~s/.$//;   $output.=$token->[4];
     if ($name=~/$nm/) {      }
 # print "NMATCH: $nm ($name)\n";      if ($excludeflag==1) {
  $excluden++;  
  foreach my $attributemlist  
     (@{$eha{$echild}->{'attribute'}}) {  
  foreach my $attributematch   
     (@{$attributemlist}) {  
  my ($an,$am)=  
     split(/\=/,$attributematch,2);  
  $am=~s/^.//;  
  $am=~s/.$//;  
 # print 'AM:'."($an,$am)\t";  
 # print 'ATT:'.join(',',%atthash)."\n";  
  if ($atthash{$an}) {  
     if ($atthash{$an}=~/$am/) {  
  $excludea++;  
 # print "AMATCH: $am (".  
 #    join(',',  
 # @attributes)  
 # ."\n";  
     }  
  }  
     }  
     }  
     }  
  }  
     }      }
     $tr.=$token->[4];  
  }   }
  else {   elsif ($token->[0] eq 'E') {
     print $token->[4];      if ($eh{$token->[1]} and $excludeflag==1) {
    $ttype='E';
    $excludeflag=0;
    $outcache.=$token->[2];
    my $retval=&evalconditions($outcache);
    if (&evalconditions($outcache)) {
       $output.=$outcache;
    }
    else {
       $output.='<!-- FILTERED OUT -->';
    }
    $outcache='';
       }
       elsif ($excludeflag==1) {
    $ttype='';
    $outcache.=$token->[2];
       }
       else {
    $output.=$token->[2];
    $ttype='E';
       }
  }   }
     }      }
     elsif ($token->[0] eq 'E') {      &cc;
  if ($echild) {  }
     $tr.=$token->[2];  print $output;
     if ($excluden) {  
  foreach my $cdatamlist (@{$eha{$echild}->{'cdata'}}) {  # -------------------------------------------------------------- evalconditions
     foreach my $cdatamatch (@{$cdatamlist}) {  sub evalconditions {
 # print "CDATA: $cdatamatch, $cdata\n";      my ($parsetext)=@_;
  my $cm=$cdatamatch;      my $eparser = HTML::TokeParser->new(\$parsetext);
  my $not=0;      unless (@{$conditions{'name'}} or
  if ($cm=~/\!/) {      @{$conditions{'attribute'}}) {
     $not=1;   return 0;
     $cm=~s/^.//;      }
       my $nameflag=0;
       my $cdataflag=0;
       my $matchflag=0;
       my $Ttoken='';
       while (my $token = $eparser->get_token()) {
    if ($token->[0] eq 'S') {
       foreach my $name (@{$conditions{'name'}}) {
    my $flag=0;
    my $match=$name;
    if ($match=~/^\!/) {
       $match=~s/^\!//g;
       $flag=1;
    }
    $match=~s/^\///g;
    $match=~s/\/$//g;
    if ((!$flag and $token->[1]=~/$match/) or
       ($flag and $token->[1]!~/$match/)) {
       $nameflag=1;
    }
       }
       $Ttoken='';
    }
    elsif ($token->[0] eq 'E') {
       foreach my $name (@{$conditions{'name'}}) {
    my $flag=0;
    my $match=$name;
    if ($match=~/^\!/) {
       $match=~s/^\!//g;
       $flag=1;
    }
    $match=~s/^\///g;
    $match=~s/\/$//g;
    if ((!$flag and $token->[1]=~/$match/) or
       ($flag and $token->[1]!~/$match/)) {
       foreach my $cdata (@{$conditions{'cdata'}}) {
    my $flag=0;
    my $match=$cdata;
    if ($match=~/^\!/) {
       $match=~s/^\!//g;
       $flag=1;
  }   }
  $cm=~s/^.//; $cm=~s/.$//;   $match=~s/^\///g;
  if ((!$not and $cdata!~/$cm/)   $match=~s/\/$//g;
     or ($not and $cdata=~/$cm/)) {   if ((!$flag and $Ttoken=~/$match/) or
 #    print "CMISMATCH: $cm ($cdata)\n";      ($flag and $Ttoken!~/$match/)) {
       $cdataflag=1;
  }   }
  elsif (($not and $cdata!~/$cm/)      }
        or (!$not and $cdata=~/$cm/)) {      if (@{$conditions{'cdata'}}) {
     $exclude++;   if ($cdataflag) {
       return 0;
    }
       }
       else {
    if ($nameflag) {
       return 0;
  }   }
     }      }
       $nameflag=0;
  }   }
     }      }
  }   }
  if ($eh{$token->[1]}) {   elsif ($token->[0] eq 'T') {
     $echild=0;      if ($nameflag) {
     if (!$exclude and !$excludea) {   $Ttoken.=$token->[1];
  print $tr;  
 # print $token->[2];  
  $tr='';  
     }  
     elsif ($exclude>0 or $excludea>0) {  
 # print "EXCLUDING $token->[1] $excludea $excluden\n";  
  $exclude=0; $excluden=0; $excludea=0;  
  $tr='';  
     }      }
     $exclude=0; $excluden=0; $excludea=0;  
  }  
  else {  
     if ($echild) {  
 # $tr.=$token->[2];  
     }  
     else {  
  print $token->[2];  
  $tr='';  
     }  
  }  
  $hloc--;  
     }  
     elsif ($token->[0] eq 'T') {  
  if ($echild) {  
     $tr.=$token->[1];  
     $cdata=$token->[1];  
  }  
  else {  
     print $token->[1];  
     $tr='';  
  }   }
     }      }
       return 1;
 }  }
   
 # ------------------------------------------------------------ clear conditions  # ------------------------------------------------------------ clear conditions
Line 215  sub cc { Line 255  sub cc {
     @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};      @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
     @{$conditions{'value'}}=(); pop @{$conditions{'value'}};      @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
     @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};      @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
       %eh=(1,1); delete $eh{1};
 }  }
   
 # --------------------------------------- remove starting and ending whitespace  # --------------------------------------- remove starting and ending whitespace
Line 222  sub trim { Line 263  sub trim {
     my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;      my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
 }  }
   
   
   
   
 # --------------------------------------------------------- Format xfml section  # --------------------------------------------------------- Format xfml section
 sub format_xfml {  sub format_xfml {
     my (@tokeninfo)=@_;      my (@tokeninfo)=@_;
     return '';      return '';
 }  }
   
   # ------------------------------------------------------- Format clause section
   sub format_clause {
       my (@tokeninfo)=@_;
       return '';
   }
   
 # ---------------------------------------------------- Format when:name section  # ---------------------------------------------------- Format when:name section
 sub format_when_name {  sub format_when_name {
     my (@tokeninfo)=@_;      my (@tokeninfo)=@_;
     $wloc++;  #    $wloc++;
     my $att_match=$tokeninfo[2]->{'match'};      my $att_match=$tokeninfo[2]->{'match'};
     push @{$conditions{'name'}},$att_match;      push @{$conditions{'name'}},$att_match;
     my $text=&trim($parser->get_text('/when:name'));      my $text=&trim($parser->get_text('/when:name'));
     $parser->get_tag('/when:name');      $parser->get_tag('/when:name');
 #    print 'Name Matching...'.$att_match;  #    $wloc--;
     $wloc--;  #    &cc unless $wloc;
     &cc unless $wloc;  
     return '';  
 }  
   
 # ----------------------------------------------- Format when:attribute section  
 sub format_when_attribute {  
     my (@tokeninfo)=@_;  
     $wloc++;  
     my $att_match=$tokeninfo[2]->{'match'};  
     push @{$conditions{'attribute'}},$att_match;  
     my $text=&trim($parser->get_text('/when:attribute'));  
     $parser->get_tag('/when:attribute');  
 #    print 'Attribute Matching...'.$att_match;  
     $wloc--;  
     &cc unless $wloc;  
     return '';      return '';
 }  }
   
Line 261  sub format_when_cdata { Line 296  sub format_when_cdata {
     my (@tokeninfo)=@_;      my (@tokeninfo)=@_;
     $wloc++;      $wloc++;
     my $att_match=$tokeninfo[2]->{'match'};      my $att_match=$tokeninfo[2]->{'match'};
 #    print 'Cdata Matching...'.$att_match;  
     push @{$conditions{'cdata'}},$att_match;      push @{$conditions{'cdata'}},$att_match;
     my $text=&trim($parser->get_text('/when:cdata'));      my $text=&trim($parser->get_text('/when:cdata'));
     $parser->get_tag('/when:cdata');      $parser->get_tag('/when:cdata');
     $wloc--;      $wloc--;
     &cc unless $wloc;  #    &cc unless $wloc;
     return '';  
 }  
   
 # ----------------------------------------------- Format choice:include section  
 sub format_choice_include {  
     my (@tokeninfo)=@_;  
     my $text=&trim($parser->get_text('/choice:include'));  
     $parser->get_tag('/choice:include');  
     $ih{$tokeninfo[2]->{'match'}}++;  
     return '';      return '';
 }  }
   
Line 295  sub format_choice_exclude { Line 320  sub format_choice_exclude {
          [@{$conditions{'cdata'}}];           [@{$conditions{'cdata'}}];
     return '';      return '';
 }  }
   
   # ----------------------------------- POD (plain old documentation, CPAN style)
   
   =pod
   
   =head1 NAME
   
   xfml_parse.pl - This is meant to parse files meeting the xfml document type.
   See xfml.dtd.  XFML=XML Filtering Markup Language.
   
   =head1 SYNOPSIS
   
   Usage is for lpml file to come in through standard input.
   
   =over 4
   
   =item * 
   
   1st argument is name of xfml file.
   
   =back
   
   Example:
   
    cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
   
   or
   
    perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
   
   =head1 DESCRIPTION
   
   I am using a multiple pass-through approach to parsing
   the xfml file.  This saves memory and makes sure the server
   will never be overloaded.
   
   =head1 README
   
   I am using a multiple pass-through approach to parsing
   the xfml file.  This saves memory and makes sure the server
   will never be overloaded.
   
   =head1 PREREQUISITES
   
   HTML::TokeParser
   
   =head1 COREQUISITES
   
   =head1 OSNAMES
   
   linux
   
   =head1 SCRIPT CATEGORIES
   
   Packaging/Administrative
   
   =head1 AUTHOR
   
    Scott Harrison
    codeharrison@yahoo.com
   
   Please let me know how/if you are finding this script useful and
   any/all suggestions.  -Scott
   
   =cut
   

Removed from v.1.1  
changed lines
  Added in v.1.4


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>