File:  [LON-CAPA] / loncom / build / xfml_parse.pl
Revision 1.3: download - view: text, annotated - select for diffs
Wed Feb 20 00:21:42 2002 UTC (22 years, 3 months ago) by harris41
Branches: MAIN
CVS tags: HEAD
a cleaner leaner script (major rewrite)

#!/usr/bin/perl

# YEAR=2002
# 1/26,1/27,1/28,1/29,1/30,1/31 - Scott Harrison
#
###

# Read in 2 XML file; first is the filter specification, the second
# is the XML file to be filtered

###############################################################################
##                                                                           ##
## ORGANIZATION OF THIS PERL SCRIPT                                          ##
## 1. Notes                                                                  ##
## 2. Read in filter file                                                    ##
## 3. Initialize and clear conditions                                        ##
## 4. Run through and apply clauses                                          ##
##                                                                           ##
###############################################################################

# ----------------------------------------------------------------------- Notes
#
# This is meant to parse files meeting the xfml document type.
# See xfml.dtd.  XFML=XML Filtering Markup Language.

use HTML::TokeParser;
use strict;

unless (@ARGV) {
    print <<END;
Incorrect invocation.
Example usages:
cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
END
}

my %eh;

# ---------------------------------------------- Read in filter file from @ARGV
my $tofilter=shift @ARGV;
open IN,"<$tofilter"; my @lines=<IN>;
my $parsestring=join('',@lines); undef @lines; close IN;
my $parser = HTML::TokeParser->new(\$parsestring) or
    die('can\'t create TokeParser object');
$parser->xml_mode('1');

# --------------------------------------------- initialize and clear conditions
my %conditions; &cc;

# Define handling methods for mode-dependent text rendering
$parser->{textify}={
    'xfml' => \&format_xfml,
    'when:name' => \&format_when_name,
    'when:attribute' => \&format_when_attribute,
    'when:cdata' => \&format_when_cdata,
    'choice:exclude' => \&format_choice_exclude,
    'clause' => \&format_clause,
    };

my $text;
my $xfml;
my $wloc=0;
my %eha;

# ----------------------------------------------- Run through and apply clauses
my @lines2=<>; my $output=join('',@lines2); undef @lines2;
my $lparser = HTML::TokeParser->new(\$output) or
    die('can\'t create TokeParser object');
$lparser->xml_mode('1');
my $parsestring2;
while (my $token = $parser->get_tag('clause')) {
    $parsestring2=$output;
    $lparser = HTML::TokeParser->new(\$parsestring2);
    $lparser->xml_mode('1');
    $output='';
    &format_clause(@{$token});
    $text = $parser->get_text('/clause');
    $token = $parser->get_tag('/clause');

    my $token='';
    my $ttype='';
    my $excludeflag=0;
    my $outcache='';
    while ($token = $lparser->get_token()) {
	if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; }
	elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1];	}
	elsif ($token->[0] eq 'T') {
	    if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S'
		or $ttype eq 'E') {
		$output.=$token->[1];
	    }
	    else {
		$outcache.=$token->[1];
	    }
	}
	elsif ($token->[0] eq 'S') {
	    if ($eh{$token->[1]} or $excludeflag==1) {
		$ttype='';
		$excludeflag=1;
		$outcache.=$token->[4];
	    }
	    else {
		$ttype='S';
		$output.=$token->[4];
	    }
	    if ($excludeflag==1) {
		
	    }
	}
	elsif ($token->[0] eq 'E') {
	    if ($eh{$token->[1]} and $excludeflag==1) {
		$ttype='E';
		$excludeflag=0;
		$outcache.=$token->[2];
		my $retval=&evalconditions($outcache);
		if (&evalconditions($outcache)) {
		    $output.=$outcache;
		}
		else {
		    $output.='<!-- FILTERED OUT -->';
		}
		$outcache='';
	    }
	    elsif ($excludeflag==1) {
		$ttype='';
		$outcache.=$token->[2];
	    }
	    else {
		$output.=$token->[2];
		$ttype='E';
	    }
	}
    }
    &cc;
}
print $output;

# -------------------------------------------------------------- evalconditions
sub evalconditions {
    my ($parsetext)=@_;
    my $eparser = HTML::TokeParser->new(\$parsetext);
    unless (@{$conditions{'name'}} or
	    @{$conditions{'attribute'}}) {
	return 0;
    }
    my $nameflag=0;
    my $cdataflag=0;
    my $matchflag=0;
    my $Ttoken='';
    while (my $token = $eparser->get_token()) {
	if ($token->[0] eq 'S') {
	    foreach my $name (@{$conditions{'name'}}) {
		my $flag=0;
		my $match=$name;
		if ($match=~/^\!/) {
		    $match=~s/^\!//g;
		    $flag=1;
		}
		$match=~s/^\///g;
		$match=~s/\/$//g;
		if ((!$flag and $token->[1]=~/$match/) or
		    ($flag and $token->[1]!~/$match/)) {
		    $nameflag=1;
		}
	    }
	    $Ttoken='';
	}
	elsif ($token->[0] eq 'E') {
	    foreach my $name (@{$conditions{'name'}}) {
		my $flag=0;
		my $match=$name;
		if ($match=~/^\!/) {
		    $match=~s/^\!//g;
		    $flag=1;
		}
		$match=~s/^\///g;
		$match=~s/\/$//g;
		if ((!$flag and $token->[1]=~/$match/) or
		    ($flag and $token->[1]!~/$match/)) {
		    foreach my $cdata (@{$conditions{'cdata'}}) {
			my $flag=0;
			my $match=$cdata;
			if ($match=~/^\!/) {
			    $match=~s/^\!//g;
			    $flag=1;
			}
			$match=~s/^\///g;
			$match=~s/\/$//g;
			if ((!$flag and $Ttoken=~/$match/) or
			    ($flag and $Ttoken!~/$match/)) {
			    $cdataflag=1;
			}
		    }
		    if (@{$conditions{'cdata'}}) {
			if ($cdataflag) {
			    return 0;
			}
		    }
		    else {
			if ($nameflag) {
			    return 0;
			}
		    }
		    $nameflag=0;
		}
	    }
	}
	elsif ($token->[0] eq 'T') {
	    if ($nameflag) {
		$Ttoken.=$token->[1];
	    }
	}
    }
    return 1;
}

# ------------------------------------------------------------ clear conditions
sub cc {
    @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
    @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
    @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
    @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
    %eh=(1,1); delete $eh{1};
}

# --------------------------------------- remove starting and ending whitespace
sub trim {
    my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
}




# --------------------------------------------------------- Format xfml section
sub format_xfml {
    my (@tokeninfo)=@_;
    return '';
}

# ------------------------------------------------------- Format clause section
sub format_clause {
    my (@tokeninfo)=@_;
    return '';
}

# ---------------------------------------------------- Format when:name section
sub format_when_name {
    my (@tokeninfo)=@_;
#    $wloc++;
    my $att_match=$tokeninfo[2]->{'match'};
    push @{$conditions{'name'}},$att_match;
    my $text=&trim($parser->get_text('/when:name'));
    $parser->get_tag('/when:name');
#    $wloc--;
#    &cc unless $wloc;
    return '';
}

# --------------------------------------------------- Format when:cdata section
sub format_when_cdata {
    my (@tokeninfo)=@_;
    $wloc++;
    my $att_match=$tokeninfo[2]->{'match'};
    push @{$conditions{'cdata'}},$att_match;
    my $text=&trim($parser->get_text('/when:cdata'));
    $parser->get_tag('/when:cdata');
    $wloc--;
#    &cc unless $wloc;
    return '';
}

# ----------------------------------------------- Format choice:exclude section
sub format_choice_exclude {
    my (@tokeninfo)=@_;
    my $text=&trim($parser->get_text('/choice:exclude'));
    $parser->get_tag('/choice:exclude');
    $eh{$tokeninfo[2]->{'nodename'}}++;
    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
         [@{$conditions{'name'}}];
    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
         [@{$conditions{'attribute'}}];
    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
         [@{$conditions{'value'}}];
    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
         [@{$conditions{'cdata'}}];
    return '';
}

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>