File:  [LON-CAPA] / loncom / build / xfml_parse.pl
Revision 1.2: download - view: text, annotated - select for diffs
Fri Feb 1 10:56:41 2002 UTC (22 years, 3 months ago) by harris41
Branches: MAIN
CVS tags: stable_2002_spring, HEAD
a lot of cleaning up, debugging, and commenting

#!/usr/bin/perl

# YEAR=2002
# 1/26,1/27,1/28,1/29,1/30,1/31 - Scott Harrison
#
###

# Read in 2 XML file; first is the filter specification, the second
# is the XML file to be filtered

###############################################################################
##                                                                           ##
## ORGANIZATION OF THIS PERL SCRIPT                                          ##
## 1. Notes                                                                  ##
## 2. Get command line arguments                                             ##
## 3. First pass through (grab distribution-specific information)            ##
## 4. Second pass through (parse out what is not necessary)                  ##
## 5. Third pass through (translate markup according to specified mode)      ##
## 6. Functions (most all just format contents of different markup tags)     ##
## 7. POD (plain old documentation, CPAN style)                              ##
##                                                                           ##
###############################################################################

# ----------------------------------------------------------------------- Notes
#
# I am using a multiple pass-through approach to parsing
# the xfml file.  This saves memory and makes sure the server
# will never be overloaded.
#
# This is meant to parse files meeting the piml document type.
# See xfml.dtd.  XFML=XML Filtering Markup Language.

use HTML::TokeParser;
use strict;

unless (@ARGV) {
    print <<END;
Incorrect invocation.
Example usages:
cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
END
}

my %eh;
my %ih;
my $tofilter=shift @ARGV;
open IN,"<$tofilter";
my @lines=<IN>; my $parsestring=join('',@lines); undef @lines;
close IN;
my $parser = HTML::TokeParser->new(\$parsestring) or
    die('can\'t create TokeParser object');
$parser->xml_mode('1');

# Define handling methods for mode-dependent text rendering

my %conditions; &cc;

$parser->{textify}={
    xfml => \&format_xfml,
    'when:name' => \&format_when_name,
    'when:attribute' => \&format_when_attribute,
    'when:cdata' => \&format_when_cdata,
    'choice:include' => \&format_choice_include,
    'choice:exclude' => \&format_choice_exclude,
    };

my $text;
my $xfml;
my $wloc=0;
my %eha;

while (my $token = $parser->get_tag('xfml')) {
    &format_xfml(@{$token});
    $text = $parser->get_text('/xfml');
    $token = $parser->get_tag('/xfml');
}

#open IN,"<$tofilter";
my @lines2=<>; my $parsestring2=join('',@lines2); undef @lines2;
$parser = HTML::TokeParser->new(\$parsestring2) or
    die('can\'t create TokeParser object');
$parser->xml_mode('1');

my $token;
my $hloc=0;
my %ts;
my $tr;
my $echild=0;
my $exclude=0;
my $excluden=0;
my $excludea=0;
my $et=0;
my $cdata='';
my $excludenold=0;
my $ign=0;

while ($token = $parser->get_token()) {
    if ($token->[0] eq 'D') {
	print $token->[1];
    }
    elsif ($token->[0] eq 'C') {
	print $token->[1];
    }
    elsif ($token->[0] eq 'S') {
	$cdata='';
	$hloc++;
# if token can be excluded, then pretend it is until all conditions are
# run (eha); then output during end tag processing
# else, output

# a token can be excluded when it is an eh key, or a child node of
# an eh key

	if ($eh{$token->[1]}) {
	    $echild=$token->[1];
	}
	if ($echild) {
	    # run through names for echild
	    # then attributes and/or values and/or cdata
	    my $name=$token->[1];
	    my @attributes=@{$token->[3]};
	    my %atthash=%{$token->[2]};
	    foreach my $namemlist (@{$eha{$echild}->{'name'}}) {
		foreach my $namematch (@{$namemlist}) {
		    my $nm=$namematch; $nm=~s/^.//; $nm=~s/.$//;
		    if ($name=~/$nm/) {
			$excludenold=$excluden;
			$excluden++;
			foreach my $attributemlist
			    (@{$eha{$echild}->{'attribute'}}) {
				foreach my $attributematch 
				    (@{$attributemlist}) {
					my ($an,$am)=
					    split(/\=/,$attributematch,2);
					$am=~s/^.//;
					$am=~s/.$//;
					if ($atthash{$an}) {
					    if ($atthash{$an}=~/$am/) {
						$excludea++;
					    }
					}
				    }
			    }
		    }
		}
	    }
	    $tr.=$token->[4];
	}
	else {
	    print $token->[4];
	}
    }
    elsif ($token->[0] eq 'E') {
	if ($echild) {
	    $tr.=$token->[2];
	    if ($excluden) {
		my $i=0;
		CDATALOOP:
		foreach my $cdatamlist (@{$eha{$echild}->{'cdata'}}) {
		    $i++;
		    my $j;
		    foreach my $cdatamatch (@{$cdatamlist}) {
			$j++;
#				print "CDATA: $cdatamatch, $cdata\n";
			my $cm=$cdatamatch;
			my $not=0;
			if ($cm=~/\!/) {
			    $not=1;
			    $cm=~s/^.//;
			}
			$cm=~s/^.//; $cm=~s/.$//;
			if ($not and $cdata=~/$cm/) {
			    $ign=1; $exclude=0;
			}
			if ((!$not and $cdata!~/$cm/)
			    or ($not and $cdata=~/$cm/)) {
#				nothing happens
#			    $exclude=0;
			}
			elsif (($not and $cdata!~/$cm/)
			       or (!$not and $cdata=~/$cm/)) {
			    $exclude++ unless $ign;
			}
		    }
		}
	    }
	}
	if ($eh{$token->[1]}) {
	    $ign=0;
	    $echild=0;
	    if (!$exclude and !$excludea) {
		print $tr;
#		print $token->[2];
		$tr='';
	    }
	    elsif ($exclude>0 or $excludea>0) {
#		print "EXCLUDING $token->[1] $exclude $excludea $excluden\n";
		$exclude=0; $excluden=0; $excludea=0;
		$tr='';
	    }
	    $exclude=0; $excluden=0; $excludea=0;
	}
	else {
	    if ($echild) {
#		$tr.=$token->[2];
	    }
	    else {
		print $token->[2];
		$tr='';
	    }
	}
	$hloc--;
    }
    elsif ($token->[0] eq 'T') {
	if ($echild) {
	    $tr.=$token->[1];
	    $cdata=$token->[1];
	}
	else {
	    print $token->[1];
	    $tr='';
	}
    }
}

# ------------------------------------------------------------ clear conditions
sub cc {
    @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
    @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
    @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
    @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
}

# --------------------------------------- remove starting and ending whitespace
sub trim {
    my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
}



# --------------------------------------------------------- Format xfml section
sub format_xfml {
    my (@tokeninfo)=@_;
    return '';
}

# ---------------------------------------------------- Format when:name section
sub format_when_name {
    my (@tokeninfo)=@_;
    $wloc++;
    my $att_match=$tokeninfo[2]->{'match'};
    push @{$conditions{'name'}},$att_match;
    my $text=&trim($parser->get_text('/when:name'));
    $parser->get_tag('/when:name');
    $wloc--;
    &cc unless $wloc;
    return '';
}

# ----------------------------------------------- Format when:attribute section
sub format_when_attribute {
    my (@tokeninfo)=@_;
    $wloc++;
    my $att_match=$tokeninfo[2]->{'match'};
    push @{$conditions{'attribute'}},$att_match;
    my $text=&trim($parser->get_text('/when:attribute'));
    $parser->get_tag('/when:attribute');
    $wloc--;
    &cc unless $wloc;
    return '';
}

# --------------------------------------------------- Format when:cdata section
sub format_when_cdata {
    my (@tokeninfo)=@_;
    $wloc++;
    my $att_match=$tokeninfo[2]->{'match'};
    push @{$conditions{'cdata'}},$att_match;
    my $text=&trim($parser->get_text('/when:cdata'));
    $parser->get_tag('/when:cdata');
    $wloc--;
    &cc unless $wloc;
    return '';
}

# ----------------------------------------------- Format choice:include section
sub format_choice_include {
    my (@tokeninfo)=@_;
    my $text=&trim($parser->get_text('/choice:include'));
    $parser->get_tag('/choice:include');
    $ih{$tokeninfo[2]->{'match'}}++;
    return '';
}

# ----------------------------------------------- Format choice:exclude section
sub format_choice_exclude {
    my (@tokeninfo)=@_;
    my $text=&trim($parser->get_text('/choice:exclude'));
    $parser->get_tag('/choice:exclude');
    $eh{$tokeninfo[2]->{'nodename'}}++;
    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
         [@{$conditions{'name'}}];
    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
         [@{$conditions{'attribute'}}];
    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
         [@{$conditions{'value'}}];
    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
         [@{$conditions{'cdata'}}];
    return '';
}

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>