#!/usr/bin/perl

# YEAR=2002
# 1/26,1/27,1/28 - Scott Harrison

# Read in 2 XML file; first is the filter specification, the second
# is the XML file to be filtered

use HTML::TokeParser;
use strict;

unless (@ARGV) {
    print <<END;
Incorrect invocation.
Example usages:
cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
END
}

my %eh;
my %ih;
my $tofilter=shift @ARGV;
my @lines=<>; my $parsestring=join('',@lines); undef @lines;
my $parser = HTML::TokeParser->new(\$parsestring) or
    die('can\'t create TokeParser object');
$parser->xml_mode('1');

# Define handling methods for mode-dependent text rendering

my %conditions; &cc;

$parser->{textify}={
    xfml => \&format_xfml,
    'when:name' => \&format_when_name,
    'when:attribute' => \&format_when_attribute,
    'when:cdata' => \&format_when_cdata,
    'choice:include' => \&format_choice_include,
    'choice:exclude' => \&format_choice_exclude,
    };

my $text;
my $xfml;
my $wloc=0;
my %eha;

while (my $token = $parser->get_tag('xfml')) {
    &format_xfml(@{$token});
    $text = $parser->get_text('/xfml');
#    print $xfml;
#    print $text;
    $token = $parser->get_tag('/xfml');
}

open IN,"<$tofilter";
my @lines2=<IN>; close IN; my $parsestring2=join('',@lines2); undef @lines2;
$parser = HTML::TokeParser->new(\$parsestring2) or
    die('can\'t create TokeParser object');
$parser->xml_mode('1');

my $token;
my $hloc=0;
my %ts;
my $tr;
my $echild=0;
my $exclude=0;
my $excluden=0;
my $excludea=0;
my $et=0;
my $cdata='';
while ($token = $parser->get_token()) {
# from HTML::TokeParser documentation:
#             ["S",  $tag, %$attr, @$attrseq, $text]
#             ["E",  $tag, $text]
#             ["T",  $text, $is_data]
#             ["C",  $text]
#             ["D",  $text]
#             ["PI", $token0, $text]
#    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
#         @{$conditions{'name'}};
#    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
#         @{$conditions{'attribute'}};
#    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
#         @{$conditions{'value'}};
#    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
#         @{$conditions{'cdata'}};
    if ($token->[0] eq 'D') {
	print $token->[1];
    }
    elsif ($token->[0] eq 'C') {
	print $token->[1];
    }
    elsif ($token->[0] eq 'S') {
	$cdata='';
	$hloc++;
# if token can be excluded, then pretend it is until all conditions are
# run (eha); then output during end tag processing
# else, output

# a token can be excluded when it is an eh key, or a child node of
# an eh key

	if ($eh{$token->[1]}) {
	    $echild=$token->[1];
#	    print "ECHILD=$echild\n";
	}
	if ($echild) {
	    # run through names for echild
	    # then attributes and/or values and/or cdata
	    my $name=$token->[1];
	    my @attributes=@{$token->[3]};
	    my %atthash=%{$token->[2]};
	    foreach my $namemlist (@{$eha{$echild}->{'name'}}) {
		foreach my $namematch (@{$namemlist}) {
		    my $nm=$namematch; $nm=~s/^.//; $nm=~s/.$//;
		    if ($name=~/$nm/) {
#			print "NMATCH: $nm ($name)\n";
			$excluden++;
			foreach my $attributemlist
			    (@{$eha{$echild}->{'attribute'}}) {
				foreach my $attributematch 
				    (@{$attributemlist}) {
					my ($an,$am)=
					    split(/\=/,$attributematch,2);
					$am=~s/^.//;
					$am=~s/.$//;
#					print 'AM:'."($an,$am)\t";
#					print 'ATT:'.join(',',%atthash)."\n";
					if ($atthash{$an}) {
					    if ($atthash{$an}=~/$am/) {
						$excludea++;
#						print "AMATCH: $am (".
#						    join(',',
#							 @attributes)
#							."\n";
					    }
					}
				    }
			    }
		    }
		}
	    }
	    $tr.=$token->[4];
	}
	else {
	    print $token->[4];
	}
    }
    elsif ($token->[0] eq 'E') {
	if ($echild) {
	    $tr.=$token->[2];
	    if ($excluden) {
		foreach my $cdatamlist (@{$eha{$echild}->{'cdata'}}) {
		    foreach my $cdatamatch (@{$cdatamlist}) {
#				print "CDATA: $cdatamatch, $cdata\n";
			my $cm=$cdatamatch;
			my $not=0;
			if ($cm=~/\!/) {
			    $not=1;
			    $cm=~s/^.//;
			}
			$cm=~s/^.//; $cm=~s/.$//;
			if ((!$not and $cdata!~/$cm/)
			    or ($not and $cdata=~/$cm/)) {
#				    print "CMISMATCH: $cm ($cdata)\n";
			}
			elsif (($not and $cdata!~/$cm/)
			       or (!$not and $cdata=~/$cm/)) {
			    $exclude++;
			}
		    }
		}
	    }
	}
	if ($eh{$token->[1]}) {
	    $echild=0;
	    if (!$exclude and !$excludea) {
		print $tr;
#		print $token->[2];
		$tr='';
	    }
	    elsif ($exclude>0 or $excludea>0) {
#		print "EXCLUDING $token->[1] $excludea $excluden\n";
		$exclude=0; $excluden=0; $excludea=0;
		$tr='';
	    }
	    $exclude=0; $excluden=0; $excludea=0;
	}
	else {
	    if ($echild) {
#		$tr.=$token->[2];
	    }
	    else {
		print $token->[2];
		$tr='';
	    }
	}
	$hloc--;
    }
    elsif ($token->[0] eq 'T') {
	if ($echild) {
	    $tr.=$token->[1];
	    $cdata=$token->[1];
	}
	else {
	    print $token->[1];
	    $tr='';
	}
    }
}

# ------------------------------------------------------------ clear conditions
sub cc {
    @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
    @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
    @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
    @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
}

# --------------------------------------- remove starting and ending whitespace
sub trim {
    my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
}

# --------------------------------------------------------- Format xfml section
sub format_xfml {
    my (@tokeninfo)=@_;
    return '';
}

# ---------------------------------------------------- Format when:name section
sub format_when_name {
    my (@tokeninfo)=@_;
    $wloc++;
    my $att_match=$tokeninfo[2]->{'match'};
    push @{$conditions{'name'}},$att_match;
    my $text=&trim($parser->get_text('/when:name'));
    $parser->get_tag('/when:name');
#    print 'Name Matching...'.$att_match;
    $wloc--;
    &cc unless $wloc;
    return '';
}

# ----------------------------------------------- Format when:attribute section
sub format_when_attribute {
    my (@tokeninfo)=@_;
    $wloc++;
    my $att_match=$tokeninfo[2]->{'match'};
    push @{$conditions{'attribute'}},$att_match;
    my $text=&trim($parser->get_text('/when:attribute'));
    $parser->get_tag('/when:attribute');
#    print 'Attribute Matching...'.$att_match;
    $wloc--;
    &cc unless $wloc;
    return '';
}

# --------------------------------------------------- Format when:cdata section
sub format_when_cdata {
    my (@tokeninfo)=@_;
    $wloc++;
    my $att_match=$tokeninfo[2]->{'match'};
#    print 'Cdata Matching...'.$att_match;
    push @{$conditions{'cdata'}},$att_match;
    my $text=&trim($parser->get_text('/when:cdata'));
    $parser->get_tag('/when:cdata');
    $wloc--;
    &cc unless $wloc;
    return '';
}

# ----------------------------------------------- Format choice:include section
sub format_choice_include {
    my (@tokeninfo)=@_;
    my $text=&trim($parser->get_text('/choice:include'));
    $parser->get_tag('/choice:include');
    $ih{$tokeninfo[2]->{'match'}}++;
    return '';
}

# ----------------------------------------------- Format choice:exclude section
sub format_choice_exclude {
    my (@tokeninfo)=@_;
    my $text=&trim($parser->get_text('/choice:exclude'));
    $parser->get_tag('/choice:exclude');
    $eh{$tokeninfo[2]->{'nodename'}}++;
    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
         [@{$conditions{'name'}}];
    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
         [@{$conditions{'attribute'}}];
    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
         [@{$conditions{'value'}}];
    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
         [@{$conditions{'cdata'}}];
    return '';
}