File:  [LON-CAPA] / nsdl / build / xfml_parse.pl
Revision 1.1: download - view: text, annotated - select for diffs
Sun May 5 02:44:57 2002 UTC (22 years, 1 month ago) by harris41
Branches: MAIN
CVS tags: HEAD
setting up the LPML toolset from http://lpml.sourceforge.net/

#!/usr/bin/perl

# -------------------------------------------------------- Documentation notice
# Run "perldoc ./lpml_parse.pl" in order to best view the software
# documentation internalized in this program.

# --------------------------------------------------------- License Information
# The LearningOnline Network with CAPA
# piml_parse.pl - Linux Packaging Markup Language parser
#
# $Id: xfml_parse.pl,v 1.1 2002/05/05 02:44:57 harris41 Exp $
#
# Written by Scott Harrison, codeharrison@yahoo.com
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# /home/httpd/html/adm/gpl.txt
#
# http://www.lon-capa.org/
#
# YEAR=2002
# 1/26,1/27,1/28,1/29,1/30,1/31,2/20,4/8 - Scott Harrison
#
###

# Read in 2 XML file; first is the filter specification, the second
# is the XML file to be filtered

###############################################################################
##                                                                           ##
## ORGANIZATION OF THIS PERL SCRIPT                                          ##
## 1. Notes                                                                  ##
## 2. Read in filter file                                                    ##
## 3. Initialize and clear conditions                                        ##
## 4. Run through and apply clauses                                          ##
##                                                                           ##
###############################################################################

# ----------------------------------------------------------------------- Notes
#
# This is meant to parse files meeting the xfml document type.
# See xfml.dtd.  XFML=XML Filtering Markup Language.

use HTML::TokeParser;
use strict;

unless (@ARGV) {
    print(<<END);
Incorrect invocation.
Example usages:
cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
END
}

my %eh;

# ---------------------------------------------- Read in filter file from @ARGV
my $tofilter=shift @ARGV;
open(IN,"<$tofilter"); my @lines=<IN>;
my $parsestring=join('',@lines); undef @lines; close IN;
my $parser = HTML::TokeParser->new(\$parsestring) or
    die('can\'t create TokeParser object');
$parser->xml_mode('1');

# --------------------------------------------- initialize and clear conditions
my %conditions; &cc;

# Define handling methods for mode-dependent text rendering
$parser->{textify}={
    'xfml' => \&format_xfml,
    'when:name' => \&format_when_name,
    'when:attribute' => \&format_when_attribute,
    'when:cdata' => \&format_when_cdata,
    'choice:exclude' => \&format_choice_exclude,
    'clause' => \&format_clause,
    };

my $text;
my $xfml;
my $wloc=0;
my %eha;

# ----------------------------------------------- Run through and apply clauses
my @lines2=<>; my $output=join('',@lines2); undef @lines2;
my $lparser = HTML::TokeParser->new(\$output) or
    die('can\'t create TokeParser object');
$lparser->xml_mode('1');
my $parsestring2;
while (my $token = $parser->get_tag('clause')) {
    $parsestring2=$output;
    $lparser = HTML::TokeParser->new(\$parsestring2);
    $lparser->xml_mode('1');
    $output='';
    &format_clause(@{$token});
    $text = $parser->get_text('/clause');
    $token = $parser->get_tag('/clause');

    my $token='';
    my $ttype='';
    my $excludeflag=0;
    my $outcache='';
    while ($token = $lparser->get_token()) {
	if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; }
	elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1];	}
	elsif ($token->[0] eq 'T') {
	    if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S'
		or $ttype eq 'E') {
		$output.=$token->[1];
	    }
	    else {
		$outcache.=$token->[1];
	    }
	}
	elsif ($token->[0] eq 'S') {
	    if ($eh{$token->[1]} or $excludeflag==1) {
		$ttype='';
		$excludeflag=1;
		$outcache.=$token->[4];
	    }
	    else {
		$ttype='S';
		$output.=$token->[4];
	    }
	    if ($excludeflag==1) {
		
	    }
	}
	elsif ($token->[0] eq 'E') {
	    if ($eh{$token->[1]} and $excludeflag==1) {
		$ttype='E';
		$excludeflag=0;
		$outcache.=$token->[2];
		my $retval=&evalconditions($outcache);
		if (&evalconditions($outcache)) {
		    $output.=$outcache;
		}
		else {
		    $output.='<!-- FILTERED OUT -->';
		}
		$outcache='';
	    }
	    elsif ($excludeflag==1) {
		$ttype='';
		$outcache.=$token->[2];
	    }
	    else {
		$output.=$token->[2];
		$ttype='E';
	    }
	}
    }
    &cc;
}
print $output;

# -------------------------------------------------------------- evalconditions
sub evalconditions {
    my ($parsetext)=@_;
    my $eparser = HTML::TokeParser->new(\$parsetext);
    unless (@{$conditions{'name'}} or
	    @{$conditions{'attribute'}}) {
	return 0;
    }
    my $nameflag=0;
    my $cdataflag=0;
    my $matchflag=0;
    my $Ttoken='';
    while (my $token = $eparser->get_token()) {
	if ($token->[0] eq 'S') {
	    foreach my $name (@{$conditions{'name'}}) {
		my $flag=0;
		my $match=$name;
		if ($match=~/^\!/) {
		    $match=~s/^\!//g;
		    $flag=1;
		}
		$match=~s/^\///g;
		$match=~s/\/$//g;
		if ((!$flag and $token->[1]=~/$match/) or
		    ($flag and $token->[1]!~/$match/)) {
		    $nameflag=1;
		}
	    }
	    $Ttoken='';
	}
	elsif ($token->[0] eq 'E') {
	    foreach my $name (@{$conditions{'name'}}) {
		my $flag=0;
		my $match=$name;
		if ($match=~/^\!/) {
		    $match=~s/^\!//g;
		    $flag=1;
		}
		$match=~s/^\///g;
		$match=~s/\/$//g;
		if ((!$flag and $token->[1]=~/$match/) or
		    ($flag and $token->[1]!~/$match/)) {
		    foreach my $cdata (@{$conditions{'cdata'}}) {
			my $flag=0;
			my $match=$cdata;
			if ($match=~/^\!/) {
			    $match=~s/^\!//g;
			    $flag=1;
			}
			$match=~s/^\///g;
			$match=~s/\/$//g;
			if ((!$flag and $Ttoken=~/$match/) or
			    ($flag and $Ttoken!~/$match/)) {
			    $cdataflag=1;
			}
		    }
		    if (@{$conditions{'cdata'}}) {
			if ($cdataflag) {
			    return 0;
			}
		    }
		    else {
			if ($nameflag) {
			    return 0;
			}
		    }
		    $nameflag=0;
		}
	    }
	}
	elsif ($token->[0] eq 'T') {
	    if ($nameflag) {
		$Ttoken.=$token->[1];
	    }
	}
    }
    return 1;
}

# ------------------------------------------------------------ clear conditions
sub cc {
    @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
    @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
    @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
    @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
    %eh=(1,1); delete $eh{1};
}

# --------------------------------------- remove starting and ending whitespace
sub trim {
    my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
}




# --------------------------------------------------------- Format xfml section
sub format_xfml {
    my (@tokeninfo)=@_;
    return '';
}

# ------------------------------------------------------- Format clause section
sub format_clause {
    my (@tokeninfo)=@_;
    return '';
}

# ---------------------------------------------------- Format when:name section
sub format_when_name {
    my (@tokeninfo)=@_;
#    $wloc++;
    my $att_match=$tokeninfo[2]->{'match'};
    push @{$conditions{'name'}},$att_match;
    my $text=&trim($parser->get_text('/when:name'));
    $parser->get_tag('/when:name');
#    $wloc--;
#    &cc unless $wloc;
    return '';
}

# --------------------------------------------------- Format when:cdata section
sub format_when_cdata {
    my (@tokeninfo)=@_;
    $wloc++;
    my $att_match=$tokeninfo[2]->{'match'};
    push @{$conditions{'cdata'}},$att_match;
    my $text=&trim($parser->get_text('/when:cdata'));
    $parser->get_tag('/when:cdata');
    $wloc--;
#    &cc unless $wloc;
    return '';
}

# ----------------------------------------------- Format choice:exclude section
sub format_choice_exclude {
    my (@tokeninfo)=@_;
    my $text=&trim($parser->get_text('/choice:exclude'));
    $parser->get_tag('/choice:exclude');
    $eh{$tokeninfo[2]->{'nodename'}}++;
    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
         [@{$conditions{'name'}}];
    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
         [@{$conditions{'attribute'}}];
    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
         [@{$conditions{'value'}}];
    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
         [@{$conditions{'cdata'}}];
    return '';
}

# ----------------------------------- POD (plain old documentation, CPAN style)

=pod

=head1 NAME

xfml_parse.pl - This is meant to parse XFML files (XML Filtering Markup Language.)

=head1 SYNOPSIS

Usage is for lpml file to come in through standard input.

=over 4

=item * 

1st argument is name of xfml file.

=back

Example:

 cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml

or

 perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml

=head1 DESCRIPTION

I am using a multiple pass-through approach to parsing
the xfml file.  This saves memory and makes sure the server
will never be overloaded.

=head1 README

I am using a multiple pass-through approach to parsing
the xfml file.  This saves memory and makes sure the server
will never be overloaded.

=head1 PREREQUISITES

HTML::TokeParser

=head1 COREQUISITES

=head1 OSNAMES

linux

=head1 SCRIPT CATEGORIES

Packaging/Administrative

=head1 AUTHOR

 Scott Harrison
 codeharrison@yahoo.com

Please let me know how/if you are finding this script useful and
any/all suggestions.  -Scott

=cut


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>
500 Internal Server Error

Internal Server Error

The server encountered an internal error or misconfiguration and was unable to complete your request.

Please contact the server administrator at root@localhost to inform them of the time this error occurred, and the actions you performed just before this error.

More information about this error may be available in the server error log.