Annotation of loncom/build/xfml_parse.pl, revision 1.2
1.1 harris41 1: #!/usr/bin/perl
2:
3: # YEAR=2002
1.2 ! harris41 4: # 1/26,1/27,1/28,1/29,1/30,1/31 - Scott Harrison
! 5: #
! 6: ###
1.1 harris41 7:
8: # Read in 2 XML file; first is the filter specification, the second
9: # is the XML file to be filtered
10:
1.2 ! harris41 11: ###############################################################################
! 12: ## ##
! 13: ## ORGANIZATION OF THIS PERL SCRIPT ##
! 14: ## 1. Notes ##
! 15: ## 2. Get command line arguments ##
! 16: ## 3. First pass through (grab distribution-specific information) ##
! 17: ## 4. Second pass through (parse out what is not necessary) ##
! 18: ## 5. Third pass through (translate markup according to specified mode) ##
! 19: ## 6. Functions (most all just format contents of different markup tags) ##
! 20: ## 7. POD (plain old documentation, CPAN style) ##
! 21: ## ##
! 22: ###############################################################################
! 23:
! 24: # ----------------------------------------------------------------------- Notes
! 25: #
! 26: # I am using a multiple pass-through approach to parsing
! 27: # the xfml file. This saves memory and makes sure the server
! 28: # will never be overloaded.
! 29: #
! 30: # This is meant to parse files meeting the piml document type.
! 31: # See xfml.dtd. XFML=XML Filtering Markup Language.
! 32:
1.1 harris41 33: use HTML::TokeParser;
34: use strict;
35:
36: unless (@ARGV) {
37: print <<END;
38: Incorrect invocation.
39: Example usages:
40: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
41: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
42: END
43: }
44:
45: my %eh;
46: my %ih;
47: my $tofilter=shift @ARGV;
1.2 ! harris41 48: open IN,"<$tofilter";
! 49: my @lines=<IN>; my $parsestring=join('',@lines); undef @lines;
! 50: close IN;
1.1 harris41 51: my $parser = HTML::TokeParser->new(\$parsestring) or
52: die('can\'t create TokeParser object');
53: $parser->xml_mode('1');
54:
55: # Define handling methods for mode-dependent text rendering
56:
57: my %conditions; &cc;
58:
59: $parser->{textify}={
60: xfml => \&format_xfml,
61: 'when:name' => \&format_when_name,
62: 'when:attribute' => \&format_when_attribute,
63: 'when:cdata' => \&format_when_cdata,
64: 'choice:include' => \&format_choice_include,
65: 'choice:exclude' => \&format_choice_exclude,
66: };
67:
68: my $text;
69: my $xfml;
70: my $wloc=0;
71: my %eha;
72:
73: while (my $token = $parser->get_tag('xfml')) {
74: &format_xfml(@{$token});
75: $text = $parser->get_text('/xfml');
76: $token = $parser->get_tag('/xfml');
77: }
78:
1.2 ! harris41 79: #open IN,"<$tofilter";
! 80: my @lines2=<>; my $parsestring2=join('',@lines2); undef @lines2;
1.1 harris41 81: $parser = HTML::TokeParser->new(\$parsestring2) or
82: die('can\'t create TokeParser object');
83: $parser->xml_mode('1');
84:
85: my $token;
86: my $hloc=0;
87: my %ts;
88: my $tr;
89: my $echild=0;
90: my $exclude=0;
91: my $excluden=0;
92: my $excludea=0;
93: my $et=0;
94: my $cdata='';
1.2 ! harris41 95: my $excludenold=0;
! 96: my $ign=0;
! 97:
1.1 harris41 98: while ($token = $parser->get_token()) {
99: if ($token->[0] eq 'D') {
100: print $token->[1];
101: }
102: elsif ($token->[0] eq 'C') {
103: print $token->[1];
104: }
105: elsif ($token->[0] eq 'S') {
106: $cdata='';
107: $hloc++;
108: # if token can be excluded, then pretend it is until all conditions are
109: # run (eha); then output during end tag processing
110: # else, output
111:
112: # a token can be excluded when it is an eh key, or a child node of
113: # an eh key
114:
115: if ($eh{$token->[1]}) {
116: $echild=$token->[1];
117: }
118: if ($echild) {
119: # run through names for echild
120: # then attributes and/or values and/or cdata
121: my $name=$token->[1];
122: my @attributes=@{$token->[3]};
123: my %atthash=%{$token->[2]};
124: foreach my $namemlist (@{$eha{$echild}->{'name'}}) {
125: foreach my $namematch (@{$namemlist}) {
126: my $nm=$namematch; $nm=~s/^.//; $nm=~s/.$//;
127: if ($name=~/$nm/) {
1.2 ! harris41 128: $excludenold=$excluden;
1.1 harris41 129: $excluden++;
130: foreach my $attributemlist
131: (@{$eha{$echild}->{'attribute'}}) {
132: foreach my $attributematch
133: (@{$attributemlist}) {
134: my ($an,$am)=
135: split(/\=/,$attributematch,2);
136: $am=~s/^.//;
137: $am=~s/.$//;
138: if ($atthash{$an}) {
139: if ($atthash{$an}=~/$am/) {
140: $excludea++;
141: }
142: }
143: }
144: }
145: }
146: }
147: }
148: $tr.=$token->[4];
149: }
150: else {
151: print $token->[4];
152: }
153: }
154: elsif ($token->[0] eq 'E') {
155: if ($echild) {
156: $tr.=$token->[2];
157: if ($excluden) {
1.2 ! harris41 158: my $i=0;
! 159: CDATALOOP:
1.1 harris41 160: foreach my $cdatamlist (@{$eha{$echild}->{'cdata'}}) {
1.2 ! harris41 161: $i++;
! 162: my $j;
1.1 harris41 163: foreach my $cdatamatch (@{$cdatamlist}) {
1.2 ! harris41 164: $j++;
1.1 harris41 165: # print "CDATA: $cdatamatch, $cdata\n";
166: my $cm=$cdatamatch;
167: my $not=0;
168: if ($cm=~/\!/) {
169: $not=1;
170: $cm=~s/^.//;
171: }
172: $cm=~s/^.//; $cm=~s/.$//;
1.2 ! harris41 173: if ($not and $cdata=~/$cm/) {
! 174: $ign=1; $exclude=0;
! 175: }
1.1 harris41 176: if ((!$not and $cdata!~/$cm/)
177: or ($not and $cdata=~/$cm/)) {
1.2 ! harris41 178: # nothing happens
! 179: # $exclude=0;
1.1 harris41 180: }
181: elsif (($not and $cdata!~/$cm/)
182: or (!$not and $cdata=~/$cm/)) {
1.2 ! harris41 183: $exclude++ unless $ign;
1.1 harris41 184: }
185: }
186: }
187: }
188: }
189: if ($eh{$token->[1]}) {
1.2 ! harris41 190: $ign=0;
1.1 harris41 191: $echild=0;
192: if (!$exclude and !$excludea) {
193: print $tr;
194: # print $token->[2];
195: $tr='';
196: }
197: elsif ($exclude>0 or $excludea>0) {
1.2 ! harris41 198: # print "EXCLUDING $token->[1] $exclude $excludea $excluden\n";
1.1 harris41 199: $exclude=0; $excluden=0; $excludea=0;
200: $tr='';
201: }
202: $exclude=0; $excluden=0; $excludea=0;
203: }
204: else {
205: if ($echild) {
206: # $tr.=$token->[2];
207: }
208: else {
209: print $token->[2];
210: $tr='';
211: }
212: }
213: $hloc--;
214: }
215: elsif ($token->[0] eq 'T') {
216: if ($echild) {
217: $tr.=$token->[1];
218: $cdata=$token->[1];
219: }
220: else {
221: print $token->[1];
222: $tr='';
223: }
224: }
225: }
226:
227: # ------------------------------------------------------------ clear conditions
228: sub cc {
229: @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
230: @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
231: @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
232: @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
233: }
234:
235: # --------------------------------------- remove starting and ending whitespace
236: sub trim {
237: my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
238: }
239:
1.2 ! harris41 240:
! 241:
1.1 harris41 242: # --------------------------------------------------------- Format xfml section
243: sub format_xfml {
244: my (@tokeninfo)=@_;
245: return '';
246: }
247:
248: # ---------------------------------------------------- Format when:name section
249: sub format_when_name {
250: my (@tokeninfo)=@_;
251: $wloc++;
252: my $att_match=$tokeninfo[2]->{'match'};
253: push @{$conditions{'name'}},$att_match;
254: my $text=&trim($parser->get_text('/when:name'));
255: $parser->get_tag('/when:name');
256: $wloc--;
257: &cc unless $wloc;
258: return '';
259: }
260:
261: # ----------------------------------------------- Format when:attribute section
262: sub format_when_attribute {
263: my (@tokeninfo)=@_;
264: $wloc++;
265: my $att_match=$tokeninfo[2]->{'match'};
266: push @{$conditions{'attribute'}},$att_match;
267: my $text=&trim($parser->get_text('/when:attribute'));
268: $parser->get_tag('/when:attribute');
269: $wloc--;
270: &cc unless $wloc;
271: return '';
272: }
273:
274: # --------------------------------------------------- Format when:cdata section
275: sub format_when_cdata {
276: my (@tokeninfo)=@_;
277: $wloc++;
278: my $att_match=$tokeninfo[2]->{'match'};
279: push @{$conditions{'cdata'}},$att_match;
280: my $text=&trim($parser->get_text('/when:cdata'));
281: $parser->get_tag('/when:cdata');
282: $wloc--;
283: &cc unless $wloc;
284: return '';
285: }
286:
287: # ----------------------------------------------- Format choice:include section
288: sub format_choice_include {
289: my (@tokeninfo)=@_;
290: my $text=&trim($parser->get_text('/choice:include'));
291: $parser->get_tag('/choice:include');
292: $ih{$tokeninfo[2]->{'match'}}++;
293: return '';
294: }
295:
296: # ----------------------------------------------- Format choice:exclude section
297: sub format_choice_exclude {
298: my (@tokeninfo)=@_;
299: my $text=&trim($parser->get_text('/choice:exclude'));
300: $parser->get_tag('/choice:exclude');
301: $eh{$tokeninfo[2]->{'nodename'}}++;
302: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
303: [@{$conditions{'name'}}];
304: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
305: [@{$conditions{'attribute'}}];
306: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
307: [@{$conditions{'value'}}];
308: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
309: [@{$conditions{'cdata'}}];
310: return '';
311: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>