1: #!/usr/local/bin/perl
2:
3: #
4: # lon-capa.pl
5: # Parse the LON-CAPA metadata
6: #
7: # Andy Dong <adong@smete.org> 10/23/2002
8: #
9: # Contact Gerd Kortemeyer (korte@lite.msu.edu)
10:
11: use strict;
12: use LWP::UserAgent;
13: use Getopt::Std;
14: use Digest::MD5 qw(md5_hex);
15: use IO::File;
16:
17: my $basepath='/home/httpd/cgi-bin/OAI-XMLFile/XMLFile/nsdlexport/data';
18:
19: my $pub_month;
20: my $pub_year;
21: my @loncapa;
22:
23: # HTTP requests
24:
25: my $content;
26: my $content_regex = 'File Not Found';
27:
28: # Configuration
29:
30: my $debug = 0;
31:
32: # The list of servers is from the LON-CAPA CVS repository in /loncapa/loncom/production_hosts.tab
33: my @servers = (
34: 'newscience.westshore.cc.mi.us',
35: 's10.lite.msu.edu',
36: 's12.lite.msu.edu',
37: 'lon-capa.chem.sunysb.edu',
38: 'schubert.tmcc.edu',
39: 'dalton.chem.sfu.ca',
40: 'capa2.phy.ohiou.edu',
41: 'pollux.physics.fsu.edu',
42: 'loncapa.physics.sc.edu',
43: 'loncapa.math.ucf.edu',
44: 'zappa.ags.udel.edu',
45: 'loncapa.gwu.edu',
46: 'neptune.physics.ndsu.nodak.edu',
47: 'capa1.uwsp.edu',
48: 'natasha.it.fit.edu',
49: 'loncapa.Mines.EDU',
50: 'loncapa.chm.nau.edu');
51:
52: foreach (@servers) {
53: my $url='http://'.$_.'/cgi-bin/metadata_harvest.pl';
54: # End Configuration
55:
56: my $ua = new LWP::UserAgent;
57: $ua->timeout(600);
58:
59: my $request = new HTTP::Request GET => $url;
60: $request->authorization_basic('reaper', 'cat4u');
61:
62: my $response = $ua->request( $request );
63:
64: if ( $response->is_success ) {
65: print 'SUCCESS: ' . $response->message.' for '.$url."\n\n";
66: $content = $response->content;
67: # Delete all blank lines
68: $content =~ s/(?<!.)\n//g;
69: # Replace all ^M with spaces
70: $content =~ s/
/\s/g;
71: # Push the content into an array
72: @loncapa = split /\n/, $content;
73: } else {
74: print 'LON-CAPA request failed: ' . $response->message.' for '.$url."\n\n";
75: next;
76: }
77:
78: #@loncapa=undef;
79: #open (LON_FILE, 'metadata_harvest.txt') || die;
80:
81: #while (<LON_FILE>) {
82: # chomp;
83: # push(@loncapa,$_);
84: #}
85:
86: my %records = ();;
87:
88: foreach my $metadata (@loncapa) {
89: chomp $metadata;
90: $metadata=~s/[^\w\d\s\.\;\:\,\|\/]/ /gs;
91: my @tkline = split('\|', $metadata);
92: my $title = $tkline[0];
93: next if ( $title eq '' );
94: my $author = $tkline[1];
95: next if ( $author eq '' );
96: my @authorname = split(' ', $author);
97: my $author_fname = $authorname[0];
98: my $author_lname = $authorname[1];
99: # We have to make an exception for Multimedia Physics which is an organization not a person
100: my $object_type;
101: if ( $author_lname eq 'Physics' ) {
102: $object_type = 'organization';
103: } else {
104: $object_type = 'person';
105: }
106: my $subject = $tkline[2];
107: next if ( ($subject eq 'Sample') || ($subject eq 'Something') );
108: my $resourceurl = 'http://nsdl.lon-capa.org' . $tkline[3];
109: my $baseid=$tkline[3];
110: my ($adom,$auname)=($baseid=~/^\/res\/(\w+)\/(\w+)\//);
111: $baseid=~s/\W/\_/g;
112: $baseid=~s/^\_res\_//g;
113: my $fileid=md5_hex($baseid);
114:
115: next if ( $resourceurl =~ /(.*)\/demo\/(.*)/ );
116: my $keywords = $tkline[4];
117: my $version = $tkline[5];
118: my $notes = $tkline[6];
119: my $abstract = $tkline[7];
120: next if ($abstract eq '');
121: my $type = $tkline[8];
122: my $learning_resource_type;
123: if ( $type eq 'problem' ) {
124: $learning_resource_type = 114;
125: } elsif ( $type eq 'exam' ) {
126: $learning_resource_type = 114;
127: } elsif ( $type eq 'quiz' ) {
128: $learning_resource_type = 114;
129: } elsif ( $type eq 'assess' ) {
130: $learning_resource_type = 114;
131: } elsif ( $type eq 'survey' ) {
132: $learning_resource_type = 114;
133: } elsif ( $type eq 'form' ) {
134: $learning_resource_type = 114;
135: } elsif ( $type eq 'library' ) {
136: $learning_resource_type = 107;
137: } elsif ( $type eq 'page' ) {
138: $learning_resource_type = 104;
139: } elsif ( $type eq 'sequence' ) {
140: $learning_resource_type = 104;
141: } elsif ( $type eq 'spreadsheet' ) {
142: $learning_resource_type = 114;
143: } else {
144: $learning_resource_type = 0;
145: }
146:
147: my $media_format;
148: if ( ($type eq 'htm') || ($type eq 'gif') || ($type eq 'mov') || ($type eq 'xml') ) {
149: $media_format = 70;
150: } else {
151: $media_format = 0;
152: }
153:
154: my $language = $tkline[9]; # Look only for seniso
155: next if ( $language ne 'seniso');
156: my $primary_language='en-US';
157: my $creation_date = $tkline[10];
158: my ($pub_year,$pub_month,$pub_day) = ( $creation_date =~ /^(\d{4}) (\d{2}) (\d{2})\s(\d{2}):(\d{2}):(\d{2})$/ );
159: my $revision_date = $tkline[11];
160: my ($rev_year,$rev_month,$rev_day) = ( $revision_date =~ /^(\d{4}) (\d{2}) (\d{2})\s(\d{2}):(\d{2}):(\d{2})$/ );
161: my $owner = $tkline[12];
162: my $rights_description;
163: my $copyright = $tkline[13]; # public,domain,default,private (skip if private and domain)
164: # Public means no login required
165:
166: if ( $copyright eq 'public' ) {
167: $rights_description = 'LON-CAPA Public Resource. No login required.';
168: } elsif ($copyright eq 'domain') {
169: $rights_description = 'Restricted to certain LON-CAPA domains.';
170: } else {
171: $rights_description = 'LON-CAPA Default Use Restriction. Login required.';
172: }
173: # Domain means restricted to a particular LON-CAPA domain
174: # Defaults mean access open to any registered LON-CAPA user
175: # Private means open only to author of material
176: next if ( $copyright eq 'private');
177: next if ( $copyright eq 'domain');
178: my $platform = "5"; # HTML Browser (not specified but construed from metadata)
179: #
180: # Create path
181: #
182: unless (-e $basepath.'/'.$adom) { mkdir($basepath.'/'.$adom); }
183: unless (-e $basepath.'/'.$adom.'/'.$auname) {
184: mkdir($basepath.'/'.$adom.'/'.$auname) || die 'Could not create '.$basepath.'/'.$adom.'/'.$auname;
185: }
186: open(XML,'>'.$basepath.'/'.$adom.'/'.$auname.'/'.$baseid.'.xml');
187: print XML (<<ENDMETA);
188: <?xml version="1.0" encoding="UTF-8"?>
189:
190: <oaidc:dc xmlns="http://purl.org/dc/elements/1.1/"
191: xmlns:oaidc="http://www.openarchives.org/OAI/2.0/oai_dc/"
192: xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
193: xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/
194: http://www.openarchives.org/OAI/2.0/oai_dc.xsd"
195: >
196: <title>$title</title>
197: <creator>$author_fname $author_lname</creator>
198: <identifier>$resourceurl</identifier>
199: <subject>$keywords</subject>
200: <subject>$subject</subject>
201: <language>$primary_language</language>
202: <description>$abstract</description>
203: <date>$rev_year-$rev_month-$rev_day</date>
204: </oaidc:dc>
205: ENDMETA
206: close (XML);
207: }
208: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>