version 1.2, 2003/07/28 20:14:17
|
version 1.7, 2003/10/21 15:58:26
|
Line 12 use strict;
|
Line 12 use strict;
|
use LWP::UserAgent; |
use LWP::UserAgent; |
use Getopt::Std; |
use Getopt::Std; |
use Digest::MD5 qw(md5_hex); |
use Digest::MD5 qw(md5_hex); |
|
use IO::File; |
|
|
|
my $basepath='/home/httpd/cgi-bin/OAI-XMLFile/XMLFile/nsdlexport/data'; |
|
|
my $pub_month; |
my $pub_month; |
my $pub_year; |
my $pub_year; |
Line 25 my $content_regex = 'File Not Found';
|
Line 28 my $content_regex = 'File Not Found';
|
# Configuration |
# Configuration |
|
|
my $debug = 0; |
my $debug = 0; |
my $url = 'http://s10.lite.msu.edu/cgi-bin/metadata_harvest.pl'; |
|
# The list of servers is from the LON-CAPA CVS repository in /loncapa/loncom/production_hosts.tab |
# The list of servers is from the LON-CAPA CVS repository in /loncapa/loncom/production_hosts.tab |
my @servers = ( 'newscience.westshore.cc.mi.us', 's10.lite.msu.edu', 's12.lite.msu.edu', 'lon-capa.chem.sunysb.edu', 'schubert.tmcc.edu', 'dalton.chem.sfu.ca', 'capa2.phy.ohiou.edu', 'pollux.physics.fsu.edu', 'loncapa.physics.sc.edu', 'loncapa.math.ucf.edu', 'zappa.ags.udel.edu', 'loncapa.gwu.edu'); |
my @servers = ( |
|
'newscience.westshore.cc.mi.us', |
|
's10.lite.msu.edu', |
|
's12.lite.msu.edu', |
|
'lon-capa.chem.sunysb.edu', |
|
'schubert.tmcc.edu', |
|
'dalton.chem.sfu.ca', |
|
'capa2.phy.ohiou.edu', |
|
'pollux.physics.fsu.edu', |
|
'loncapa.physics.sc.edu', |
|
'loncapa.math.ucf.edu', |
|
'zappa.ags.udel.edu', |
|
'loncapa.gwu.edu', |
|
'neptune.physics.ndsu.nodak.edu', |
|
'capa1.uwsp.edu', |
|
'natasha.it.fit.edu', |
|
'loncapa.Mines.EDU', |
|
'loncapa.chm.nau.edu'); |
|
|
|
foreach (@servers) { |
|
my $url='http://'.$_.'/cgi-bin/metadata_harvest.pl'; |
# End Configuration |
# End Configuration |
|
|
my $ua = new LWP::UserAgent; |
my $ua = new LWP::UserAgent; |
Line 40 $request->authorization_basic('reaper',
|
Line 62 $request->authorization_basic('reaper',
|
my $response = $ua->request( $request ); |
my $response = $ua->request( $request ); |
|
|
if ( $response->is_success ) { |
if ( $response->is_success ) { |
|
print 'SUCCESS: ' . $response->message.' for '.$url."\n\n"; |
$content = $response->content; |
$content = $response->content; |
# Delete all blank lines |
# Delete all blank lines |
$content =~ s/(?<!.)\n//g; |
$content =~ s/(?<!.)\n//g; |
Line 48 if ( $response->is_success ) {
|
Line 71 if ( $response->is_success ) {
|
# Push the content into an array |
# Push the content into an array |
@loncapa = split /\n/, $content; |
@loncapa = split /\n/, $content; |
} else { |
} else { |
die 'LON-CAPA request failed: ' . $response->message; |
print 'LON-CAPA request failed: ' . $response->message.' for '.$url."\n\n"; |
|
next; |
} |
} |
|
|
#@loncapa=undef; |
#@loncapa=undef; |
Line 60 if ( $response->is_success ) {
|
Line 84 if ( $response->is_success ) {
|
#} |
#} |
|
|
my %records = ();; |
my %records = ();; |
|
|
foreach my $metadata (@loncapa) { |
foreach my $metadata (@loncapa) { |
chomp $metadata; |
chomp $metadata; |
$metadata=~s/[^\w\d\s\.\;\:\,\|\/]/ /gs; |
$metadata=~s/[^\w\d\s\.\;\:\,\|\/]/ /gs; |
Line 82 foreach my $metadata (@loncapa) {
|
Line 107 foreach my $metadata (@loncapa) {
|
next if ( ($subject eq 'Sample') || ($subject eq 'Something') ); |
next if ( ($subject eq 'Sample') || ($subject eq 'Something') ); |
my $resourceurl = 'http://nsdl.lon-capa.org' . $tkline[3]; |
my $resourceurl = 'http://nsdl.lon-capa.org' . $tkline[3]; |
my $baseid=$tkline[3]; |
my $baseid=$tkline[3]; |
|
my ($adom,$auname)=($baseid=~/^\/res\/(\w+)\/(\w+)\//); |
$baseid=~s/\W/\_/g; |
$baseid=~s/\W/\_/g; |
$baseid=~s/^\_res\_//g; |
$baseid=~s/^\_res\_//g; |
|
my $fileid=md5_hex($baseid); |
|
|
next if ( $resourceurl =~ /(.*)\/demo\/(.*)/ ); |
next if ( $resourceurl =~ /(.*)\/demo\/(.*)/ ); |
my $keywords = $tkline[4]; |
my $keywords = $tkline[4]; |
my $version = $tkline[5]; |
my $version = $tkline[5]; |
my $notes = $tkline[6]; |
my $notes = $tkline[6]; |
my $abstract = $tkline[7]; |
my $abstract = $tkline[7]; |
next if ($abstract eq ''); |
unless ($abstract) { $abstract=$subject; } |
|
unless ($abstract) { $abstract=$title; } |
|
unless ($abstract) { $abstract=$keywords; } |
my $type = $tkline[8]; |
my $type = $tkline[8]; |
my $learning_resource_type; |
my $learning_resource_type; |
if ( $type eq 'problem' ) { |
if ( $type eq 'problem' ) { |
Line 128 foreach my $metadata (@loncapa) {
|
Line 157 foreach my $metadata (@loncapa) {
|
next if ( $language ne 'seniso'); |
next if ( $language ne 'seniso'); |
my $primary_language='en-US'; |
my $primary_language='en-US'; |
my $creation_date = $tkline[10]; |
my $creation_date = $tkline[10]; |
my ($pub_year,$pub_month,$pub_day) = ( $creation_date =~ /^(\d{4})-(\d{2})-(\d{2})\s(\d{2}):(\d{2}):(\d{2})$/ ); |
my ($pub_year,$pub_month,$pub_day) = ( $creation_date =~ /^(\d{4}) (\d{2}) (\d{2})\s(\d{2}):(\d{2}):(\d{2})$/ ); |
my $revision_date = $tkline[11]; |
my $revision_date = $tkline[11]; |
|
my ($rev_year,$rev_month,$rev_day) = ( $revision_date =~ /^(\d{4}) (\d{2}) (\d{2})\s(\d{2}):(\d{2}):(\d{2})$/ ); |
my $owner = $tkline[12]; |
my $owner = $tkline[12]; |
my $rights_description; |
my $rights_description; |
my $copyright = $tkline[13]; # public,domain,default,private (skip if private and domain) |
my $copyright = $tkline[13]; # public,domain,default,private (skip if private and domain) |
Line 146 foreach my $metadata (@loncapa) {
|
Line 176 foreach my $metadata (@loncapa) {
|
# Defaults mean access open to any registered LON-CAPA user |
# Defaults mean access open to any registered LON-CAPA user |
# Private means open only to author of material |
# Private means open only to author of material |
next if ( $copyright eq 'private'); |
next if ( $copyright eq 'private'); |
|
next if ( $copyright eq 'domain'); |
my $platform = "5"; # HTML Browser (not specified but construed from metadata) |
my $platform = "5"; # HTML Browser (not specified but construed from metadata) |
print (<<ENDMETA); |
# |
<rdf about="lon-capa.nsdl.collections/$baseid"> |
# Create path |
<dc:title>$title</dc:title> |
# |
<dc:creator>$author_fname $author_lname</dc:creator> |
unless (-e $basepath.'/'.$adom) { mkdir($basepath.'/'.$adom); } |
<dc:subject>$keywords</dc:subject> |
unless (-e $basepath.'/'.$adom.'/'.$auname) { |
<dc:subject>$subject</dc:subject> |
mkdir($basepath.'/'.$adom.'/'.$auname) || die 'Could not create '.$basepath.'/'.$adom.'/'.$auname; |
<dc:identifier scheme="URI">$resourceurl</dc:identifier> |
} |
<dc:language>$primary_language</dc:language> |
open(XML,'>'.$basepath.'/'.$adom.'/'.$auname.'/'.$baseid.'.xml'); |
<dc:description>$abstract<dc:description> |
print XML (<<ENDMETA); |
<dc:date>$revision_date</dc:date> |
<?xml version="1.0" encoding="UTF-8"?> |
</rdf> |
|
|
<oaidc:dc xmlns="http://purl.org/dc/elements/1.1/" |
|
xmlns:oaidc="http://www.openarchives.org/OAI/2.0/oai_dc/" |
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|
xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ |
|
http://www.openarchives.org/OAI/2.0/oai_dc.xsd" |
|
> |
|
<title>$title</title> |
|
<creator>$author_fname $author_lname</creator> |
|
<identifier>$resourceurl</identifier> |
|
<subject>$keywords</subject> |
|
<subject>$subject</subject> |
|
<language>$primary_language</language> |
|
<description>$abstract</description> |
|
<date>$rev_year-$rev_month-$rev_day</date> |
|
</oaidc:dc> |
ENDMETA |
ENDMETA |
|
close (XML); |
|
} |
} |
} |
|
|