--- nsdl/nsdlloncapaorg/harvester.pl 2003/07/28 20:14:17 1.2 +++ nsdl/nsdlloncapaorg/harvester.pl 2003/07/29 14:37:51 1.4 @@ -12,6 +12,9 @@ use strict; use LWP::UserAgent; use Getopt::Std; use Digest::MD5 qw(md5_hex); +use IO::File; + +my $basepath='/home/httpd/cgi-bin/OAI-XMLFile/XMLFile/nsdlexport/data'; my $pub_month; my $pub_year; @@ -60,6 +63,7 @@ if ( $response->is_success ) { #} my %records = ();; + foreach my $metadata (@loncapa) { chomp $metadata; $metadata=~s/[^\w\d\s\.\;\:\,\|\/]/ /gs; @@ -82,8 +86,10 @@ foreach my $metadata (@loncapa) { next if ( ($subject eq 'Sample') || ($subject eq 'Something') ); my $resourceurl = 'http://nsdl.lon-capa.org' . $tkline[3]; my $baseid=$tkline[3]; + my ($adom,$auname)=($baseid=~/^\/res\/(\w+)\/(\w+)\//); $baseid=~s/\W/\_/g; $baseid=~s/^\_res\_//g; + my $fileid=md5_hex($baseid); next if ( $resourceurl =~ /(.*)\/demo\/(.*)/ ); my $keywords = $tkline[4]; @@ -128,8 +134,9 @@ foreach my $metadata (@loncapa) { next if ( $language ne 'seniso'); my $primary_language='en-US'; my $creation_date = $tkline[10]; - my ($pub_year,$pub_month,$pub_day) = ( $creation_date =~ /^(\d{4})-(\d{2})-(\d{2})\s(\d{2}):(\d{2}):(\d{2})$/ ); + my ($pub_year,$pub_month,$pub_day) = ( $creation_date =~ /^(\d{4}) (\d{2}) (\d{2})\s(\d{2}):(\d{2}):(\d{2})$/ ); my $revision_date = $tkline[11]; + my ($rev_year,$rev_month,$rev_day) = ( $revision_date =~ /^(\d{4}) (\d{2}) (\d{2})\s(\d{2}):(\d{2}):(\d{2})$/ ); my $owner = $tkline[12]; my $rights_description; my $copyright = $tkline[13]; # public,domain,default,private (skip if private and domain) @@ -147,18 +154,32 @@ foreach my $metadata (@loncapa) { # Private means open only to author of material next if ( $copyright eq 'private'); my $platform = "5"; # HTML Browser (not specified but construed from metadata) - print (< - $title - $author_fname $author_lname - $keywords - $subject - $resourceurl - $primary_language - $abstract - $revision_date - - +# +# Create path +# + unless (-e $basepath.'/'.$adom) { mkdir($basepath.'/'.$adom); } + unless (-e $basepath.'/'.$adom.'/'.$auname) { + mkdir($basepath.'/'.$adom.'/'.$auname) || die 'Could not create '.$basepath.'/'.$adom.'/'.$auname; + } + open(XML,'>'.$basepath.'/'.$adom.'/'.$auname.'/'.$baseid.'.xml'); + print XML (< + + + $title + $author_fname $author_lname + $resourceurl + $keywords + $subject + $primary_language + $abstract + $rev_year-$rev_month-$rev_day + ENDMETA + close (XML); } -