--- loncom/metadata_database/searchcat.pl 2004/04/08 15:57:32 1.55 +++ loncom/metadata_database/searchcat.pl 2005/03/21 20:36:11 1.63 @@ -2,7 +2,7 @@ # The LearningOnline Network # searchcat.pl "Search Catalog" batch script # -# $Id: searchcat.pl,v 1.55 2004/04/08 15:57:32 matthew Exp $ +# $Id: searchcat.pl,v 1.63 2005/03/21 20:36:11 matthew Exp $ # # Copyright Michigan State University Board of Trustees # @@ -68,49 +68,90 @@ use strict; use DBI; use lib '/home/httpd/lib/perl/'; -use LONCAPA::Configuration; use LONCAPA::lonmetadata; +use Getopt::Long; use IO::File; use HTML::TokeParser; use GDBM_File; use POSIX qw(strftime mktime); + +use Apache::lonnet(); + use File::Find; +# +# Set up configuration options +my ($simulate,$oneuser,$help,$verbose,$logfile,$debug); +GetOptions ( + 'help' => \$help, + 'simulate' => \$simulate, + 'only=s' => \$oneuser, + 'verbose=s' => \$verbose, + 'debug' => \$debug, + ); + +if ($help) { + print <<"ENDHELP"; +$0 +Rebuild and update the LON-CAPA metadata database. +Options: + -help Print this help + -simulate Do not modify the database. + -only=user Only compute for the given user. Implies -simulate + -verbose=val Sets logging level, val must be a number + -debug Turns on debugging output +ENDHELP + exit 0; +} + +if (! defined($debug)) { + $debug = 0; +} + +if (! defined($verbose)) { + $verbose = 0; +} + +if (defined($oneuser)) { + $simulate=1; +} + ## ## Use variables for table names so we can test this routine a little easier my $oldname = 'metadata'; -my $newname = 'newmetadata'; +my $newname = 'newmetadata'.$$; # append pid to have unique temporary table # -# Read loncapa_apache.conf and loncapa.conf -my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); -my %perlvar=%{$perlvarref}; -undef $perlvarref; -delete $perlvar{'lonReceipt'}; # remove since sensitive (really?) & not needed -# # Only run if machine is a library server -exit if ($perlvar{'lonRole'} ne 'library'); +exit if ($Apache::lonnet::perlvar{'lonRole'} ne 'library'); # # Make sure this process is running from user=www my $wwwid=getpwnam('www'); if ($wwwid!=$<) { - my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; - my $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; + my $emailto="$Apache::lonnet::perlvar{'lonAdmEMail'},$Apache::lonnet::perlvar{'lonSysEMail'}"; + my $subj="LON: $Apache::lonnet::perlvar{'lonHostID'} User ID mismatch"; system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\ - mailto $emailto -s '$subj' > /dev/null"); + mail -s '$subj' $emailto > /dev/null"); exit 1; } # # Let people know we are running -open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log'); -print LOG '==== Searchcat Run '.localtime()."====\n"; +open(LOG,'>>'.$Apache::lonnet::perlvar{'lonDaemons'}.'/logs/searchcat.log'); +&log(0,'==== Searchcat Run '.localtime()."===="); + + +if ($debug) { + &log(0,'simulating') if ($simulate); + &log(0,'only processing user '.$oneuser) if ($oneuser); + &log(0,'verbosity level = '.$verbose); +} # # Connect to database my $dbh; -if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'}, +if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$Apache::lonnet::perlvar{'lonSqlAccess'}, { RaiseError =>0,PrintError=>0}))) { - print LOG "Cannot connect to database!\n"; + &log(0,"Cannot connect to database!"); die "MySQL Error: Cannot connect to database!\n"; } # This can return an error and still be okay, so we do not bother checking. @@ -122,48 +163,62 @@ my $request = &LONCAPA::lonmetadata::cre $dbh->do($request); if ($dbh->err) { $dbh->disconnect(); - print LOG "\nMySQL Error Create: ".$dbh->errstr."\n"; + &log(0,"MySQL Error Create: ".$dbh->errstr); die $dbh->errstr; } # # find out which users we need to examine -opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}"); -my @homeusers = - grep { - &ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_"); - } grep { - !/^\.\.?$/; - } readdir(RESOURCES); -closedir RESOURCES; -# -# Loop through the users -foreach my $user (@homeusers) { - print LOG "=== User: ".$user."\n"; - my $prodir=&propath($perlvar{'lonDefDomain'},$user); - # - # Use File::Find to get the files we need to read/modify - find( - {preprocess => \&only_meta_files, -# wanted => \&print_filename, -# wanted => \&log_metadata, - wanted => \&process_meta_file, - }, - "$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user"); +my @domains = sort(&Apache::lonnet::current_machine_domains()); +&log(9,'domains ="'.join('","',@domains).'"'); + +foreach my $dom (@domains) { + &log(9,'domain = '.$dom); + opendir(RESOURCES,"$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom"); + my @homeusers = + grep { + &ishome("$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom/$_"); + } grep { + !/^\.\.?$/; + } readdir(RESOURCES); + closedir RESOURCES; + &log(5,'users = '.$dom.':'.join(',',@homeusers)); + # + if ($oneuser) { + @homeusers=($oneuser); + } + # + # Loop through the users + foreach my $user (@homeusers) { + &log(0,"=== User: ".$user); + &process_dynamic_metadata($user,$dom); + # + # Use File::Find to get the files we need to read/modify + find( + {preprocess => \&only_meta_files, + #wanted => \&print_filename, + #wanted => \&log_metadata, + wanted => \&process_meta_file, + }, join('/',($Apache::lonnet::perlvar{'lonDocRoot'},'res',$dom,$user)) ); + } } # # Rename the table -$dbh->do('DROP TABLE IF EXISTS '.$oldname); -if (! $dbh->do('RENAME TABLE '.$newname.' TO '.$oldname)) { - print LOG "MySQL Error Rename: ".$dbh->errstr."\n"; - die $dbh->errstr; +if (! $simulate) { + $dbh->do('DROP TABLE IF EXISTS '.$oldname); + if (! $dbh->do('RENAME TABLE '.$newname.' TO '.$oldname)) { + &log(0,"MySQL Error Rename: ".$dbh->errstr); + die $dbh->errstr; + } else { + &log(1,"MySQL table rename successful."); + } } if (! $dbh->disconnect) { - print LOG "MySQL Error Disconnect: ".$dbh->errstr."\n"; + &log(0,"MySQL Error Disconnect: ".$dbh->errstr); die $dbh->errstr; } ## ## Finished! -print LOG "==== Searchcat completed ".localtime()." ====\n"; +&log(0,"==== Searchcat completed ".localtime()." ===="); close(LOG); &write_type_count(); @@ -171,6 +226,23 @@ close(LOG); exit 0; +## +## Status logging routine. Inputs: $level, $message +## +## $level 0 should be used for normal output and error messages +## +## $message does not need to end with \n. In the case of errors +## the message should contain as much information as possible to +## help in diagnosing the problem. +## +sub log { + my ($level,$message)=@_; + $level = 0 if (! defined($level)); + if ($verbose >= $level) { + print LOG $message.$/; + } +} + ######################################################## ######################################################## ### ### @@ -205,10 +277,12 @@ sub only_meta_files { sub print_filename { my ($file) = $_; my $fullfilename = $File::Find::name; - if (-d $file) { - print LOG " Got directory ".$fullfilename."\n"; - } else { - print LOG " Got file ".$fullfilename."\n"; + if ($debug) { + if (-d $file) { + &log(5," Got directory ".$fullfilename); + } else { + &log(5," Got file ".$fullfilename); + } } $_=$file; } @@ -217,46 +291,48 @@ sub log_metadata { my ($file) = $_; my $fullfilename = $File::Find::name; return if (-d $fullfilename); # No need to do anything here for directories - print LOG $fullfilename."\n"; - my $ref=&metadata($fullfilename); - if (! defined($ref)) { - print LOG " No data\n"; - return; - } - while (my($key,$value) = each(%$ref)) { - print LOG " ".$key." => ".$value."\n"; + if ($debug) { + &log(6,$fullfilename); + my $ref=&metadata($fullfilename); + if (! defined($ref)) { + &log(6," No data"); + return; + } + while (my($key,$value) = each(%$ref)) { + &log(6," ".$key." => ".$value); + } + &count_copyright($ref->{'copyright'}); } - &count_copyright($ref->{'copyright'}); $_=$file; } - ## ## process_meta_file ## Called by File::Find. ## Only input is the filename in $_. sub process_meta_file { my ($file) = $_; - my $filename = $File::Find::name; + my $filename = $File::Find::name; # full filename return if (-d $filename); # No need to do anything here for directories # - print LOG $filename."\n"; + &log(3,$filename) if ($debug); # my $ref=&metadata($filename); # # $url is the original file url, not the metadata file - my $url='/res/'.&declutter($filename); - $url=~s/\.meta$//; - print LOG " ".$url."\n"; + my $target = $filename; + $target =~ s/\.meta$//; + my $url='/res/'.&declutter($target); + &log(3," ".$url) if ($debug); # # Ignore some files based on their metadata if ($ref->{'obsolete'}) { - print LOG "obsolete\n"; + &log(3,"obsolete") if ($debug); return; } &count_copyright($ref->{'copyright'}); if ($ref->{'copyright'} eq 'private') { - print LOG "private\n"; + &log(3,"private") if ($debug); return; } # @@ -264,11 +340,21 @@ sub process_meta_file { my %dyn; if ($url=~ m:/default$:) { $url=~ s:/default$:/:; + &log(3,"Skipping dynamic data") if ($debug); } else { - # %dyn=&dynamicmeta($url); + &log(3,"Retrieving dynamic data") if ($debug); + %dyn=&get_dynamic_metadata($url); &count_type($url); } # + if (! defined($ref->{'creationdate'}) || + $ref->{'creationdate'} =~ /^\s*$/) { + $ref->{'creationdate'} = (stat($target))[9]; + } + if (! defined($ref->{'lastrevisiondate'}) || + $ref->{'lastrevisiondate'} =~ /^\s*$/) { + $ref->{'lastrevisiondate'} = (stat($target))[9]; + } $ref->{'creationdate'} = &sqltime($ref->{'creationdate'}); $ref->{'lastrevisiondate'} = &sqltime($ref->{'lastrevisiondate'}); my %Data = ( @@ -276,17 +362,15 @@ sub process_meta_file { %dyn, 'url'=>$url, 'version'=>'current'); - my ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh,$newname, - \%Data); - if ($err) { - print LOG "\nMySQL Error Insert: ".$err."\n"; - die $err; - } - if ($count < 1) { - print LOG "Unable to insert record into MySQL database for $url\n"; - die "Unable to insert record into MySQl database for $url"; - } else { - print LOG "Count = ".$count."\n"; + if (! $simulate) { + my ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh,$newname, + \%Data); + if ($err) { + &log(0,"MySQL Error Insert: ".$err); + } + if ($count < 1) { + &log(0,"Unable to insert record into MySQL database for $url"); + } } # # Reset $_ before leaving @@ -311,7 +395,7 @@ sub metadata { if ($filename !~ /\.meta$/) { $filename.='.meta'; } - my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename); + my $metastring=&getfile($Apache::lonnet::perlvar{'lonDocRoot'}.'/res/'.$filename); return undef if (! defined($metastring)); my $parser=HTML::TokeParser->new(\$metastring); my $token; @@ -366,122 +450,105 @@ sub getfile { ### ### ######################################################## ######################################################## -sub dynamicmeta { - my $url = &declutter(shift()); - $url =~ s/\.meta$//; - my %data = ('count' => 0, - 'course' => 0, - 'course_list' => '', - 'avetries' => 'NULL', - 'avetries_list' => '', - 'stdno' => 0, - 'stdno_list' => '', - 'usage' => 0, - 'usage_list' => '', - 'goto' => 0, - 'goto_list' => '', - 'comefrom' => 0, - 'comefrom_list' => '', - 'difficulty' => 'NULL', - 'difficulty_list' => '', - 'sequsage' => '0', - 'sequsage_list' => '', - 'clear' => 'NULL', - 'technical' => 'NULL', - 'correct' => 'NULL', - 'helpful' => 'NULL', - 'depth' => 'NULL', - 'comments' => '', - ); - my ($dom,$auth)=($url=~/^(\w+)\/(\w+)\//); - my $prodir=&propath($dom,$auth); +## +## Dynamic metadata description (incomplete) +## +## For a full description of all fields, +## see LONCAPA::lonmetadata +## +## Field Type +##----------------------------------------------------------- +## count integer +## course integer +## course_list comma separated list of course ids +## avetries real +## avetries_list comma separated list of real numbers +## stdno real +## stdno_list comma separated list of real numbers +## usage integer +## usage_list comma separated list of resources +## goto scalar +## goto_list comma separated list of resources +## comefrom scalar +## comefrom_list comma separated list of resources +## difficulty real +## difficulty_list comma separated list of real numbers +## sequsage scalar +## sequsage_list comma separated list of resources +## clear real +## technical real +## correct real +## helpful real +## depth real +## comments html of all the comments made +## +{ + +my %DynamicData; +my %Counts; + +sub process_dynamic_metadata { + my ($user,$dom) = @_; + undef(%DynamicData); + undef(%Counts); # - # Get metadata except counts + my $prodir = &propath($dom,$user); + # + # Read in the dynamic metadata my %evaldata; if (! tie(%evaldata,'GDBM_File', $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) { - return (undef); - } - my %sum=(); - my %count=(); - my %concat=(); - my %listitems=( - 'course' => 'add', - 'goto' => 'add', - 'comefrom' => 'add', - 'avetries' => 'average', - 'stdno' => 'add', - 'difficulty' => 'average', - 'clear' => 'average', - 'technical' => 'average', - 'helpful' => 'average', - 'correct' => 'average', - 'depth' => 'average', - 'comments' => 'append', - 'usage' => 'count' - ); - # - my $regexp=$url; - $regexp=~s/(\W)/\\$1/g; - $regexp='___'.$regexp.'___([a-z]+)$'; - while (my ($esckey,$value)=each %evaldata) { - my $key=&unescape($esckey); - if ($key=~/$regexp/) { - my ($item,$purl,$cat)=split(/___/,$key); - $count{$cat}++; - if ($listitems{$cat} ne 'append') { - if (defined($sum{$cat})) { - $sum{$cat}+=&unescape($value); - $concat{$cat}.=','.$item; - } else { - $sum{$cat}=&unescape($value); - $concat{$cat}=$item; - } - } else { - if (defined($sum{$cat})) { - if ($evaldata{$esckey}=~/\w/) { - $sum{$cat}.='