--- loncom/metadata_database/searchcat.pl 2004/04/08 15:57:32 1.55 +++ loncom/metadata_database/searchcat.pl 2005/03/11 03:25:18 1.62 @@ -2,7 +2,7 @@ # The LearningOnline Network # searchcat.pl "Search Catalog" batch script # -# $Id: searchcat.pl,v 1.55 2004/04/08 15:57:32 matthew Exp $ +# $Id: searchcat.pl,v 1.62 2005/03/11 03:25:18 matthew Exp $ # # Copyright Michigan State University Board of Trustees # @@ -71,16 +71,59 @@ use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; use LONCAPA::lonmetadata; +use Getopt::Long; use IO::File; use HTML::TokeParser; use GDBM_File; use POSIX qw(strftime mktime); + +use Sys::Hostname; + use File::Find; +# +# Set up configuration options +my ($simulate,$oneuser,$help,$verbose,$logfile,$debug,$multidom); +GetOptions ( + 'help' => \$help, + 'simulate' => \$simulate, + 'only=s' => \$oneuser, + 'verbose=s' => \$verbose, + 'debug' => \$debug, + 'multi_domain' => \$multidom, + ); + +if ($help) { + print <<"ENDHELP"; +$0 +Rebuild and update the LON-CAPA metadata database. +Options: + -help Print this help + -simulate Do not modify the database. + -only=user Only compute for the given user. Implies -simulate + -verbose=val Sets logging level, val must be a number + -debug Turns on debugging output + -multi_domain Parse the hosts.tab file domain(s) to use. +ENDHELP + exit 0; +} + +if (! defined($debug)) { + $debug = 0; +} + +if (! defined($verbose)) { + $verbose = 0; +} + +if (defined($oneuser)) { + $simulate=1; +} + ## ## Use variables for table names so we can test this routine a little easier my $oldname = 'metadata'; -my $newname = 'newmetadata'; +my $newname = 'newmetadata'.$$; # append pid to have unique temporary table # # Read loncapa_apache.conf and loncapa.conf @@ -103,14 +146,21 @@ if ($wwwid!=$<) { } # # Let people know we are running -open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log'); -print LOG '==== Searchcat Run '.localtime()."====\n"; +open(LOG,'>>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log'); +&log(0,'==== Searchcat Run '.localtime()."===="); + + +if ($debug) { + &log(0,'simulating') if ($simulate); + &log(0,'only processing user '.$oneuser) if ($oneuser); + &log(0,'verbosity level = '.$verbose); +} # # Connect to database my $dbh; if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'}, { RaiseError =>0,PrintError=>0}))) { - print LOG "Cannot connect to database!\n"; + &log(0,"Cannot connect to database!"); die "MySQL Error: Cannot connect to database!\n"; } # This can return an error and still be okay, so we do not bother checking. @@ -122,48 +172,84 @@ my $request = &LONCAPA::lonmetadata::cre $dbh->do($request); if ($dbh->err) { $dbh->disconnect(); - print LOG "\nMySQL Error Create: ".$dbh->errstr."\n"; + &log(0,"MySQL Error Create: ".$dbh->errstr); die $dbh->errstr; } # # find out which users we need to examine -opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}"); -my @homeusers = - grep { - &ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_"); - } grep { - !/^\.\.?$/; - } readdir(RESOURCES); -closedir RESOURCES; -# -# Loop through the users -foreach my $user (@homeusers) { - print LOG "=== User: ".$user."\n"; - my $prodir=&propath($perlvar{'lonDefDomain'},$user); - # - # Use File::Find to get the files we need to read/modify - find( - {preprocess => \&only_meta_files, -# wanted => \&print_filename, -# wanted => \&log_metadata, - wanted => \&process_meta_file, - }, - "$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user"); +my @domains; +if (defined($multidom)) { + &log(1,'====multi domain setup===='); + # Peek into the hosts.tab and look for matches of our hostname + my $host = hostname(); + &log(9,'hostname = "'.$host.'"'); + open(HOSTFILE,$perlvar{'lonTabDir'}.'/hosts.tab') || + die ("Unable to determine domain(s) of multi-domain server"); + my %domains; + while () { + next if (/^\#/); + next if (!/:\Q$host\E/); + &log(9,$_); + $domains{(split(':',$_))[1]}++; + } + close HOSTFILE; + @domains = sort(keys(%domains)); + &log(9,join(',',@domains)); + if (! scalar(@domains)) { + die ("Unable to find any domains in the hosts.tab that match ".$host); + } +} else { + push(@domains,$perlvar{'lonDefDomain'}); +} + +foreach my $dom (@domains) { + &log(9,'domain = '.$dom); + opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$dom"); + my @homeusers = + grep { + &ishome("$perlvar{'lonDocRoot'}/res/$dom/$_"); + } grep { + !/^\.\.?$/; + } readdir(RESOURCES); + closedir RESOURCES; + &log(5,'users = '.$dom.':'.join(',',@homeusers)); + # + if ($oneuser) { + @homeusers=($oneuser); + } + # + # Loop through the users + foreach my $user (@homeusers) { + &log(0,"=== User: ".$user); + &process_dynamic_metadata($user,$dom); + # + # Use File::Find to get the files we need to read/modify + find( + {preprocess => \&only_meta_files, + #wanted => \&print_filename, + #wanted => \&log_metadata, + wanted => \&process_meta_file, + }, join('/',($perlvar{'lonDocRoot'},'res',$dom,$user)) ); + } } # # Rename the table -$dbh->do('DROP TABLE IF EXISTS '.$oldname); -if (! $dbh->do('RENAME TABLE '.$newname.' TO '.$oldname)) { - print LOG "MySQL Error Rename: ".$dbh->errstr."\n"; - die $dbh->errstr; +if (! $simulate) { + $dbh->do('DROP TABLE IF EXISTS '.$oldname); + if (! $dbh->do('RENAME TABLE '.$newname.' TO '.$oldname)) { + &log(0,"MySQL Error Rename: ".$dbh->errstr); + die $dbh->errstr; + } else { + &log(1,"MySQL table rename successful."); + } } if (! $dbh->disconnect) { - print LOG "MySQL Error Disconnect: ".$dbh->errstr."\n"; + &log(0,"MySQL Error Disconnect: ".$dbh->errstr); die $dbh->errstr; } ## ## Finished! -print LOG "==== Searchcat completed ".localtime()." ====\n"; +&log(0,"==== Searchcat completed ".localtime()." ===="); close(LOG); &write_type_count(); @@ -171,6 +257,23 @@ close(LOG); exit 0; +## +## Status logging routine. Inputs: $level, $message +## +## $level 0 should be used for normal output and error messages +## +## $message does not need to end with \n. In the case of errors +## the message should contain as much information as possible to +## help in diagnosing the problem. +## +sub log { + my ($level,$message)=@_; + $level = 0 if (! defined($level)); + if ($verbose >= $level) { + print LOG $message.$/; + } +} + ######################################################## ######################################################## ### ### @@ -205,10 +308,12 @@ sub only_meta_files { sub print_filename { my ($file) = $_; my $fullfilename = $File::Find::name; - if (-d $file) { - print LOG " Got directory ".$fullfilename."\n"; - } else { - print LOG " Got file ".$fullfilename."\n"; + if ($debug) { + if (-d $file) { + &log(5," Got directory ".$fullfilename); + } else { + &log(5," Got file ".$fullfilename); + } } $_=$file; } @@ -217,46 +322,48 @@ sub log_metadata { my ($file) = $_; my $fullfilename = $File::Find::name; return if (-d $fullfilename); # No need to do anything here for directories - print LOG $fullfilename."\n"; - my $ref=&metadata($fullfilename); - if (! defined($ref)) { - print LOG " No data\n"; - return; - } - while (my($key,$value) = each(%$ref)) { - print LOG " ".$key." => ".$value."\n"; + if ($debug) { + &log(6,$fullfilename); + my $ref=&metadata($fullfilename); + if (! defined($ref)) { + &log(6," No data"); + return; + } + while (my($key,$value) = each(%$ref)) { + &log(6," ".$key." => ".$value); + } + &count_copyright($ref->{'copyright'}); } - &count_copyright($ref->{'copyright'}); $_=$file; } - ## ## process_meta_file ## Called by File::Find. ## Only input is the filename in $_. sub process_meta_file { my ($file) = $_; - my $filename = $File::Find::name; + my $filename = $File::Find::name; # full filename return if (-d $filename); # No need to do anything here for directories # - print LOG $filename."\n"; + &log(3,$filename) if ($debug); # my $ref=&metadata($filename); # # $url is the original file url, not the metadata file - my $url='/res/'.&declutter($filename); - $url=~s/\.meta$//; - print LOG " ".$url."\n"; + my $target = $filename; + $target =~ s/\.meta$//; + my $url='/res/'.&declutter($target); + &log(3," ".$url) if ($debug); # # Ignore some files based on their metadata if ($ref->{'obsolete'}) { - print LOG "obsolete\n"; + &log(3,"obsolete") if ($debug); return; } &count_copyright($ref->{'copyright'}); if ($ref->{'copyright'} eq 'private') { - print LOG "private\n"; + &log(3,"private") if ($debug); return; } # @@ -264,11 +371,21 @@ sub process_meta_file { my %dyn; if ($url=~ m:/default$:) { $url=~ s:/default$:/:; + &log(3,"Skipping dynamic data") if ($debug); } else { - # %dyn=&dynamicmeta($url); + &log(3,"Retrieving dynamic data") if ($debug); + %dyn=&get_dynamic_metadata($url); &count_type($url); } # + if (! defined($ref->{'creationdate'}) || + $ref->{'creationdate'} =~ /^\s*$/) { + $ref->{'creationdate'} = (stat($target))[9]; + } + if (! defined($ref->{'lastrevisiondate'}) || + $ref->{'lastrevisiondate'} =~ /^\s*$/) { + $ref->{'lastrevisiondate'} = (stat($target))[9]; + } $ref->{'creationdate'} = &sqltime($ref->{'creationdate'}); $ref->{'lastrevisiondate'} = &sqltime($ref->{'lastrevisiondate'}); my %Data = ( @@ -276,17 +393,15 @@ sub process_meta_file { %dyn, 'url'=>$url, 'version'=>'current'); - my ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh,$newname, - \%Data); - if ($err) { - print LOG "\nMySQL Error Insert: ".$err."\n"; - die $err; - } - if ($count < 1) { - print LOG "Unable to insert record into MySQL database for $url\n"; - die "Unable to insert record into MySQl database for $url"; - } else { - print LOG "Count = ".$count."\n"; + if (! $simulate) { + my ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh,$newname, + \%Data); + if ($err) { + &log(0,"MySQL Error Insert: ".$err); + } + if ($count < 1) { + &log(0,"Unable to insert record into MySQL database for $url"); + } } # # Reset $_ before leaving @@ -366,122 +481,105 @@ sub getfile { ### ### ######################################################## ######################################################## -sub dynamicmeta { - my $url = &declutter(shift()); - $url =~ s/\.meta$//; - my %data = ('count' => 0, - 'course' => 0, - 'course_list' => '', - 'avetries' => 'NULL', - 'avetries_list' => '', - 'stdno' => 0, - 'stdno_list' => '', - 'usage' => 0, - 'usage_list' => '', - 'goto' => 0, - 'goto_list' => '', - 'comefrom' => 0, - 'comefrom_list' => '', - 'difficulty' => 'NULL', - 'difficulty_list' => '', - 'sequsage' => '0', - 'sequsage_list' => '', - 'clear' => 'NULL', - 'technical' => 'NULL', - 'correct' => 'NULL', - 'helpful' => 'NULL', - 'depth' => 'NULL', - 'comments' => '', - ); - my ($dom,$auth)=($url=~/^(\w+)\/(\w+)\//); - my $prodir=&propath($dom,$auth); +## +## Dynamic metadata description (incomplete) +## +## For a full description of all fields, +## see LONCAPA::lonmetadata +## +## Field Type +##----------------------------------------------------------- +## count integer +## course integer +## course_list comma separated list of course ids +## avetries real +## avetries_list comma separated list of real numbers +## stdno real +## stdno_list comma separated list of real numbers +## usage integer +## usage_list comma separated list of resources +## goto scalar +## goto_list comma separated list of resources +## comefrom scalar +## comefrom_list comma separated list of resources +## difficulty real +## difficulty_list comma separated list of real numbers +## sequsage scalar +## sequsage_list comma separated list of resources +## clear real +## technical real +## correct real +## helpful real +## depth real +## comments html of all the comments made +## +{ + +my %DynamicData; +my %Counts; + +sub process_dynamic_metadata { + my ($user,$dom) = @_; + undef(%DynamicData); + undef(%Counts); # - # Get metadata except counts + my $prodir = &propath($dom,$user); + # + # Read in the dynamic metadata my %evaldata; if (! tie(%evaldata,'GDBM_File', $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) { - return (undef); - } - my %sum=(); - my %count=(); - my %concat=(); - my %listitems=( - 'course' => 'add', - 'goto' => 'add', - 'comefrom' => 'add', - 'avetries' => 'average', - 'stdno' => 'add', - 'difficulty' => 'average', - 'clear' => 'average', - 'technical' => 'average', - 'helpful' => 'average', - 'correct' => 'average', - 'depth' => 'average', - 'comments' => 'append', - 'usage' => 'count' - ); - # - my $regexp=$url; - $regexp=~s/(\W)/\\$1/g; - $regexp='___'.$regexp.'___([a-z]+)$'; - while (my ($esckey,$value)=each %evaldata) { - my $key=&unescape($esckey); - if ($key=~/$regexp/) { - my ($item,$purl,$cat)=split(/___/,$key); - $count{$cat}++; - if ($listitems{$cat} ne 'append') { - if (defined($sum{$cat})) { - $sum{$cat}+=&unescape($value); - $concat{$cat}.=','.$item; - } else { - $sum{$cat}=&unescape($value); - $concat{$cat}=$item; - } - } else { - if (defined($sum{$cat})) { - if ($evaldata{$esckey}=~/\w/) { - $sum{$cat}.='
'.&unescape($evaldata{$esckey}); - } - } else { - $sum{$cat}=''.&unescape($evaldata{$esckey}); - } - } - } + return 0; } + # + %DynamicData = &LONCAPA::lonmetadata::process_reseval_data(\%evaldata); untie(%evaldata); - # transfer gathered data to returnhash, calculate averages where applicable - my %returnhash; - while (my $cat=each(%count)) { - if ($count{$cat} eq 'nan') { next; } - if ($sum{$cat} eq 'nan') { next; } - if ($listitems{$cat} eq 'average') { - if ($count{$cat}) { - $returnhash{$cat}=int(($sum{$cat}/$count{$cat})*100.0+0.5)/100.0; - } else { - $returnhash{$cat}='NULL'; - } - } elsif ($listitems{$cat} eq 'count') { - $returnhash{$cat}=$count{$cat}; - } else { - $returnhash{$cat}=$sum{$cat}; - } - $returnhash{$cat.'_list'}=$concat{$cat}; - } + $DynamicData{'domain'} = $dom; + print('user = '.$user.' domain = '.$dom.$/); # - # get count - if (tie(my %evaldata,'GDBM_File', - $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) { - my $escurl=&escape($url); - if (! exists($evaldata{$escurl})) { - $returnhash{'count'}=0; - } else { - $returnhash{'count'}=$evaldata{$escurl}; - } - untie %evaldata; + # Read in the access count data + &log(7,'Reading access count data') if ($debug); + my %countdata; + if (! tie(%countdata,'GDBM_File', + $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) { + return 0; } - return %returnhash; + while (my ($key,$count) = each(%countdata)) { + next if ($key !~ /^$dom/); + $key = &unescape($key); + &log(8,' Count '.$key.' = '.$count) if ($debug); + $Counts{$key}=$count; + } + untie(%countdata); + if ($debug) { + &log(7,scalar(keys(%Counts)). + " Counts read for ".$user."@".$dom); + &log(7,scalar(keys(%DynamicData)). + " Dynamic metadata read for ".$user."@".$dom); + } + # + return 1; +} + +sub get_dynamic_metadata { + my ($url) = @_; + $url =~ s:^/res/::; + my %data = &LONCAPA::lonmetadata::process_dynamic_metadata($url, + \%DynamicData); + # find the count + $data{'count'} = $Counts{$url}; + # + # Log the dynamic metadata + if ($debug) { + while (my($k,$v)=each(%data)) { + &log(8," ".$k." => ".$v); + } + } + return %data; } +} # End of %DynamicData and %Counts scope + ######################################################## ######################################################## ### ### @@ -593,8 +691,10 @@ sub sqltime { $TimeData[5]+=1900; $mysqltime = sprintf('%04d-%02d-%02d %02d:%02d:%02d', @TimeData[5,4,3,2,1,0]); + } elsif (! defined($time) || $time == 0) { + $mysqltime = 0; } else { - print LOG " Unable to decode time ".$time."\n"; + &log(0," sqltime:Unable to decode time ".$time); $mysqltime = 0; } return $mysqltime;