--- loncom/metadata_database/searchcat.pl 2004/04/09 22:04:53 1.56 +++ loncom/metadata_database/searchcat.pl 2007/01/02 09:12:51 1.74 @@ -2,7 +2,7 @@ # The LearningOnline Network # searchcat.pl "Search Catalog" batch script # -# $Id: searchcat.pl,v 1.56 2004/04/09 22:04:53 matthew Exp $ +# $Id: searchcat.pl,v 1.74 2007/01/02 09:12:51 raeburn Exp $ # # Copyright Michigan State University Board of Trustees # @@ -65,10 +65,8 @@ and correct user experience. =cut use strict; - use DBI; use lib '/home/httpd/lib/perl/'; -use LONCAPA::Configuration; use LONCAPA::lonmetadata; use Getopt::Long; @@ -77,6 +75,8 @@ use HTML::TokeParser; use GDBM_File; use POSIX qw(strftime mktime); +use Apache::lonnet(); + use File::Find; # @@ -118,32 +118,38 @@ if (defined($oneuser)) { ## ## Use variables for table names so we can test this routine a little easier -my $oldname = 'metadata'; -my $newname = 'newmetadata'; +my %oldnames = ( + 'metadata' => 'metadata', + 'portfolio' => 'portfolio_metadata', + 'access' => 'portfolio_access', + 'addedfields' => 'portfolio_addedfields', + ); + +my %newnames; +# new table names - append pid to have unique temporary tables +foreach my $key (keys(%oldnames)) { + $newnames{$key} = 'new'.$oldnames{$key}.$$; +} # -# Read loncapa_apache.conf and loncapa.conf -my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); -my %perlvar=%{$perlvarref}; -undef $perlvarref; -delete $perlvar{'lonReceipt'}; # remove since sensitive (really?) & not needed -# # Only run if machine is a library server -exit if ($perlvar{'lonRole'} ne 'library'); +exit if ($Apache::lonnet::perlvar{'lonRole'} ne 'library'); # # Make sure this process is running from user=www my $wwwid=getpwnam('www'); if ($wwwid!=$<) { - my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; - my $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; + my $emailto="$Apache::lonnet::perlvar{'lonAdmEMail'},$Apache::lonnet::perlvar{'lonSysEMail'}"; + my $subj="LON: $Apache::lonnet::perlvar{'lonHostID'} User ID mismatch"; system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\ - mailto $emailto -s '$subj' > /dev/null"); + mail -s '$subj' $emailto > /dev/null"); exit 1; } # # Let people know we are running -open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log'); +open(LOG,'>>'.$Apache::lonnet::perlvar{'lonDaemons'}.'/logs/searchcat.log'); &log(0,'==== Searchcat Run '.localtime()."===="); + + if ($debug) { &log(0,'simulating') if ($simulate); &log(0,'only processing user '.$oneuser) if ($oneuser); @@ -152,65 +158,119 @@ if ($debug) { # # Connect to database my $dbh; -if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'}, +if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$Apache::lonnet::perlvar{'lonSqlAccess'}, { RaiseError =>0,PrintError=>0}))) { &log(0,"Cannot connect to database!"); die "MySQL Error: Cannot connect to database!\n"; } # This can return an error and still be okay, so we do not bother checking. # (perhaps it should be more robust and check for specific errors) -$dbh->do('DROP TABLE IF EXISTS '.$newname); +foreach my $key (keys(%newnames)) { + if ($newnames{$key} ne '') { + $dbh->do('DROP TABLE IF EXISTS '.$newnames{$key}); + } +} + # -# Create the new table -my $request = &LONCAPA::lonmetadata::create_metadata_storage($newname); -$dbh->do($request); -if ($dbh->err) { - $dbh->disconnect(); - &log(0,"MySQL Error Create: ".$dbh->errstr); - die $dbh->errstr; +# Create the new metadata and portfolio tables +foreach my $key (keys(%newnames)) { + if ($newnames{$key} ne '') { + my $request = + &LONCAPA::lonmetadata::create_metadata_storage($newnames{$key},$oldnames{$key}); + $dbh->do($request); + if ($dbh->err) { + $dbh->disconnect(); + &log(0,"MySQL Error Create: ".$dbh->errstr); + die $dbh->errstr; + } + } } + # # find out which users we need to examine -my $dom = $perlvar{'lonDefDomain'}; -opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$dom"); -my @homeusers = - grep { - &ishome("$perlvar{'lonDocRoot'}/res/$dom/$_"); - } grep { - !/^\.\.?$/; - } readdir(RESOURCES); -closedir RESOURCES; -# -if ($oneuser) { - @homeusers=($oneuser); -} -# -# Loop through the users -foreach my $user (@homeusers) { - &log(0,"=== User: ".$user); - &process_dynamic_metadata($user,$dom); - # - # Use File::Find to get the files we need to read/modify - find( - {preprocess => \&only_meta_files, -# wanted => \&print_filename, -# wanted => \&log_metadata, - wanted => \&process_meta_file, - }, - "$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user"); +my @domains = sort(&Apache::lonnet::current_machine_domains()); +&log(9,'domains ="'.join('","',@domains).'"'); + +foreach my $dom (@domains) { + &log(9,'domain = '.$dom); + opendir(RESOURCES,"$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom"); + my @homeusers = + grep { + &ishome("$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom/$_"); + } grep { + !/^\.\.?$/; + } readdir(RESOURCES); + closedir RESOURCES; + &log(5,'users = '.$dom.':'.join(',',@homeusers)); + # + if ($oneuser) { + @homeusers=($oneuser); + } + # + # Loop through the users + foreach my $user (@homeusers) { + &log(0,"=== User: ".$user); + &process_dynamic_metadata($user,$dom); + # + # Use File::Find to get the files we need to read/modify + find( + {preprocess => \&only_meta_files, + #wanted => \&print_filename, + #wanted => \&log_metadata, + wanted => \&process_meta_file, + no_chdir => 1, + }, join('/',($Apache::lonnet::perlvar{'lonDocRoot'},'res',$dom,$user)) ); + } + # Search for public portfolio files + my %portusers; + if ($oneuser) { + %portusers = ( + $oneuser => '', + ); + } else { + my $dir = $Apache::lonnet::perlvar{lonUsersDir}.'/'.$dom; + &descend_tree($dir,0,\%portusers); + } + foreach my $uname (keys(%portusers)) { + my $urlstart = '/uploaded/'.$dom.'/'.$uname; + my $pathstart = &propath($dom,$uname).'/userfiles'; + my $is_course = &Apache::lonnet::is_course($dom,$uname); + my $curr_perm = &Apache::lonnet::get_portfile_permissions($dom,$uname); + my %access = &Apache::lonnet::get_access_controls($curr_perm); + foreach my $file (keys(%access)) { + my ($group,$url,$fullpath); + if ($is_course) { + ($group, my ($path)) = ($file =~ /^(\w+)(\/.+)$/); + $fullpath = $pathstart.'/groups/'.$group.'/portfolio'.$path; + $url = $urlstart.'/groups/'.$group.'/portfolio'.$path; + } else { + $fullpath = $pathstart.'/portfolio'.$file; + $url = $urlstart.'/portfolio'.$file; + } + if (ref($access{$file}) eq 'HASH') { + &process_portfolio_access_data($url,$access{$file}); + } + &process_portfolio_metadata($url,$fullpath,$is_course,$dom, + $uname,$group); + } + } } + # -# Rename the table +# Rename the tables if (! $simulate) { - $dbh->do('DROP TABLE IF EXISTS '.$oldname); - if (! $dbh->do('RENAME TABLE '.$newname.' TO '.$oldname)) { - &log(0,"MySQL Error Rename: ".$dbh->errstr); - die $dbh->errstr; - } else { - &log(1,"MySQL table rename successful."); + foreach my $key (keys(%oldnames)) { + if (($oldnames{$key} ne '') && ($newnames{$key} ne '')) { + $dbh->do('DROP TABLE IF EXISTS '.$oldnames{$key}); + if (! $dbh->do('RENAME TABLE '.$newnames{$key}.' TO '.$oldnames{$key})) { + &log(0,"MySQL Error Rename: ".$dbh->errstr); + die $dbh->errstr; + } else { + &log(1,"MySQL table rename successful for $key."); + } + } } } - if (! $dbh->disconnect) { &log(0,"MySQL Error Disconnect: ".$dbh->errstr); die $dbh->errstr; @@ -242,6 +302,109 @@ sub log { } } +sub descend_tree { + my ($dir,$depth,$alldomusers) = @_; + if (-d $dir) { + opendir(DIR,$dir); + my @contents = grep(!/^\./,readdir(DIR)); + closedir(DIR); + $depth ++; + foreach my $item (@contents) { + if ($depth < 4) { + &descend_tree($dir.'/'.$item,$depth,$alldomusers); + } else { + if (-e $dir.'/'.$item.'/file_permissions.db') { + + $$alldomusers{$item} = ''; + } + } + } + } +} + +sub process_portfolio_access_data { + my ($url,$access_hash) = @_; + foreach my $key (keys(%{$access_hash})) { + my $acc_data; + $acc_data->{url} = $url; + $acc_data->{keynum} = $key; + my ($num,$scope,$end,$start) = + ($key =~ /^([^:]+):([a-z]+)_(\d*)_?(\d*)$/); + next if (($scope ne 'public') && ($scope ne 'guest')); + $acc_data->{scope} = $scope; + if ($end != 0) { + $acc_data->{end} = &LONCAPA::lonmetadata::sqltime($end); + } + $acc_data->{start} = &LONCAPA::lonmetadata::sqltime($start); + if (! $simulate) { + my ($count,$err) = + &LONCAPA::lonmetadata::store_metadata($dbh, + $newnames{'access'}, + 'portfolio_access',$acc_data); + if ($err) { + &log(0,"MySQL Error Insert: ".$err); + } + if ($count < 1) { + &log(0,"Unable to insert record into MySQL database for $url"); + } + } + } +} + +sub process_portfolio_metadata { + my ($url,$fullpath,$is_course,$dom,$uname,$group) = @_; + my ($ref,$crs,$addedfields) = &portfolio_metadata($fullpath,$dom,$uname, + $group); + &getfiledates($ref,$fullpath); + if ($is_course) { + $ref->{'groupname'} = $group; + } + my %Data; + if (ref($ref) eq 'HASH') { + %Data = %{$ref}; + } + %Data = ( + %Data, + 'url'=>$url, + 'version'=>'current', + ); + if (! $simulate) { + my ($count,$err) = + &LONCAPA::lonmetadata::store_metadata($dbh, + $newnames{'portfolio'}, + 'portfolio_metadata',\%Data); + if ($err) { + &log(0,"MySQL Error Insert: ".$err); + } + if ($count < 1) { + &log(0,"Unable to insert record into MySQL portfolio_metadata database table for $url"); + } + if (ref($addedfields) eq 'HASH') { + if (keys(%{$addedfields}) > 0) { + foreach my $key (keys(%{$addedfields})) { + my $added_data = { + 'url' => $url, + 'field' => $key, + 'value' => $addedfields->{$key}, + 'courserestricted' => $crs, + }; + ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh, + $newnames{'addedfields'}, + 'portfolio_addedfields', + $added_data); + if ($err) { + &log(0,"MySQL Error Insert: ".$err); + } + if ($count < 1) { + &log(0,"Unable to insert record into MySQL portfolio_addedfields database table for url = $url and field = $key"); + } + } + } + } + } + return; +} + ######################################################## ######################################################## ### ### @@ -261,7 +424,7 @@ sub only_meta_files { foreach my $file (@PossibleFiles) { if ( ($file =~ /\.meta$/ && # Ends in meta $file !~ /\.\d+\.[^\.]+\.meta$/ # is not for a prior version - ) || (-d $file )) { # directories are okay + ) || (-d $File::Find::dir."/".$file )) { # directories are okay # but we do not want /. or /.. push(@ChosenFiles,$file); } @@ -292,7 +455,7 @@ sub log_metadata { return if (-d $fullfilename); # No need to do anything here for directories if ($debug) { &log(6,$fullfilename); - my $ref=&metadata($fullfilename); + my $ref = &metadata($fullfilename); if (! defined($ref)) { &log(6," No data"); return; @@ -305,7 +468,6 @@ sub log_metadata { $_=$file; } - ## ## process_meta_file ## Called by File::Find. @@ -317,11 +479,12 @@ sub process_meta_file { # &log(3,$filename) if ($debug); # - my $ref=&metadata($filename); + my $ref = &metadata($filename); # # $url is the original file url, not the metadata file - my $url='/res/'.&declutter($filename); - $url=~s/\.meta$//; + my $target = $filename; + $target =~ s/\.meta$//; + my $url='/res/'.&declutter($target); &log(3," ".$url) if ($debug); # # Ignore some files based on their metadata @@ -345,24 +508,22 @@ sub process_meta_file { %dyn=&get_dynamic_metadata($url); &count_type($url); } + &getfiledates($ref,$target); # - $ref->{'creationdate'} = &sqltime($ref->{'creationdate'}); - $ref->{'lastrevisiondate'} = &sqltime($ref->{'lastrevisiondate'}); my %Data = ( %$ref, %dyn, 'url'=>$url, 'version'=>'current'); if (! $simulate) { - my ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh,$newname, - \%Data); + my ($count,$err) = + &LONCAPA::lonmetadata::store_metadata($dbh,$newnames{'metadata'}, + 'metadata',\%Data); if ($err) { &log(0,"MySQL Error Insert: ".$err); - die $err; } if ($count < 1) { &log(0,"Unable to insert record into MySQL database for $url"); - die "Unable to insert record into MySQl database for $url"; } } # @@ -379,7 +540,7 @@ sub process_meta_file { ######################################################## ######################################################## sub metadata { - my ($uri)=@_; + my ($uri) = @_; my %metacache=(); $uri=&declutter($uri); my $filename=$uri; @@ -388,7 +549,7 @@ sub metadata { if ($filename !~ /\.meta$/) { $filename.='.meta'; } - my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename); + my $metastring=&getfile($Apache::lonnet::perlvar{'lonDocRoot'}.'/res/'.$filename); return undef if (! defined($metastring)); my $parser=HTML::TokeParser->new(\$metastring); my $token; @@ -409,7 +570,7 @@ sub metadata { } foreach ( @{$token->[3]}) { $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_}; - } + } if (! ($metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry))){ $metacache{$uri.''.$unikey} = $metacache{$uri.''.$unikey.'.default'}; @@ -419,6 +580,88 @@ sub metadata { return \%metacache; } +############################################################### +############################################################### +### ### +### &portfolio_metadata($filepath,$dom,$uname,$group) ### +### Retrieve metadata for the given file ### +### Returns array - ### +### contains reference to metadatahash and ### +### optional reference to addedfields hash ### +### ### +############################################################### +############################################################### +sub portfolio_metadata { + my ($fullpath,$dom,$uname,$group)=@_; + my ($mime) = ( $fullpath=~/\.(\w+)$/ ); + my %metacache=(); + if ($fullpath !~ /\.meta$/) { + $fullpath .= '.meta'; + } + my (@standard_fields,%addedfields); + my $colsref = + $LONCAPA::lonmetadata::Portfolio_metadata_table_description; + if (ref($colsref) eq 'ARRAY') { + my @columns = @{$colsref}; + foreach my $coldata (@columns) { + push(@standard_fields,$coldata->{'name'}); + } + } + my $metastring=&getfile($fullpath); + if (! defined($metastring)) { + $metacache{'keys'}= 'owner,domain,mime'; + $metacache{'owner'} = $uname.':'.$dom; + $metacache{'domain'} = $dom; + $metacache{'mime'} = $mime; + if ($group ne '') { + $metacache{'keys'} .= ',courserestricted'; + $metacache{'courserestricted'} = 'course.'.$dom.'_'.$uname; + } + } else { + my $parser=HTML::TokeParser->new(\$metastring); + my $token; + while ($token=$parser->get_token) { + if ($token->[0] eq 'S') { + my $entry=$token->[1]; + if ($metacache{'keys'}) { + $metacache{'keys'}.=','.$entry; + } else { + $metacache{'keys'}=$entry; + } + my $value = $parser->get_text('/'.$entry); + if (!grep(/^\Q$entry\E$/,@standard_fields)) { + my $clean_value = lc($value); + $clean_value =~ s/\s/_/g; + if ($clean_value ne $entry) { + if (defined($addedfields{$entry})) { + $addedfields{$entry} .=','.$value; + } else { + $addedfields{$entry} = $value; + } + } + } else { + $metacache{$entry} = $value; + } + } + } # End of ($token->[0] eq 'S') + } + if (keys(%addedfields) > 0) { + foreach my $key (sort keys(%addedfields)) { + $metacache{'addedfieldnames'} .= $key.','; + $metacache{'addedfieldvalues'} .= $addedfields{$key}.'&&&'; + } + $metacache{'addedfieldnames'} =~ s/,$//; + $metacache{'addedfieldvalues'} =~ s/\&\&\&$//; + if ($metacache{'keys'}) { + $metacache{'keys'}.=',addedfieldnames'; + } else { + $metacache{'keys'}='addedfieldnames'; + } + $metacache{'keys'}.=',addedfieldvalues'; + } + return (\%metacache,$metacache{'courserestricted'},\%addedfields); +} + ## ## &getfile($filename) ## Slurps up an entire file into a scalar. @@ -436,6 +679,26 @@ sub getfile { return $contents; } +## +## &getfiledates() +## Converts creationdate and modifieddates to SQL format +## Applies stat() to file to retrieve dates if missing +sub getfiledates { + my ($ref,$target) = @_; + if (! defined($ref->{'creationdate'}) || + $ref->{'creationdate'} =~ /^\s*$/) { + $ref->{'creationdate'} = (stat($target))[9]; + } + if (! defined($ref->{'lastrevisiondate'}) || + $ref->{'lastrevisiondate'} =~ /^\s*$/) { + $ref->{'lastrevisiondate'} = (stat($target))[9]; + } + $ref->{'creationdate'} = + &LONCAPA::lonmetadata::sqltime($ref->{'creationdate'}); + $ref->{'lastrevisiondate'} = + &LONCAPA::lonmetadata::sqltime($ref->{'lastrevisiondate'}); +} + ######################################################## ######################################################## ### ### @@ -444,27 +707,30 @@ sub getfile { ######################################################## ######################################################## ## -## Dynamic metadata description +## Dynamic metadata description (incomplete) +## +## For a full description of all fields, +## see LONCAPA::lonmetadata ## ## Field Type ##----------------------------------------------------------- ## count integer ## course integer -## course_list comma seperated list of course ids +## course_list comma separated list of course ids ## avetries real -## avetries_list comma seperated list of real numbers +## avetries_list comma separated list of real numbers ## stdno real -## stdno_list comma seperated list of real numbers +## stdno_list comma separated list of real numbers ## usage integer -## usage_list comma seperated list of resources +## usage_list comma separated list of resources ## goto scalar -## goto_list comma seperated list of resources +## goto_list comma separated list of resources ## comefrom scalar -## comefrom_list comma seperated list of resources +## comefrom_list comma separated list of resources ## difficulty real -## difficulty_list comma seperated list of real numbers +## difficulty_list comma separated list of real numbers ## sequsage scalar -## sequsage_list comma seperated list of resources +## sequsage_list comma separated list of resources ## clear real ## technical real ## correct real @@ -491,34 +757,10 @@ sub process_dynamic_metadata { return 0; } # - # Process every stored element - while (my ($storedkey,$value) = each(%evaldata)) { - my ($source,$file,$type) = split('___',$storedkey); - $source = &unescape($source); - $file = &unescape($file); - $value = &unescape($value); - " got ".$file."\n ".$type." ".$source."\n"; - if ($type =~ /^(avetries|count|difficulty|stdno|timestamp)$/) { - # - # Statistics: $source is course id - $DynamicData{$file}->{'statistics'}->{$source}->{$type}=$value; - } elsif ($type =~ /^(clear|comments|depth|technical|helpful)$/){ - # - # Evaluation $source is username, check if they evaluated it - # more than once. If so, pad the entry with a space. - while(exists($DynamicData{$file}->{'evaluation'}->{$type}->{$source})) { - $source .= ' '; - } - $DynamicData{$file}->{'evaluation'}->{$type}->{$source}=$value; - } elsif ($type =~ /^(course|comefrom|goto|usage)$/) { - # - # Context $source is course id or resource - push(@{$DynamicData{$file}->{$type}},&unescape($source)); - } else { - &log(0," ".$user."@".$dom.":Process metadata: Unable to decode ".$type); - } - } + %DynamicData = &LONCAPA::lonmetadata::process_reseval_data(\%evaldata); untie(%evaldata); + $DynamicData{'domain'} = $dom; + #print('user = '.$user.' domain = '.$dom.$/); # # Read in the access count data &log(7,'Reading access count data') if ($debug); @@ -547,75 +789,17 @@ sub process_dynamic_metadata { sub get_dynamic_metadata { my ($url) = @_; $url =~ s:^/res/::; - if (! exists($DynamicData{$url})) { - &log(7,' No dynamic data for '.$url) if ($debug); - return (); - } - my %data; - my $resdata = $DynamicData{$url}; - # - # Get the statistical data - foreach my $type (qw/avetries difficulty stdno/) { - my $count; - my $sum; - my @Values; - foreach my $coursedata (values(%{$resdata->{'statistics'}})) { - if (ref($coursedata) eq 'HASH' && exists($coursedata->{$type})) { - $count++; - $sum += $coursedata->{$type}; - push(@Values,$coursedata->{$type}); - } - } - if ($count) { - $data{$type} = $sum/$count; - $data{$type.'_list'} = join(',',@Values); - } - } + my %data = &LONCAPA::lonmetadata::process_dynamic_metadata($url, + \%DynamicData); # find the count $data{'count'} = $Counts{$url}; # - # Get the context data - foreach my $type (qw/course goto comefrom/) { - if (defined($resdata->{$type}) && - ref($resdata->{$type}) eq 'ARRAY') { - $data{$type} = scalar(@{$resdata->{$type}}); - $data{$type.'_list'} = join(',',@{$resdata->{$type}}); - } - } - if (defined($resdata->{'usage'}) && - ref($resdata->{'usage'}) eq 'ARRAY') { - $data{'sequsage'} = scalar(@{$resdata->{'usage'}}); - $data{'sequsage_list'} = join(',',@{$resdata->{'usage'}}); - } - # - # Get the evaluation data - foreach my $type (qw/clear technical correct helpful depth/) { - my $count; - my $sum; - foreach my $evaluator (keys(%{$resdata->{'evaluation'}->{$type}})){ - $sum += $resdata->{'evaluation'}->{$type}->{$evaluator}; - $count++; - } - if ($count > 0) { - $data{$type}=$sum/$count; - } - } - # - # put together comments - my $comments = '
'; - } - $comments .= '