--- loncom/metadata_database/searchcat.pl 2003/12/26 15:13:48 1.50 +++ loncom/metadata_database/searchcat.pl 2007/01/03 03:59:48 1.70.2.2 @@ -2,7 +2,7 @@ # The LearningOnline Network # searchcat.pl "Search Catalog" batch script # -# $Id: searchcat.pl,v 1.50 2003/12/26 15:13:48 www Exp $ +# $Id: searchcat.pl,v 1.70.2.2 2007/01/03 03:59:48 albertel Exp $ # # Copyright Michigan State University Board of Trustees # @@ -65,449 +65,803 @@ and correct user experience. =cut use strict; - +use DBI; use lib '/home/httpd/lib/perl/'; -use LONCAPA::Configuration; +use LONCAPA::lonmetadata; +use Getopt::Long; use IO::File; use HTML::TokeParser; -use DBI; use GDBM_File; use POSIX qw(strftime mktime); -require "find.pl"; +use Apache::lonnet(); -my @metalist; +use File::Find; -my $simplestatus=''; -my %countext=(); +# +# Set up configuration options +my ($simulate,$oneuser,$help,$verbose,$logfile,$debug); +GetOptions ( + 'help' => \$help, + 'simulate' => \$simulate, + 'only=s' => \$oneuser, + 'verbose=s' => \$verbose, + 'debug' => \$debug, + ); -# ----------------------------------------------------- write out simple status -sub writesimple { - open(SMP,'>/home/httpd/html/lon-status/mysql.txt'); - print SMP $simplestatus."\n"; - close(SMP); +if ($help) { + print <<"ENDHELP"; +$0 +Rebuild and update the LON-CAPA metadata database. +Options: + -help Print this help + -simulate Do not modify the database. + -only=user Only compute for the given user. Implies -simulate + -verbose=val Sets logging level, val must be a number + -debug Turns on debugging output +ENDHELP + exit 0; } -sub writecount { - open(RSMP,'>/home/httpd/html/lon-status/rescount.txt'); - foreach (keys %countext) { - print RSMP $_.'='.$countext{$_}.'&'; - } - print RSMP 'time='.time."\n"; - close(RSMP); +if (! defined($debug)) { + $debug = 0; } -# -------------------------------------- counts files with different extensions -sub count { - my $file=shift; - $file=~/\.(\w+)$/; - my $ext=lc($1); - if (defined($countext{$ext})) { - $countext{$ext}++; - } else { - $countext{$ext}=1; - } +if (! defined($verbose)) { + $verbose = 0; } -# ----------------------------------------------------- Un-Escape Special Chars -sub unescape { - my $str=shift; - $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; - return $str; +if (defined($oneuser)) { + $simulate=1; } -# -------------------------------------------------------- Escape Special Chars +## +## Use variables for table names so we can test this routine a little easier +my %oldnames = ( + 'metadata' => 'metadata', + 'portfolio' => 'portfolio_metadata', + 'access' => 'portfolio_access', + 'addedfields' => 'portfolio_addedfields', + ); -sub escape { - my $str=shift; - $str =~ s/(\W)/"%".unpack('H2',$1)/eg; - return $str; +my %newnames; +# new table names - append pid to have unique temporary tables +foreach my $key (keys(%oldnames)) { + $newnames{$key} = 'new'.$oldnames{$key}.$$; } -# ------------------------------------------- Code to evaluate dynamic metadata - -sub dynamicmeta { - my $url=&declutter(shift); - $url=~s/\.meta$//; - my %returnhash=( - 'count' => 0, - 'course' => 0, - 'course_list' => '', - 'avetries' => 'NULL', - 'avetries_list' => '', - 'stdno' => 0, - 'stdno_list' => '', - 'usage' => 0, - 'usage_list' => '', - 'goto' => 0, - 'goto_list' => '', - 'comefrom' => 0, - 'comefrom_list' => '', - 'difficulty' => 'NULL', - 'difficulty_list' => '', - 'clear' => 'NULL', - 'technical' => 'NULL', - 'correct' => 'NULL', - 'helpful' => 'NULL', - 'depth' => 'NULL', - 'comments' => '' - ); - my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//); - my $prodir=&propath($adomain,$aauthor); - -# Get metadata except counts - if (tie(my %evaldata,'GDBM_File', - $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) { - my %sum=(); - my %cnt=(); - my %concat=(); - my %listitems=( - 'course' => 'add', - 'goto' => 'add', - 'comefrom' => 'add', - 'avetries' => 'avg', - 'stdno' => 'add', - 'difficulty' => 'avg', - 'clear' => 'avg', - 'technical' => 'avg', - 'helpful' => 'avg', - 'correct' => 'avg', - 'depth' => 'avg', - 'comments' => 'app', - 'usage' => 'cnt' - ); - - my $regexp=$url; - $regexp=~s/(\W)/\\$1/g; - $regexp='___'.$regexp.'___([a-z]+)$'; - while (my ($esckey,$value)=each %evaldata) { - my $key=&unescape($esckey); - if ($key=~/$regexp/) { - my ($item,$purl,$cat)=split(/___/,$key); - if (defined($cnt{$cat})) { $cnt{$cat}++; } else { $cnt{$cat}=1; } - unless ($listitems{$cat} eq 'app') { - if (defined($sum{$cat})) { - $sum{$cat}+=&unescape($evaldata{$esckey}); - $concat{$cat}.=','.$item; - } else { - $sum{$cat}=&unescape($evaldata{$esckey}); - $concat{$cat}=$item; - } - } else { - if (defined($sum{$cat})) { - if ($evaldata{$esckey}=~/\w/) { - $sum{$cat}.='
'.&unescape($evaldata{$esckey}); - } - } else { - $sum{$cat}=''.&unescape($evaldata{$esckey}); - } - } - } - } - untie(%evaldata); -# transfer gathered data to returnhash, calculate averages where applicable - while (my $cat=each(%cnt)) { - if ($cnt{$cat} eq 'nan') { next; } - if ($sum{$cat} eq 'nan') { next; } - if ($listitems{$cat} eq 'avg') { - if ($cnt{$cat}) { - $returnhash{$cat}=int(($sum{$cat}/$cnt{$cat})*100.0+0.5)/100.0; - } else { - $returnhash{$cat}='NULL'; - } - } elsif ($listitems{$cat} eq 'cnt') { - $returnhash{$cat}=$cnt{$cat}; - } else { - $returnhash{$cat}=$sum{$cat}; - } - $returnhash{$cat.'_list'}=$concat{$cat}; - } - } -# get count - if (tie(my %evaldata,'GDBM_File', - $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) { - my $escurl=&escape($url); - if (! exists($evaldata{$escurl})) { - $returnhash{'count'}=0; - } else { - $returnhash{'count'}=$evaldata{$escurl}; - } - untie %evaldata; - } - return %returnhash; -} - -# --------------- Read loncapa_apache.conf and loncapa.conf and get variables -my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); -my %perlvar=%{$perlvarref}; -undef $perlvarref; -delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed - -# ------------------------------------- Only run if machine is a library server -exit unless $perlvar{'lonRole'} eq 'library'; - -# ----------------------------- Make sure this process is running from user=www - +# +# Only run if machine is a library server +exit if ($Apache::lonnet::perlvar{'lonRole'} ne 'library'); +# +# Make sure this process is running from user=www my $wwwid=getpwnam('www'); if ($wwwid!=$<) { - my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; - my $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; + my $emailto="$Apache::lonnet::perlvar{'lonAdmEMail'},$Apache::lonnet::perlvar{'lonSysEMail'}"; + my $subj="LON: $Apache::lonnet::perlvar{'lonHostID'} User ID mismatch"; system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\ - mailto $emailto -s '$subj' > /dev/null"); + mail -s '$subj' $emailto > /dev/null"); exit 1; } +# +# Let people know we are running +open(LOG,'>>'.$Apache::lonnet::perlvar{'lonDaemons'}.'/logs/searchcat.log'); +&log(0,'==== Searchcat Run '.localtime()."===="); -# ---------------------------------------------------------- We are in business - -open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log'); -print LOG '==== Searchcat Run '.localtime()."====\n\n"; -$simplestatus='time='.time.'&'; +if ($debug) { + &log(0,'simulating') if ($simulate); + &log(0,'only processing user '.$oneuser) if ($oneuser); + &log(0,'verbosity level = '.$verbose); +} +# +# Connect to database my $dbh; -# ------------------------------------- Make sure that database can be accessed -{ - unless ( - $dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0}) - ) { - print LOG "Cannot connect to database!\n"; - $simplestatus.='mysql=defunct'; - &writesimple(); - exit; - } - -# Make temporary table - $dbh->do("DROP TABLE IF EXISTS newmetadata"); - my $make_metadata_table = "CREATE TABLE IF NOT EXISTS newmetadata (". - "title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, ". - "version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, ". - "creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, ". - "copyright TEXT, dependencies TEXT, ". - "count INTEGER UNSIGNED, ". - "course INTEGER UNSIGNED, course_list TEXT, ". - "goto INTEGER UNSIGNED, goto_list TEXT, ". - "comefrom INTEGER UNSIGNED, comefrom_list TEXT, ". - "sequsage INTEGER UNSIGNED, sequsage_list TEXT, ". - "stdno INTEGER UNSIGNED, stdno_list TEXT, ". - "avetries FLOAT, avetries_list TEXT, ". - "difficulty FLOAT, difficulty_list TEXT, ". - "clear FLOAT, technical FLOAT, correct FLOAT, helpful FLOAT, depth FLOAT, ". - "comments TEXT, ". - "FULLTEXT idx_title (title), ". - "FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), ". - "FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), ". - "FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), ". - "FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), ". - "FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), ". - "FULLTEXT idx_copyright (copyright)) ". - "TYPE=MyISAM"; - # It would sure be nice to have some logging mechanism. - unless ($dbh->do($make_metadata_table)) { - print LOG "\nMySQL Error Create: ".$dbh->errstr."\n"; - die $dbh->errstr; - } -} - -# ------------------------------------------------------------- get .meta files -opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}"); -my @homeusers = grep { - &ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_") - } grep {!/^\.\.?$/} readdir(RESOURCES); -closedir RESOURCES; - -# -# Create the statement handlers we need - -my $insert_sth = $dbh->prepare - ("INSERT INTO newmetadata VALUES (". - "?,". # title - "?,". # author - "?,". # subject - "?,". # declutter url - "?,". # version - "?,". # current - "?,". # notes - "?,". # abstract - "?,". # mime - "?,". # language - "?,". # creationdate - "?,". # revisiondate - "?,". # owner - "?,". # copyright - "?,". # dependencies - "?,". # count - "?,". # course - "?,". # course_list - "?,". # goto - "?,". # goto_list - "?,". # comefrom - "?,". # comefrom_list - "?,". # usage - "?,". # usage_list - "?,". # stdno - "?,". # stdno_list - "?,". # avetries - "?,". # avetries_list - "?,". # difficulty - "?,". # difficulty_list - "?,". # clear - "?,". # technical - "?,". # correct - "?,". # helpful - "?,". # depth - "?". # comments - ")" - ); - -foreach my $user (@homeusers) { - print LOG "\n=== User: ".$user."\n\n"; - - my $prodir=&propath($perlvar{'lonDefDomain'},$user); - # Use find.pl - undef @metalist; - @metalist=(); - &find("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user"); - # -- process each file to get metadata and put into search catalog SQL - # database. Also, check to see if already there. - # I could just delete (without searching first), but this works for now. - foreach my $m (@metalist) { - print LOG "- ".$m."\n"; - my $ref=&metadata($m); - my $m2='/res/'.&declutter($m); - $m2=~s/\.meta$//; - if ($ref->{'obsolete'}) { print LOG "obsolete\n"; next; } - if ($ref->{'copyright'} eq 'private') { print LOG "private\n"; next; } - my %dyn=&dynamicmeta($m2); - &count($m2); - unless ($insert_sth->execute( - $ref->{'title'}, - $ref->{'author'}, - $ref->{'subject'}, - $m2, - $ref->{'keywords'}, - 'current', - $ref->{'notes'}, - $ref->{'abstract'}, - $ref->{'mime'}, - $ref->{'language'}, - sqltime($ref->{'creationdate'}), - sqltime($ref->{'lastrevisiondate'}), - $ref->{'owner'}, - $ref->{'copyright'}, - $ref->{'dependencies'}, - $dyn{'count'}, - $dyn{'course'}, - $dyn{'course_list'}, - $dyn{'goto'}, - $dyn{'goto_list'}, - $dyn{'comefrom'}, - $dyn{'comefrom_list'}, - $dyn{'usage'}, - $dyn{'usage_list'}, - $dyn{'stdno'}, - $dyn{'stdno_list'}, - $dyn{'avetries'}, - $dyn{'avetries_list'}, - $dyn{'difficulty'}, - $dyn{'difficulty_list'}, - $dyn{'clear'}, - $dyn{'technical'}, - $dyn{'correct'}, - $dyn{'helpful'}, - $dyn{'depth'}, - $dyn{'comments'} - )) { - print LOG "\nMySQL Error Insert: ".$dbh->errstr."\n"; - die $dbh->errstr; - } - $ref = undef; - } -} -# --------------------------------------------------- Close database connection -$dbh->do("DROP TABLE IF EXISTS metadata"); -unless ($dbh->do("RENAME TABLE newmetadata TO metadata")) { - print LOG "\nMySQL Error Rename: ".$dbh->errstr."\n"; - die $dbh->errstr; +if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$Apache::lonnet::perlvar{'lonSqlAccess'}, + { RaiseError =>0,PrintError=>0}))) { + &log(0,"Cannot connect to database!"); + die "MySQL Error: Cannot connect to database!\n"; +} +# This can return an error and still be okay, so we do not bother checking. +# (perhaps it should be more robust and check for specific errors) +foreach my $key (keys(%newnames)) { + if ($newnames{$key} ne '') { + $dbh->do('DROP TABLE IF EXISTS '.$newnames{$key}); + } +} + +# +# Create the new metadata and portfolio tables +foreach my $key (keys(%newnames)) { + if ($newnames{$key} ne '') { + my $request = + &LONCAPA::lonmetadata::create_metadata_storage($newnames{$key},$oldnames{$key}); + $dbh->do($request); + if ($dbh->err) { + $dbh->disconnect(); + &log(0,"MySQL Error Create: ".$dbh->errstr); + die $dbh->errstr; + } + } +} + +# +# find out which users we need to examine +my @domains = sort(&Apache::lonnet::current_machine_domains()); +&log(9,'domains ="'.join('","',@domains).'"'); + +foreach my $dom (@domains) { + &log(9,'domain = '.$dom); + opendir(RESOURCES,"$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom"); + my @homeusers = + grep { + &ishome("$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom/$_"); + } grep { + !/^\.\.?$/; + } readdir(RESOURCES); + closedir RESOURCES; + &log(5,'users = '.$dom.':'.join(',',@homeusers)); + # + if ($oneuser) { + @homeusers=($oneuser); + } + # + # Loop through the users + foreach my $user (@homeusers) { + &log(0,"=== User: ".$user); + &process_dynamic_metadata($user,$dom); + # + # Use File::Find to get the files we need to read/modify + find( + {preprocess => \&only_meta_files, + #wanted => \&print_filename, + #wanted => \&log_metadata, + wanted => \&process_meta_file, + no_chdir => 1, + }, join('/',($Apache::lonnet::perlvar{'lonDocRoot'},'res',$dom,$user)) ); + } + # Search for public portfolio files + my %portusers; + if ($oneuser) { + %portusers = ( + $oneuser => '', + ); + } else { + my $dir = $Apache::lonnet::perlvar{lonUsersDir}.'/'.$dom; + &descend_tree($dir,0,\%portusers); + } + foreach my $uname (keys(%portusers)) { + my $urlstart = '/uploaded/'.$dom.'/'.$uname; + my $pathstart = &propath($dom,$uname).'/userfiles'; + my $is_course = &Apache::lonnet::is_course($dom,$uname); + my $curr_perm = &Apache::lonnet::get_portfile_permissions($dom,$uname); + my %access = &Apache::lonnet::get_access_controls($curr_perm); + foreach my $file (keys(%access)) { + my ($group,$url,$fullpath); + if ($is_course) { + ($group, my ($path)) = ($file =~ /^(\w+)(\/.+)$/); + $fullpath = $pathstart.'/groups/'.$group.'/portfolio'.$path; + $url = $urlstart.'/groups/'.$group.'/portfolio'.$path; + } else { + $fullpath = $pathstart.'/portfolio'.$file; + $url = $urlstart.'/portfolio'.$file; + } + if (ref($access{$file}) eq 'HASH') { + &process_portfolio_access_data($url,$access{$file}); + } + &process_portfolio_metadata($url,$fullpath,$is_course,$dom, + $uname,$group); + } + } } -unless ($dbh->disconnect) { - print LOG "\nMySQL Error Disconnect: ".$dbh->errstr."\n"; + +# +# Rename the tables +if (! $simulate) { + foreach my $key (keys(%oldnames)) { + if (($oldnames{$key} ne '') && ($newnames{$key} ne '')) { + $dbh->do('DROP TABLE IF EXISTS '.$oldnames{$key}); + if (! $dbh->do('RENAME TABLE '.$newnames{$key}.' TO '.$oldnames{$key})) { + &log(0,"MySQL Error Rename: ".$dbh->errstr); + die $dbh->errstr; + } else { + &log(1,"MySQL table rename successful for $key."); + } + } + } +} +if (! $dbh->disconnect) { + &log(0,"MySQL Error Disconnect: ".$dbh->errstr); die $dbh->errstr; } -print LOG "\n==== Searchcat completed ".localtime()." ====\n"; +## +## Finished! +&log(0,"==== Searchcat completed ".localtime()." ===="); close(LOG); -&writesimple(); -&writecount(); + +&write_type_count(); +&write_copyright_count(); + exit 0; +## +## Status logging routine. Inputs: $level, $message +## +## $level 0 should be used for normal output and error messages +## +## $message does not need to end with \n. In the case of errors +## the message should contain as much information as possible to +## help in diagnosing the problem. +## +sub log { + my ($level,$message)=@_; + $level = 0 if (! defined($level)); + if ($verbose >= $level) { + print LOG $message.$/; + } +} + +sub descend_tree { + my ($dir,$depth,$alldomusers) = @_; + if (-d $dir) { + opendir(DIR,$dir); + my @contents = grep(!/^\./,readdir(DIR)); + closedir(DIR); + $depth ++; + foreach my $item (@contents) { + if ($depth < 4) { + &descend_tree($dir.'/'.$item,$depth,$alldomusers); + } else { + if (-e $dir.'/'.$item.'/file_permissions.db') { + + $$alldomusers{$item} = ''; + } + } + } + } +} +sub process_portfolio_access_data { + my ($url,$access_hash) = @_; + foreach my $key (keys(%{$access_hash})) { + my $acc_data; + $acc_data->{url} = $url; + $acc_data->{keynum} = $key; + my ($num,$scope,$end,$start) = + ($key =~ /^([^:]+):([a-z]+)_(\d*)_?(\d*)$/); + next if (($scope ne 'public') && ($scope ne 'guest')); + $acc_data->{scope} = $scope; + if ($end != 0) { + $acc_data->{end} = &sqltime($end); + } + $acc_data->{start} = &sqltime($start); + if (! $simulate) { + my ($count,$err) = + &LONCAPA::lonmetadata::store_metadata($dbh, + $newnames{'access'}, + 'portfolio_access',$acc_data); + if ($err) { + &log(0,"MySQL Error Insert: ".$err); + } + if ($count < 1) { + &log(0,"Unable to insert record into MySQL database for $url"); + } + } + } +} -# ============================================================================= +sub process_portfolio_metadata { + my ($url,$fullpath,$is_course,$dom,$uname,$group) = @_; + my ($ref,$crs,$addedfields) = &portfolio_metadata($fullpath,$dom,$uname, + $group); + &getfiledates($ref,$fullpath); + if ($is_course) { + $ref->{'groupname'} = $group; + } + my %Data; + if (ref($ref) eq 'HASH') { + %Data = %{$ref}; + } + %Data = ( + %Data, + 'url'=>$url, + 'version'=>'current', + ); + if (! $simulate) { + my ($count,$err) = + &LONCAPA::lonmetadata::store_metadata($dbh, + $newnames{'portfolio'}, + 'portfolio_metadata',\%Data); + if ($err) { + &log(0,"MySQL Error Insert: ".$err); + } + if ($count < 1) { + &log(0,"Unable to insert record into MySQL portfolio_metadata database table for $url"); + } + if (ref($addedfields) eq 'HASH') { + if (keys(%{$addedfields}) > 0) { + foreach my $key (keys(%{$addedfields})) { + my $added_data = { + 'url' => $url, + 'field' => $key, + 'value' => $addedfields->{$key}, + 'courserestricted' => $crs, + }; + ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh, + $newnames{'addedfields'}, + 'portfolio_addedfields', + $added_data); + if ($err) { + &log(0,"MySQL Error Insert: ".$err); + } + if ($count < 1) { + &log(0,"Unable to insert record into MySQL portfolio_addedfields database table for url = $url and field = $key"); + } + } + } + } + } + return; +} + +######################################################## +######################################################## +### ### +### File::Find support routines ### +### ### +######################################################## +######################################################## +## +## &only_meta_files +## +## Called by File::Find. +## Takes a list of files/directories in and returns a list of files/directories +## to search. +sub only_meta_files { + my @PossibleFiles = @_; + my @ChosenFiles; + foreach my $file (@PossibleFiles) { + if ( ($file =~ /\.meta$/ && # Ends in meta + $file !~ /\.\d+\.[^\.]+\.meta$/ # is not for a prior version + ) || (-d $File::Find::dir."/".$file )) { # directories are okay + # but we do not want /. or /.. + push(@ChosenFiles,$file); + } + } + return @ChosenFiles; +} + +## +## +## Debugging routines, use these for 'wanted' in the File::Find call +## +sub print_filename { + my ($file) = $_; + my $fullfilename = $File::Find::name; + if ($debug) { + if (-d $file) { + &log(5," Got directory ".$fullfilename); + } else { + &log(5," Got file ".$fullfilename); + } + } + $_=$file; +} + +sub log_metadata { + my ($file) = $_; + my $fullfilename = $File::Find::name; + return if (-d $fullfilename); # No need to do anything here for directories + if ($debug) { + &log(6,$fullfilename); + my $ref = &metadata($fullfilename); + if (! defined($ref)) { + &log(6," No data"); + return; + } + while (my($key,$value) = each(%$ref)) { + &log(6," ".$key." => ".$value); + } + &count_copyright($ref->{'copyright'}); + } + $_=$file; +} + +## +## process_meta_file +## Called by File::Find. +## Only input is the filename in $_. +sub process_meta_file { + my ($file) = $_; + my $filename = $File::Find::name; # full filename + return if (-d $filename); # No need to do anything here for directories + # + &log(3,$filename) if ($debug); + # + my $ref = &metadata($filename); + # + # $url is the original file url, not the metadata file + my $target = $filename; + $target =~ s/\.meta$//; + my $url='/res/'.&declutter($target); + &log(3," ".$url) if ($debug); + # + # Ignore some files based on their metadata + if ($ref->{'obsolete'}) { + &log(3,"obsolete") if ($debug); + return; + } + &count_copyright($ref->{'copyright'}); + if ($ref->{'copyright'} eq 'private') { + &log(3,"private") if ($debug); + return; + } + # + # Find the dynamic metadata + my %dyn; + if ($url=~ m:/default$:) { + $url=~ s:/default$:/:; + &log(3,"Skipping dynamic data") if ($debug); + } else { + &log(3,"Retrieving dynamic data") if ($debug); + %dyn=&get_dynamic_metadata($url); + &count_type($url); + } + &getfiledates($ref,$target); + # + my %Data = ( + %$ref, + %dyn, + 'url'=>$url, + 'version'=>'current'); + if (! $simulate) { + my ($count,$err) = + &LONCAPA::lonmetadata::store_metadata($dbh,$newnames{'metadata'}, + 'metadata',\%Data); + if ($err) { + &log(0,"MySQL Error Insert: ".$err); + } + if ($count < 1) { + &log(0,"Unable to insert record into MySQL database for $url"); + } + } + # + # Reset $_ before leaving + $_ = $file; +} -# ---------------------------------------------------------------- Get metadata -# significantly altered from subroutine present in lonnet +######################################################## +######################################################## +### ### +### &metadata($uri) ### +### Retrieve metadata for the given file ### +### ### +######################################################## +######################################################## sub metadata { - my ($uri,$what)=@_; + my ($uri) = @_; my %metacache=(); $uri=&declutter($uri); my $filename=$uri; $uri=~s/\.meta$//; $uri=''; - unless ($metacache{$uri.'keys'}) { - unless ($filename=~/\.meta$/) { $filename.='.meta'; } - my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename); + if ($filename !~ /\.meta$/) { + $filename.='.meta'; + } + my $metastring=&getfile($Apache::lonnet::perlvar{'lonDocRoot'}.'/res/'.$filename); + return undef if (! defined($metastring)); + my $parser=HTML::TokeParser->new(\$metastring); + my $token; + while ($token=$parser->get_token) { + if ($token->[0] eq 'S') { + my $entry=$token->[1]; + my $unikey=$entry; + if (defined($token->[2]->{'part'})) { + $unikey.='_'.$token->[2]->{'part'}; + } + if (defined($token->[2]->{'name'})) { + $unikey.='_'.$token->[2]->{'name'}; + } + if ($metacache{$uri.'keys'}) { + $metacache{$uri.'keys'}.=','.$unikey; + } else { + $metacache{$uri.'keys'}=$unikey; + } + foreach ( @{$token->[3]}) { + $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_}; + } + if (! ($metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry))){ + $metacache{$uri.''.$unikey} = + $metacache{$uri.''.$unikey.'.default'}; + } + } # End of ($token->[0] eq 'S') + } + return \%metacache; +} + +############################################################### +############################################################### +### ### +### &portfolio_metadata($filepath,$dom,$uname,$group) ### +### Retrieve metadata for the given file ### +### Returns array - ### +### contains reference to metadatahash and ### +### optional reference to addedfields hash ### +### ### +############################################################### +############################################################### +sub portfolio_metadata { + my ($fullpath,$dom,$uname,$group)=@_; + my ($mime) = ( $fullpath=~/\.(\w+)$/ ); + my %metacache=(); + if ($fullpath !~ /\.meta$/) { + $fullpath .= '.meta'; + } + my (@standard_fields,%addedfields); + my $colsref = + $LONCAPA::lonmetadata::Portfolio_metadata_table_description; + if (ref($colsref) eq 'ARRAY') { + my @columns = @{$colsref}; + foreach my $coldata (@columns) { + push(@standard_fields,$coldata->{'name'}); + } + } + my $metastring=&getfile($fullpath); + if (! defined($metastring)) { + $metacache{'keys'}= 'owner,domain,mime'; + $metacache{'owner'} = $uname.':'.$dom; + $metacache{'domain'} = $dom; + $metacache{'mime'} = $mime; + if (defined($group)) { + $metacache{'keys'} .= ',courserestricted'; + $metacache{'courserestricted'} = 'course.'.$dom.'_'.$uname; + } + } else { my $parser=HTML::TokeParser->new(\$metastring); my $token; while ($token=$parser->get_token) { if ($token->[0] eq 'S') { my $entry=$token->[1]; - my $unikey=$entry; - if (defined($token->[2]->{'part'})) { - $unikey.='_'.$token->[2]->{'part'}; - } - if (defined($token->[2]->{'name'})) { - $unikey.='_'.$token->[2]->{'name'}; - } - if ($metacache{$uri.'keys'}) { - $metacache{$uri.'keys'}.=','.$unikey; + if ($metacache{'keys'}) { + $metacache{'keys'}.=','.$entry; } else { - $metacache{$uri.'keys'}=$unikey; + $metacache{'keys'}=$entry; } - map { - $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_}; - } @{$token->[3]}; - unless ( - $metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry) - ) { $metacache{$uri.''.$unikey}= - $metacache{$uri.''.$unikey.'.default'}; + my $value = $parser->get_text('/'.$entry); + if (!grep(/^\Q$entry\E$/,@standard_fields)) { + my $clean_value = lc($value); + $clean_value =~ s/\s/_/g; + if ($clean_value ne $entry) { + if (defined($addedfields{$entry})) { + $addedfields{$entry} .=','.$value; + } else { + $addedfields{$entry} = $value; } + } + } else { + $metacache{$entry} = $value; + } } + } # End of ($token->[0] eq 'S') + } + if (keys(%addedfields) > 0) { + foreach my $key (sort keys(%addedfields)) { + $metacache{'addedfieldnames'} .= $key.','; + $metacache{'addedfieldvalues'} .= $addedfields{$key}.'&&&'; } + $metacache{'addedfieldnames'} =~ s/,$//; + $metacache{'addedfieldvalues'} =~ s/\&\&\&$//; + if ($metacache{'keys'}) { + $metacache{'keys'}.=',addedfieldnames'; + } else { + $metacache{'keys'}='addedfieldnames'; + } + $metacache{'keys'}.=',addedfieldvalues'; } - return \%metacache; + return (\%metacache,$metacache{'courserestricted'},\%addedfields); } -# ------------------------------------------------------------ Serves up a file -# returns either the contents of the file or a -1 +## +## &getfile($filename) +## Slurps up an entire file into a scalar. +## Returns undef if the file does not exist sub getfile { - my $file=shift; - if (! -e $file ) { return -1; }; + my $file = shift(); + if (! -e $file ) { + return undef; + } my $fh=IO::File->new($file); - my $a=''; - while (<$fh>) { $a .=$_; } - return $a; + my $contents = ''; + while (<$fh>) { + $contents .= $_; + } + return $contents; } -# ------------------------------------------------------------- Declutters URLs -sub declutter { - my $thisfn=shift; - $thisfn=~s/^$perlvar{'lonDocRoot'}//; - $thisfn=~s/^\///; - $thisfn=~s/^res\///; - return $thisfn; +## +## &getfiledates() +## Converts creationdate and modifieddates to SQL format +## Applies stat() to file to retrieve dates if missing +sub getfiledates { + my ($ref,$target) = @_; + if (! defined($ref->{'creationdate'}) || + $ref->{'creationdate'} =~ /^\s*$/) { + $ref->{'creationdate'} = (stat($target))[9]; + } + if (! defined($ref->{'lastrevisiondate'}) || + $ref->{'lastrevisiondate'} =~ /^\s*$/) { + $ref->{'lastrevisiondate'} = (stat($target))[9]; + } + $ref->{'creationdate'} = &sqltime($ref->{'creationdate'}); + $ref->{'lastrevisiondate'} = &sqltime($ref->{'lastrevisiondate'}); +} + +######################################################## +######################################################## +### ### +### Dynamic Metadata ### +### ### +######################################################## +######################################################## +## +## Dynamic metadata description (incomplete) +## +## For a full description of all fields, +## see LONCAPA::lonmetadata +## +## Field Type +##----------------------------------------------------------- +## count integer +## course integer +## course_list comma separated list of course ids +## avetries real +## avetries_list comma separated list of real numbers +## stdno real +## stdno_list comma separated list of real numbers +## usage integer +## usage_list comma separated list of resources +## goto scalar +## goto_list comma separated list of resources +## comefrom scalar +## comefrom_list comma separated list of resources +## difficulty real +## difficulty_list comma separated list of real numbers +## sequsage scalar +## sequsage_list comma separated list of resources +## clear real +## technical real +## correct real +## helpful real +## depth real +## comments html of all the comments made +## +{ + +my %DynamicData; +my %Counts; + +sub process_dynamic_metadata { + my ($user,$dom) = @_; + undef(%DynamicData); + undef(%Counts); + # + my $prodir = &propath($dom,$user); + # + # Read in the dynamic metadata + my %evaldata; + if (! tie(%evaldata,'GDBM_File', + $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) { + return 0; + } + # + %DynamicData = &LONCAPA::lonmetadata::process_reseval_data(\%evaldata); + untie(%evaldata); + $DynamicData{'domain'} = $dom; + #print('user = '.$user.' domain = '.$dom.$/); + # + # Read in the access count data + &log(7,'Reading access count data') if ($debug); + my %countdata; + if (! tie(%countdata,'GDBM_File', + $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) { + return 0; + } + while (my ($key,$count) = each(%countdata)) { + next if ($key !~ /^$dom/); + $key = &unescape($key); + &log(8,' Count '.$key.' = '.$count) if ($debug); + $Counts{$key}=$count; + } + untie(%countdata); + if ($debug) { + &log(7,scalar(keys(%Counts)). + " Counts read for ".$user."@".$dom); + &log(7,scalar(keys(%DynamicData)). + " Dynamic metadata read for ".$user."@".$dom); + } + # + return 1; } -# --------------------------------------- Is this the home server of an author? -# (copied from lond, modification of the return value) +sub get_dynamic_metadata { + my ($url) = @_; + $url =~ s:^/res/::; + my %data = &LONCAPA::lonmetadata::process_dynamic_metadata($url, + \%DynamicData); + # find the count + $data{'count'} = $Counts{$url}; + # + # Log the dynamic metadata + if ($debug) { + while (my($k,$v)=each(%data)) { + &log(8," ".$k." => ".$v); + } + } + return %data; +} + +} # End of %DynamicData and %Counts scope + +######################################################## +######################################################## +### ### +### Counts ### +### ### +######################################################## +######################################################## +{ + +my %countext; + +sub count_type { + my $file=shift; + $file=~/\.(\w+)$/; + my $ext=lc($1); + $countext{$ext}++; +} + +sub write_type_count { + open(RESCOUNT,'>/home/httpd/html/lon-status/rescount.txt'); + while (my ($extension,$count) = each(%countext)) { + print RESCOUNT $extension.'='.$count.'&'; + } + print RESCOUNT 'time='.time."\n"; + close(RESCOUNT); +} + +} # end of scope for %countext + +{ + +my %copyrights; + +sub count_copyright { + $copyrights{@_[0]}++; +} + +sub write_copyright_count { + open(COPYCOUNT,'>/home/httpd/html/lon-status/copyrightcount.txt'); + while (my ($copyright,$count) = each(%copyrights)) { + print COPYCOUNT $copyright.'='.$count.'&'; + } + print COPYCOUNT 'time='.time."\n"; + close(COPYCOUNT); +} + +} # end of scope for %copyrights + +######################################################## +######################################################## +### ### +### Miscellanous Utility Routines ### +### ### +######################################################## +######################################################## +## +## &ishome($username) +## Returns 1 if $username is a LON-CAPA author, 0 otherwise +## (copied from lond, modification of the return value) sub ishome { my $author=shift; $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/; @@ -520,54 +874,76 @@ sub ishome { } } -# -------------------------------------------- Return path to profile directory -# (copied from lond) +## +## &propath($udom,$uname) +## Returns the path to the users LON-CAPA directory +## (copied from lond) sub propath { my ($udom,$uname)=@_; $udom=~s/\W//g; $uname=~s/\W//g; my $subdir=$uname.'__'; $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/; - my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname"; + my $proname="$Apache::lonnet::perlvar{'lonUsersDir'}/$udom/$subdir/$uname"; return $proname; } -# ---------------------------- convert 'time' format into a datetime sql format +## +## &sqltime($timestamp) +## +## Convert perl $timestamp to MySQL time. MySQL expects YYYY-MM-DD HH:MM:SS +## sub sqltime { - my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = - localtime(&unsqltime(@_[0])); - $mon++; $year+=1900; - return "$year-$mon-$mday $hour:$min:$sec"; + my ($time) = @_; + my $mysqltime; + if ($time =~ + /(\d+)-(\d+)-(\d+) # YYYY-MM-DD + \s # a space + (\d+):(\d+):(\d+) # HH:MM::SS + /x ) { + # Some of the .meta files have the time in mysql + # format already, so just make sure they are 0 padded and + # pass them back. + $mysqltime = sprintf('%04d-%02d-%02d %02d:%02d:%02d', + $1,$2,$3,$4,$5,$6); + } elsif ($time =~ /^\d+$/) { + my @TimeData = gmtime($time); + # Alter the month to be 1-12 instead of 0-11 + $TimeData[4]++; + # Alter the year to be from 0 instead of from 1900 + $TimeData[5]+=1900; + $mysqltime = sprintf('%04d-%02d-%02d %02d:%02d:%02d', + @TimeData[5,4,3,2,1,0]); + } elsif (! defined($time) || $time == 0) { + $mysqltime = 0; + } else { + &log(0," sqltime:Unable to decode time ".$time); + $mysqltime = 0; + } + return $mysqltime; } -sub maketime { - my %th=@_; - return POSIX::mktime(($th{'seconds'},$th{'minutes'},$th{'hours'}, - $th{'day'},$th{'month'}-1, - $th{'year'}-1900,0,0,$th{'dlsav'})); +## +## &declutter($filename) +## Given a filename, returns a url for the filename. +sub declutter { + my $thisfn=shift; + $thisfn=~s/^$Apache::lonnet::perlvar{'lonDocRoot'}//; + $thisfn=~s/^\///; + $thisfn=~s/^res\///; + return $thisfn; } - -######################################### -# -# Retro-fixing of un-backward-compatible time format - -sub unsqltime { - my $timestamp=shift; - if ($timestamp=~/^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/) { - $timestamp=&maketime('year'=>$1,'month'=>$2,'day'=>$3, - 'hours'=>$4,'minutes'=>$5,'seconds'=>$6); - } - return $timestamp; +## +## Escape / Unescape special characters +sub unescape { + my $str=shift; + $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; + return $str; } -# ----------------- Code to enable 'find' subroutine listing of the .meta files - -no strict "vars"; - -sub wanted { - (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && - -f _ && - /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ && - push(@metalist,"$dir/$_"); +sub escape { + my $str=shift; + $str =~ s/(\W)/"%".unpack('H2',$1)/eg; + return $str; }