version 1.42, 2003/10/08 14:15:03
|
version 1.54, 2004/01/05 15:54:22
|
Line 56 This script also does general database m
|
Line 56 This script also does general database m
|
the C<loncapa:metadata> table if it is deprecated. |
the C<loncapa:metadata> table if it is deprecated. |
|
|
This script evaluates dynamic metadata from the authors' |
This script evaluates dynamic metadata from the authors' |
F<nohist_resevaldata.db> database file in order to store it in MySQL, as |
F<nohist_resevaldata.db> database file in order to store it in MySQL. |
well as to compress the filesize (add up all "count"-type metadata). |
|
|
|
This script is playing an increasingly important role for a loncapa |
This script is playing an increasingly important role for a loncapa |
library server. The proper operation of this script is critical for a smooth |
library server. The proper operation of this script is critical for a smooth |
Line 65 and correct user experience.
|
Line 64 and correct user experience.
|
|
|
=cut |
=cut |
|
|
|
use strict; |
|
|
use lib '/home/httpd/lib/perl/'; |
use lib '/home/httpd/lib/perl/'; |
use LONCAPA::Configuration; |
use LONCAPA::Configuration; |
|
|
Line 74 use DBI;
|
Line 75 use DBI;
|
use GDBM_File; |
use GDBM_File; |
use POSIX qw(strftime mktime); |
use POSIX qw(strftime mktime); |
|
|
|
require "find.pl"; |
|
|
my @metalist; |
my @metalist; |
|
|
$simplestatus=''; |
my $simplestatus=''; |
my %countext=(); |
my %countext=(); |
|
|
|
# ----------------------------------------------------- write out simple status |
sub writesimple { |
sub writesimple { |
open(SMP,'>/home/httpd/html/lon-status/mysql.txt'); |
open(SMP,'>/home/httpd/html/lon-status/mysql.txt'); |
print SMP $simplestatus."\n"; |
print SMP $simplestatus."\n"; |
Line 94 sub writecount {
|
Line 98 sub writecount {
|
close(RSMP); |
close(RSMP); |
} |
} |
|
|
|
# -------------------------------------- counts files with different extensions |
sub count { |
sub count { |
my $file=shift; |
my $file=shift; |
$file=~/\.(\w+)$/; |
$file=~/\.(\w+)$/; |
Line 120 sub escape {
|
Line 125 sub escape {
|
return $str; |
return $str; |
} |
} |
|
|
|
|
# ------------------------------------------- Code to evaluate dynamic metadata |
# ------------------------------------------- Code to evaluate dynamic metadata |
|
|
sub dynamicmeta { |
sub dynamicmeta { |
|
|
my $url=&declutter(shift); |
my $url=&declutter(shift); |
$url=~s/\.meta$//; |
$url=~s/\.meta$//; |
my %returnhash=(); |
my %returnhash=( |
|
'count' => 0, |
|
'course' => 0, |
|
'course_list' => '', |
|
'avetries' => 'NULL', |
|
'avetries_list' => '', |
|
'stdno' => 0, |
|
'stdno_list' => '', |
|
'usage' => 0, |
|
'usage_list' => '', |
|
'goto' => 0, |
|
'goto_list' => '', |
|
'comefrom' => 0, |
|
'comefrom_list' => '', |
|
'difficulty' => 'NULL', |
|
'difficulty_list' => '', |
|
'clear' => 'NULL', |
|
'technical' => 'NULL', |
|
'correct' => 'NULL', |
|
'helpful' => 'NULL', |
|
'depth' => 'NULL', |
|
'comments' => '' |
|
); |
my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//); |
my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//); |
my $prodir=&propath($adomain,$aauthor); |
my $prodir=&propath($adomain,$aauthor); |
if ((tie(%evaldata,'GDBM_File', |
|
$prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) && |
# Get metadata except counts |
(tie(%newevaldata,'GDBM_File', |
if (tie(my %evaldata,'GDBM_File', |
$prodir.'/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) { |
$prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) { |
my %sum=(); |
my %sum=(); |
my %cnt=(); |
my %cnt=(); |
my %listitems=('count' => 'add', |
my %concat=(); |
'course' => 'add', |
my %listitems=( |
'goto' => 'add', |
'course' => 'add', |
'comefrom' => 'add', |
'goto' => 'add', |
'avetries' => 'avg', |
'comefrom' => 'add', |
'stdno' => 'add', |
'avetries' => 'avg', |
'difficulty' => 'avg', |
'stdno' => 'add', |
'clear' => 'avg', |
'difficulty' => 'avg', |
'technical' => 'avg', |
'clear' => 'avg', |
'helpful' => 'avg', |
'technical' => 'avg', |
'correct' => 'avg', |
'helpful' => 'avg', |
'depth' => 'avg', |
'correct' => 'avg', |
'comments' => 'app', |
'depth' => 'avg', |
'usage' => 'cnt' |
'comments' => 'app', |
); |
'usage' => 'cnt' |
my $regexp=$url; |
); |
$regexp=~s/(\W)/\\$1/g; |
|
$regexp='___'.$regexp.'___([a-z]+)$'; |
my $regexp=$url; |
while (my ($key,$value)=each %evaldata) { |
$regexp=~s/(\W)/\\$1/g; |
$key=&unescape($key); |
$regexp='___'.$regexp.'___([a-z]+)$'; |
if ($key=~/$regexp/) { |
while (my ($esckey,$value)=each %evaldata) { |
my $ctype=$1; |
my $key=&unescape($esckey); |
if (defined($cnt{$ctype})) { |
if ($key=~/$regexp/) { |
$cnt{$ctype}++; |
my ($item,$purl,$cat)=split(/___/,$key); |
} else { |
if (defined($cnt{$cat})) { $cnt{$cat}++; } else { $cnt{$cat}=1; } |
$cnt{$ctype}=1; |
unless ($listitems{$cat} eq 'app') { |
} |
if (defined($sum{$cat})) { |
unless ($listitems{$ctype} eq 'app') { |
$sum{$cat}+=&unescape($evaldata{$esckey}); |
if (defined($sum{$ctype})) { |
$concat{$cat}.=','.$item; |
$sum{$ctype}+=$value; |
} else { |
} else { |
$sum{$cat}=&unescape($evaldata{$esckey}); |
$sum{$ctype}=$value; |
$concat{$cat}=$item; |
} |
} |
} else { |
} else { |
if (defined($sum{$ctype})) { |
if (defined($sum{$cat})) { |
if ($value) { |
if ($evaldata{$esckey}=~/\w/) { |
$sum{$ctype}.='<hr>'.$value; |
$sum{$cat}.='<hr />'.&unescape($evaldata{$esckey}); |
} |
} |
} else { |
} else { |
$sum{$ctype}=''.$value; |
$sum{$cat}=''.&unescape($evaldata{$esckey}); |
} |
} |
|
} |
} |
} |
if ($ctype ne 'count') { |
} |
$newevaldata{$_}=$value; |
untie(%evaldata); |
} |
# transfer gathered data to returnhash, calculate averages where applicable |
} |
while (my $cat=each(%cnt)) { |
} |
if ($cnt{$cat} eq 'nan') { next; } |
foreach (keys %cnt) { |
if ($sum{$cat} eq 'nan') { next; } |
if ($listitems{$_} eq 'avg') { |
if ($listitems{$cat} eq 'avg') { |
$returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0; |
if ($cnt{$cat}) { |
} elsif ($listitems{$_} eq 'cnt') { |
$returnhash{$cat}=int(($sum{$cat}/$cnt{$cat})*100.0+0.5)/100.0; |
$returnhash{$_}=$cnt{$_}; |
} else { |
} else { |
$returnhash{$cat}='NULL'; |
$returnhash{$_}=$sum{$_}; |
} |
} |
} elsif ($listitems{$cat} eq 'cnt') { |
} |
$returnhash{$cat}=$cnt{$cat}; |
if ($returnhash{'count'}) { |
} else { |
my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count'; |
$returnhash{$cat}=$sum{$cat}; |
$newevaldata{$newkey}=$returnhash{'count'}; |
} |
} |
$returnhash{$cat.'_list'}=$concat{$cat}; |
untie(%evaldata); |
} |
untie(%newevaldata); |
} |
} |
# get count |
return %returnhash; |
if (tie(my %evaldata,'GDBM_File', |
|
$prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) { |
|
my $escurl=&escape($url); |
|
if (! exists($evaldata{$escurl})) { |
|
$returnhash{'count'}=0; |
|
} else { |
|
$returnhash{'count'}=$evaldata{$escurl}; |
|
} |
|
untie %evaldata; |
|
} |
|
return %returnhash; |
} |
} |
|
|
# ----------------- Code to enable 'find' subroutine listing of the .meta files |
|
require "find.pl"; |
|
sub wanted { |
|
(($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && |
|
-f _ && |
|
/^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ && |
|
push(@metalist,"$dir/$_"); |
|
} |
|
|
|
# --------------- Read loncapa_apache.conf and loncapa.conf and get variables |
# --------------- Read loncapa_apache.conf and loncapa.conf and get variables |
my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); |
my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); |
my %perlvar=%{$perlvarref}; |
my %perlvar=%{$perlvarref}; |
undef $perlvarref; # remove since sensitive and not needed |
undef $perlvarref; |
delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed |
delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed |
|
|
# ------------------------------------- Only run if machine is a library server |
# ------------------------------------- Only run if machine is a library server |
Line 224 exit unless $perlvar{'lonRole'} eq 'libr
|
Line 251 exit unless $perlvar{'lonRole'} eq 'libr
|
|
|
my $wwwid=getpwnam('www'); |
my $wwwid=getpwnam('www'); |
if ($wwwid!=$<) { |
if ($wwwid!=$<) { |
$emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; |
my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; |
$subj="LON: $perlvar{'lonHostID'} User ID mismatch"; |
my $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; |
system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\ |
system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\ |
mailto $emailto -s '$subj' > /dev/null"); |
mailto $emailto -s '$subj' > /dev/null"); |
exit 1; |
exit 1; |
Line 249 my $dbh;
|
Line 276 my $dbh;
|
exit; |
exit; |
} |
} |
|
|
my $make_metadata_table = "CREATE TABLE IF NOT EXISTS metadata (". |
# Make temporary table |
|
$dbh->do("DROP TABLE IF EXISTS newmetadata"); |
|
my $make_metadata_table = "CREATE TABLE IF NOT EXISTS newmetadata (". |
"title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, ". |
"title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, ". |
"version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, ". |
"version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, ". |
"creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, ". |
"creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, ". |
"copyright TEXT, FULLTEXT idx_title (title), ". |
"copyright TEXT, dependencies TEXT, ". |
|
"modifyinguser TEXT, authorspace TEXT, ". |
|
"lowestgradelevel INTEGER UNSIGNED, highestgradelevel INTEGER UNSIGNED, ". |
|
"standards TEXT, ". |
|
"count INTEGER UNSIGNED, ". |
|
"course INTEGER UNSIGNED, course_list TEXT, ". |
|
"goto INTEGER UNSIGNED, goto_list TEXT, ". |
|
"comefrom INTEGER UNSIGNED, comefrom_list TEXT, ". |
|
"sequsage INTEGER UNSIGNED, sequsage_list TEXT, ". |
|
"stdno INTEGER UNSIGNED, stdno_list TEXT, ". |
|
"avetries FLOAT, avetries_list TEXT, ". |
|
"difficulty FLOAT, difficulty_list TEXT, ". |
|
"clear FLOAT, technical FLOAT, correct FLOAT, helpful FLOAT, depth FLOAT, ". |
|
"comments TEXT, ". |
|
# For backward compatibility, only insert new fields below |
|
# ... |
|
# For backward compatibility, end new fields above |
|
"FULLTEXT idx_title (title), ". |
"FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), ". |
"FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), ". |
"FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), ". |
"FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), ". |
"FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), ". |
"FULLTEXT idx_notes (notes), ". |
"FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), ". |
"FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), ". |
"FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), ". |
"FULLTEXT idx_owner (owner), ". |
"FULLTEXT idx_copyright (copyright)) TYPE=MYISAM"; |
"FULLTEXT idx_standards (standards))". |
|
"TYPE=MyISAM"; |
# It would sure be nice to have some logging mechanism. |
# It would sure be nice to have some logging mechanism. |
$dbh->do($make_metadata_table); |
unless ($dbh->do($make_metadata_table)) { |
|
print LOG "\nMySQL Error Create: ".$dbh->errstr."\n"; |
|
die $dbh->errstr; |
|
} |
} |
} |
|
|
# ------------------------------------------------------------- get .meta files |
# ------------------------------------------------------------- get .meta files |
Line 273 closedir RESOURCES;
|
Line 323 closedir RESOURCES;
|
|
|
# |
# |
# Create the statement handlers we need |
# Create the statement handlers we need |
my $delete_sth = $dbh->prepare |
|
("DELETE FROM metadata WHERE url LIKE BINARY ?"); |
|
|
|
my $insert_sth = $dbh->prepare |
my $insert_sth = $dbh->prepare |
("INSERT INTO metadata VALUES (". |
("INSERT INTO newmetadata VALUES (". |
"?,". # title |
"?,". # title |
"?,". # author |
"?,". # author |
"?,". # subject |
"?,". # subject |
"?,". # m2??? |
"?,". # declutter url |
"?,". # version |
"?,". # version |
"?,". # current |
"?,". # current |
"?,". # notes |
"?,". # notes |
Line 291 my $insert_sth = $dbh->prepare
|
Line 339 my $insert_sth = $dbh->prepare
|
"?,". # creationdate |
"?,". # creationdate |
"?,". # revisiondate |
"?,". # revisiondate |
"?,". # owner |
"?,". # owner |
"?)" # copyright |
"?,". # copyright |
|
"?,". # dependencies |
|
"?,". # modifyinguser |
|
"?,". # authorspace |
|
"?,". # lowestgradelevel |
|
"?,". # highestgradelevel |
|
"?,". # standards |
|
"?,". # count |
|
"?,". # course |
|
"?,". # course_list |
|
"?,". # goto |
|
"?,". # goto_list |
|
"?,". # comefrom |
|
"?,". # comefrom_list |
|
"?,". # usage |
|
"?,". # usage_list |
|
"?,". # stdno |
|
"?,". # stdno_list |
|
"?,". # avetries |
|
"?,". # avetries_list |
|
"?,". # difficulty |
|
"?,". # difficulty_list |
|
"?,". # clear |
|
"?,". # technical |
|
"?,". # correct |
|
"?,". # helpful |
|
"?,". # depth |
|
"?". # comments |
|
")" |
); |
); |
|
|
foreach my $user (@homeusers) { |
foreach my $user (@homeusers) { |
print LOG "\n=== User: ".$user."\n\n"; |
print LOG "\n=== User: ".$user."\n\n"; |
# Remove left-over db-files from potentially crashed searchcat run |
|
my $prodir=&propath($perlvar{'lonDefDomain'},$user); |
my $prodir=&propath($perlvar{'lonDefDomain'},$user); |
unlink($prodir.'/nohist_new_resevaldata.db'); |
|
# Use find.pl |
# Use find.pl |
undef @metalist; |
undef @metalist; |
@metalist=(); |
@metalist=(); |
Line 311 foreach my $user (@homeusers) {
|
Line 386 foreach my $user (@homeusers) {
|
my $ref=&metadata($m); |
my $ref=&metadata($m); |
my $m2='/res/'.&declutter($m); |
my $m2='/res/'.&declutter($m); |
$m2=~s/\.meta$//; |
$m2=~s/\.meta$//; |
&dynamicmeta($m2); |
|
if ($ref->{'obsolete'}) { print LOG "obsolete\n"; next; } |
if ($ref->{'obsolete'}) { print LOG "obsolete\n"; next; } |
if ($ref->{'copyright'} eq 'private') { print LOG "private\n"; next; } |
if ($ref->{'copyright'} eq 'private') { print LOG "private\n"; next; } |
&count($m2); |
my %dyn=(); |
$delete_sth->execute($m2); |
if ($m2=~/\/default$/) { |
$insert_sth->execute($ref->{'title'}, |
$m2=~s/\/default$/\//; |
|
} else { |
|
%dyn=&dynamicmeta($m2); |
|
&count($m2); |
|
} |
|
unless ($insert_sth->execute( |
|
$ref->{'title'}, |
$ref->{'author'}, |
$ref->{'author'}, |
$ref->{'subject'}, |
$ref->{'subject'}, |
$m2, |
$m2, |
Line 329 foreach my $user (@homeusers) {
|
Line 409 foreach my $user (@homeusers) {
|
sqltime($ref->{'creationdate'}), |
sqltime($ref->{'creationdate'}), |
sqltime($ref->{'lastrevisiondate'}), |
sqltime($ref->{'lastrevisiondate'}), |
$ref->{'owner'}, |
$ref->{'owner'}, |
$ref->{'copyright'}); |
$ref->{'copyright'}, |
# if ($dbh->err()) { |
$ref->{'dependencies'}, |
# print STDERR "Error:".$dbh->errstr()."\n"; |
$ref->{'modifyinguser'}, |
# } |
$ref->{'authorspace'}, |
|
$ref->{'lowestgradelevel'}, |
|
$ref->{'highestgradelevel'}, |
|
$ref->{'standards'}, |
|
$dyn{'count'}, |
|
$dyn{'course'}, |
|
$dyn{'course_list'}, |
|
$dyn{'goto'}, |
|
$dyn{'goto_list'}, |
|
$dyn{'comefrom'}, |
|
$dyn{'comefrom_list'}, |
|
$dyn{'usage'}, |
|
$dyn{'usage_list'}, |
|
$dyn{'stdno'}, |
|
$dyn{'stdno_list'}, |
|
$dyn{'avetries'}, |
|
$dyn{'avetries_list'}, |
|
$dyn{'difficulty'}, |
|
$dyn{'difficulty_list'}, |
|
$dyn{'clear'}, |
|
$dyn{'technical'}, |
|
$dyn{'correct'}, |
|
$dyn{'helpful'}, |
|
$dyn{'depth'}, |
|
$dyn{'comments'} |
|
)) { |
|
print LOG "\nMySQL Error Insert: ".$dbh->errstr."\n"; |
|
die $dbh->errstr; |
|
} |
$ref = undef; |
$ref = undef; |
} |
} |
|
|
# --------------------------------------------------- Clean up database |
|
# Need to, perhaps, remove stale SQL database records. |
|
# ... not yet implemented |
|
|
|
# ------------------------------------------- Copy over the new db-files |
|
# |
|
|
|
system('mv '.$prodir.'/nohist_new_resevaldata.db '. |
|
$prodir.'/nohist_resevaldata.db'); |
|
|
|
} |
} |
# --------------------------------------------------- Close database connection |
# --------------------------------------------------- Close database connection |
$dbh->disconnect; |
$dbh->do("DROP TABLE IF EXISTS metadata"); |
|
unless ($dbh->do("RENAME TABLE newmetadata TO metadata")) { |
|
print LOG "\nMySQL Error Rename: ".$dbh->errstr."\n"; |
|
die $dbh->errstr; |
|
} |
|
unless ($dbh->disconnect) { |
|
print LOG "\nMySQL Error Disconnect: ".$dbh->errstr."\n"; |
|
die $dbh->errstr; |
|
} |
print LOG "\n==== Searchcat completed ".localtime()." ====\n"; |
print LOG "\n==== Searchcat completed ".localtime()." ====\n"; |
close(LOG); |
close(LOG); |
&writesimple(); |
&writesimple(); |
Line 363 exit 0;
|
Line 468 exit 0;
|
# significantly altered from subroutine present in lonnet |
# significantly altered from subroutine present in lonnet |
sub metadata { |
sub metadata { |
my ($uri,$what)=@_; |
my ($uri,$what)=@_; |
my %metacache; |
my %metacache=(); |
$uri=&declutter($uri); |
$uri=&declutter($uri); |
my $filename=$uri; |
my $filename=$uri; |
$uri=~s/\.meta$//; |
$uri=~s/\.meta$//; |
Line 450 sub propath {
|
Line 555 sub propath {
|
|
|
# ---------------------------- convert 'time' format into a datetime sql format |
# ---------------------------- convert 'time' format into a datetime sql format |
sub sqltime { |
sub sqltime { |
|
my $time=&unsqltime(@_[0]); |
|
unless ($time) { return 'NULL'; } |
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = |
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = |
localtime(&unsqltime(@_[0])); |
localtime($time); |
$mon++; $year+=1900; |
$mon++; $year+=1900; |
return "$year-$mon-$mday $hour:$min:$sec"; |
return "$year-$mon-$mday $hour:$min:$sec"; |
} |
} |
Line 477 sub unsqltime {
|
Line 584 sub unsqltime {
|
return $timestamp; |
return $timestamp; |
} |
} |
|
|
|
# ----------------- Code to enable 'find' subroutine listing of the .meta files |
|
|
|
no strict "vars"; |
|
|
|
sub wanted { |
|
(($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && |
|
-f _ && |
|
/^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ && |
|
push(@metalist,"$dir/$_"); |
|
} |