version 1.55, 2004/04/08 15:57:32
|
version 1.62, 2005/03/11 03:25:18
|
Line 71 use lib '/home/httpd/lib/perl/';
|
Line 71 use lib '/home/httpd/lib/perl/';
|
use LONCAPA::Configuration; |
use LONCAPA::Configuration; |
use LONCAPA::lonmetadata; |
use LONCAPA::lonmetadata; |
|
|
|
use Getopt::Long; |
use IO::File; |
use IO::File; |
use HTML::TokeParser; |
use HTML::TokeParser; |
use GDBM_File; |
use GDBM_File; |
use POSIX qw(strftime mktime); |
use POSIX qw(strftime mktime); |
|
|
|
use Sys::Hostname; |
|
|
use File::Find; |
use File::Find; |
|
|
|
# |
|
# Set up configuration options |
|
my ($simulate,$oneuser,$help,$verbose,$logfile,$debug,$multidom); |
|
GetOptions ( |
|
'help' => \$help, |
|
'simulate' => \$simulate, |
|
'only=s' => \$oneuser, |
|
'verbose=s' => \$verbose, |
|
'debug' => \$debug, |
|
'multi_domain' => \$multidom, |
|
); |
|
|
|
if ($help) { |
|
print <<"ENDHELP"; |
|
$0 |
|
Rebuild and update the LON-CAPA metadata database. |
|
Options: |
|
-help Print this help |
|
-simulate Do not modify the database. |
|
-only=user Only compute for the given user. Implies -simulate |
|
-verbose=val Sets logging level, val must be a number |
|
-debug Turns on debugging output |
|
-multi_domain Parse the hosts.tab file domain(s) to use. |
|
ENDHELP |
|
exit 0; |
|
} |
|
|
|
if (! defined($debug)) { |
|
$debug = 0; |
|
} |
|
|
|
if (! defined($verbose)) { |
|
$verbose = 0; |
|
} |
|
|
|
if (defined($oneuser)) { |
|
$simulate=1; |
|
} |
|
|
## |
## |
## Use variables for table names so we can test this routine a little easier |
## Use variables for table names so we can test this routine a little easier |
my $oldname = 'metadata'; |
my $oldname = 'metadata'; |
my $newname = 'newmetadata'; |
my $newname = 'newmetadata'.$$; # append pid to have unique temporary table |
|
|
# |
# |
# Read loncapa_apache.conf and loncapa.conf |
# Read loncapa_apache.conf and loncapa.conf |
Line 103 if ($wwwid!=$<) {
|
Line 146 if ($wwwid!=$<) {
|
} |
} |
# |
# |
# Let people know we are running |
# Let people know we are running |
open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log'); |
open(LOG,'>>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log'); |
print LOG '==== Searchcat Run '.localtime()."====\n"; |
&log(0,'==== Searchcat Run '.localtime()."===="); |
|
|
|
|
|
if ($debug) { |
|
&log(0,'simulating') if ($simulate); |
|
&log(0,'only processing user '.$oneuser) if ($oneuser); |
|
&log(0,'verbosity level = '.$verbose); |
|
} |
# |
# |
# Connect to database |
# Connect to database |
my $dbh; |
my $dbh; |
if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'}, |
if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'}, |
{ RaiseError =>0,PrintError=>0}))) { |
{ RaiseError =>0,PrintError=>0}))) { |
print LOG "Cannot connect to database!\n"; |
&log(0,"Cannot connect to database!"); |
die "MySQL Error: Cannot connect to database!\n"; |
die "MySQL Error: Cannot connect to database!\n"; |
} |
} |
# This can return an error and still be okay, so we do not bother checking. |
# This can return an error and still be okay, so we do not bother checking. |
Line 122 my $request = &LONCAPA::lonmetadata::cre
|
Line 172 my $request = &LONCAPA::lonmetadata::cre
|
$dbh->do($request); |
$dbh->do($request); |
if ($dbh->err) { |
if ($dbh->err) { |
$dbh->disconnect(); |
$dbh->disconnect(); |
print LOG "\nMySQL Error Create: ".$dbh->errstr."\n"; |
&log(0,"MySQL Error Create: ".$dbh->errstr); |
die $dbh->errstr; |
die $dbh->errstr; |
} |
} |
# |
# |
# find out which users we need to examine |
# find out which users we need to examine |
opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}"); |
my @domains; |
my @homeusers = |
if (defined($multidom)) { |
grep { |
&log(1,'====multi domain setup===='); |
&ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_"); |
# Peek into the hosts.tab and look for matches of our hostname |
} grep { |
my $host = hostname(); |
!/^\.\.?$/; |
&log(9,'hostname = "'.$host.'"'); |
} readdir(RESOURCES); |
open(HOSTFILE,$perlvar{'lonTabDir'}.'/hosts.tab') || |
closedir RESOURCES; |
die ("Unable to determine domain(s) of multi-domain server"); |
# |
my %domains; |
# Loop through the users |
while (<HOSTFILE>) { |
foreach my $user (@homeusers) { |
next if (/^\#/); |
print LOG "=== User: ".$user."\n"; |
next if (!/:\Q$host\E/); |
my $prodir=&propath($perlvar{'lonDefDomain'},$user); |
&log(9,$_); |
# |
$domains{(split(':',$_))[1]}++; |
# Use File::Find to get the files we need to read/modify |
} |
find( |
close HOSTFILE; |
{preprocess => \&only_meta_files, |
@domains = sort(keys(%domains)); |
# wanted => \&print_filename, |
&log(9,join(',',@domains)); |
# wanted => \&log_metadata, |
if (! scalar(@domains)) { |
wanted => \&process_meta_file, |
die ("Unable to find any domains in the hosts.tab that match ".$host); |
}, |
} |
"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user"); |
} else { |
|
push(@domains,$perlvar{'lonDefDomain'}); |
|
} |
|
|
|
foreach my $dom (@domains) { |
|
&log(9,'domain = '.$dom); |
|
opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$dom"); |
|
my @homeusers = |
|
grep { |
|
&ishome("$perlvar{'lonDocRoot'}/res/$dom/$_"); |
|
} grep { |
|
!/^\.\.?$/; |
|
} readdir(RESOURCES); |
|
closedir RESOURCES; |
|
&log(5,'users = '.$dom.':'.join(',',@homeusers)); |
|
# |
|
if ($oneuser) { |
|
@homeusers=($oneuser); |
|
} |
|
# |
|
# Loop through the users |
|
foreach my $user (@homeusers) { |
|
&log(0,"=== User: ".$user); |
|
&process_dynamic_metadata($user,$dom); |
|
# |
|
# Use File::Find to get the files we need to read/modify |
|
find( |
|
{preprocess => \&only_meta_files, |
|
#wanted => \&print_filename, |
|
#wanted => \&log_metadata, |
|
wanted => \&process_meta_file, |
|
}, join('/',($perlvar{'lonDocRoot'},'res',$dom,$user)) ); |
|
} |
} |
} |
# |
# |
# Rename the table |
# Rename the table |
$dbh->do('DROP TABLE IF EXISTS '.$oldname); |
if (! $simulate) { |
if (! $dbh->do('RENAME TABLE '.$newname.' TO '.$oldname)) { |
$dbh->do('DROP TABLE IF EXISTS '.$oldname); |
print LOG "MySQL Error Rename: ".$dbh->errstr."\n"; |
if (! $dbh->do('RENAME TABLE '.$newname.' TO '.$oldname)) { |
die $dbh->errstr; |
&log(0,"MySQL Error Rename: ".$dbh->errstr); |
|
die $dbh->errstr; |
|
} else { |
|
&log(1,"MySQL table rename successful."); |
|
} |
} |
} |
if (! $dbh->disconnect) { |
if (! $dbh->disconnect) { |
print LOG "MySQL Error Disconnect: ".$dbh->errstr."\n"; |
&log(0,"MySQL Error Disconnect: ".$dbh->errstr); |
die $dbh->errstr; |
die $dbh->errstr; |
} |
} |
## |
## |
## Finished! |
## Finished! |
print LOG "==== Searchcat completed ".localtime()." ====\n"; |
&log(0,"==== Searchcat completed ".localtime()." ===="); |
close(LOG); |
close(LOG); |
|
|
&write_type_count(); |
&write_type_count(); |
Line 171 close(LOG);
|
Line 257 close(LOG);
|
|
|
exit 0; |
exit 0; |
|
|
|
## |
|
## Status logging routine. Inputs: $level, $message |
|
## |
|
## $level 0 should be used for normal output and error messages |
|
## |
|
## $message does not need to end with \n. In the case of errors |
|
## the message should contain as much information as possible to |
|
## help in diagnosing the problem. |
|
## |
|
sub log { |
|
my ($level,$message)=@_; |
|
$level = 0 if (! defined($level)); |
|
if ($verbose >= $level) { |
|
print LOG $message.$/; |
|
} |
|
} |
|
|
######################################################## |
######################################################## |
######################################################## |
######################################################## |
### ### |
### ### |
Line 205 sub only_meta_files {
|
Line 308 sub only_meta_files {
|
sub print_filename { |
sub print_filename { |
my ($file) = $_; |
my ($file) = $_; |
my $fullfilename = $File::Find::name; |
my $fullfilename = $File::Find::name; |
if (-d $file) { |
if ($debug) { |
print LOG " Got directory ".$fullfilename."\n"; |
if (-d $file) { |
} else { |
&log(5," Got directory ".$fullfilename); |
print LOG " Got file ".$fullfilename."\n"; |
} else { |
|
&log(5," Got file ".$fullfilename); |
|
} |
} |
} |
$_=$file; |
$_=$file; |
} |
} |
Line 217 sub log_metadata {
|
Line 322 sub log_metadata {
|
my ($file) = $_; |
my ($file) = $_; |
my $fullfilename = $File::Find::name; |
my $fullfilename = $File::Find::name; |
return if (-d $fullfilename); # No need to do anything here for directories |
return if (-d $fullfilename); # No need to do anything here for directories |
print LOG $fullfilename."\n"; |
if ($debug) { |
my $ref=&metadata($fullfilename); |
&log(6,$fullfilename); |
if (! defined($ref)) { |
my $ref=&metadata($fullfilename); |
print LOG " No data\n"; |
if (! defined($ref)) { |
return; |
&log(6," No data"); |
} |
return; |
while (my($key,$value) = each(%$ref)) { |
} |
print LOG " ".$key." => ".$value."\n"; |
while (my($key,$value) = each(%$ref)) { |
|
&log(6," ".$key." => ".$value); |
|
} |
|
&count_copyright($ref->{'copyright'}); |
} |
} |
&count_copyright($ref->{'copyright'}); |
|
$_=$file; |
$_=$file; |
} |
} |
|
|
|
|
## |
## |
## process_meta_file |
## process_meta_file |
## Called by File::Find. |
## Called by File::Find. |
## Only input is the filename in $_. |
## Only input is the filename in $_. |
sub process_meta_file { |
sub process_meta_file { |
my ($file) = $_; |
my ($file) = $_; |
my $filename = $File::Find::name; |
my $filename = $File::Find::name; # full filename |
return if (-d $filename); # No need to do anything here for directories |
return if (-d $filename); # No need to do anything here for directories |
# |
# |
print LOG $filename."\n"; |
&log(3,$filename) if ($debug); |
# |
# |
my $ref=&metadata($filename); |
my $ref=&metadata($filename); |
# |
# |
# $url is the original file url, not the metadata file |
# $url is the original file url, not the metadata file |
my $url='/res/'.&declutter($filename); |
my $target = $filename; |
$url=~s/\.meta$//; |
$target =~ s/\.meta$//; |
print LOG " ".$url."\n"; |
my $url='/res/'.&declutter($target); |
|
&log(3," ".$url) if ($debug); |
# |
# |
# Ignore some files based on their metadata |
# Ignore some files based on their metadata |
if ($ref->{'obsolete'}) { |
if ($ref->{'obsolete'}) { |
print LOG "obsolete\n"; |
&log(3,"obsolete") if ($debug); |
return; |
return; |
} |
} |
&count_copyright($ref->{'copyright'}); |
&count_copyright($ref->{'copyright'}); |
if ($ref->{'copyright'} eq 'private') { |
if ($ref->{'copyright'} eq 'private') { |
print LOG "private\n"; |
&log(3,"private") if ($debug); |
return; |
return; |
} |
} |
# |
# |
Line 264 sub process_meta_file {
|
Line 371 sub process_meta_file {
|
my %dyn; |
my %dyn; |
if ($url=~ m:/default$:) { |
if ($url=~ m:/default$:) { |
$url=~ s:/default$:/:; |
$url=~ s:/default$:/:; |
|
&log(3,"Skipping dynamic data") if ($debug); |
} else { |
} else { |
# %dyn=&dynamicmeta($url); |
&log(3,"Retrieving dynamic data") if ($debug); |
|
%dyn=&get_dynamic_metadata($url); |
&count_type($url); |
&count_type($url); |
} |
} |
# |
# |
|
if (! defined($ref->{'creationdate'}) || |
|
$ref->{'creationdate'} =~ /^\s*$/) { |
|
$ref->{'creationdate'} = (stat($target))[9]; |
|
} |
|
if (! defined($ref->{'lastrevisiondate'}) || |
|
$ref->{'lastrevisiondate'} =~ /^\s*$/) { |
|
$ref->{'lastrevisiondate'} = (stat($target))[9]; |
|
} |
$ref->{'creationdate'} = &sqltime($ref->{'creationdate'}); |
$ref->{'creationdate'} = &sqltime($ref->{'creationdate'}); |
$ref->{'lastrevisiondate'} = &sqltime($ref->{'lastrevisiondate'}); |
$ref->{'lastrevisiondate'} = &sqltime($ref->{'lastrevisiondate'}); |
my %Data = ( |
my %Data = ( |
Line 276 sub process_meta_file {
|
Line 393 sub process_meta_file {
|
%dyn, |
%dyn, |
'url'=>$url, |
'url'=>$url, |
'version'=>'current'); |
'version'=>'current'); |
my ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh,$newname, |
if (! $simulate) { |
\%Data); |
my ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh,$newname, |
if ($err) { |
\%Data); |
print LOG "\nMySQL Error Insert: ".$err."\n"; |
if ($err) { |
die $err; |
&log(0,"MySQL Error Insert: ".$err); |
} |
} |
if ($count < 1) { |
if ($count < 1) { |
print LOG "Unable to insert record into MySQL database for $url\n"; |
&log(0,"Unable to insert record into MySQL database for $url"); |
die "Unable to insert record into MySQl database for $url"; |
} |
} else { |
|
print LOG "Count = ".$count."\n"; |
|
} |
} |
# |
# |
# Reset $_ before leaving |
# Reset $_ before leaving |
Line 366 sub getfile {
|
Line 481 sub getfile {
|
### ### |
### ### |
######################################################## |
######################################################## |
######################################################## |
######################################################## |
sub dynamicmeta { |
## |
my $url = &declutter(shift()); |
## Dynamic metadata description (incomplete) |
$url =~ s/\.meta$//; |
## |
my %data = ('count' => 0, |
## For a full description of all fields, |
'course' => 0, |
## see LONCAPA::lonmetadata |
'course_list' => '', |
## |
'avetries' => 'NULL', |
## Field Type |
'avetries_list' => '', |
##----------------------------------------------------------- |
'stdno' => 0, |
## count integer |
'stdno_list' => '', |
## course integer |
'usage' => 0, |
## course_list comma separated list of course ids |
'usage_list' => '', |
## avetries real |
'goto' => 0, |
## avetries_list comma separated list of real numbers |
'goto_list' => '', |
## stdno real |
'comefrom' => 0, |
## stdno_list comma separated list of real numbers |
'comefrom_list' => '', |
## usage integer |
'difficulty' => 'NULL', |
## usage_list comma separated list of resources |
'difficulty_list' => '', |
## goto scalar |
'sequsage' => '0', |
## goto_list comma separated list of resources |
'sequsage_list' => '', |
## comefrom scalar |
'clear' => 'NULL', |
## comefrom_list comma separated list of resources |
'technical' => 'NULL', |
## difficulty real |
'correct' => 'NULL', |
## difficulty_list comma separated list of real numbers |
'helpful' => 'NULL', |
## sequsage scalar |
'depth' => 'NULL', |
## sequsage_list comma separated list of resources |
'comments' => '', |
## clear real |
); |
## technical real |
my ($dom,$auth)=($url=~/^(\w+)\/(\w+)\//); |
## correct real |
my $prodir=&propath($dom,$auth); |
## helpful real |
|
## depth real |
|
## comments html of all the comments made |
|
## |
|
{ |
|
|
|
my %DynamicData; |
|
my %Counts; |
|
|
|
sub process_dynamic_metadata { |
|
my ($user,$dom) = @_; |
|
undef(%DynamicData); |
|
undef(%Counts); |
# |
# |
# Get metadata except counts |
my $prodir = &propath($dom,$user); |
|
# |
|
# Read in the dynamic metadata |
my %evaldata; |
my %evaldata; |
if (! tie(%evaldata,'GDBM_File', |
if (! tie(%evaldata,'GDBM_File', |
$prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) { |
$prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) { |
return (undef); |
return 0; |
} |
|
my %sum=(); |
|
my %count=(); |
|
my %concat=(); |
|
my %listitems=( |
|
'course' => 'add', |
|
'goto' => 'add', |
|
'comefrom' => 'add', |
|
'avetries' => 'average', |
|
'stdno' => 'add', |
|
'difficulty' => 'average', |
|
'clear' => 'average', |
|
'technical' => 'average', |
|
'helpful' => 'average', |
|
'correct' => 'average', |
|
'depth' => 'average', |
|
'comments' => 'append', |
|
'usage' => 'count' |
|
); |
|
# |
|
my $regexp=$url; |
|
$regexp=~s/(\W)/\\$1/g; |
|
$regexp='___'.$regexp.'___([a-z]+)$'; |
|
while (my ($esckey,$value)=each %evaldata) { |
|
my $key=&unescape($esckey); |
|
if ($key=~/$regexp/) { |
|
my ($item,$purl,$cat)=split(/___/,$key); |
|
$count{$cat}++; |
|
if ($listitems{$cat} ne 'append') { |
|
if (defined($sum{$cat})) { |
|
$sum{$cat}+=&unescape($value); |
|
$concat{$cat}.=','.$item; |
|
} else { |
|
$sum{$cat}=&unescape($value); |
|
$concat{$cat}=$item; |
|
} |
|
} else { |
|
if (defined($sum{$cat})) { |
|
if ($evaldata{$esckey}=~/\w/) { |
|
$sum{$cat}.='<hr />'.&unescape($evaldata{$esckey}); |
|
} |
|
} else { |
|
$sum{$cat}=''.&unescape($evaldata{$esckey}); |
|
} |
|
} |
|
} |
|
} |
} |
|
# |
|
%DynamicData = &LONCAPA::lonmetadata::process_reseval_data(\%evaldata); |
untie(%evaldata); |
untie(%evaldata); |
# transfer gathered data to returnhash, calculate averages where applicable |
$DynamicData{'domain'} = $dom; |
my %returnhash; |
print('user = '.$user.' domain = '.$dom.$/); |
while (my $cat=each(%count)) { |
|
if ($count{$cat} eq 'nan') { next; } |
|
if ($sum{$cat} eq 'nan') { next; } |
|
if ($listitems{$cat} eq 'average') { |
|
if ($count{$cat}) { |
|
$returnhash{$cat}=int(($sum{$cat}/$count{$cat})*100.0+0.5)/100.0; |
|
} else { |
|
$returnhash{$cat}='NULL'; |
|
} |
|
} elsif ($listitems{$cat} eq 'count') { |
|
$returnhash{$cat}=$count{$cat}; |
|
} else { |
|
$returnhash{$cat}=$sum{$cat}; |
|
} |
|
$returnhash{$cat.'_list'}=$concat{$cat}; |
|
} |
|
# |
# |
# get count |
# Read in the access count data |
if (tie(my %evaldata,'GDBM_File', |
&log(7,'Reading access count data') if ($debug); |
$prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) { |
my %countdata; |
my $escurl=&escape($url); |
if (! tie(%countdata,'GDBM_File', |
if (! exists($evaldata{$escurl})) { |
$prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) { |
$returnhash{'count'}=0; |
return 0; |
} else { |
|
$returnhash{'count'}=$evaldata{$escurl}; |
|
} |
|
untie %evaldata; |
|
} |
} |
return %returnhash; |
while (my ($key,$count) = each(%countdata)) { |
|
next if ($key !~ /^$dom/); |
|
$key = &unescape($key); |
|
&log(8,' Count '.$key.' = '.$count) if ($debug); |
|
$Counts{$key}=$count; |
|
} |
|
untie(%countdata); |
|
if ($debug) { |
|
&log(7,scalar(keys(%Counts)). |
|
" Counts read for ".$user."@".$dom); |
|
&log(7,scalar(keys(%DynamicData)). |
|
" Dynamic metadata read for ".$user."@".$dom); |
|
} |
|
# |
|
return 1; |
|
} |
|
|
|
sub get_dynamic_metadata { |
|
my ($url) = @_; |
|
$url =~ s:^/res/::; |
|
my %data = &LONCAPA::lonmetadata::process_dynamic_metadata($url, |
|
\%DynamicData); |
|
# find the count |
|
$data{'count'} = $Counts{$url}; |
|
# |
|
# Log the dynamic metadata |
|
if ($debug) { |
|
while (my($k,$v)=each(%data)) { |
|
&log(8," ".$k." => ".$v); |
|
} |
|
} |
|
return %data; |
} |
} |
|
|
|
} # End of %DynamicData and %Counts scope |
|
|
######################################################## |
######################################################## |
######################################################## |
######################################################## |
### ### |
### ### |
Line 593 sub sqltime {
|
Line 691 sub sqltime {
|
$TimeData[5]+=1900; |
$TimeData[5]+=1900; |
$mysqltime = sprintf('%04d-%02d-%02d %02d:%02d:%02d', |
$mysqltime = sprintf('%04d-%02d-%02d %02d:%02d:%02d', |
@TimeData[5,4,3,2,1,0]); |
@TimeData[5,4,3,2,1,0]); |
|
} elsif (! defined($time) || $time == 0) { |
|
$mysqltime = 0; |
} else { |
} else { |
print LOG " Unable to decode time ".$time."\n"; |
&log(0," sqltime:Unable to decode time ".$time); |
$mysqltime = 0; |
$mysqltime = 0; |
} |
} |
return $mysqltime; |
return $mysqltime; |