--- loncom/metadata_database/searchcat.pl 2002/03/04 05:06:18 1.16 +++ loncom/metadata_database/searchcat.pl 2002/10/18 13:54:31 1.24 @@ -2,7 +2,7 @@ # The LearningOnline Network # searchcat.pl "Search Catalog" batch script # -# $Id: searchcat.pl,v 1.16 2002/03/04 05:06:18 harris41 Exp $ +# $Id: searchcat.pl,v 1.24 2002/10/18 13:54:31 www Exp $ # # Copyright Michigan State University Board of Trustees # @@ -27,20 +27,130 @@ # http://www.lon-capa.org/ # # YEAR=2001 -# # 04/14/2001, 04/16/2001 Scott Harrison # +# YEAR=2002 +# 05/11/2002 Scott Harrison +# ### # This script goes through a LON-CAPA resource # directory and gathers metadata. # The metadata is entered into a SQL database. +use lib '/home/httpd/lib/perl/'; +use LONCAPA::Configuration; + use IO::File; use HTML::TokeParser; use DBI; +use GDBM_File; +use POSIX qw(strftime mktime); my @metalist; + + +# ----------------------------------------------------- Un-Escape Special Chars + +sub unescape { + my $str=shift; + $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; + return $str; +} + +# -------------------------------------------------------- Escape Special Chars + +sub escape { + my $str=shift; + $str =~ s/(\W)/"%".unpack('H2',$1)/eg; + return $str; +} + + +# ------------------------------------------- Code to evaluate dynamic metadata + +sub dynamicmeta { +# +# +# Do nothing for now ... +# +# + return; +# +# ..., but stuff below already works +# + my $url=&declutter(shift); + $url=~s/\.meta$//; + my %returnhash=(); + my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//); + my $prodir=&propath($adomain,$aauthor); + if (tie(%evaldata,'GDBM_File', + $prodir.'/nohist_resevaldata.db',&GDBM_WRCREAT(),0640)) { + my %sum=(); + my %cnt=(); + my %listitems=('count' => 'add', + 'course' => 'add', + 'avetries' => 'avg', + 'stdno' => 'add', + 'difficulty' => 'avg', + 'clear' => 'avg', + 'technical' => 'avg', + 'helpful' => 'avg', + 'correct' => 'avg', + 'depth' => 'avg', + 'comments' => 'app', + 'usage' => 'cnt' + ); + my $regexp=$url; + $regexp=~s/(\W)/\\$1/g; + $regexp='___'.$regexp.'___([a-z]+)$'; + foreach (keys %evaldata) { + my $key=&unescape($_); + if ($key=~/$regexp/) { + my $ctype=$1; + if (defined($cnt{$ctype})) { + $cnt{$ctype}++; + } else { + $cnt{$ctype}=1; + } + unless ($listitems{$ctype} eq 'app') { + if (defined($sum{$ctype})) { + $sum{$ctype}+=$evaldata{$_}; + } else { + $sum{$ctype}=$evaldata{$_}; + } + } else { + if (defined($sum{$ctype})) { + if ($evaldata{$_}) { + $sum{$ctype}.='
'.$evaldata{$_}; + } + } else { + $sum{$ctype}=''.$evaldata{$_}; + } + } + if ($ctype eq 'count') { + delete($evaldata{$_}); + } + } + } + foreach (keys %cnt) { + if ($listitems{$_} eq 'avg') { + $returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0; + } elsif ($listitems{$_} eq 'cnt') { + $returnhash{$_}=$cnt{$_}; + } else { + $returnhash{$_}=$sum{$_}; + } + } + if ($returnhash{'count'}) { + my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count'; + $evaldata{$newkey}=$returnhash{'count'}; + } + untie(%evaldata); + } + return %returnhash; +} + # ----------------- Code to enable 'find' subroutine listing of the .meta files require "find.pl"; sub wanted { @@ -50,17 +160,11 @@ sub wanted { push(@metalist,"$dir/$_"); } -# ------------------------------------ Read httpd access.conf and get variables -open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf"; - -while ($configline=) { - if ($configline =~ /PerlSetVar/) { - my ($dummy,$varname,$varvalue)=split(/\s+/,$configline); - chomp($varvalue); - $perlvar{$varname}=$varvalue; - } -} -close(CONFIG); +# --------------- Read loncapa_apache.conf and loncapa.conf and get variables +my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); +my %perlvar=%{$perlvarref}; +undef $perlvarref; # remove since sensitive and not needed +delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed # ------------------------------------- Only run if machine is a library server exit unless $perlvar{'lonRole'} eq 'library'; @@ -74,6 +178,19 @@ my $dbh; print "Cannot connect to database!\n"; exit; } + my $make_metadata_table = "CREATE TABLE IF NOT EXISTS metadata (". + "title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, ". + "version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, ". + "creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, ". + "copyright TEXT, FULLTEXT idx_title (title), ". + "FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), ". + "FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), ". + "FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), ". + "FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), ". + "FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), ". + "FULLTEXT idx_copyright (copyright)) TYPE=MYISAM"; + # It would sure be nice to have some logging mechanism. + $dbh->do($make_metadata_table); } # ------------------------------------------------------------- get .meta files @@ -93,6 +210,7 @@ foreach my $m (@metalist) { my $ref=&metadata($m); my $m2='/res/'.&declutter($m); $m2=~s/\.meta$//; + &dynamicmeta($m2); my $q2="select * from metadata where url like binary '$m2'"; my $sth = $dbh->prepare($q2); $sth->execute(); @@ -218,7 +336,30 @@ sub propath { # ---------------------------- convert 'time' format into a datetime sql format sub sqltime { my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = - localtime(@_[0]); + localtime(&unsqltime(@_[0])); $mon++; $year+=1900; return "$year-$mon-$mday $hour:$min:$sec"; } + +sub maketime { + my %th=@_; + return POSIX::mktime( + ($th{'seconds'},$th{'minutes'},$th{'hours'}, + $th{'day'},$th{'month'}-1,$th{'year'}-1900,0,0,$th{'dlsav'})); +} + + +######################################### +# +# Retro-fixing of un-backward-compatible time format + +sub unsqltime { + my $timestamp=shift; + if ($timestamp=~/^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/) { + $timestamp=&maketime( + 'year'=>$1,'month'=>$2,'day'=>$3, + 'hours'=>$4,'minutes'=>$5,'seconds'=>$6); + } + return $timestamp; +} +