--- loncom/metadata_database/searchcat.pl 2001/04/16 12:04:41 1.2
+++ loncom/metadata_database/searchcat.pl 2002/10/08 18:45:33 1.23
@@ -1,49 +1,195 @@
#!/usr/bin/perl
# The LearningOnline Network
# searchcat.pl "Search Catalog" batch script
-
-# 04/14/2001 Scott Harrison
+#
+# $Id: searchcat.pl,v 1.23 2002/10/08 18:45:33 albertel Exp $
+#
+# Copyright Michigan State University Board of Trustees
+#
+# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
+#
+# LON-CAPA is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# LON-CAPA is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with LON-CAPA; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# /home/httpd/html/adm/gpl.txt
+#
+# http://www.lon-capa.org/
+#
+# YEAR=2001
+# 04/14/2001, 04/16/2001 Scott Harrison
+#
+# YEAR=2002
+# 05/11/2002 Scott Harrison
+#
+###
# This script goes through a LON-CAPA resource
# directory and gathers metadata.
# The metadata is entered into a SQL database.
-use strict;
+use lib '/home/httpd/lib/perl/';
+use LONCAPA::Configuration;
use IO::File;
use HTML::TokeParser;
+use DBI;
+use GDBM_File;
my @metalist;
+
+
+# ----------------------------------------------------- Un-Escape Special Chars
+
+sub unescape {
+ my $str=shift;
+ $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
+ return $str;
+}
+
+# -------------------------------------------------------- Escape Special Chars
+
+sub escape {
+ my $str=shift;
+ $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
+ return $str;
+}
+
+
+# ------------------------------------------- Code to evaluate dynamic metadata
+
+sub dynamicmeta {
+#
+#
+# Do nothing for now ...
+#
+#
+ return;
+#
+# ..., but stuff below already works
+#
+ my $url=&declutter(shift);
+ $url=~s/\.meta$//;
+ my %returnhash=();
+ my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//);
+ my $prodir=&propath($adomain,$aauthor);
+ if (tie(%evaldata,'GDBM_File',
+ $prodir.'/nohist_resevaldata.db',&GDBM_WRCREAT(),0640)) {
+ my %sum=();
+ my %cnt=();
+ my %listitems=('count' => 'add',
+ 'course' => 'add',
+ 'avetries' => 'avg',
+ 'stdno' => 'add',
+ 'difficulty' => 'avg',
+ 'clear' => 'avg',
+ 'technical' => 'avg',
+ 'helpful' => 'avg',
+ 'correct' => 'avg',
+ 'depth' => 'avg',
+ 'comments' => 'app',
+ 'usage' => 'cnt'
+ );
+ my $regexp=$url;
+ $regexp=~s/(\W)/\\$1/g;
+ $regexp='___'.$regexp.'___([a-z]+)$';
+ foreach (keys %evaldata) {
+ my $key=&unescape($_);
+ if ($key=~/$regexp/) {
+ my $ctype=$1;
+ if (defined($cnt{$ctype})) {
+ $cnt{$ctype}++;
+ } else {
+ $cnt{$ctype}=1;
+ }
+ unless ($listitems{$ctype} eq 'app') {
+ if (defined($sum{$ctype})) {
+ $sum{$ctype}+=$evaldata{$_};
+ } else {
+ $sum{$ctype}=$evaldata{$_};
+ }
+ } else {
+ if (defined($sum{$ctype})) {
+ if ($evaldata{$_}) {
+ $sum{$ctype}.='
'.$evaldata{$_};
+ }
+ } else {
+ $sum{$ctype}=''.$evaldata{$_};
+ }
+ }
+ if ($ctype eq 'count') {
+ delete($evaldata{$_});
+ }
+ }
+ }
+ foreach (keys %cnt) {
+ if ($listitems{$_} eq 'avg') {
+ $returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
+ } elsif ($listitems{$_} eq 'cnt') {
+ $returnhash{$_}=$cnt{$_};
+ } else {
+ $returnhash{$_}=$sum{$_};
+ }
+ }
+ if ($returnhash{'count'}) {
+ my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count';
+ $evaldata{$newkey}=$returnhash{'count'};
+ }
+ untie(%evaldata);
+ }
+ return %returnhash;
+}
+
# ----------------- Code to enable 'find' subroutine listing of the .meta files
require "find.pl";
sub wanted {
(($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
-f _ &&
- /^.*\.meta$/ &&
+ /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
push(@metalist,"$dir/$_");
}
-# ------------------------------------ Read httpd access.conf and get variables
-open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf";
+# --------------- Read loncapa_apache.conf and loncapa.conf and get variables
+my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
+my %perlvar=%{$perlvarref};
+undef $perlvarref; # remove since sensitive and not needed
+delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
-while ($configline=) {
- if ($configline =~ /PerlSetVar/) {
- my ($dummy,$varname,$varvalue)=split(/\s+/,$configline);
- chomp($varvalue);
- $perlvar{$varname}=$varvalue;
- }
-}
-close(CONFIG);
+# ------------------------------------- Only run if machine is a library server
+exit unless $perlvar{'lonRole'} eq 'library';
+my $dbh;
# ------------------------------------- Make sure that database can be accessed
{
- my $dbh;
unless (
$dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0})
) {
print "Cannot connect to database!\n";
exit;
}
+ my $make_metadata_table = "CREATE TABLE IF NOT EXISTS metadata (".
+ "title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, ".
+ "version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, ".
+ "creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, ".
+ "copyright TEXT, FULLTEXT idx_title (title), ".
+ "FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), ".
+ "FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), ".
+ "FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), ".
+ "FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), ".
+ "FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), ".
+ "FULLTEXT idx_copyright (copyright)) TYPE=MYISAM";
+ # It would sure be nice to have some logging mechanism.
+ $dbh->do($make_metadata_table);
}
# ------------------------------------------------------------- get .meta files
@@ -57,24 +203,36 @@ foreach my $user (@homeusers) {
}
# -- process each file to get metadata and put into search catalog SQL database
+# Also, check to see if already there.
+# I could just delete (without searching first), but this works for now.
foreach my $m (@metalist) {
my $ref=&metadata($m);
- my $sth=$dbh->prepare('insert into metadata values ('.
- delete($ref->{'title'}),
- delete($ref->{'author'}).','.
- delete($ref->{'subject'}).','.
- delete($ref->{'url'}).','.
- delete($ref->{'keywords'}).','.
- delete($ref->{'version'}).','.
- delete($ref->{'notes'}).','.
- delete($ref->{'abstract'}).','.
- delete($ref->{'mime'}).','.
- delete($ref->{'language'}).','.
- delete($ref->{'creationdate'}).','.
- delete($ref->{'lastrevisiondate'}).','.
- delete($ref->{'owner'}).','.
- delete($ref->{'copyright'}).
- ')';
+ my $m2='/res/'.&declutter($m);
+ $m2=~s/\.meta$//;
+ &dynamicmeta($m2);
+ my $q2="select * from metadata where url like binary '$m2'";
+ my $sth = $dbh->prepare($q2);
+ $sth->execute();
+ my $r1=$sth->fetchall_arrayref;
+ if (@$r1) {
+ $sth=$dbh->prepare("delete from metadata where url like binary '$m2'");
+ $sth->execute();
+ }
+ $sth=$dbh->prepare('insert into metadata values ('.
+ '"'.delete($ref->{'title'}).'"'.','.
+ '"'.delete($ref->{'author'}).'"'.','.
+ '"'.delete($ref->{'subject'}).'"'.','.
+ '"'.$m2.'"'.','.
+ '"'.delete($ref->{'keywords'}).'"'.','.
+ '"'.'current'.'"'.','.
+ '"'.delete($ref->{'notes'}).'"'.','.
+ '"'.delete($ref->{'abstract'}).'"'.','.
+ '"'.delete($ref->{'mime'}).'"'.','.
+ '"'.delete($ref->{'language'}).'"'.','.
+ '"'.sqltime(delete($ref->{'creationdate'})).'"'.','.
+ '"'.sqltime(delete($ref->{'lastrevisiondate'})).'"'.','.
+ '"'.delete($ref->{'owner'}).'"'.','.
+ '"'.delete($ref->{'copyright'}).'"'.')');
$sth->execute();
}
@@ -173,3 +331,11 @@ sub propath {
my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname";
return $proname;
}
+
+# ---------------------------- convert 'time' format into a datetime sql format
+sub sqltime {
+ my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
+ localtime(@_[0]);
+ $mon++; $year+=1900;
+ return "$year-$mon-$mday $hour:$min:$sec";
+}