version 1.3, 2001/04/16 12:07:36
|
version 1.23, 2002/10/08 18:45:33
|
Line 1
|
Line 1
|
#!/usr/bin/perl |
#!/usr/bin/perl |
# The LearningOnline Network |
# The LearningOnline Network |
# searchcat.pl "Search Catalog" batch script |
# searchcat.pl "Search Catalog" batch script |
|
# |
# 04/14/2001 Scott Harrison |
# $Id$ |
|
# |
|
# Copyright Michigan State University Board of Trustees |
|
# |
|
# This file is part of the LearningOnline Network with CAPA (LON-CAPA). |
|
# |
|
# LON-CAPA is free software; you can redistribute it and/or modify |
|
# it under the terms of the GNU General Public License as published by |
|
# the Free Software Foundation; either version 2 of the License, or |
|
# (at your option) any later version. |
|
# |
|
# LON-CAPA is distributed in the hope that it will be useful, |
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
# GNU General Public License for more details. |
|
# |
|
# You should have received a copy of the GNU General Public License |
|
# along with LON-CAPA; if not, write to the Free Software |
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
# |
|
# /home/httpd/html/adm/gpl.txt |
|
# |
|
# http://www.lon-capa.org/ |
|
# |
|
# YEAR=2001 |
|
# 04/14/2001, 04/16/2001 Scott Harrison |
|
# |
|
# YEAR=2002 |
|
# 05/11/2002 Scott Harrison |
|
# |
|
### |
|
|
# This script goes through a LON-CAPA resource |
# This script goes through a LON-CAPA resource |
# directory and gathers metadata. |
# directory and gathers metadata. |
# The metadata is entered into a SQL database. |
# The metadata is entered into a SQL database. |
|
|
|
use lib '/home/httpd/lib/perl/'; |
|
use LONCAPA::Configuration; |
|
|
use IO::File; |
use IO::File; |
use HTML::TokeParser; |
use HTML::TokeParser; |
|
use DBI; |
|
use GDBM_File; |
|
|
my @metalist; |
my @metalist; |
|
|
|
|
|
# ----------------------------------------------------- Un-Escape Special Chars |
|
|
|
sub unescape { |
|
my $str=shift; |
|
$str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; |
|
return $str; |
|
} |
|
|
|
# -------------------------------------------------------- Escape Special Chars |
|
|
|
sub escape { |
|
my $str=shift; |
|
$str =~ s/(\W)/"%".unpack('H2',$1)/eg; |
|
return $str; |
|
} |
|
|
|
|
|
# ------------------------------------------- Code to evaluate dynamic metadata |
|
|
|
sub dynamicmeta { |
|
# |
|
# |
|
# Do nothing for now ... |
|
# |
|
# |
|
return; |
|
# |
|
# ..., but stuff below already works |
|
# |
|
my $url=&declutter(shift); |
|
$url=~s/\.meta$//; |
|
my %returnhash=(); |
|
my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//); |
|
my $prodir=&propath($adomain,$aauthor); |
|
if (tie(%evaldata,'GDBM_File', |
|
$prodir.'/nohist_resevaldata.db',&GDBM_WRCREAT(),0640)) { |
|
my %sum=(); |
|
my %cnt=(); |
|
my %listitems=('count' => 'add', |
|
'course' => 'add', |
|
'avetries' => 'avg', |
|
'stdno' => 'add', |
|
'difficulty' => 'avg', |
|
'clear' => 'avg', |
|
'technical' => 'avg', |
|
'helpful' => 'avg', |
|
'correct' => 'avg', |
|
'depth' => 'avg', |
|
'comments' => 'app', |
|
'usage' => 'cnt' |
|
); |
|
my $regexp=$url; |
|
$regexp=~s/(\W)/\\$1/g; |
|
$regexp='___'.$regexp.'___([a-z]+)$'; |
|
foreach (keys %evaldata) { |
|
my $key=&unescape($_); |
|
if ($key=~/$regexp/) { |
|
my $ctype=$1; |
|
if (defined($cnt{$ctype})) { |
|
$cnt{$ctype}++; |
|
} else { |
|
$cnt{$ctype}=1; |
|
} |
|
unless ($listitems{$ctype} eq 'app') { |
|
if (defined($sum{$ctype})) { |
|
$sum{$ctype}+=$evaldata{$_}; |
|
} else { |
|
$sum{$ctype}=$evaldata{$_}; |
|
} |
|
} else { |
|
if (defined($sum{$ctype})) { |
|
if ($evaldata{$_}) { |
|
$sum{$ctype}.='<hr>'.$evaldata{$_}; |
|
} |
|
} else { |
|
$sum{$ctype}=''.$evaldata{$_}; |
|
} |
|
} |
|
if ($ctype eq 'count') { |
|
delete($evaldata{$_}); |
|
} |
|
} |
|
} |
|
foreach (keys %cnt) { |
|
if ($listitems{$_} eq 'avg') { |
|
$returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0; |
|
} elsif ($listitems{$_} eq 'cnt') { |
|
$returnhash{$_}=$cnt{$_}; |
|
} else { |
|
$returnhash{$_}=$sum{$_}; |
|
} |
|
} |
|
if ($returnhash{'count'}) { |
|
my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count'; |
|
$evaldata{$newkey}=$returnhash{'count'}; |
|
} |
|
untie(%evaldata); |
|
} |
|
return %returnhash; |
|
} |
|
|
# ----------------- Code to enable 'find' subroutine listing of the .meta files |
# ----------------- Code to enable 'find' subroutine listing of the .meta files |
require "find.pl"; |
require "find.pl"; |
sub wanted { |
sub wanted { |
(($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && |
(($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && |
-f _ && |
-f _ && |
/^.*\.meta$/ && |
/^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ && |
push(@metalist,"$dir/$_"); |
push(@metalist,"$dir/$_"); |
} |
} |
|
|
# ------------------------------------ Read httpd access.conf and get variables |
# --------------- Read loncapa_apache.conf and loncapa.conf and get variables |
open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf"; |
my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); |
|
my %perlvar=%{$perlvarref}; |
|
undef $perlvarref; # remove since sensitive and not needed |
|
delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed |
|
|
while ($configline=<CONFIG>) { |
# ------------------------------------- Only run if machine is a library server |
if ($configline =~ /PerlSetVar/) { |
exit unless $perlvar{'lonRole'} eq 'library'; |
my ($dummy,$varname,$varvalue)=split(/\s+/,$configline); |
|
chomp($varvalue); |
|
$perlvar{$varname}=$varvalue; |
|
} |
|
} |
|
close(CONFIG); |
|
|
|
my $dbh; |
my $dbh; |
# ------------------------------------- Make sure that database can be accessed |
# ------------------------------------- Make sure that database can be accessed |
Line 42 my $dbh;
|
Line 177 my $dbh;
|
print "Cannot connect to database!\n"; |
print "Cannot connect to database!\n"; |
exit; |
exit; |
} |
} |
|
my $make_metadata_table = "CREATE TABLE IF NOT EXISTS metadata (". |
|
"title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, ". |
|
"version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, ". |
|
"creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, ". |
|
"copyright TEXT, FULLTEXT idx_title (title), ". |
|
"FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), ". |
|
"FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), ". |
|
"FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), ". |
|
"FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), ". |
|
"FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), ". |
|
"FULLTEXT idx_copyright (copyright)) TYPE=MYISAM"; |
|
# It would sure be nice to have some logging mechanism. |
|
$dbh->do($make_metadata_table); |
} |
} |
|
|
# ------------------------------------------------------------- get .meta files |
# ------------------------------------------------------------- get .meta files |
Line 55 foreach my $user (@homeusers) {
|
Line 203 foreach my $user (@homeusers) {
|
} |
} |
|
|
# -- process each file to get metadata and put into search catalog SQL database |
# -- process each file to get metadata and put into search catalog SQL database |
|
# Also, check to see if already there. |
|
# I could just delete (without searching first), but this works for now. |
foreach my $m (@metalist) { |
foreach my $m (@metalist) { |
my $ref=&metadata($m); |
my $ref=&metadata($m); |
my $sth=$dbh->prepare('insert into metadata values ('. |
my $m2='/res/'.&declutter($m); |
delete($ref->{'title'}), |
$m2=~s/\.meta$//; |
delete($ref->{'author'}).','. |
&dynamicmeta($m2); |
delete($ref->{'subject'}).','. |
my $q2="select * from metadata where url like binary '$m2'"; |
delete($ref->{'url'}).','. |
my $sth = $dbh->prepare($q2); |
delete($ref->{'keywords'}).','. |
$sth->execute(); |
delete($ref->{'version'}).','. |
my $r1=$sth->fetchall_arrayref; |
delete($ref->{'notes'}).','. |
if (@$r1) { |
delete($ref->{'abstract'}).','. |
$sth=$dbh->prepare("delete from metadata where url like binary '$m2'"); |
delete($ref->{'mime'}).','. |
$sth->execute(); |
delete($ref->{'language'}).','. |
} |
delete($ref->{'creationdate'}).','. |
$sth=$dbh->prepare('insert into metadata values ('. |
delete($ref->{'lastrevisiondate'}).','. |
'"'.delete($ref->{'title'}).'"'.','. |
delete($ref->{'owner'}).','. |
'"'.delete($ref->{'author'}).'"'.','. |
delete($ref->{'copyright'}). |
'"'.delete($ref->{'subject'}).'"'.','. |
')'; |
'"'.$m2.'"'.','. |
|
'"'.delete($ref->{'keywords'}).'"'.','. |
|
'"'.'current'.'"'.','. |
|
'"'.delete($ref->{'notes'}).'"'.','. |
|
'"'.delete($ref->{'abstract'}).'"'.','. |
|
'"'.delete($ref->{'mime'}).'"'.','. |
|
'"'.delete($ref->{'language'}).'"'.','. |
|
'"'.sqltime(delete($ref->{'creationdate'})).'"'.','. |
|
'"'.sqltime(delete($ref->{'lastrevisiondate'})).'"'.','. |
|
'"'.delete($ref->{'owner'}).'"'.','. |
|
'"'.delete($ref->{'copyright'}).'"'.')'); |
$sth->execute(); |
$sth->execute(); |
} |
} |
|
|
Line 171 sub propath {
|
Line 331 sub propath {
|
my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname"; |
my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname"; |
return $proname; |
return $proname; |
} |
} |
|
|
|
# ---------------------------- convert 'time' format into a datetime sql format |
|
sub sqltime { |
|
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = |
|
localtime(@_[0]); |
|
$mon++; $year+=1900; |
|
return "$year-$mon-$mday $hour:$min:$sec"; |
|
} |