Diff for /loncom/metadata_database/searchcat.pl between versions 1.3 and 1.23

version 1.3, 2001/04/16 12:07:36 version 1.23, 2002/10/08 18:45:33
Line 1 Line 1
 #!/usr/bin/perl  #!/usr/bin/perl
 # The LearningOnline Network  # The LearningOnline Network
 # searchcat.pl "Search Catalog" batch script  # searchcat.pl "Search Catalog" batch script
   #
 # 04/14/2001 Scott Harrison  # $Id$
   #
   # Copyright Michigan State University Board of Trustees
   #
   # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
   #
   # LON-CAPA is free software; you can redistribute it and/or modify
   # it under the terms of the GNU General Public License as published by
   # the Free Software Foundation; either version 2 of the License, or
   # (at your option) any later version.
   #
   # LON-CAPA is distributed in the hope that it will be useful,
   # but WITHOUT ANY WARRANTY; without even the implied warranty of
   # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   # GNU General Public License for more details.
   #
   # You should have received a copy of the GNU General Public License
   # along with LON-CAPA; if not, write to the Free Software
   # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   #
   # /home/httpd/html/adm/gpl.txt
   #
   # http://www.lon-capa.org/
   #
   # YEAR=2001
   # 04/14/2001, 04/16/2001 Scott Harrison
   #
   # YEAR=2002
   # 05/11/2002 Scott Harrison
   #
   ###
   
 # This script goes through a LON-CAPA resource  # This script goes through a LON-CAPA resource
 # directory and gathers metadata.  # directory and gathers metadata.
 # The metadata is entered into a SQL database.  # The metadata is entered into a SQL database.
   
   use lib '/home/httpd/lib/perl/';
   use LONCAPA::Configuration;
   
 use IO::File;  use IO::File;
 use HTML::TokeParser;  use HTML::TokeParser;
   use DBI;
   use GDBM_File;
   
 my @metalist;  my @metalist;
   
   
   # ----------------------------------------------------- Un-Escape Special Chars
   
   sub unescape {
       my $str=shift;
       $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
       return $str;
   }
   
   # -------------------------------------------------------- Escape Special Chars
   
   sub escape {
       my $str=shift;
       $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
       return $str;
   }
   
   
   # ------------------------------------------- Code to evaluate dynamic metadata
   
   sub dynamicmeta {
   #
   #
   # Do nothing for now ...
   #
   #
       return;
   #
   # ..., but stuff below already works
   #
       my $url=&declutter(shift);
       $url=~s/\.meta$//;
       my %returnhash=();
       my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//);
       my $prodir=&propath($adomain,$aauthor);
       if (tie(%evaldata,'GDBM_File',
               $prodir.'/nohist_resevaldata.db',&GDBM_WRCREAT(),0640)) {
          my %sum=();
          my %cnt=();
          my %listitems=('count'        => 'add',
                         'course'       => 'add',
                         'avetries'     => 'avg',
                         'stdno'        => 'add',
                         'difficulty'   => 'avg',
                         'clear'        => 'avg',
                         'technical'    => 'avg',
                         'helpful'      => 'avg',
                         'correct'      => 'avg',
                         'depth'        => 'avg',
                         'comments'     => 'app',
                         'usage'        => 'cnt'
                         );
          my $regexp=$url;
          $regexp=~s/(\W)/\\$1/g;
          $regexp='___'.$regexp.'___([a-z]+)$';
          foreach (keys %evaldata) {
    my $key=&unescape($_);
    if ($key=~/$regexp/) {
       my $ctype=$1;
               if (defined($cnt{$ctype})) { 
                  $cnt{$ctype}++; 
               } else { 
                  $cnt{$ctype}=1; 
               }
               unless ($listitems{$ctype} eq 'app') {
                  if (defined($sum{$ctype})) {
                     $sum{$ctype}+=$evaldata{$_};
             } else {
                     $sum{$ctype}=$evaldata{$_};
          }
               } else {
                  if (defined($sum{$ctype})) {
                     if ($evaldata{$_}) {
                        $sum{$ctype}.='<hr>'.$evaldata{$_};
             }
           } else {
                $sum{$ctype}=''.$evaldata{$_};
          }
       }
       if ($ctype eq 'count') {
          delete($evaldata{$_});
               }
    }
         }
         foreach (keys %cnt) {
            if ($listitems{$_} eq 'avg') {
        $returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
            } elsif ($listitems{$_} eq 'cnt') {
                $returnhash{$_}=$cnt{$_};
            } else {
                $returnhash{$_}=$sum{$_};
            }
        }
        if ($returnhash{'count'}) {
            my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count';
            $evaldata{$newkey}=$returnhash{'count'};
        }
        untie(%evaldata);
      }
      return %returnhash;
   }
     
 # ----------------- Code to enable 'find' subroutine listing of the .meta files  # ----------------- Code to enable 'find' subroutine listing of the .meta files
 require "find.pl";  require "find.pl";
 sub wanted {  sub wanted {
     (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&      (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
     -f _ &&      -f _ &&
     /^.*\.meta$/ &&      /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
     push(@metalist,"$dir/$_");      push(@metalist,"$dir/$_");
 }  }
   
 # ------------------------------------ Read httpd access.conf and get variables  # ---------------  Read loncapa_apache.conf and loncapa.conf and get variables
 open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf";  my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
   my %perlvar=%{$perlvarref};
   undef $perlvarref; # remove since sensitive and not needed
   delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
   
 while ($configline=<CONFIG>) {  # ------------------------------------- Only run if machine is a library server
     if ($configline =~ /PerlSetVar/) {  exit unless $perlvar{'lonRole'} eq 'library';
  my ($dummy,$varname,$varvalue)=split(/\s+/,$configline);  
         chomp($varvalue);  
         $perlvar{$varname}=$varvalue;  
     }  
 }  
 close(CONFIG);  
   
 my $dbh;  my $dbh;
 # ------------------------------------- Make sure that database can be accessed  # ------------------------------------- Make sure that database can be accessed
Line 42  my $dbh; Line 177  my $dbh;
  print "Cannot connect to database!\n";   print "Cannot connect to database!\n";
  exit;   exit;
     }      }
       my $make_metadata_table = "CREATE TABLE IF NOT EXISTS metadata (".
           "title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, ".
           "version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, ".
           "creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, ".
           "copyright TEXT, FULLTEXT idx_title (title), ".
           "FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), ".
           "FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), ".
           "FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), ".
           "FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), ".
           "FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), ".
           "FULLTEXT idx_copyright (copyright)) TYPE=MYISAM";
       # It would sure be nice to have some logging mechanism.
       $dbh->do($make_metadata_table);
 }  }
   
 # ------------------------------------------------------------- get .meta files  # ------------------------------------------------------------- get .meta files
Line 55  foreach my $user (@homeusers) { Line 203  foreach my $user (@homeusers) {
 }  }
   
 # -- process each file to get metadata and put into search catalog SQL database  # -- process each file to get metadata and put into search catalog SQL database
   # Also, check to see if already there.
   # I could just delete (without searching first), but this works for now.
 foreach my $m (@metalist) {  foreach my $m (@metalist) {
     my $ref=&metadata($m);      my $ref=&metadata($m);
     my $sth=$dbh->prepare('insert into metadata values ('.      my $m2='/res/'.&declutter($m);
   delete($ref->{'title'}),      $m2=~s/\.meta$//;
   delete($ref->{'author'}).','.      &dynamicmeta($m2);
   delete($ref->{'subject'}).','.      my $q2="select * from metadata where url like binary '$m2'";
   delete($ref->{'url'}).','.      my $sth = $dbh->prepare($q2);
   delete($ref->{'keywords'}).','.      $sth->execute();
   delete($ref->{'version'}).','.      my $r1=$sth->fetchall_arrayref;
   delete($ref->{'notes'}).','.      if (@$r1) {
   delete($ref->{'abstract'}).','.   $sth=$dbh->prepare("delete from metadata where url like binary '$m2'");
   delete($ref->{'mime'}).','.          $sth->execute();
   delete($ref->{'language'}).','.      }
   delete($ref->{'creationdate'}).','.      $sth=$dbh->prepare('insert into metadata values ('.
   delete($ref->{'lastrevisiondate'}).','.    '"'.delete($ref->{'title'}).'"'.','.
   delete($ref->{'owner'}).','.    '"'.delete($ref->{'author'}).'"'.','.
   delete($ref->{'copyright'}).    '"'.delete($ref->{'subject'}).'"'.','.
   ')';    '"'.$m2.'"'.','.
     '"'.delete($ref->{'keywords'}).'"'.','.
     '"'.'current'.'"'.','.
     '"'.delete($ref->{'notes'}).'"'.','.
     '"'.delete($ref->{'abstract'}).'"'.','.
     '"'.delete($ref->{'mime'}).'"'.','.
     '"'.delete($ref->{'language'}).'"'.','.
     '"'.sqltime(delete($ref->{'creationdate'})).'"'.','.
     '"'.sqltime(delete($ref->{'lastrevisiondate'})).'"'.','.
     '"'.delete($ref->{'owner'}).'"'.','.
     '"'.delete($ref->{'copyright'}).'"'.')');
     $sth->execute();      $sth->execute();
 }  }
   
Line 171  sub propath { Line 331  sub propath {
     my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname";      my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname";
     return $proname;      return $proname;
 }   } 
   
   # ---------------------------- convert 'time' format into a datetime sql format
   sub sqltime {
       my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
    localtime(@_[0]);
       $mon++; $year+=1900;
       return "$year-$mon-$mday $hour:$min:$sec";
   }

Removed from v.1.3  
changed lines
  Added in v.1.23


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>