File:  [LON-CAPA] / loncom / metadata_database / searchcat.pl
Revision 1.1: download - view: text, annotated - select for diffs
Sat Apr 14 18:24:54 2001 UTC (23 years, 1 month ago) by harris41
Branches: MAIN
CVS tags: HEAD
a batch script for synchronizing the contents of a library server with
a SQL metadata database -Scott

    1: #!/usr/bin/perl
    2: # The LearningOnline Network
    3: # searchcat.pl "Search Catalog" batch script
    4: 
    5: # 04/14/2001 Scott Harrison
    6: 
    7: # This script goes through a LON-CAPA resource
    8: # directory and gathers metadata.
    9: # The metadata is entered into a SQL database.
   10: 
   11: use strict;
   12: 
   13: use IO::File;
   14: use HTML::TokeParser;
   15: 
   16: my @metalist;
   17: # ----------------- Code to enable 'find' subroutine listing of the .meta files
   18: require "find.pl";
   19: sub wanted {
   20:     (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
   21:     -f _ &&
   22:     /^.*\.meta$/ &&
   23:     push(@metalist,"$dir/$_");
   24: }
   25: 
   26: # ------------------------------------ Read httpd access.conf and get variables
   27: open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf";
   28: 
   29: while ($configline=<CONFIG>) {
   30:     if ($configline =~ /PerlSetVar/) {
   31: 	my ($dummy,$varname,$varvalue)=split(/\s+/,$configline);
   32:         chomp($varvalue);
   33:         $perlvar{$varname}=$varvalue;
   34:     }
   35: }
   36: close(CONFIG);
   37: 
   38: # ------------------------------------- Make sure that database can be accessed
   39: {
   40:     my $dbh;
   41:     unless (
   42: 	    $dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0})
   43: 	    ) { 
   44: 	print "Cannot connect to database!\n";
   45: 	exit;
   46:     }
   47: }
   48: 
   49: # ------------------------------------------------------------- get .meta files
   50: # need to actually loop over existing users here.. will fix soon
   51: &find("$perlvar{'lonDocRoot'}/res");
   52: 
   53: # -- process each file to get metadata and put into search catalog SQL database
   54: foreach my $m (@metalist) {
   55:     my $ref=&metadata($m);
   56:     my $sth=$dbh->prepare('insert into metadata values ('.
   57: 			  delete($ref->{'title'}),
   58: 			  delete($ref->{'author'}).','.
   59: 			  delete($ref->{'subject'}).','.
   60: 			  delete($ref->{'url'}).','.
   61: 			  delete($ref->{'keywords'}).','.
   62: 			  delete($ref->{'version'}).','.
   63: 			  delete($ref->{'notes'}).','.
   64: 			  delete($ref->{'abstract'}).','.
   65: 			  delete($ref->{'mime'}).','.
   66: 			  delete($ref->{'language'}).','.
   67: 			  delete($ref->{'creationdate'}).','.
   68: 			  delete($ref->{'lastrevisiondate'}).','.
   69: 			  delete($ref->{'owner'}).','.
   70: 			  delete($ref->{'copyright'}).
   71: 			  ')';
   72:     $sth->execute();
   73: }
   74: 
   75: # ----------------------------------------------------------- Clean up database
   76: # Need to, perhaps, remove stale SQL database records.
   77: # ... not yet implemented
   78: 
   79: # --------------------------------------------------- Close database connection
   80: $dbh->disconnect;
   81: 
   82: # ---------------------------------------------------------------- Get metadata
   83: # significantly altered from subroutine present in lonnet
   84: sub metadata {
   85:     my ($uri,$what)=@_;
   86:     my %metacache;
   87:     $uri=&declutter($uri);
   88:     my $filename=$uri;
   89:     $uri=~s/\.meta$//;
   90:     $uri='';
   91:     unless ($metacache{$uri.'keys'}) {
   92:         unless ($filename=~/\.meta$/) { $filename.='.meta'; }
   93: 	my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename);
   94:         my $parser=HTML::TokeParser->new(\$metastring);
   95:         my $token;
   96:         while ($token=$parser->get_token) {
   97:            if ($token->[0] eq 'S') {
   98: 	      my $entry=$token->[1];
   99:               my $unikey=$entry;
  100:               if (defined($token->[2]->{'part'})) { 
  101:                  $unikey.='_'.$token->[2]->{'part'}; 
  102: 	      }
  103:               if (defined($token->[2]->{'name'})) { 
  104:                  $unikey.='_'.$token->[2]->{'name'}; 
  105: 	      }
  106:               if ($metacache{$uri.'keys'}) {
  107:                  $metacache{$uri.'keys'}.=','.$unikey;
  108:               } else {
  109:                  $metacache{$uri.'keys'}=$unikey;
  110: 	      }
  111:               map {
  112: 		  $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_};
  113:               } @{$token->[3]};
  114:               unless (
  115:                  $metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry)
  116: 		      ) { $metacache{$uri.''.$unikey}=
  117: 			      $metacache{$uri.''.$unikey.'.default'};
  118: 		      }
  119:           }
  120:        }
  121:     }
  122:     return \%metacache;
  123: }
  124: 
  125: # ------------------------------------------------------------ Serves up a file
  126: # returns either the contents of the file or a -1
  127: sub getfile {
  128:   my $file=shift;
  129:   if (! -e $file ) { return -1; };
  130:   my $fh=IO::File->new($file);
  131:   my $a='';
  132:   while (<$fh>) { $a .=$_; }
  133:   return $a
  134: }
  135: 
  136: # ------------------------------------------------------------- Declutters URLs
  137: sub declutter {
  138:     my $thisfn=shift;
  139:     $thisfn=~s/^$perlvar{'lonDocRoot'}//;
  140:     $thisfn=~s/^\///;
  141:     $thisfn=~s/^res\///;
  142:     return $thisfn;
  143: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>