Diff for /loncom/metadata_database/searchcat.pl between versions 1.7 and 1.29

version 1.7, 2001/04/16 12:26:36 version 1.29, 2003/02/03 13:42:16
Line 1 Line 1
 #!/usr/bin/perl  #!/usr/bin/perl
 # The LearningOnline Network  # The LearningOnline Network
 # searchcat.pl "Search Catalog" batch script  # searchcat.pl "Search Catalog" batch script
   #
   # $Id$
   #
   # Copyright Michigan State University Board of Trustees
   #
   # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
   #
   # LON-CAPA is free software; you can redistribute it and/or modify
   # it under the terms of the GNU General Public License as published by
   # the Free Software Foundation; either version 2 of the License, or
   # (at your option) any later version.
   #
   # LON-CAPA is distributed in the hope that it will be useful,
   # but WITHOUT ANY WARRANTY; without even the implied warranty of
   # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   # GNU General Public License for more details.
   #
   # You should have received a copy of the GNU General Public License
   # along with LON-CAPA; if not, write to the Free Software
   # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   #
   # /home/httpd/html/adm/gpl.txt
   #
   # http://www.lon-capa.org/
   #
   # YEAR=2001
   # 04/14/2001, 04/16/2001 Scott Harrison
   #
   # YEAR=2002
   # 05/11/2002 Scott Harrison
   #
   # YEAR=2003
   # Scott Harrison
   #
   ###
   
   =pod
   
   =head1 NAME
   
   B<searchcat.pl> - put authoritative filesystem data into sql database.
   
   =head1 SYNOPSIS
   
   Ordinarily this script is to be called from a loncapa cron job
   (CVS source location: F<loncapa/loncom/cron/loncapa>; typical
   filesystem installation location: F</etc/cron.d/loncapa>).
   
   Here is the cron job entry.
   
   C<# Repopulate and refresh the metadata database used for the search catalog.>
   
   C<10 1 * * 7    www    /home/httpd/perl/searchcat.pl>
   
   This script only allows itself to be run as the user C<www>.
   
   =head1 DESCRIPTION
   
   This script goes through a loncapa resource directory and gathers metadata.
   The metadata is entered into a SQL database.
   
   This script also does general database maintenance such as reformatting
   the C<loncapa:metadata> table if it is deprecated.
   
   This script also builds dynamic temporal metadata and stores this inside
   a F<nohist_resevaldata.db> database file.
   
   This script is playing an increasingly important role for a loncapa
   library server.  The proper operation of this script is critical for a smooth
   and correct user experience.
   
 # 04/14/2001 Scott Harrison  =cut
   
 # This script goes through a LON-CAPA resource  # ========================================================== Setting things up.
 # directory and gathers metadata.  
 # The metadata is entered into a SQL database.  # ------------------------------------------------------  Use external modules.
   
   use lib '/home/httpd/lib/perl/';
   use LONCAPA::Configuration;
   
 use IO::File;  use IO::File;
 use HTML::TokeParser;  use HTML::TokeParser;
 use DBI;  use DBI;
   use GDBM_File;
   use POSIX qw(strftime mktime);
   
 my @metalist;  
 # ----------------- Code to enable 'find' subroutine listing of the .meta files  # ----------------- Code to enable 'find' subroutine listing of the .meta files
 require "find.pl";  use File::Find;
 sub wanted {  
     (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&  # List of .meta files (used on a per-user basis).
     -f _ &&  my @metalist;
     /^.*\.meta$/ &&  
     push(@metalist,"$dir/$_");  # ---------------  Read loncapa_apache.conf and loncapa.conf and get variables.
 }  my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf');
   my %perlvar = %{$perlvarref};
 # ------------------------------------ Read httpd access.conf and get variables  undef($perlvarref); # Remove since sensitive and not needed.
 open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf";  delete($perlvar{'lonReceipt'}); # Remove since sensitive and not needed.
   
 while ($configline=<CONFIG>) {  # ------------------------------------- Only run if machine is a library server
     if ($configline =~ /PerlSetVar/) {  if ($perlvar{'lonRole'} ne 'library')
  my ($dummy,$varname,$varvalue)=split(/\s+/,$configline);    {
         chomp($varvalue);      exit(0);
         $perlvar{$varname}=$varvalue;    }
     }  
 }  # ------------------------------ Make sure this process is running as user=www.
 close(CONFIG);  my $wwwid = getpwnam('www');
   if ($wwwid != $<)
 my $dbh;    {
 # ------------------------------------- Make sure that database can be accessed      $emailto = "$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
 {      $subj = "LON: $perlvar{'lonHostID'} User ID mismatch";
     unless (      system("echo 'User ID mismatch. searchcat.pl must be run as user www.' | ".
     $dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0})     "mailto $emailto -s '$subj' > /dev/null");
     ) {       exit(1);
  print "Cannot connect to database!\n";    }
  exit;  
     }  # ------------------------------------------------------ Initialize log output.
 }  open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
   print(LOG '==== Searchcat Run '.localtime().' ===='."\n\n");
 # ------------------------------------------------------------- get .meta files  
 opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}");  my $dbh; # Database object reference handle.
 my @homeusers=grep  
           {&ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_")}  # ----------------------------- Verify connection to loncapa:metadata database.
           grep {!/^\.\.?$/} readdir(RESOURCES);  unless (
 closedir RESOURCES;   $dbh = DBI->connect('DBI:mysql:loncapa','www',
 foreach my $user (@homeusers) {      $perlvar{'lonSqlAccess'},
     &find("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user");      { RaiseError => 0,PrintError => 0})
 }   )
     { 
 # -- process each file to get metadata and put into search catalog SQL database      print(LOG '**** ERROR **** Cannot connect to database!'."\n");
 foreach my $m (@metalist) {      exit(0);
     my $ref=&metadata($m);    }
     my $sth=$dbh->prepare('insert into metadata values ('.  
   delete($ref->{'title'}).','.  # ------------------------------ Create loncapa:metadata table if non-existent.
   delete($ref->{'author'}).','.  my $make_metadata_table = 'CREATE TABLE IF NOT EXISTS metadata ('.
   delete($ref->{'subject'}).','.      'title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, '.
   delete($ref->{'url'}).','.      'version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, '.
   delete($ref->{'keywords'}).','.      'creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, '.
   delete($ref->{'version'}).','.      'copyright TEXT, utilitysemaphore BOOL, FULLTEXT idx_title (title), '.
   delete($ref->{'notes'}).','.      'FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), '.
   delete($ref->{'abstract'}).','.      'FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), '.
   delete($ref->{'mime'}).','.      'FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), '.
   delete($ref->{'language'}).','.      'FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), '.
   delete($ref->{'creationdate'}).','.      'FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), '.
   delete($ref->{'lastrevisiondate'}).','.      'FULLTEXT idx_copyright (copyright)) TYPE=MYISAM';
   delete($ref->{'owner'}).','.  
   delete($ref->{'copyright'}));  $dbh->do($make_metadata_table); # Generate the table.
     $sth->execute();  
 }  # ----------------------------- Verify format of the loncapa:metadata database.
   #                               (delete and recreate database if necessary).
 # ----------------------------------------------------------- Clean up database  
 # Need to, perhaps, remove stale SQL database records.  # Make a positive control for verifying table structure.
 # ... not yet implemented  my $make_metadata_table_CONTROL = $make_metadata_table;
   $make_metadata_table_CONTROL =~
       s/^(CREATE TABLE IF NOT EXISTS) metadata/$1 CONTROL_metadata/;
   
   $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata');
   $dbh->do($make_metadata_table_CONTROL);
   
   my $table_description; # selectall reference to the table description.
   
   my $CONTROL_table_string; # What the table description should look like.
   my $table_string; # What the table description does look like.
   
   # Calculate the CONTROL table description (what it should be).
   $table_description = $dbh->selectall_arrayref('describe CONTROL_metadata');
   foreach my $table_row (@{$table_description})
     {
       $CONTROL_table_string .= join(',',@{$table_row})."\n";
     }
   
   # Calculate the current table description (what it currently looks like).
   $table_description = $dbh->selectall_arrayref('describe metadata');
   foreach my $table_row (@{$table_description})
     {
       $table_string .= join(',',@{$table_row})."\n";
     }
   
   if ($table_string ne $CONTROL_table_string)
     {
       # Log this incident.
       print(LOG '**** WARNING **** Table structure mismatch, need to regenerate'.
     '.'."\n");
       # Delete the table.
       $dbh->do('DROP TABLE IF EXISTS metadata');
       # Generate the table.
       $dbh->do($make_metadata_table);
     }
   
   $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); # Okay.  Done with control.
   
   # ----------------------------------------------- Set utilitysemaphore to zero.
   $dbh->do('UPDATE metadata SET utilitysemaphore = 0');
   
   # ========================================================= Main functionality.
   
   # - Determine home authors on this server based on resources dir and user tree.
   
   # RESOURCES: the resources directory (subdirs correspond to author usernames).
   opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}") or
       (print(LOG '=== /res/--lonDefDomain-- directory is not accessible'."\n")
        and exit(0));
   
   # query_home_server_status will look for user home directories on this machine.
   my @homeusers =
       grep {&query_home_server_status($perlvar{'lonDocRoot'}.'/res/'.
       $perlvar{'lonDefDomain'}.'/'.$_)
     } grep {!/^\.\.?$/} readdir(RESOURCES);
   closedir(RESOURCES);
   
   unless (@homeusers)
     {
       print(LOG '=== No home users found on this server.'."\n");
     }
   
   # Consider each author individually.
   foreach my $user (@homeusers)
     {
       # Make a log entry.
       print(LOG "\n".'=== User: '.$user."\n\n");
   
       # Get filesystem path to this user's directory.
       my $user_directory =
    &construct_path_to_user_directory($perlvar{'lonDefDomain'},$user);
   
       # Remove left-over db-files from a potentially crashed searchcat run.
       unlink($user_directory.'/nohist_new_resevaldata.db');
   
       # Cleanup the metalist array.
       undef(@metalist);
       @metalist = ();
   
       # This will add entries to the @metalist array.
       &File::Find::find(\&wanted,
         $perlvar{'lonDocRoot'}.'/res/'.
         $perlvar{'lonDefDomain'}.'/'.$user);
   
       # -- process file to get metadata and put into search catalog SQL database
       # Also, build and store dynamic metadata.
       # Also, delete record entries before refreshing.
       foreach my $m (@metalist)
         {
    # Log this action.
    print(LOG "- ".$m."\n");
   
    # Get metadata from the file.
    my $ref = get_metadata_from_file($m);
   
    # Make a datarecord identifier for this resource.
    my $m2 = '/res/'.declutter($m);
    $m2 =~ s/\.meta$//;
   
    # Build and store dynamic metadata inside nohist_resevaldata.db.
    build_on_the_fly_dynamic_metadata($m2);
   
    # Delete record if it already exists.
    my $q2 = 'select * from metadata where url like binary '."'".$m2."'";
    my $sth = $dbh->prepare($q2);
    $sth->execute();
    my $r1 = $sth->fetchall_arrayref;
    if (@$r1)
     {
       $sth = 
    $dbh->prepare('delete from metadata where url like binary '.
         "'".$m2."'");
       $sth->execute();
     }
   
    # Add new/replacement record into the loncapa:metadata table.
    $sth = $dbh->prepare('insert into metadata values ('.
        '"'.delete($ref->{'title'}).'"'.','.
        '"'.delete($ref->{'author'}).'"'.','.
        '"'.delete($ref->{'subject'}).'"'.','.
        '"'.$m2.'"'.','.
        '"'.delete($ref->{'keywords'}).'"'.','.
        '"'.'current'.'"'.','.
        '"'.delete($ref->{'notes'}).'"'.','.
        '"'.delete($ref->{'abstract'}).'"'.','.
        '"'.delete($ref->{'mime'}).'"'.','.
        '"'.delete($ref->{'language'}).'"'.','.
        '"'.sql_formatted_time(
          delete($ref->{'creationdate'})).'"'.','.
        '"'.sql_formatted_time(
      delete($ref->{'lastrevisiondate'})).'"'.','.
        '"'.delete($ref->{'owner'}).'"'.','.
        '"'.delete($ref->{'copyright'}).'"'.','.
        '1'.')');
    $sth->execute();
         }
   
   # ----------------------- Clean up database, remove stale SQL database records.
       $dbh->do('DELETE FROM metadata WHERE utilitysemaphore = 0');
   
   # -------------------------------------------------- Copy over the new db-files
       system('mv '.$user_directory.'/nohist_new_resevaldata.db '.
            $user_directory.'/nohist_resevaldata.db');
     }
   
 # --------------------------------------------------- Close database connection  # --------------------------------------------------- Close database connection
 $dbh->disconnect;  $dbh->disconnect;
   print LOG "\n==== Searchcat completed ".localtime()." ====\n";
   close(LOG);
   exit(0);
   
   # ================================================================ Subroutines.
   
   =pod
   
   =head1 SUBROUTINES
   
   =cut
   
   =pod
   
   B<unescape> - translate to unstrange escaped syntax to strange characters.
   
   =over 4
   
   Parameters:
   
   =item I<$str> - string with unweird characters.
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<string> - string with potentially weird characters.
   
   =back
   
   =cut
   
   sub unescape ($)
     {
       my $str = shift(@_);
       $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
       return($str);
     }
   
   =pod
   
   B<escape> - translate strange characters to unstrange escaped syntax.
   
   =over 4
   
   Parameters:
   
   =item I<$str> - string with potentially weird characters to unweird-ify.
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<string> - unweird-ified string.
   
   =back
   
   =cut
   
   sub escape ($)
     {
       my $str = shift(@_);
       $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
       return($str);
     }
   
   =pod
   
   B<build_on_the_fly_dynamic_metadata> - evaluate and store dynamic metadata.
   
   Dynamic metadata is stored in a nohist_resevaldata GDBM database.
   The only thing that this subroutine really makes happen is adjusting
   a 'count' value inside the F<nohist_new_resevaldata.db> as well
   as updating F<nohist_new_resevaldata.db> with information from
   F<nohist_resevaldata.db>.
   
   It may need optmization, but since it gets called once a week. . .
   =over 4
   
   Parameters:
   
   =item I<$url> - the filesystem path (url may be a misnomer...)
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<hash> - key-value table of dynamically evaluated metadata.
   
   =back
   
   =cut
   
   sub build_on_the_fly_dynamic_metadata ($)
     {
       # some elements in here maybe non-obvious
   
       # Need to compute the user's directory.
       my $url = &declutter(shift(@_));
       $url =~ s/\.meta$//;
       my %returnhash = ();
       my ($adomain,$aauthor) = ($url =~ m!^(\w+)/(\w+)/!);
       my $user_directory = &construct_path_to_user_directory($adomain,$aauthor);
   
       # Attempt a GDBM database instantiation inside users directory and proceed.
       if ((tie(%evaldata,'GDBM_File',
               $user_directory.
        '/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
           (tie(%newevaldata,'GDBM_File',
        $user_directory.
        '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640)))
         {
    # For different variables, track the running sum and counts.
    my %sum = ();
    my %cnt = ();
   
    # Define computed items as a sum (add) or an average (avg) or a raw
    # count (cnt) or 'app'?
    my %listitems=('count'        => 'add',
          'course'       => 'add',
          'avetries'     => 'avg',
          'stdno'        => 'add',
          'difficulty'   => 'avg',
          'clear'        => 'avg',
          'technical'    => 'avg',
          'helpful'      => 'avg',
          'correct'      => 'avg',
          'depth'        => 'avg',
          'comments'     => 'app',
          'usage'        => 'cnt'
          );
   
    # Untaint the url and use as part of a regular expression.
    my $regexp = $url;
    $regexp =~ s/(\W)/\\$1/g;
    $regexp = '___'.$regexp.'___([a-z]+)$'; #' emacs
   
    # Check existing nohist database for this url.
           # this is modfying the 'count' entries
           # and copying all othe entries over
    foreach (keys %evaldata)
     {
       my $key = &unescape($_);
       if ($key =~ /$regexp/) # If url-based entry exists.
         {
    my $ctype = $1; # Set to specific category type.
   
 # ---------------------------------------------------------------- Get metadata   # Do an increment for this category type.
 # significantly altered from subroutine present in lonnet   if (defined($cnt{$ctype}))
 sub metadata {    {
     my ($uri,$what)=@_;      $cnt{$ctype}++; 
     my %metacache;    }
     $uri=&declutter($uri);   else
     my $filename=$uri;    {
     $uri=~s/\.meta$//;      $cnt{$ctype} = 1; 
     $uri='';    }
     unless ($metacache{$uri.'keys'}) {                  unless ($listitems{$ctype} eq 'app') # WHAT DOES 'app' MEAN?
         unless ($filename=~/\.meta$/) { $filename.='.meta'; }    {
  my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename);      # Increment the sum based on the evaluated data in the db.
         my $parser=HTML::TokeParser->new(\$metastring);      if (defined($sum{$ctype}))
         my $token;        {
         while ($token=$parser->get_token) {   $sum{$ctype} += $evaldata{$_};
            if ($token->[0] eq 'S') {  
       my $entry=$token->[1];  
               my $unikey=$entry;  
               if (defined($token->[2]->{'part'})) {   
                  $unikey.='_'.$token->[2]->{'part'};   
       }  
               if (defined($token->[2]->{'name'})) {   
                  $unikey.='_'.$token->[2]->{'name'};   
       }  
               if ($metacache{$uri.'keys'}) {  
                  $metacache{$uri.'keys'}.=','.$unikey;  
               } else {  
                  $metacache{$uri.'keys'}=$unikey;  
       }  
               map {  
   $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_};  
               } @{$token->[3]};  
               unless (  
                  $metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry)  
       ) { $metacache{$uri.''.$unikey}=  
       $metacache{$uri.''.$unikey.'.default'};  
       }        }
           }      else
        }        {
     }   $sum{$ctype} = $evaldata{$_};
     return \%metacache;        }
 }     }
    else # 'app' mode, means to use '<hr />' as a separator
 # ------------------------------------------------------------ Serves up a file    {
 # returns either the contents of the file or a -1      if (defined($sum{$ctype}))
 sub getfile {        {
   my $file=shift;   if ($evaldata{$_})
   if (! -e $file ) { return -1; };    {
   my $fh=IO::File->new($file);      $sum{$ctype} .= '<hr />'.$evaldata{$_};
   my $a='';    }
   while (<$fh>) { $a .=$_; }        }
   return $a      else
 }        {
    $sum{$ctype} = ''.$evaldata{$_};
 # ------------------------------------------------------------- Declutters URLs        }
 sub declutter {    }
     my $thisfn=shift;   if ($ctype ne 'count')
     $thisfn=~s/^$perlvar{'lonDocRoot'}//;    {
     $thisfn=~s/^\///;                      # this is copying all data except 'count' attributes
     $thisfn=~s/^res\///;      $newevaldata{$_} = $evaldata{$_};
     return $thisfn;    }
 }        }
     }
 # --------------------------------------- Is this the home server of an author?  
 # (copied from lond, modification of the return value)          # the only other time this loop is useful is for the 'count' hash
 sub ishome {          # element
     my $author=shift;   foreach (keys %cnt)
     $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/;    {
     my ($udom,$uname)=split(/\//,$author);      if ($listitems{$_} eq 'avg')
     my $proname=propath($udom,$uname);        {
     if (-e $proname) {   $returnhash{$_} = int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
  return 1;        }
     } else {      elsif ($listitems{$_} eq 'cnt')
         return 0;        {
     }   $returnhash{$_} = $cnt{$_};
 }        }
       else
 # -------------------------------------------- Return path to profile directory        {
 # (copied from lond)   $returnhash{$_} = $sum{$_};
 sub propath {        }
     my ($udom,$uname)=@_;    }
     $udom=~s/\W//g;  
     $uname=~s/\W//g;          # seems to be doing something useful
     my $subdir=$uname.'__';   if ($returnhash{'count'})
     $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/;    {
     my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname";      my $newkey = $$.'_'.time.'_searchcat___'.&escape($url).'___count';
     return $proname;      $newevaldata{$newkey} = $returnhash{'count'};
 }     }
   
    untie(%evaldata); # Close/release the original nohist database.
    untie(%newevaldata); # Close/release the new nohist database.
         }
       return(%returnhash);
     }
   
   =pod
   
   B<wanted> - used by B<File::Find::find> subroutine.
   
   This evaluates whether a file is wanted, and pushes it onto the
   I<@metalist> array.  This subroutine was, for the most part, auto-generated
   by the B<find2perl> command.
   
   =over 4
   
   Parameters:
   
   =item I<$file> - a path to the file.
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<boolean> - true or false based on logical statement.
   
   =back
   
   =cut
   
   sub wanted ($)
     {
       (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
       -f $_ &&
       /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
       push(@metalist,$File::Find::dir.'/'.$_);
     }
   
   =pod
   
   B<get_metadata_from_file> - read xml-tagged file and return parsed metadata.
   
   I<Note that this is significantly altered from a subroutine present in lonnet.>
   
   =over 4
   
   Parameters:
   
   =item I<$file> - a path.to the file.
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<hash reference> - a hash array (keys and values).
   
   =back
   
   =cut
   
   sub get_metadata_from_file ($)
     {
       my ($filename) = @_;
       my %metatable; # Used to store return value of hash-tabled metadata.
       $filename = &declutter($filename); # Remove non-identifying filesystem info
       my $uri = ''; # The URI is not relevant in this scenario.
       unless ($filename =~ m/\.meta$/) # Unless ending with .meta.
         {
    $filename .= '.meta'; # Append a .meta suffix.
         }
       # Get the file contents.
       my $metadata_string =
    &get_file_contents($perlvar{'lonDocRoot'}.'/res/'.$filename);
   
       # Parse the file based on its XML tags.
       my $parser = HTML::TokeParser->new(\$metadata_string);
       my $token;
       while ($token = $parser->get_token) # Loop through tokens.
         {
    if ($token->[0] eq 'S') # If it is a start token.
     {
       my $entry = $token->[1];
       my $unikey = $entry; # A unique identifier for this xml tag key.
       if (defined($token->[2]->{'part'}))
         { 
    $unikey .= '_'.$token->[2]->{'part'}; 
         }
       if (defined($token->[2]->{'name'}))
         { 
    $unikey .= '_'.$token->[2]->{'name'}; 
         }
       # Append $unikey to metatable's keys entry.
       if ($metatable{$uri.'keys'})
         {
    $metatable{$uri.'keys'} .= ','.$unikey;
         }
       else
         {
    $metatable{$uri.'keys'} = $unikey;
         }
       # Insert contents into metatable entry for the unikey.
       foreach my $t3 (@{$token->[3]})
         {
    $metatable{$uri.''.$unikey.'.'.$_} = $token->[2]->{$t3};
         }
       # If there was no text contained inside the tags, set = default.
       unless
         (
           $metatable{$uri.''.$unikey} = $parser->get_text('/'.$entry)
         )
         {
    $metatable{$uri.''.$unikey} =
       $metatable{$uri.''.$unikey.'.default'};
         }
     }
         }
       # Return with a key-value table of XML tags and their tag contents.
       return(\%metatable);
     }
   
   =pod
   
   B<get_file_contents> - returns either the contents of the file or a -1.
   
   =over 4
   
   Parameters:
   
   =item I<$file> - a complete filesystem path.to the file.
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<string> - file contents or a -1.
   
   =back
   
   =cut
   
   sub get_file_contents ($)
     {
       my $file = shift(@_);
   
       # If file does not exist, then return a -1 value.
       unless (-e $file)
         {
    return(-1);
         }
   
       # Read in file contents.
       my $file_handle = IO::File->new($file);
       my $file_contents = '';
       while (<$file_handle>)
         {
    $file_contents .= $_;
         }
   
       # Return file contents.
       return($file_contents);
     }
   
   =pod
   
   B<declutter> - Declutters URLs (remove extraneous prefixed filesystem path).
   
   =over 4
   
   Parameters:
   
   =item I<$filesystem_path> - a complete filesystem path.
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<string> - remnants of the filesystem path (beginning portion removed).
   
   =back
   
   =cut
   
   sub declutter
     {
       my $filesystem_path = shift(@_);
   
       # Remove beginning portions of the filesystem path.
       $filesystem_path =~ s/^$perlvar{'lonDocRoot'}//;
       $filesystem_path =~ s!^/!!;
       $filesystem_path =~ s!^res/!!;
   
       # Return what is remaining for the filesystem path.
       return($filesystem_path);
     }
   
   =pod
   
   B<query_home_server_status> - Is this the home server of an author's directory?
   
   =over 4
   
   Parameters:
   
   =item I<$author_filesystem_path> - directory path for a user.
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<boolean> - 1 if true; 0 if false.
   
   =back
   
   =cut
   
   sub query_home_server_status ($)
     {
       my $author_filesystem_path = shift(@_);
   
       # Remove beginning portion of this filesystem path.
       $author_filesystem_path =~ s!/home/httpd/html/res/([^/]*)/([^/]*).*!$1/$2!;
   
       # Construct path to the author's ordinary user directory.
       my ($user_domain,$username) = split(m!/!,$author_filesystem_path);
       my $user_directory_path = construct_path_to_user_directory($user_domain,
          $username);
   
       # Return status of whether the user directory path is defined.
       if (-e $user_directory_path)
         {
    return(1); # True.
         }
       else
         {
           return(0); # False.
         }
     }
   
   =pod
   
   B<construct_path_to_user_directory> ($$) - makes a filesystem path to user dir.
   
   =over 4
   
   Parameters:
   
   =item I<$user_domain> - the loncapa domain of the user.
   
   =item I<$username> - the unique username (user id) of the user.
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<string> - representing the path on the filesystem.
   
   =back
   
   =cut
   
   sub construct_path_to_user_directory ($$)
     {
       my ($user_domain,$username) = @_;
   
       # Untaint.
       $user_domain =~ s/\W//g;
       $username =~ s/\W//g;
   
       # Create three levels of sub-directoried filesystem path
       # based on the first three characters of the username.
       my $sub_filesystem_path = $username.'__';
       $sub_filesystem_path =~ s!(.)(.)(.).*!$1/$2/$3/!;
   
       # Use the sub-directoried levels and other variables to generate
       # the complete filesystem path.
       my $complete_filesystem_path =
    join('/',($perlvar{'lonUsersDir'},
     $user_domain,
     $sub_filesystem_path,
     $username));
   
       # Return the complete filesystem path.
       return($complete_filesystem_path);
     }
   
   =pod
   
   B<sql_formatted_time> (@) - turns seconds since epoch into datetime sql format.
   
   =over 4
   
   Parameters:
   
   =item I<$epochtime> - time in seconds since epoch (may need to be sanitized).
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<string> - datetime sql formatted string.
   
   =back
   
   =cut
   
   sub sql_formatted_time ($)
     {
       # Sanitize the time argument and convert to localtime array.
       my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
    localtime(&sanitize_time(shift(@_)));
   
       # Convert month from (0..11) to (1..12).
       $mon += 1;
   
       # Make the year compatible with A.D. specification.
       $year += 1900;
   
       # Return a date which is compatible with MySQL's "DATETIME" format.
       return(join('-',($year,$mon,$mday)).
      ' '.
      join(':',($hour,$min,$sec))
      );
     }
   
   
   # ==================================== The following two subroutines are needed
   #                 for accommodating incorrect time formats inside the metadata.
   
   =pod
   
   B<make_seconds_since_epoch> (@) - turns time metadata into seconds since epoch.
   
   =over 4
   
   Parameters:
   
   =item I<%time_metadata> - a key-value listing characterizing month, year, etc.
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<integer> - seconds since epoch.
   
   =back
   
   =cut
   
   sub make_seconds_since_epoch (@)
     {
       # Keytable of time metadata.
       my %time_metadata = @_;
   
       # Return seconds since the epoch (January 1, 1970, 00:00:00 UTC).
       return(POSIX::mktime(
    ($time_metadata{'seconds'},
     $time_metadata{'minutes'},
     $time_metadata{'hours'},
     $time_metadata{'day'},
     $time_metadata{'month'}-1,
     $time_metadata{'year'}-1900,
     0,
     0,
     $time_metadata{'dlsav'})
    )
      );
     }
   
   =pod
   
   B<sanitize_time> - if time looks sql-formatted, make it seconds since epoch.
   
   Somebody described this subroutine as
   "retro-fixing of un-backward-compatible time format".
   
   What this means, is that a part of this code expects to get UTC seconds
   since the epoch (beginning of 1970).  Yet, some of the .meta files have
   sql-formatted time strings (2001-04-01, etc.) instead of seconds-since-epoch
   integers (e.g. 1044147435).  These time strings do not encode the timezone
   and, in this sense, can be considered "un-backwards-compatible".
   
   =over 4
   
   Parameters:
   
   =item I<$potentially_badformat_string> - string to "retro-fix".
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<integer> - seconds since epoch.
   
   =back
   
   =cut
   
   sub sanitize_time ($)
     {
       my $timestamp = shift(@_);
       # If timestamp is in this unexpected format....
       if ($timestamp =~ /^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/)
         {
    # then convert into seconds since epoch (the expected format).
    $timestamp = &make_seconds_since_epoch(
          'year' => $1,
          'month' => $2,
          'day' => $3,
          'hours' => $4,
          'minutes' => $5,
          'seconds' => $6
          );
         }
       # Otherwise we assume timestamp to be as expected.
       return($timestamp);
     }
   
   =pod
   
   =head1 AUTHOR
   
   Written to help the loncapa project.
   
   Scott Harrison, sharrison@users.sourceforge.net
   
   This is distributed under the same terms as loncapa (i.e. "freeware").
   
   =cut

Removed from v.1.7  
changed lines
  Added in v.1.29


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>