File:  [LON-CAPA] / nsdl / lonsql
Revision 1.4: download - view: text, annotated - select for diffs
Thu Nov 17 22:51:59 2005 UTC (18 years, 4 months ago) by www
Branches: MAIN
CVS tags: HEAD
Starting to parse output

#!/usr/bin/perl

# The LearningOnline Network
# lonsql - LON TCP-NSDL Query Handler.
#
# $Id: lonsql,v 1.4 2005/11/17 22:51:59 www Exp $
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# /home/httpd/html/adm/gpl.txt
#
# http://www.lon-capa.org/
#

=pod

=head1 NAME

lonsql - LON TCP-MySQL-Server Daemon for handling database requests.

=head1 SYNOPSIS

This script should be run as user=www.  
Note that a lonsql.pid file contains the pid of the parent process.

=head1 OVERVIEW

=head2 Purpose within LON-CAPA

LON-CAPA is meant to distribute A LOT of educational content to A LOT
of people. It is ineffective to directly rely on contents within the
ext2 filesystem to be speedily scanned for on-the-fly searches of
content descriptions. (Simply put, it takes a cumbersome amount of
time to open, read, analyze, and close thousands of files.)

The solution is to index various data fields that are descriptive of
the educational resources on a LON-CAPA server machine in a
database. Descriptive data fields are referred to as "metadata". The
question then arises as to how this metadata is handled in terms of
the rest of the LON-CAPA network without burdening client and daemon
processes.

The obvious solution, using lonc to send a query to a lond process,
doesn't work so well in general as you can see in the following
example:

    lonc= loncapa client process    A-lonc= a lonc process on Server A
    lond= loncapa daemon process

                 database command
    A-lonc  --------TCP/IP----------------> B-lond

The problem emerges that A-lonc and B-lond are kept waiting for the
MySQL server to "do its stuff", or in other words, perform the
conceivably sophisticated, data-intensive, time-sucking database
transaction.  By tying up a lonc and lond process, this significantly
cripples the capabilities of LON-CAPA servers.

The solution is to offload the work onto another process, and use
lonc and lond just for requests and notifications of completed
processing:

                database command

  A-lonc  ---------TCP/IP-----------------> B-lond =====> B-lonsql
         <---------------------------------/                |
           "ok, I'll get back to you..."                    |
                                                            |
                                                            /
  A-lond  <-------------------------------  B-lonc   <======
           "Guess what? I have the result!"

Of course, depending on success or failure, the messages may vary, but
the principle remains the same where a separate pool of children
processes (lonsql's) handle the MySQL database manipulations.

Thus, lonc and lond spend effectively no time waiting on results from
the database.

=head1 Internals

=over 4

=cut

use strict;

use lib '/home/httpd/lib/perl/';
use LONCAPA::Configuration;
use LONCAPA::lonmetadata();

use IO::Socket;
use Symbol;
use POSIX;
use IO::Select;
use IO::File;
use Socket;
use Fcntl;
use Tie::RefHash;
use HTML::LCParser();
use LWP::UserAgent();
use HTTP::Headers;
use HTTP::Date;
use File::Find;
use localenroll;

########################################################
########################################################

=pod 

=item Variables required for forking

=over 4

=item $MAX_CLIENTS_PER_CHILD

The number of clients each child should process.

=item %children 

The keys to %children  are the current child process IDs

=item $children

The current number of children

=back

=cut 

########################################################
########################################################
my $MAX_CLIENTS_PER_CHILD  = 5;   # number of clients each child should process
my %children               = ();  # keys are current child process IDs
my $children               = 0;   # current number of children
                               
###################################################################
###################################################################

=pod

=item Main body of code.

=over 4

=item Read data from loncapa_apache.conf and loncapa.conf.

=item Ensure we can access the database.

=item Determine if there are other instances of lonsql running.

=item Read the hosts file.

=item Create a socket for lonsql.

=item Fork once and dissociate from parent.

=item Write PID to disk.

=item Prefork children and maintain the population of children.

=back

=cut

###################################################################
###################################################################
my $childmaxattempts=10;
my $run =0;              # running counter to generate the query-id
#
# Read loncapa_apache.conf and loncapa.conf
#
my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
my %perlvar=%{$perlvarref};
#
# Write the /home/www/.my.cnf file 
my $conf_file = '/home/www/.my.cnf';
if (! -e $conf_file) {
    if (open MYCNF, ">$conf_file") {
        print MYCNF <<"ENDMYCNF";
[client]
user=www
password=$perlvar{'lonSqlAccess'}
ENDMYCNF
        close MYCNF;
    } else {
        warn "Unable to write $conf_file, continuing";
    }
}


#
# Check if other instance running
#
my $pidfile="$perlvar{'lonDaemons'}/logs/lonsql.pid";
if (-e $pidfile) {
   my $lfh=IO::File->new("$pidfile");
   my $pide=<$lfh>;
   chomp($pide);
   if (kill 0 => $pide) { die "already running"; }
}

#
# Read hosts file
#
my $thisserver;
my $PREFORK=4; # number of children to maintain, at least four spare
open (CONFIG,"$perlvar{'lonTabDir'}/hosts.tab") || die "Can't read host file";
while (my $configline=<CONFIG>) {
    my ($id,$domain,$role,$name)=split(/:/,$configline);
    $name=~s/\s//g;
    $thisserver=$name if ($id eq $perlvar{'lonHostID'});
    #$PREFORK++;
}
close(CONFIG);
#
#$PREFORK=int($PREFORK/4);

#
# Create a socket to talk to lond
#
my $unixsock = "mysqlsock";
my $localfile="$perlvar{'lonSockDir'}/$unixsock";
my $server;
unlink ($localfile);
unless ($server=IO::Socket::UNIX->new(Local    =>"$localfile",
                                      Type    => SOCK_STREAM,
                                      Listen => 10)) {
    print "in socket error:$@\n";
}

#
# Fork once and dissociate
#
my $fpid=fork;
exit if $fpid;
die "Couldn't fork: $!" unless defined ($fpid);
POSIX::setsid() or die "Can't start new session: $!";

#
# Write our PID on disk
my $execdir=$perlvar{'lonDaemons'};
open (PIDSAVE,">$execdir/logs/lonsql.pid");
print PIDSAVE "$$\n";
close(PIDSAVE);
&logthis("<font color='red'>CRITICAL: ---------- Starting ----------</font>");

#
# Ignore signals generated during initial startup
$SIG{HUP}=$SIG{USR1}='IGNORE';
# Now we are on our own    
#    Fork off our children.
for (1 .. $PREFORK) {
    make_new_child();
}

#
# Install signal handlers.
$SIG{CHLD} = \&REAPER;
$SIG{INT}  = $SIG{TERM} = \&HUNTSMAN;
$SIG{HUP}  = \&HUPSMAN;

#
# And maintain the population.
while (1) {
    sleep;                          # wait for a signal (i.e., child's death)
    for (my $i = $children; $i < $PREFORK; $i++) {
        make_new_child();           # top up the child pool
    }
}

########################################################
########################################################

=pod

=item &make_new_child

Inputs: None

Returns: None

=cut

########################################################
########################################################
sub make_new_child {
    my $pid;
    my $sigset;
    #
    # block signal for fork
    $sigset = POSIX::SigSet->new(SIGINT);
    sigprocmask(SIG_BLOCK, $sigset)
        or die "Can't block SIGINT for fork: $!\n";
    #
    die "fork: $!" unless defined ($pid = fork);
    #
    if ($pid) {
        # Parent records the child's birth and returns.
        sigprocmask(SIG_UNBLOCK, $sigset)
            or die "Can't unblock SIGINT for fork: $!\n";
        $children{$pid} = 1;
        $children++;
        return;
    } else {
        # Child can *not* return from this subroutine.
        $SIG{INT} = 'DEFAULT';      # make SIGINT kill us as it did before
        # unblock signals
        sigprocmask(SIG_UNBLOCK, $sigset)
            or die "Can't unblock SIGINT for fork: $!\n";

	$SIG{TERM}=$SIG{INT}=$SIG{QUIT}=$SIG{__DIE__}=\&DISCONNECT;
        # handle connections until we've reached $MAX_CLIENTS_PER_CHILD
        for (my $i=0; $i < $MAX_CLIENTS_PER_CHILD; $i++) {
            my $client = $server->accept() or last;
            # do something with the connection
	    $run = $run+1;
	    my $userinput = <$client>;
	    chomp($userinput);
            #
	    my ($conserver,$query,
		$arg1,$arg2,$arg3)=split(/&/,$userinput);
	    my $query=unescape($query);
            #
            #send query id which is pid_unixdatetime_runningcounter
	    my $queryid = $thisserver;
	    $queryid .="_".($$)."_";
	    $queryid .= time."_";
	    $queryid .= $run;
	    print $client "$queryid\n";
	    #
	    # &logthis("QUERY: $query - $arg1 - $arg2 - $arg3");
	    sleep 1;
            #
            my $result='';
            #
            # At this point, query is received, query-ID assigned and sent 
            # back, $query eq 'logquery' will mean that this is a query 
            # against log-files
            if (($query eq 'userlog') || ($query eq 'courselog')) {
                # beginning of log query
                my $udom    = &unescape($arg1);
                my $uname   = &unescape($arg2);
                my $command = &unescape($arg3);
                my $path    = &propath($udom,$uname);
                if (-e "$path/activity.log") {
                    if ($query eq 'userlog') {
                        $result=&userlog($path,$command);
                    } else {
                        $result=&courselog($path,$command);
                    }
                } else {
                    &logthis('Unable to do log query: '.$uname.'@'.$udom);
                    $result='no_such_file';
                }
                # end of log query
            } elsif ($query eq 'fetchenrollment') {
                # retrieve institutional class lists
                my $dom = &unescape($arg1);
                my %affiliates = ();
                my %replies = ();
                my $locresult = '';
                my $querystr = &unescape($arg3);
                foreach (split/%%/,$querystr) {
                    if (/^([^=]+)=([^=]+)$/) {
                        @{$affiliates{$1}} = split/,/,$2;
                    }
                }
                $locresult = &localenroll::fetch_enrollment($dom,\%affiliates,\%replies);
                $result = &escape($locresult.':');
                if ($locresult) {
                    $result .= &escape(join(':',map{$_.'='.$replies{$_}} keys %replies));
                }
            } elsif ($query eq 'prepare activity log') {
                my ($cid,$domain) = map {&unescape($_);} ($arg1,$arg2);
                &logthis('preparing activity log tables for '.$cid);
                my $command = 
                    qq{$perlvar{'lonDaemons'}/parse_activity_log.pl -course=$cid -domain=$domain};
                system($command);
                &logthis($command);
                my $returnvalue = $?>>8;
                if ($returnvalue) {
                    $result = 'error: parse_activity_log.pl returned '.
                        $returnvalue;
                } else {
                    $result = 'success';
                }
            } else {
                # Do an sql query
                $result = &do_sql_query($query,$arg1,$arg2);
            }
            # result does not need to be escaped because it has already been
            # escaped.
            #$result=&escape($result);
            &reply("queryreply:$queryid:$result",$conserver);
        }
        # tidy up gracefully and finish
        #

        # this exit is VERY important, otherwise the child will become
        # a producer of more and more children, forking yourself into
        # process death.
        exit;
    }
}

########################################################
########################################################

=pod

=item &do_sql_query

Runs an sql metadata table query.

Inputs: $query, $custom, $customshow

Returns: A string containing escaped results.

=cut

########################################################
########################################################
{
    my @metalist;

sub process_file {
    if ( -e $_ &&  # file exists
         -f $_ &&  # and is a normal file
         /\.meta$/ &&  # ends in meta
         ! /^.+\.\d+\.[^\.]+\.meta$/  # is not a previous version
         ) {
        push(@metalist,$File::Find::name);
    }
}

sub do_sql_query {
    my ($query) = @_;
    &logthis('doing query '.$query);
 
    my @results = ();
 
    #
    if ($query) {
        #prepare and execute the query
	my $aref=&nsdl_query($query);
	foreach my $row (@$aref) {
	    my @b=map { &escape($_); } @$row;
	    push @results,join(",", @b);
	}
        
    }
    return join("&",@results);
} # End of &do_sql_query

} # End of scoping curly braces for &process_file and &do_sql_query
########################################################
########################################################

=pod

=item &logthis

Inputs: $message, the message to log

Returns: nothing

Writes $message to the logfile.

=cut

########################################################
########################################################
sub logthis {
    my $message=shift;
    my $execdir=$perlvar{'lonDaemons'};
    my $fh=IO::File->new(">>$execdir/logs/lonsql.log");
    my $now=time;
    my $local=localtime($now);
    print $fh "$local ($$): $message\n";
}

# -------------------------------------------------- Non-critical communication

########################################################
########################################################

=pod

=item &subreply

Sends a command to a server.  Called only by &reply.

Inputs: $cmd,$server

Returns: The results of the message or 'con_lost' on error.

=cut

########################################################
########################################################
sub subreply {
    my ($cmd,$server)=@_;
    my $peerfile="$perlvar{'lonSockDir'}/$server";
    my $sclient=IO::Socket::UNIX->new(Peer    =>"$peerfile",
                                      Type    => SOCK_STREAM,
                                      Timeout => 10)
       or return "con_lost";
    print $sclient "$cmd\n";
    my $answer=<$sclient>;
    chomp($answer);
    $answer="con_lost" if (!$answer);
    return $answer;
}

########################################################
########################################################

=pod

=item &reply

Sends a command to a server.

Inputs: $cmd,$server

Returns: The results of the message or 'con_lost' on error.

=cut

########################################################
########################################################
sub reply {
  my ($cmd,$server)=@_;
  my $answer;
  if ($server ne $perlvar{'lonHostID'}) { 
    $answer=subreply($cmd,$server);
    if ($answer eq 'con_lost') {
	$answer=subreply("ping",$server);
        $answer=subreply($cmd,$server);
    }
  } else {
    $answer='self_reply';
    $answer=subreply($cmd,$server);
  } 
  return $answer;
}

########################################################
########################################################

=pod

=item &escape

Escape special characters in a string.

Inputs: string to escape

Returns: The input string with special characters escaped.

=cut

########################################################
########################################################
sub escape {
    my $str=shift;
    $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
    return $str;
}

########################################################
########################################################

=pod

=item &unescape

Unescape special characters in a string.

Inputs: string to unescape

Returns: The input string with special characters unescaped.

=cut

########################################################
########################################################
sub unescape {
    my $str=shift;
    $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
    return $str;
}

########################################################
########################################################

=pod

=item &ishome

Determine if the current machine is the home server for a user.
The determination is made by checking the filesystem for the users information.

Inputs: $author

Returns: 0 - this is not the authors home server, 1 - this is.

=cut

########################################################
########################################################
sub ishome {
    my $author=shift;
    $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/;
    my ($udom,$uname)=split(/\//,$author);
    my $proname=propath($udom,$uname);
    if (-e $proname) {
	return 1;
    } else {
        return 0;
    }
}

########################################################
########################################################

=pod

=item &propath

Inputs: user name, user domain

Returns: The full path to the users directory.

=cut

########################################################
########################################################
sub propath {
    my ($udom,$uname)=@_;
    $udom=~s/\W//g;
    $uname=~s/\W//g;
    my $subdir=$uname.'__';
    $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/;
    my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname";
    return $proname;
} 

########################################################
########################################################

=pod

=item &courselog

Inputs: $path, $command

Returns: unescaped string of values.

=cut

########################################################
########################################################
sub courselog {
    my ($path,$command)=@_;
    my %filters=();
    foreach (split(/\:/,&unescape($command))) {
	my ($name,$value)=split(/\=/,$_);
        $filters{$name}=$value;
    }
    my @results=();
    open(IN,$path.'/activity.log') or return ('file_error');
    while (my $line=<IN>) {
        chomp($line);
        my ($timestamp,$host,$log)=split(/\:/,$line);
#
# $log has the actual log entries; currently still escaped, and
# %26(timestamp)%3a(url)%3a(user)%3a(domain)
# then additionally
# %3aPOST%3a(name)%3d(value)%3a(name)%3d(value)
# or
# %3aCSTORE%3a(name)%3d(value)%26(name)%3d(value)
#
# get delimiter between timestamped entries to be &&&
        $log=~s/\%26(\d+)\%3a/\&\&\&$1\%3a/g;
# now go over all log entries 
        foreach (split(/\&\&\&/,&unescape($log))) {
	    my ($time,$res,$uname,$udom,$action,@values)=split(/\:/,$_);
            my $values=&unescape(join(':',@values));
            $values=~s/\&/\:/g;
            $res=&unescape($res);
            my $include=1;
            if (($filters{'username'}) && ($uname ne $filters{'username'})) 
                                                               { $include=0; }
            if (($filters{'domain'}) && ($udom ne $filters{'domain'})) 
                                                               { $include=0; }
            if (($filters{'url'}) && ($res!~/$filters{'url'}/)) 
                                                               { $include=0; }
            if (($filters{'start'}) && ($time<$filters{'start'})) 
                                                               { $include=0; }
            if (($filters{'end'}) && ($time>$filters{'end'})) 
                                                               { $include=0; }
            if (($filters{'action'} eq 'view') && ($action)) 
                                                               { $include=0; }
            if (($filters{'action'} eq 'submit') && ($action ne 'POST')) 
                                                               { $include=0; }
            if (($filters{'action'} eq 'grade') && ($action ne 'CSTORE')) 
                                                               { $include=0; }
            if ($include) {
	       push(@results,($time<1000000000?'0':'').$time.':'.$res.':'.
                                            $uname.':'.$udom.':'.
                                            $action.':'.$values);
            }
       }
    }
    close IN;
    return join('&',sort(@results));
}

########################################################
########################################################

=pod

=item &userlog

Inputs: $path, $command

Returns: unescaped string of values.

=cut

########################################################
########################################################
sub userlog {
    my ($path,$command)=@_;
    my %filters=();
    foreach (split(/\:/,&unescape($command))) {
	my ($name,$value)=split(/\=/,$_);
        $filters{$name}=$value;
    }
    my @results=();
    open(IN,$path.'/activity.log') or return ('file_error');
    while (my $line=<IN>) {
        chomp($line);
        my ($timestamp,$host,$log)=split(/\:/,$line);
        $log=&unescape($log);
        my $include=1;
        if (($filters{'start'}) && ($timestamp<$filters{'start'})) 
                                                             { $include=0; }
        if (($filters{'end'}) && ($timestamp>$filters{'end'})) 
                                                             { $include=0; }
        if (($filters{'action'} eq 'log') && ($log!~/^Log/)) { $include=0; }
        if (($filters{'action'} eq 'check') && ($log!~/^Check/)) 
                                                             { $include=0; }
        if ($include) {
	   push(@results,$timestamp.':'.$log);
        }
    }
    close IN;
    return join('&',sort(@results));
}

########################################################
########################################################

=pod

=item Functions required for forking

=over 4

=item REAPER

REAPER takes care of dead children.

=item HUNTSMAN

Signal handler for SIGINT.

=item HUPSMAN

Signal handler for SIGHUP

=item DISCONNECT

Disconnects from database.

=back

=cut

########################################################
########################################################
sub REAPER {                   # takes care of dead children
    $SIG{CHLD} = \&REAPER;
    my $pid = wait;
    $children --;
    &logthis("Child $pid died");
    delete $children{$pid};
}

sub HUNTSMAN {                      # signal handler for SIGINT
    local($SIG{CHLD}) = 'IGNORE';   # we're going to kill our children
    kill 'INT' => keys %children;
    my $execdir=$perlvar{'lonDaemons'};
    unlink("$execdir/logs/lonsql.pid");
    &logthis("<font color='red'>CRITICAL: Shutting down</font>");
    $unixsock = "mysqlsock";
    my $port="$perlvar{'lonSockDir'}/$unixsock";
    unlink($port);
    exit;                           # clean up with dignity
}

sub HUPSMAN {                      # signal handler for SIGHUP
    local($SIG{CHLD}) = 'IGNORE';  # we're going to kill our children
    kill 'INT' => keys %children;
    close($server);                # free up socket
    &logthis("<font color='red'>CRITICAL: Restarting</font>");
    my $execdir=$perlvar{'lonDaemons'};
    $unixsock = "mysqlsock";
    my $port="$perlvar{'lonSockDir'}/$unixsock";
    unlink($port);
    exec("$execdir/lonsql");         # here we go again
}

#
# Takes SQL query
# sends it to NSDL
# has to return array reference
#

sub nsdl_query {
    my $query=shift;
    my ($keyword)=($query=~/\"\%([^\%]+)\%\"/);
    $keyword=&escape($keyword);
    &logthis('Doing '.$keyword);
    my $url='http://search.nsdl.org?verb=Search&s=0&n=500&q='.$keyword;
    my $ua=new LWP::UserAgent;
    my $response=$ua->get($url);
    my $parser=HTML::LCParser->new(\$response->content);
    my %result=();
    my $is=();
    my $cont='';
    my $array=[];
    my $token;
    while ($token=$parser->get_token) {
	if ($token->[0] eq 'T') {
	    $cont.=$token->[1];
	} elsif ($token->[0] eq 'S') {
	    if ($token->[1] eq 'record') {
		%result=();
	    } elsif ($token->[1]=/^dc\:/) {
		$is=$token->[1];
		$cont='';
	    }
	} elsif ($token->[0] eq 'E') {
	    if ($token->[1] eq 'record') {
#
# Now store it away
#
	    } elsif ($token->[1]=/^dc\:/) {
		$result{$is}=$cont;
	    }
	}
    }
    return $array;
}

=pod

=back

=cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>