File:  [LON-CAPA] / loncom / cgi / clusterstatus.pl
Revision 1.14: download - view: text, annotated - select for diffs
Fri Aug 1 19:20:26 2003 UTC (20 years, 10 months ago) by www
Branches: MAIN
CVS tags: HEAD
* Making timeout even smaller. If 10 secs are not enough, there is some
trouble - which is the thing we want to detect
* Always deal with 5 connections - makes no difference in the beginning,
but makes sure that the final obnoxious holes get filed up towards the
end
* Make diagonal (connections to self) better visible in connections matrix

#!/usr/bin/perl
$|=1;
# The LearningOnline Network with CAPA
# Cluster Status
#
# $Id: clusterstatus.pl,v 1.14 2003/08/01 19:20:26 www Exp $

use lib '/home/httpd/lib/perl/';
use LONCAPA::Configuration;

use LWP::UserAgent();
use HTTP::Headers;
use IO::File;

my %host=();
my $oneday=60*60*24;

my %connectionstatus=();
my %perlvar=();

my $mode;
my $concount=0;

sub select_form {
    my ($def,$name,%hash) = @_;
    my $selectform = "<select name=\"$name\" size=\"1\">\n";
    foreach (sort keys %hash) {
        $selectform.="<option value=\"$_\" ".
            ($_ eq $def ? 'selected' : '').
                ">".$hash{$_}."</option>\n";
    }
    $selectform.="</select>";
    return $selectform;
}


sub key {
    my ($local,$url)=@_;
    my $key=$local.'_'.$url;
    $key=~s/\W/\_/gs;
    return $key;
}

sub hidden {
    my ($name,$value)=@_;
    print "\n<input type='hidden' name='$name' value='$value' />";
}

sub request {
    my ($local,$url,$cachetime)=@_;
    $cachetime*=(0.5+rand);
    my $key=&key($local,$url);
    my $reply='';
    if ($FORM{$key.'_time'}) {
	if ((time-$FORM{$key.'_time'})<$cachetime) {
	    $reply=$FORM{$key};
	    &hidden($key.'_time',$FORM{$key.'_time'});
	    &hidden($key.'_fromcache',1);
	}
    }
    unless ($reply) {
	unless ($hostname{$local}) { 
	    $reply='local_unknown'; 
	} else {

	    my $ua=new LWP::UserAgent(timeout => 10);
    
	    my $request=new HTTP::Request('GET',
					  "http://".$hostname{$local}.$url);
	    $request->authorization_basic('lonadm','litelite');

	    my $response=$ua->request($request);

	    unless ($response->is_success) { 
		$reply='local_error'; 
	    } else {
		$reply=$response->content;
		chomp($reply);
	    }
	}
	&hidden($key.'_time',time);
    }
    &hidden($key,$reply);
    return $reply;
}

# ============================================= Are local and remote connected?
sub connected {
    my ($local,$remote)=@_;
    $local=~s/\W//g;
    $remote=~s/\W//g;

    unless ($hostname{$remote}) { return 'remote_unknown'; }
    my $url='/cgi-bin/ping.pl?'.$remote;
#
# Slowly phase this in: if not cached, only do 5 percent of the cases,
# but always do the first five. 
#
    unless ($FORM{&key($local,$url)}) {
	unless (($concount<=5) || (rand>0.95)) { 
	    return 'not_yet'; 
	} else {
	    $concount++;
	}
    }
#
# Actually do the query
#
    &statuslist($local,'connecting '.$remote);
    my $reply=&request($local,$url,3600);
    $reply=(split("\n",$reply))[0];
    $reply=~s/\W//g;
    if ($reply ne $remote) { return $reply; }
    return 'ok';
}
# ============================================================ Get a reply hash

sub replyhash {
    my %returnhash=();
    foreach (split(/\&/,&request(@_))) {
	my ($name,$value)=split(/\=/,$_);
	if ($name) {
	    unless ($value) { $value=''; }
	    $returnhash{$name}=$value;
	}
    }
    return %returnhash;
}

# ================================================================ Link to host

sub otherwindow {
    my ($local,$url,$label)=@_;
    return
  " <a href='http://$hostname{$local}$url' target='newwin$local'>$label</a> ";
}

sub login {
    my $local=shift;
    print &otherwindow($local,'/adm/login?domain='.$perlvar{'lonDefDomain'},
		       'Login');
}

sub runloncron {
    my $local=shift;
    print &otherwindow($local,'/cgi-bin/loncron.pl','Run loncron');
}

sub loncron {
    my $local=shift;
    print &otherwindow($local,'/lon-status','loncron');
}

sub lonc {
    my $local=shift;
    print &otherwindow($local,'/lon-status/loncstatus.txt','lonc');
}

sub lond {
    my $local=shift;
    print &otherwindow($local,'/lon-status/londstatus.txt','lond');
}

sub users {
    my $local=shift;
    print &otherwindow($local,'/cgi-bin/userstatus.pl','Users');
}

sub versions {
    my $local=shift;
    print &otherwindow($local,'/cgi-bin/lonversions.pl','Versions');
}

sub server {
    my $local=shift;
    print &otherwindow($local,'/server-status','Server Status');
}

# ========================================================= Produce a green bar
sub bar {
    my $parm=shift;
    my $number=int($parm+0.5);
    print "<table><tr><td bgcolor='#225522'><font color='#225522'>";
    for (my $i=0;$i<$number;$i++) {
	print "+";
    }
    print "</font></table>";
}

# ========================================================== Show server status

sub serverstatus {
    my ($local,$trouble)=@_;
    print (<<ENDHEADER);
<a name="$local" />
<table width="100%" bgcolor="#225522" cellspacing="2" cellpadding="2" border="0">
<tr><td bgcolor="#BBDDBB"><font color="#225522" face="arial"><b>
$local $hostdom{$local}</b> <tt>($hostname{$local}; $hostrole{$local})</tt>
<br />$domaindescription{$hostdom{$local}}
</font></th></tr><tr><td bgcolor="#DDDDBB"><font color="#225522">
ENDHEADER
    &login($local);&server($local);&users($local);&versions($local);
    &loncron($local);&lond($local);&lonc($local);&runloncron($local);
    print "</font></td></tr>";
    if ($trouble) {
	print ("<tr><td bgcolor='#DDBBBB'><font color='#552222' size='+2'>$trouble</font></td></tr>");
    }
    print "<tr><td bgcolor='#BBBBBB'>";
# load
    if (($host{$local.'_load_doomed'}>0.5) || ($mode eq 'load_doomed')) {
	print "<br />Load: ".$host{$local.'_load'}
    }
# users
    if (($host{$local.'_users_doomed'}>10) || ($mode eq 'users_doomed')) {
	print "<br />Active Users: ".$host{$local.'_users'}
    }

# checkrpms
    if ($host{$local.'_checkrpms'}) {
	print "<br />RPMs: ".$host{$local.'_checkrpms'}
    }
# mysql
    if ($host{$local.'_mysql'}) {
	print "<br />MySQL Database: ".$host{$local.'_mysql'}
    }
# connections
    if ($host{$local.'_notconnected'}) {
	print "<br />Not connected: ";
	foreach (split(/ /,$host{$local.'_notconnected'})) {
	    if ($_) {
		print " <a href='#$_'>$_</a>";
	    }
	}
    }
# errors
    if ($host{$local.'_errors'}) {
	print "<br />loncron errors: ".$host{$local.'_errors'};
    }
    print "</td></tr></table><br />";
}

# =========================================================== Doomedness sorted

sub doomedness {
    my $crit=shift;
    my %alldoomed=();
    my @allhosts=();
    foreach (keys %host) {
	if ($_=~/^(\w+)\_$crit$/) {
	    if ($host{$_}) {
		push (@allhosts,$1);
		$alldoomed{$1}=$host{$_};
	    }
	}
    }
    return sort { $alldoomed{$b} <=> $alldoomed{$a} } @allhosts;
}

# ====================================================================== Status
sub statuslist {
    my ($local,$what)=@_;
    print 
"<script>document.prgstat.progress.value='Testing $local ($hostname{$local}): $what';</script>\n";
}

#
# Main program
#
# ========================================================= Get form parameters
my $buffer;

read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
my @pairs=split(/&/,$buffer);
my $pair; my $name; my $value;
undef %FORM;
%FORM=();
foreach $pair (@pairs) {
    ($name,$value) = split(/=/,$pair);
    $value =~ tr/+/ /;
    $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
    $FORM{$name}=$value;
} 

$buffer=$ENV{'QUERY_STRING'};
@pairs=split(/&/,$buffer);
foreach $pair (@pairs) {
    ($name,$value) = split(/=/,$pair);
    $value =~ tr/+/ /;
    $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
    $FORM{$name}=$value;
} 

# ====================================================== Determine refresh rate

my $refresh=(($FORM{'refresh'}=~/^\d+$/)?$FORM{'refresh'}:120);
if ($refresh<30) { $refresh=30; }
my $starttime=time;

# ============================================================== Determine mode

my %modes=('trouble' => 'Trouble',
	   'users_doomed' => 'Doomed: Users',
	   'loncron_doomed' => 'Doomed: General (loncron)',
	   'mysql_doomed' => 'Doomed: Database (mysql)',
	   'notconnected_doomed' => 'Doomed: Connections',
	   'checkrpms_doomed' => 'Doomed: RPMs',
	   'load_doomed' => 'Doomed: Load',
	   'unresponsive_doomed' => 'Doomed: Status could not be determined',
	   'users' => 'User Report',
	   'load' => 'Load Report',
	   'connections' => 'Connections Matrix');

$mode=$FORM{'mode'};
unless ($modes{$mode}) { $mode='trouble'; }
# ================================================================ Send Headers
print "Content-type: text/html\n\n".
    "<html><body bgcolor='#FFFFFF'>\n";
# -------------------- Read loncapa.conf (and by default, loncapa_apache.conf).
my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
%perlvar=%{$perlvarref};
undef $perlvarref; # remove since sensitive and not needed
delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed

# ------------------------------------------------------------- Read hosts file
{
    my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab");

    $total=0;
    while (my $configline=<$config>) {
       $configline=~s/#.*$//;
       unless ($configline=~/\w/) { next; } 
       my ($id,$domain,$role,$name,$ip)=split(/:/,$configline);
       $hostname{$id}=$name;
       $hostdom{$id}=$domain;
       $hostrole{$id}=$role;
       $hostip{$id}=$ip;
       $total++;
       if (($role eq 'library') && ($id ne $perlvar{'lonHostID'})) {
	   $libserv{$id}=$name;
       }
    }
}
# ------------------------------------------------------------ Read domain file
{
    my $fh=IO::File->new($perlvar{'lonTabDir'}.'/domain.tab');
    %domaindescription = ();
    %domain_auth_def = ();
    %domain_auth_arg_def = ();
    if ($fh) {
       while (<$fh>) {
           next if (/^(\#|\s*$)/);
           chomp;
           my ($domain, $domain_description, $def_auth, $def_auth_arg)
               = split(/:/,$_,4);
           $domain_auth_def{$domain}=$def_auth;
           $domain_auth_arg_def{$domain}=$def_auth_arg;
           $domaindescription{$domain}=$domain_description;
       }
    }
}

print "<img src='/adm/lonIcons/lonlogos.gif' align='right' /><h1>LON-CAPA Cluster Status ".localtime()."</h1>";
print "<form name='prgstat'>\n".
"<input type='text' name='progress' value='Starting ...' size='100' /><br />".
"</form>\n";;
print "<form name='status' method='post'>\n";
print 'Choose next report: '.&select_form($mode,'mode',%modes).'<hr />';
&hidden('refresh',$refresh);

# ==================================================== Main Loop over all Hosts

my $maxusers=0;
my $maxload=0;
my $totalusers=0;

foreach $local (sort keys %hostname) {
    $host{$local.'_unresponsive_doomed'}=0;
# -- Check general status
    &statuslist($local,'General');
    my %loncron=&replyhash($local,'/lon-status/loncron_simple.txt',1200);
    if (defined($loncron{'local_error'})) {
	$host{$local.'_loncron'}='Could not determine.';
	$host{$local.'_unresponsive_doomed'}++;
    } else {
	if ((time-$loncron{'time'})>$oneday) {
	    $host{$local.'_loncron'}='Stale.';
	    $host{$local.'_unresponsive_doomed'}++;
	} else {
	    $host{$local.'_loncron_doomed'}=$loncron{'notices'}
	                                 +4*$loncron{'warnings'}
	                               +100*$loncron{'errors'};
	    $host{$local.'_errors'}=$loncron{'errors'};
	}
    }
# -- Check user status
    &statuslist($local,'Users');
    my %userstatus=&replyhash($local,'/cgi-bin/userstatus.pl?simple',600);
    if (defined($userstatus{'local_error'})) {
	$host{$local.'_userstatus'}='Could not determine.';
	$host{$local.'_unresponsive_doomed'}++;
    } else {
	$host{$local.'_users_doomed'}=$userstatus{'Active'};
	$host{$local.'_users'}=$userstatus{'Active'};
	unless ($host{$local.'_users'}) { $host{$local.'_users'}=0; }
	if ($host{$local.'_users'}>$maxusers) { 
	    $maxusers=$host{$local.'_users'};
	}
	$totalusers+=$host{$local.'_users'};
	my ($sload,$mload,$lload)=split(/ /,$userstatus{'loadavg'});
	$host{$local.'_load_doomed'}=$mload;
	if ($mload>$maxload) { 
	    $maxload=$mload;
	}
	$host{$local.'_load'}=$userstatus{'loadavg'};
    }
# -- Check mysql status
    &statuslist($local,'Database');
    my %mysql=&replyhash($local,'/lon-status/mysql.txt',3600);
    if (defined($mysql{'local_error'})) {
	$host{$local.'_mysql'}='Could not determine.';
	$host{$local.'_unresponsive_doomed'}++;
    } else {
	if ((time-$mysql{'time'})>(7*$oneday)) {
	    if ($hostrole{$local} eq 'library') {
		$host{$local.'_mysql'}='Stale.';
		$host{$local.'_mysql_doomed'}=1;
	    }
	    if ($mysql{'mysql'} eq 'defunct') {
		$host{$local.'_mysql'}='Defunct (maybe stale).';
		$host{$local.'_mysql_doomed'}=2;
	    }
	} elsif ($mysql{'mysql'} eq 'defunct') {
	    $host{$local.'_mysql'}='Defunct.';
	    $host{$local.'_mysql_doomed'}=3;
	}
    }
# -- Check rpm status
    &statuslist($local,'RPMs');
    my %checkrpms=&replyhash($local,'/lon-status/checkrpms.txt',7200);
    if (defined($checkrpms{'local_error'})) {
	$host{$local.'_checkrpms'}='Could not determine.';
	$host{$local.'_unresponsive_doomed'}++;
    } else {
	if ((time-$checkrpms{'time'})>(4*$oneday)) {
	    $host{$local.'_checkrpms'}='Stale.';
	    $host{$local.'_checkrpms_doomed'}=50;
	    $host{$local.'_unresponsive_doomed'}++;
	} elsif ($checkrpms{'status'} eq 'fail') {
	    $host{$local.'_checkrpms'}='Could not checked RPMs.';
	    $host{$local.'_checkrpms_doomed'}=100;
	} elsif ($checkrpms{'rpmcount'}) {
	    $host{$local.'_checkrpms'}='Outdated RPMs: '.
		$checkrpms{'rpmcount'};
	    $host{$local.'_checkrpms_doomed'}=$checkrpms{'rpmcount'};
	}
    }
# -- Check connections
    &statuslist($local,'Connections');
    $host{$local.'_notconnected'}='';
    $host{$local.'_notconnected_doomed'}=0;
    foreach $remote (sort keys %hostname) {
	my $status=&connected($local,$remote);
	$connectionstatus{$local.'_TO_'.$remote}=$status;
	unless (($status eq 'ok') || ($status eq 'not_yet')) {
	    $host{$local.'_notconnected'}.=' '.$remote;
	    $host{$local.'_notconnected_doomed'}++;
	}
    }
# =============================================================== End Mail Loop
}
&statuslist('Done.');
# ====================================================================== Output
    if ($mode=~/\_doomed$/) {
# Output by doomedness
	foreach (&doomedness($mode)) {
	    &serverstatus($_);
	}
    } elsif ($mode eq 'connections') {
	print 
       "<table cellspacing='3' cellpadding='3' border='0' bgcolor='#225522'>".
       "<tr><td bgcolor='#225522'>&nbsp;</td>";
	foreach my $remote (sort keys %hostname) {
	    print '<th bgcolor="#DDDDBB">'.$remote.'</th>';
	}
	print "</tr>\n";
# connection matrix
	foreach my $local (sort keys %hostname) {
	    print '<tr><th bgcolor="#DDDDBB">'.$local.'</th>';
	    foreach my $remote (sort keys %hostname) {
		if ($connectionstatus{$local.'_TO_'.$remote} eq 'not_yet') {
		    my $cellcolor='#FFFFFF';
		    if ($local eq $remote) { $cellcolor='#DDDDDD'; }
		    print '<td bgcolor="'.$cellcolor.'"><font color="#555522" size="-2">not yet tested</font></td>';
		} elsif ($connectionstatus{$local.'_TO_'.$remote} eq 'ok') {
		    my $cellcolor='#BBDDBB';
		    if ($local eq $remote) { $cellcolor='#99DD99'; }
		    print 
'<td bgcolor="'.$cellcolor.'"><font color="#225522" face="arial"><b>ok</b></td>';
		} else {
		    my $cellcolor='#DDBBBB';
		    if ($connectionstatus{$local.'_TO_'.$remote} eq 'local_error') {
			if ($local eq $remote) { 
			    $cellcolor='#DD88AA'; 
			} else {
			    $cellcolor='#DDAACC';
			}
		    } else {
			if ($local eq $remote) { $cellcolor='#DD9999'; }
		    }
		    print 
		  '<td bgcolor="'.$cellcolor.'"><font color="#552222" size="-2">'.
		  $connectionstatus{$local.'_TO_'.$remote}.'<br />';
		    &lonc($local); &lond($remote);
		    print '</td>';
		}
	    }
	    print "</tr>\n";
	}
	print "</table>";
    } elsif ($mode eq 'users') {
# Users
	if ($maxusers) {
	    my $factor=50/$maxusers;
	    print "<h3>Total active user(s): $totalusers</h3>". 
       "<table cellspacing='3' cellpadding='3' border='0' bgcolor='#225522'>";

	    foreach $local (sort keys %hostname) {
		if (defined($host{$local.'_users'})) {
		    print 
'<tr><th bgcolor="#BBDDBB"><font face="arial" color="#225522" size="+1">'.$local.
			'</font></th><td bgcolor="#DDDDBB">';
		    &users($local);
		    print 
	      '</td><td bgcolor="#DDDDBB"><font face="arial" color="#225522">'.
	      $host{$local.'_users'}.'</font></td><td bgcolor="#DDDDBB"';
		    &bar($factor*$host{$local.'_users'});
		    print "</td></tr>\n";
		}
	    }
	    print "</table>";
	} else {
	    print "No active users logged in.";
	}
    } elsif ($mode eq 'load') {
# Load
	if ($maxload) {
	    my $factor=50/$maxload; 
	    print
       "<table cellspacing='3' cellpadding='3' border='0' bgcolor='#225522'>";
	    foreach $local (sort keys %hostname) {
		if (defined($host{$local.'_load_doomed'})) {
		    print 
'<tr><th bgcolor="#BBDDBB"><font face="arial" color="#225522" size="+1">'.
                        $local.
			'</font></th><td bgcolor="#DDDDBB">';
		    &server($local);
		    print 
	      '</td><td bgcolor="#DDDDBB"><font face="arial" color="#225522">'.
	      $host{$local.'_load_doomed'}.'</font></td><td bgcolor="#DDDDBB"';
		    &bar($factor*$host{$local.'_load_doomed'});
		    print "</td></tr>\n";
		}
	    }
	    print "</table>";
	} else {
	    print "No workload.";
	}
    } elsif ($mode eq 'trouble') {
	my $count=0;
	foreach $local (sort keys %hostname) {
	    my $trouble='';
	    if ($host{$local.'_errors'}) {
		$trouble='Has loncron errors.<br />';
	    } elsif ($host{$local.'_loncron_doomed'}>600) {
		$trouble='High loncron count.<br />';
	    }
	    if ($host{$local.'_load_doomed'}>5) {
		$trouble='High load.<br />';
	    }
	    if ($host{$local.'_users_doomed'}>200) {
		$trouble='High user volume.<br />';
	    }
	    if ($host{$local.'_mysql_doomed'}>1) {
		$trouble='MySQL database apparently offline.<br />';
	    }
	    if ($host{$local.'_checkrpms_doomed'}>100) {
		$trouble='RPMs outdated.<br />';
	    }
	    if ($trouble) { $count++; &serverstatus($local,$trouble); }
	}
	unless ($count) { print "No mayor trouble."; }
    }
# ============================================================== Close, refresh
print "</form><script>";
$runtime=time-$starttime;
if (($refresh-$runtime)<30) {
    print "setTimeout('document.status.submit()',30000);\n".
          "document.prgstat.progress.value='Will automatically refresh.'";
} else {
    $refreshtime=int(1000*($refresh-$runtime));
    print "setTimeout('document.status.submit()',$refreshtime);\n".
          "document.prgstat.progress.value='Will automatically refresh ($refresh secs refresh cycle)'";
}
print "</script></body></html>";
exit 0;

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>
500 Internal Server Error

Internal Server Error

The server encountered an internal error or misconfiguration and was unable to complete your request.

Please contact the server administrator at root@localhost to inform them of the time this error occurred, and the actions you performed just before this error.

More information about this error may be available in the server error log.