--- loncom/cgi/clusterstatus.pl 2003/07/31 15:35:02 1.9 +++ loncom/cgi/clusterstatus.pl 2003/08/01 19:20:26 1.14 @@ -3,7 +3,7 @@ $|=1; # The LearningOnline Network with CAPA # Cluster Status # -# $Id: clusterstatus.pl,v 1.9 2003/07/31 15:35:02 www Exp $ +# $Id: clusterstatus.pl,v 1.14 2003/08/01 19:20:26 www Exp $ use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; @@ -19,6 +19,7 @@ my %connectionstatus=(); my %perlvar=(); my $mode; +my $concount=0; sub select_form { my ($def,$name,%hash) = @_; @@ -47,6 +48,7 @@ sub hidden { sub request { my ($local,$url,$cachetime)=@_; + $cachetime*=(0.5+rand); my $key=&key($local,$url); my $reply=''; if ($FORM{$key.'_time'}) { @@ -61,7 +63,7 @@ sub request { $reply='local_unknown'; } else { - my $ua=new LWP::UserAgent(timeout => 20); + my $ua=new LWP::UserAgent(timeout => 10); my $request=new HTTP::Request('GET', "http://".$hostname{$local}.$url); @@ -91,10 +93,15 @@ sub connected { unless ($hostname{$remote}) { return 'remote_unknown'; } my $url='/cgi-bin/ping.pl?'.$remote; # -# Slowly phase this in: if not cached, only do 10 percent of the cases +# Slowly phase this in: if not cached, only do 5 percent of the cases, +# but always do the first five. # unless ($FORM{&key($local,$url)}) { - unless (rand>0.9) { return 'not_yet'; } + unless (($concount<=5) || (rand>0.95)) { + return 'not_yet'; + } else { + $concount++; + } } # # Actually do the query @@ -169,20 +176,36 @@ sub server { print &otherwindow($local,'/server-status','Server Status'); } +# ========================================================= Produce a green bar +sub bar { + my $parm=shift; + my $number=int($parm+0.5); + print "
"; + for (my $i=0;$i<$number;$i++) { + print "+"; + } + print "
"; +} + # ========================================================== Show server status sub serverstatus { - my $local=shift; + my ($local,$trouble)=@_; print (< "; + if ($trouble) { + print (""); + } + print "
$local $hostdom{$local} ($hostname{$local}; $hostrole{$local})
$domaindescription{$hostdom{$local}} -
+
ENDHEADER &login($local);&server($local);&users($local);&versions($local); &loncron($local);&lond($local);&lonc($local);&runloncron($local); - print "
"; + print "
$trouble
"; # load if (($host{$local.'_load_doomed'}>0.5) || ($mode eq 'load_doomed')) { print "
Load: ".$host{$local.'_load'} @@ -200,6 +223,19 @@ ENDHEADER if ($host{$local.'_mysql'}) { print "
MySQL Database: ".$host{$local.'_mysql'} } +# connections + if ($host{$local.'_notconnected'}) { + print "
Not connected: "; + foreach (split(/ /,$host{$local.'_notconnected'})) { + if ($_) { + print " $_"; + } + } + } +# errors + if ($host{$local.'_errors'}) { + print "
loncron errors: ".$host{$local.'_errors'}; + } print "

"; } @@ -256,7 +292,7 @@ foreach $pair (@pairs) { # ====================================================== Determine refresh rate -my $refresh=(($FORM{'refresh'}=~/^\d+$/)?$FORM{'refresh'}:60); +my $refresh=(($FORM{'refresh'}=~/^\d+$/)?$FORM{'refresh'}:120); if ($refresh<30) { $refresh=30; } my $starttime=time; @@ -271,13 +307,14 @@ my %modes=('trouble' => 'Trouble', 'load_doomed' => 'Doomed: Load', 'unresponsive_doomed' => 'Doomed: Status could not be determined', 'users' => 'User Report', + 'load' => 'Load Report', 'connections' => 'Connections Matrix'); $mode=$FORM{'mode'}; unless ($modes{$mode}) { $mode='trouble'; } # ================================================================ Send Headers print "Content-type: text/html\n\n". - "\n"; + "\n"; # -------------------- Read loncapa.conf (and by default, loncapa_apache.conf). my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); %perlvar=%{$perlvarref}; @@ -323,8 +360,7 @@ delete $perlvar{'lonSqlAccess'}; # remov } } - -print "

Cluster Status ".localtime()."

"; +print "

LON-CAPA Cluster Status ".localtime()."

"; print "
\n". "
". "
\n";; @@ -334,6 +370,10 @@ print 'Choose next report: '.&select_for # ==================================================== Main Loop over all Hosts +my $maxusers=0; +my $maxload=0; +my $totalusers=0; + foreach $local (sort keys %hostname) { $host{$local.'_unresponsive_doomed'}=0; # -- Check general status @@ -347,6 +387,10 @@ foreach $local (sort keys %hostname) { $host{$local.'_loncron'}='Stale.'; $host{$local.'_unresponsive_doomed'}++; } else { + $host{$local.'_loncron_doomed'}=$loncron{'notices'} + +4*$loncron{'warnings'} + +100*$loncron{'errors'}; + $host{$local.'_errors'}=$loncron{'errors'}; } } # -- Check user status @@ -358,8 +402,16 @@ foreach $local (sort keys %hostname) { } else { $host{$local.'_users_doomed'}=$userstatus{'Active'}; $host{$local.'_users'}=$userstatus{'Active'}; + unless ($host{$local.'_users'}) { $host{$local.'_users'}=0; } + if ($host{$local.'_users'}>$maxusers) { + $maxusers=$host{$local.'_users'}; + } + $totalusers+=$host{$local.'_users'}; my ($sload,$mload,$lload)=split(/ /,$userstatus{'loadavg'}); $host{$local.'_load_doomed'}=$mload; + if ($mload>$maxload) { + $maxload=$mload; + } $host{$local.'_load'}=$userstatus{'loadavg'}; } # -- Check mysql status @@ -424,15 +476,131 @@ foreach $local (sort keys %hostname) { foreach (&doomedness($mode)) { &serverstatus($_); } + } elsif ($mode eq 'connections') { + print + "". + ""; + foreach my $remote (sort keys %hostname) { + print ''; + } + print "\n"; +# connection matrix + foreach my $local (sort keys %hostname) { + print ''; + foreach my $remote (sort keys %hostname) { + if ($connectionstatus{$local.'_TO_'.$remote} eq 'not_yet') { + my $cellcolor='#FFFFFF'; + if ($local eq $remote) { $cellcolor='#DDDDDD'; } + print ''; + } elsif ($connectionstatus{$local.'_TO_'.$remote} eq 'ok') { + my $cellcolor='#BBDDBB'; + if ($local eq $remote) { $cellcolor='#99DD99'; } + print +''; + } else { + my $cellcolor='#DDBBBB'; + if ($connectionstatus{$local.'_TO_'.$remote} eq 'local_error') { + if ($local eq $remote) { + $cellcolor='#DD88AA'; + } else { + $cellcolor='#DDAACC'; + } + } else { + if ($local eq $remote) { $cellcolor='#DD9999'; } + } + print + ''; + } + } + print "\n"; + } + print "
 '.$remote.'
'.$local.'not yet testedok'. + $connectionstatus{$local.'_TO_'.$remote}.'
'; + &lonc($local); &lond($remote); + print '
"; + } elsif ($mode eq 'users') { +# Users + if ($maxusers) { + my $factor=50/$maxusers; + print "

Total active user(s): $totalusers

". + ""; + + foreach $local (sort keys %hostname) { + if (defined($host{$local.'_users'})) { + print +'\n"; + } + } + print "
'.$local. + ''; + &users($local); + print + ''. + $host{$local.'_users'}.'
"; + } else { + print "No active users logged in."; + } + } elsif ($mode eq 'load') { +# Load + if ($maxload) { + my $factor=50/$maxload; + print + ""; + foreach $local (sort keys %hostname) { + if (defined($host{$local.'_load_doomed'})) { + print +'\n"; + } + } + print "
'. + $local. + ''; + &server($local); + print + ''. + $host{$local.'_load_doomed'}.'
"; + } else { + print "No workload."; + } + } elsif ($mode eq 'trouble') { + my $count=0; + foreach $local (sort keys %hostname) { + my $trouble=''; + if ($host{$local.'_errors'}) { + $trouble='Has loncron errors.
'; + } elsif ($host{$local.'_loncron_doomed'}>600) { + $trouble='High loncron count.
'; + } + if ($host{$local.'_load_doomed'}>5) { + $trouble='High load.
'; + } + if ($host{$local.'_users_doomed'}>200) { + $trouble='High user volume.
'; + } + if ($host{$local.'_mysql_doomed'}>1) { + $trouble='MySQL database apparently offline.
'; + } + if ($host{$local.'_checkrpms_doomed'}>100) { + $trouble='RPMs outdated.
'; + } + if ($trouble) { $count++; &serverstatus($local,$trouble); } + } + unless ($count) { print "No mayor trouble."; } } # ============================================================== Close, refresh print ""; exit 0; 500 Internal Server Error

Internal Server Error

The server encountered an internal error or misconfiguration and was unable to complete your request.

Please contact the server administrator at root@localhost to inform them of the time this error occurred, and the actions you performed just before this error.

More information about this error may be available in the server error log.