--- loncom/cgi/clusterstatus.pl 2003/07/31 15:35:02 1.9
+++ loncom/cgi/clusterstatus.pl 2003/08/01 19:20:26 1.14
@@ -3,7 +3,7 @@ $|=1;
# The LearningOnline Network with CAPA
# Cluster Status
#
-# $Id: clusterstatus.pl,v 1.9 2003/07/31 15:35:02 www Exp $
+# $Id: clusterstatus.pl,v 1.14 2003/08/01 19:20:26 www Exp $
use lib '/home/httpd/lib/perl/';
use LONCAPA::Configuration;
@@ -19,6 +19,7 @@ my %connectionstatus=();
my %perlvar=();
my $mode;
+my $concount=0;
sub select_form {
my ($def,$name,%hash) = @_;
@@ -47,6 +48,7 @@ sub hidden {
sub request {
my ($local,$url,$cachetime)=@_;
+ $cachetime*=(0.5+rand);
my $key=&key($local,$url);
my $reply='';
if ($FORM{$key.'_time'}) {
@@ -61,7 +63,7 @@ sub request {
$reply='local_unknown';
} else {
- my $ua=new LWP::UserAgent(timeout => 20);
+ my $ua=new LWP::UserAgent(timeout => 10);
my $request=new HTTP::Request('GET',
"http://".$hostname{$local}.$url);
@@ -91,10 +93,15 @@ sub connected {
unless ($hostname{$remote}) { return 'remote_unknown'; }
my $url='/cgi-bin/ping.pl?'.$remote;
#
-# Slowly phase this in: if not cached, only do 10 percent of the cases
+# Slowly phase this in: if not cached, only do 5 percent of the cases,
+# but always do the first five.
#
unless ($FORM{&key($local,$url)}) {
- unless (rand>0.9) { return 'not_yet'; }
+ unless (($concount<=5) || (rand>0.95)) {
+ return 'not_yet';
+ } else {
+ $concount++;
+ }
}
#
# Actually do the query
@@ -169,20 +176,36 @@ sub server {
print &otherwindow($local,'/server-status','Server Status');
}
+# ========================================================= Produce a green bar
+sub bar {
+ my $parm=shift;
+ my $number=int($parm+0.5);
+ print "
";
+ for (my $i=0;$i<$number;$i++) {
+ print "+";
+ }
+ print " |
";
+}
+
# ========================================================== Show server status
sub serverstatus {
- my $local=shift;
+ my ($local,$trouble)=@_;
print (<
$local $hostdom{$local} ($hostname{$local}; $hostrole{$local})
$domaindescription{$hostdom{$local}}
- |
+ |
ENDHEADER
&login($local);&server($local);&users($local);&versions($local);
&loncron($local);&lond($local);&lonc($local);&runloncron($local);
- print " |
";
+ print " |
";
+ if ($trouble) {
+ print ("$trouble |
");
+ }
+ print "";
# load
if (($host{$local.'_load_doomed'}>0.5) || ($mode eq 'load_doomed')) {
print " Load: ".$host{$local.'_load'}
@@ -200,6 +223,19 @@ ENDHEADER
if ($host{$local.'_mysql'}) {
print " MySQL Database: ".$host{$local.'_mysql'}
}
+# connections
+ if ($host{$local.'_notconnected'}) {
+ print " Not connected: ";
+ foreach (split(/ /,$host{$local.'_notconnected'})) {
+ if ($_) {
+ print " $_";
+ }
+ }
+ }
+# errors
+ if ($host{$local.'_errors'}) {
+ print " loncron errors: ".$host{$local.'_errors'};
+ }
print " |
";
}
@@ -256,7 +292,7 @@ foreach $pair (@pairs) {
# ====================================================== Determine refresh rate
-my $refresh=(($FORM{'refresh'}=~/^\d+$/)?$FORM{'refresh'}:60);
+my $refresh=(($FORM{'refresh'}=~/^\d+$/)?$FORM{'refresh'}:120);
if ($refresh<30) { $refresh=30; }
my $starttime=time;
@@ -271,13 +307,14 @@ my %modes=('trouble' => 'Trouble',
'load_doomed' => 'Doomed: Load',
'unresponsive_doomed' => 'Doomed: Status could not be determined',
'users' => 'User Report',
+ 'load' => 'Load Report',
'connections' => 'Connections Matrix');
$mode=$FORM{'mode'};
unless ($modes{$mode}) { $mode='trouble'; }
# ================================================================ Send Headers
print "Content-type: text/html\n\n".
- "\n";
+ "\n";
# -------------------- Read loncapa.conf (and by default, loncapa_apache.conf).
my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
%perlvar=%{$perlvarref};
@@ -323,8 +360,7 @@ delete $perlvar{'lonSqlAccess'}; # remov
}
}
-
-print "Cluster Status ".localtime()."
";
+print "LON-CAPA Cluster Status ".localtime()."
";
print "\n";;
@@ -334,6 +370,10 @@ print 'Choose next report: '.&select_for
# ==================================================== Main Loop over all Hosts
+my $maxusers=0;
+my $maxload=0;
+my $totalusers=0;
+
foreach $local (sort keys %hostname) {
$host{$local.'_unresponsive_doomed'}=0;
# -- Check general status
@@ -347,6 +387,10 @@ foreach $local (sort keys %hostname) {
$host{$local.'_loncron'}='Stale.';
$host{$local.'_unresponsive_doomed'}++;
} else {
+ $host{$local.'_loncron_doomed'}=$loncron{'notices'}
+ +4*$loncron{'warnings'}
+ +100*$loncron{'errors'};
+ $host{$local.'_errors'}=$loncron{'errors'};
}
}
# -- Check user status
@@ -358,8 +402,16 @@ foreach $local (sort keys %hostname) {
} else {
$host{$local.'_users_doomed'}=$userstatus{'Active'};
$host{$local.'_users'}=$userstatus{'Active'};
+ unless ($host{$local.'_users'}) { $host{$local.'_users'}=0; }
+ if ($host{$local.'_users'}>$maxusers) {
+ $maxusers=$host{$local.'_users'};
+ }
+ $totalusers+=$host{$local.'_users'};
my ($sload,$mload,$lload)=split(/ /,$userstatus{'loadavg'});
$host{$local.'_load_doomed'}=$mload;
+ if ($mload>$maxload) {
+ $maxload=$mload;
+ }
$host{$local.'_load'}=$userstatus{'loadavg'};
}
# -- Check mysql status
@@ -424,15 +476,131 @@ foreach $local (sort keys %hostname) {
foreach (&doomedness($mode)) {
&serverstatus($_);
}
+ } elsif ($mode eq 'connections') {
+ print
+ "".
+ " | ";
+ foreach my $remote (sort keys %hostname) {
+ print ''.$remote.' | ';
+ }
+ print "
\n";
+# connection matrix
+ foreach my $local (sort keys %hostname) {
+ print ''.$local.' | ';
+ foreach my $remote (sort keys %hostname) {
+ if ($connectionstatus{$local.'_TO_'.$remote} eq 'not_yet') {
+ my $cellcolor='#FFFFFF';
+ if ($local eq $remote) { $cellcolor='#DDDDDD'; }
+ print 'not yet tested | ';
+ } elsif ($connectionstatus{$local.'_TO_'.$remote} eq 'ok') {
+ my $cellcolor='#BBDDBB';
+ if ($local eq $remote) { $cellcolor='#99DD99'; }
+ print
+'ok | ';
+ } else {
+ my $cellcolor='#DDBBBB';
+ if ($connectionstatus{$local.'_TO_'.$remote} eq 'local_error') {
+ if ($local eq $remote) {
+ $cellcolor='#DD88AA';
+ } else {
+ $cellcolor='#DDAACC';
+ }
+ } else {
+ if ($local eq $remote) { $cellcolor='#DD9999'; }
+ }
+ print
+ ''.
+ $connectionstatus{$local.'_TO_'.$remote}.' ';
+ &lonc($local); &lond($remote);
+ print ' | ';
+ }
+ }
+ print "
\n";
+ }
+ print "
";
+ } elsif ($mode eq 'users') {
+# Users
+ if ($maxusers) {
+ my $factor=50/$maxusers;
+ print "Total active user(s): $totalusers
".
+ "";
+
+ foreach $local (sort keys %hostname) {
+ if (defined($host{$local.'_users'})) {
+ print
+''.$local.
+ ' | ';
+ &users($local);
+ print
+ ' | '.
+ $host{$local.'_users'}.' | |
\n";
+ }
+ }
+ print "
";
+ } else {
+ print "No active users logged in.";
+ }
+ } elsif ($mode eq 'load') {
+# Load
+ if ($maxload) {
+ my $factor=50/$maxload;
+ print
+ "";
+ foreach $local (sort keys %hostname) {
+ if (defined($host{$local.'_load_doomed'})) {
+ print
+''.
+ $local.
+ ' | ';
+ &server($local);
+ print
+ ' | '.
+ $host{$local.'_load_doomed'}.' | |
\n";
+ }
+ }
+ print "
";
+ } else {
+ print "No workload.";
+ }
+ } elsif ($mode eq 'trouble') {
+ my $count=0;
+ foreach $local (sort keys %hostname) {
+ my $trouble='';
+ if ($host{$local.'_errors'}) {
+ $trouble='Has loncron errors.
';
+ } elsif ($host{$local.'_loncron_doomed'}>600) {
+ $trouble='High loncron count.
';
+ }
+ if ($host{$local.'_load_doomed'}>5) {
+ $trouble='High load.
';
+ }
+ if ($host{$local.'_users_doomed'}>200) {
+ $trouble='High user volume.
';
+ }
+ if ($host{$local.'_mysql_doomed'}>1) {
+ $trouble='MySQL database apparently offline.
';
+ }
+ if ($host{$local.'_checkrpms_doomed'}>100) {
+ $trouble='RPMs outdated.
';
+ }
+ if ($trouble) { $count++; &serverstatus($local,$trouble); }
+ }
+ unless ($count) { print "No mayor trouble."; }
}
# ============================================================== Close, refresh
print "";
exit 0;
500 Internal Server Error
Internal Server Error
The server encountered an internal error or
misconfiguration and was unable to complete
your request.
Please contact the server administrator at
root@localhost to inform them of the time this error occurred,
and the actions you performed just before this error.
More information about this error may be available
in the server error log.