#!/usr/bin/perl $|=1; # The LearningOnline Network with CAPA # Cluster Status # # $Id: clusterstatus.pl,v 1.14 2003/08/01 19:20:26 www Exp $ use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; use LWP::UserAgent(); use HTTP::Headers; use IO::File; my %host=(); my $oneday=60*60*24; my %connectionstatus=(); my %perlvar=(); my $mode; my $concount=0; sub select_form { my ($def,$name,%hash) = @_; my $selectform = ""; return $selectform; } sub key { my ($local,$url)=@_; my $key=$local.'_'.$url; $key=~s/\W/\_/gs; return $key; } sub hidden { my ($name,$value)=@_; print "\n"; } sub request { my ($local,$url,$cachetime)=@_; $cachetime*=(0.5+rand); my $key=&key($local,$url); my $reply=''; if ($FORM{$key.'_time'}) { if ((time-$FORM{$key.'_time'})<$cachetime) { $reply=$FORM{$key}; &hidden($key.'_time',$FORM{$key.'_time'}); &hidden($key.'_fromcache',1); } } unless ($reply) { unless ($hostname{$local}) { $reply='local_unknown'; } else { my $ua=new LWP::UserAgent(timeout => 10); my $request=new HTTP::Request('GET', "http://".$hostname{$local}.$url); $request->authorization_basic('lonadm','litelite'); my $response=$ua->request($request); unless ($response->is_success) { $reply='local_error'; } else { $reply=$response->content; chomp($reply); } } &hidden($key.'_time',time); } &hidden($key,$reply); return $reply; } # ============================================= Are local and remote connected? sub connected { my ($local,$remote)=@_; $local=~s/\W//g; $remote=~s/\W//g; unless ($hostname{$remote}) { return 'remote_unknown'; } my $url='/cgi-bin/ping.pl?'.$remote; # # Slowly phase this in: if not cached, only do 5 percent of the cases, # but always do the first five. # unless ($FORM{&key($local,$url)}) { unless (($concount<=5) || (rand>0.95)) { return 'not_yet'; } else { $concount++; } } # # Actually do the query # &statuslist($local,'connecting '.$remote); my $reply=&request($local,$url,3600); $reply=(split("\n",$reply))[0]; $reply=~s/\W//g; if ($reply ne $remote) { return $reply; } return 'ok'; } # ============================================================ Get a reply hash sub replyhash { my %returnhash=(); foreach (split(/\&/,&request(@_))) { my ($name,$value)=split(/\=/,$_); if ($name) { unless ($value) { $value=''; } $returnhash{$name}=$value; } } return %returnhash; } # ================================================================ Link to host sub otherwindow { my ($local,$url,$label)=@_; return " $label "; } sub login { my $local=shift; print &otherwindow($local,'/adm/login?domain='.$perlvar{'lonDefDomain'}, 'Login'); } sub runloncron { my $local=shift; print &otherwindow($local,'/cgi-bin/loncron.pl','Run loncron'); } sub loncron { my $local=shift; print &otherwindow($local,'/lon-status','loncron'); } sub lonc { my $local=shift; print &otherwindow($local,'/lon-status/loncstatus.txt','lonc'); } sub lond { my $local=shift; print &otherwindow($local,'/lon-status/londstatus.txt','lond'); } sub users { my $local=shift; print &otherwindow($local,'/cgi-bin/userstatus.pl','Users'); } sub versions { my $local=shift; print &otherwindow($local,'/cgi-bin/lonversions.pl','Versions'); } sub server { my $local=shift; print &otherwindow($local,'/server-status','Server Status'); } # ========================================================= Produce a green bar sub bar { my $parm=shift; my $number=int($parm+0.5); print "
"; for (my $i=0;$i<$number;$i++) { print "+"; } print "
"; } # ========================================================== Show server status sub serverstatus { my ($local,$trouble)=@_; print (< "; if ($trouble) { print (""); } print "
$local $hostdom{$local} ($hostname{$local}; $hostrole{$local})
$domaindescription{$hostdom{$local}}
ENDHEADER &login($local);&server($local);&users($local);&versions($local); &loncron($local);&lond($local);&lonc($local);&runloncron($local); print "
$trouble
"; # load if (($host{$local.'_load_doomed'}>0.5) || ($mode eq 'load_doomed')) { print "
Load: ".$host{$local.'_load'} } # users if (($host{$local.'_users_doomed'}>10) || ($mode eq 'users_doomed')) { print "
Active Users: ".$host{$local.'_users'} } # checkrpms if ($host{$local.'_checkrpms'}) { print "
RPMs: ".$host{$local.'_checkrpms'} } # mysql if ($host{$local.'_mysql'}) { print "
MySQL Database: ".$host{$local.'_mysql'} } # connections if ($host{$local.'_notconnected'}) { print "
Not connected: "; foreach (split(/ /,$host{$local.'_notconnected'})) { if ($_) { print " $_"; } } } # errors if ($host{$local.'_errors'}) { print "
loncron errors: ".$host{$local.'_errors'}; } print "

"; } # =========================================================== Doomedness sorted sub doomedness { my $crit=shift; my %alldoomed=(); my @allhosts=(); foreach (keys %host) { if ($_=~/^(\w+)\_$crit$/) { if ($host{$_}) { push (@allhosts,$1); $alldoomed{$1}=$host{$_}; } } } return sort { $alldoomed{$b} <=> $alldoomed{$a} } @allhosts; } # ====================================================================== Status sub statuslist { my ($local,$what)=@_; print "\n"; } # # Main program # # ========================================================= Get form parameters my $buffer; read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'}); my @pairs=split(/&/,$buffer); my $pair; my $name; my $value; undef %FORM; %FORM=(); foreach $pair (@pairs) { ($name,$value) = split(/=/,$pair); $value =~ tr/+/ /; $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; $FORM{$name}=$value; } $buffer=$ENV{'QUERY_STRING'}; @pairs=split(/&/,$buffer); foreach $pair (@pairs) { ($name,$value) = split(/=/,$pair); $value =~ tr/+/ /; $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; $FORM{$name}=$value; } # ====================================================== Determine refresh rate my $refresh=(($FORM{'refresh'}=~/^\d+$/)?$FORM{'refresh'}:120); if ($refresh<30) { $refresh=30; } my $starttime=time; # ============================================================== Determine mode my %modes=('trouble' => 'Trouble', 'users_doomed' => 'Doomed: Users', 'loncron_doomed' => 'Doomed: General (loncron)', 'mysql_doomed' => 'Doomed: Database (mysql)', 'notconnected_doomed' => 'Doomed: Connections', 'checkrpms_doomed' => 'Doomed: RPMs', 'load_doomed' => 'Doomed: Load', 'unresponsive_doomed' => 'Doomed: Status could not be determined', 'users' => 'User Report', 'load' => 'Load Report', 'connections' => 'Connections Matrix'); $mode=$FORM{'mode'}; unless ($modes{$mode}) { $mode='trouble'; } # ================================================================ Send Headers print "Content-type: text/html\n\n". "\n"; # -------------------- Read loncapa.conf (and by default, loncapa_apache.conf). my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); %perlvar=%{$perlvarref}; undef $perlvarref; # remove since sensitive and not needed delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed # ------------------------------------------------------------- Read hosts file { my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab"); $total=0; while (my $configline=<$config>) { $configline=~s/#.*$//; unless ($configline=~/\w/) { next; } my ($id,$domain,$role,$name,$ip)=split(/:/,$configline); $hostname{$id}=$name; $hostdom{$id}=$domain; $hostrole{$id}=$role; $hostip{$id}=$ip; $total++; if (($role eq 'library') && ($id ne $perlvar{'lonHostID'})) { $libserv{$id}=$name; } } } # ------------------------------------------------------------ Read domain file { my $fh=IO::File->new($perlvar{'lonTabDir'}.'/domain.tab'); %domaindescription = (); %domain_auth_def = (); %domain_auth_arg_def = (); if ($fh) { while (<$fh>) { next if (/^(\#|\s*$)/); chomp; my ($domain, $domain_description, $def_auth, $def_auth_arg) = split(/:/,$_,4); $domain_auth_def{$domain}=$def_auth; $domain_auth_arg_def{$domain}=$def_auth_arg; $domaindescription{$domain}=$domain_description; } } } print "

LON-CAPA Cluster Status ".localtime()."

"; print "
\n". "
". "
\n";; print "
\n"; print 'Choose next report: '.&select_form($mode,'mode',%modes).'
'; &hidden('refresh',$refresh); # ==================================================== Main Loop over all Hosts my $maxusers=0; my $maxload=0; my $totalusers=0; foreach $local (sort keys %hostname) { $host{$local.'_unresponsive_doomed'}=0; # -- Check general status &statuslist($local,'General'); my %loncron=&replyhash($local,'/lon-status/loncron_simple.txt',1200); if (defined($loncron{'local_error'})) { $host{$local.'_loncron'}='Could not determine.'; $host{$local.'_unresponsive_doomed'}++; } else { if ((time-$loncron{'time'})>$oneday) { $host{$local.'_loncron'}='Stale.'; $host{$local.'_unresponsive_doomed'}++; } else { $host{$local.'_loncron_doomed'}=$loncron{'notices'} +4*$loncron{'warnings'} +100*$loncron{'errors'}; $host{$local.'_errors'}=$loncron{'errors'}; } } # -- Check user status &statuslist($local,'Users'); my %userstatus=&replyhash($local,'/cgi-bin/userstatus.pl?simple',600); if (defined($userstatus{'local_error'})) { $host{$local.'_userstatus'}='Could not determine.'; $host{$local.'_unresponsive_doomed'}++; } else { $host{$local.'_users_doomed'}=$userstatus{'Active'}; $host{$local.'_users'}=$userstatus{'Active'}; unless ($host{$local.'_users'}) { $host{$local.'_users'}=0; } if ($host{$local.'_users'}>$maxusers) { $maxusers=$host{$local.'_users'}; } $totalusers+=$host{$local.'_users'}; my ($sload,$mload,$lload)=split(/ /,$userstatus{'loadavg'}); $host{$local.'_load_doomed'}=$mload; if ($mload>$maxload) { $maxload=$mload; } $host{$local.'_load'}=$userstatus{'loadavg'}; } # -- Check mysql status &statuslist($local,'Database'); my %mysql=&replyhash($local,'/lon-status/mysql.txt',3600); if (defined($mysql{'local_error'})) { $host{$local.'_mysql'}='Could not determine.'; $host{$local.'_unresponsive_doomed'}++; } else { if ((time-$mysql{'time'})>(7*$oneday)) { if ($hostrole{$local} eq 'library') { $host{$local.'_mysql'}='Stale.'; $host{$local.'_mysql_doomed'}=1; } if ($mysql{'mysql'} eq 'defunct') { $host{$local.'_mysql'}='Defunct (maybe stale).'; $host{$local.'_mysql_doomed'}=2; } } elsif ($mysql{'mysql'} eq 'defunct') { $host{$local.'_mysql'}='Defunct.'; $host{$local.'_mysql_doomed'}=3; } } # -- Check rpm status &statuslist($local,'RPMs'); my %checkrpms=&replyhash($local,'/lon-status/checkrpms.txt',7200); if (defined($checkrpms{'local_error'})) { $host{$local.'_checkrpms'}='Could not determine.'; $host{$local.'_unresponsive_doomed'}++; } else { if ((time-$checkrpms{'time'})>(4*$oneday)) { $host{$local.'_checkrpms'}='Stale.'; $host{$local.'_checkrpms_doomed'}=50; $host{$local.'_unresponsive_doomed'}++; } elsif ($checkrpms{'status'} eq 'fail') { $host{$local.'_checkrpms'}='Could not checked RPMs.'; $host{$local.'_checkrpms_doomed'}=100; } elsif ($checkrpms{'rpmcount'}) { $host{$local.'_checkrpms'}='Outdated RPMs: '. $checkrpms{'rpmcount'}; $host{$local.'_checkrpms_doomed'}=$checkrpms{'rpmcount'}; } } # -- Check connections &statuslist($local,'Connections'); $host{$local.'_notconnected'}=''; $host{$local.'_notconnected_doomed'}=0; foreach $remote (sort keys %hostname) { my $status=&connected($local,$remote); $connectionstatus{$local.'_TO_'.$remote}=$status; unless (($status eq 'ok') || ($status eq 'not_yet')) { $host{$local.'_notconnected'}.=' '.$remote; $host{$local.'_notconnected_doomed'}++; } } # =============================================================== End Mail Loop } &statuslist('Done.'); # ====================================================================== Output if ($mode=~/\_doomed$/) { # Output by doomedness foreach (&doomedness($mode)) { &serverstatus($_); } } elsif ($mode eq 'connections') { print "". ""; foreach my $remote (sort keys %hostname) { print ''; } print "\n"; # connection matrix foreach my $local (sort keys %hostname) { print ''; foreach my $remote (sort keys %hostname) { if ($connectionstatus{$local.'_TO_'.$remote} eq 'not_yet') { my $cellcolor='#FFFFFF'; if ($local eq $remote) { $cellcolor='#DDDDDD'; } print ''; } elsif ($connectionstatus{$local.'_TO_'.$remote} eq 'ok') { my $cellcolor='#BBDDBB'; if ($local eq $remote) { $cellcolor='#99DD99'; } print ''; } else { my $cellcolor='#DDBBBB'; if ($connectionstatus{$local.'_TO_'.$remote} eq 'local_error') { if ($local eq $remote) { $cellcolor='#DD88AA'; } else { $cellcolor='#DDAACC'; } } else { if ($local eq $remote) { $cellcolor='#DD9999'; } } print ''; } } print "\n"; } print "
 '.$remote.'
'.$local.'not yet testedok'. $connectionstatus{$local.'_TO_'.$remote}.'
'; &lonc($local); &lond($remote); print '
"; } elsif ($mode eq 'users') { # Users if ($maxusers) { my $factor=50/$maxusers; print "

Total active user(s): $totalusers

". ""; foreach $local (sort keys %hostname) { if (defined($host{$local.'_users'})) { print '\n"; } } print "
'.$local. ''; &users($local); print ''. $host{$local.'_users'}.'
"; } else { print "No active users logged in."; } } elsif ($mode eq 'load') { # Load if ($maxload) { my $factor=50/$maxload; print ""; foreach $local (sort keys %hostname) { if (defined($host{$local.'_load_doomed'})) { print '\n"; } } print "
'. $local. ''; &server($local); print ''. $host{$local.'_load_doomed'}.'
"; } else { print "No workload."; } } elsif ($mode eq 'trouble') { my $count=0; foreach $local (sort keys %hostname) { my $trouble=''; if ($host{$local.'_errors'}) { $trouble='Has loncron errors.
'; } elsif ($host{$local.'_loncron_doomed'}>600) { $trouble='High loncron count.
'; } if ($host{$local.'_load_doomed'}>5) { $trouble='High load.
'; } if ($host{$local.'_users_doomed'}>200) { $trouble='High user volume.
'; } if ($host{$local.'_mysql_doomed'}>1) { $trouble='MySQL database apparently offline.
'; } if ($host{$local.'_checkrpms_doomed'}>100) { $trouble='RPMs outdated.
'; } if ($trouble) { $count++; &serverstatus($local,$trouble); } } unless ($count) { print "No mayor trouble."; } } # ============================================================== Close, refresh print "
"; exit 0; 500 Internal Server Error

Internal Server Error

The server encountered an internal error or misconfiguration and was unable to complete your request.

Please contact the server administrator at root@localhost to inform them of the time this error occurred, and the actions you performed just before this error.

More information about this error may be available in the server error log.