File:  [LON-CAPA] / loncom / build / filecompare.pl
Revision 1.4: download - view: text, annotated - select for diffs
Fri Nov 16 20:06:08 2001 UTC (22 years, 6 months ago) by harris41
Branches: MAIN
CVS tags: HEAD
fixes to the formatting and documentation -Scott

    1: #!/usr/bin/perl
    2: 
    3: # The LearningOnline Network witih CAPA
    4: #
    5: # filecompare.pl - script used to help probe and compare file statistics
    6: #
    7: # YEAR=2001
    8: # 9/27, 10/24, 10/25, 11/4 Scott Harrison
    9: # 11/14 Guy Albertelli
   10: # 11/16 Scott Harrison
   11: #
   12: # $Id: filecompare.pl,v 1.4 2001/11/16 20:06:08 harris41 Exp $
   13: ###
   14: 
   15: # ------------------------------------------------------------------ Invocation
   16: my $invocation=<<END;
   17: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
   18: or
   19: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
   20: 
   21: Restrictions: a list of space separated values (after the file/dir names)
   22: can restrict the comparison.
   23: These values can be: existence, cvstime, age, md5sum, size, lines,
   24: and/or diffs.
   25: 
   26: Options (before file/dir names):
   27: -p show all files that have the same comparison
   28: -n show all files that have different comparisons
   29: -a show all files (with comparisons)
   30: -q only show file names (based on first file/dir)
   31: -v verbose mode (default)
   32: END
   33: unless (@ARGV) {
   34:     print $invocation;
   35:     exit 1;
   36: }
   37: # ----------------------------------------------------------------------- Notes
   38: #
   39: # What are all the different ways to compare two files and how to look
   40: # at the differences?
   41: #
   42: # Ways of comparison:
   43: #   existence similarity
   44: #   cvs time similarity (first argument treated as CVS source)
   45: #   age similarity (modification time)
   46: #   md5sum similarity
   47: #   size similarity (bytes)
   48: #   line count difference
   49: #   number of different lines
   50: #
   51: # Quantities of comparison:
   52: #   existence (no,yes); other values become 'n/a'
   53: #   cvstime in seconds
   54: #   age in seconds
   55: #   md5sum ("same" or "different")
   56: #   size similarity (byte difference)
   57: #   line count difference (integer)
   58: #   number of different lines (integer)
   59: #   
   60: # Text output of comparison:
   61: #   existence VALUE
   62: #   cvstime VALUE
   63: #   age VALUE
   64: #   md5sum VALUE
   65: #   size VALUE
   66: #   lines VALUE
   67: #   diffs VALUE
   68: #
   69: # Output of comparison:
   70: #   exist
   71: #   if md5sum not same, then different
   72: #   if cvstime not 0, then older/newer
   73: #   if age not 0, then older/newer
   74: #   if size not 0, then bigger/smaller
   75: #   if lines not 0, then more lines of code/less lines of code
   76: #   if diffs not 0, then subtracted lines/added lines/changed lines
   77: 
   78: # implementing from unix command line (assuming bash)
   79: # md5sum, diff, wc -l
   80: 
   81: # ---------------------------------------------- Process command line arguments
   82: # Flags (before file/dir names):
   83: # -p show all files the same
   84: # -n show all files different
   85: # -a show all files (with comparisons)
   86: # -q only show file names (based on first file/dir)
   87: # -v verbose mode (default)
   88: # -b build/install mode (returns exitcode)
   89: my $verbose='1';
   90: my $show='all';
   91: my $buildmode=0;
   92: while (@ARGV) {
   93:     my $flag;
   94:     if ($ARGV[0]=~/^\-(\w)/) {
   95: 	$flag=$1;
   96: 	shift @ARGV;
   97:       SWITCH: {
   98: 	  $verbose=0, last SWITCH if $flag eq 'q';
   99: 	  $verbose=1, last SWITCH if $flag eq 'v';
  100: 	  $show='same', last SWITCH if $flag eq 'p';
  101: 	  $show='different', last SWITCH if $flag eq 'n';
  102: 	  $show='all', last SWITCH if $flag eq 'a';
  103: 	  $buildmode=1, last SWITCH if $flag eq 'b';
  104: 	  $buildmode=2, last SWITCH if $flag eq 'B';
  105: 	  $buildmode=3, last SWITCH if $flag eq 'g';
  106: 	  $buildmode=4, last SWITCH if $flag eq 'G';
  107: 	  print($invocation), exit(1);
  108:       }
  109:     }
  110:     else {
  111: 	last;
  112:     }
  113: }
  114: dowarn('Verbose: '.$verbose."\n");
  115: dowarn('Show: '.$show."\n");
  116: 
  117: # FILE1 FILE2 or DIR1 DIR2
  118: my $loc1=shift @ARGV;
  119: my $loc2=shift @ARGV;
  120: my $dirmode='directories';
  121: my @files;
  122: unless ($loc1 and $loc2) {
  123:     print($invocation), exit(1);
  124: }
  125: if (-f $loc1) {
  126:     $dirmode='files';
  127:     @files=($loc1);
  128: }
  129: else {
  130:     if (-e $loc1) {
  131: 	@files=`find $loc1 -type f`;
  132:     }
  133:     else {
  134: 	@files=($loc1);
  135:     }
  136:     map {chomp; s/^$loc1\///; $_} @files;
  137: }
  138: dowarn('Processing for mode: '.$dirmode."\n");
  139: dowarn('Location #1: '.$loc1."\n");
  140: dowarn('Location #2: '.$loc2."\n");
  141: 
  142: # A list of space separated values (after the file/dir names)
  143: # can restrict the comparison.
  144: my %restrict;
  145: while (@ARGV) {
  146:     my $r=shift @ARGV;
  147:     if ($r eq 'existence' or
  148: 	$r eq 'cvstime' or
  149: 	$r eq 'md5sum' or
  150: 	$r eq 'age' or
  151: 	$r eq 'size' or
  152: 	$r eq 'lines' or
  153: 	$r eq 'diffs') {
  154: 	$restrict{$r}=1;
  155:     }
  156:     else {
  157: 	print($invocation), exit(1);
  158:     }
  159: }
  160: if (%restrict) {
  161:     warn('Restricting comparison to: '.
  162: 	 join(' ',keys %restrict)."\n");
  163: }
  164: 
  165: my %OUTPUT=(
  166:          'existence'=>( sub {print 'existence: '.@_[0]; return;}),
  167: 	 'md5sum'=>(sub {print 'md5sum: '.@_[0];return;}),
  168:          'cvstime'=>(sub {print 'cvstime: '.@_[0];return;}),
  169:          'age'=>(sub {print 'age: '.@_[0];return;}),
  170:          'size'=>(sub {print 'size: '.@_[0];return;}),
  171:          'lines'=>(sub {print 'lines: '.@_[0];return;}),
  172:          'diffs'=>(sub {print 'diffs: '.@_[0];return;}),
  173: );
  174: 
  175: my %MEASURE=(
  176: 	 'existence' => ( sub { my ($file1,$file2)=@_;
  177: 		        my $rv1=(-e $file1)?'yes':'no';
  178: 			my $rv2=(-e $file2)?'yes':'no';
  179: 			return ($rv1,$rv2); } ),
  180: 	 'md5sum'=>( sub { my ($file1,$file2)=@_;
  181: 			my ($rv1)=split(/ /,`md5sum $file1`); chop $rv1;
  182: 			my ($rv2)=split(/ /,`md5sum $file2`); chop $rv2;
  183: 			return ($rv1,$rv2); } ),
  184: 	 'cvstime'=>( sub { my ($file1,$file2)=@_;
  185: 			my $rv1=&cvstime($file1);
  186: 			my @a=stat($file2); my $gmt=gmtime($a[9]);
  187: 			my $rv2=&utctime($gmt);
  188: 			return ($rv1,$rv2); } ),
  189:          'age'=>( sub {	my ($file1,$file2)=@_;
  190: 			my @a=stat($file1); my $rv1=$a[9];
  191: 			@a=stat($file2); my $rv2=$a[9];
  192: 			return ($rv1,$rv2); } ),
  193:          'size'=>( sub { my ($file1,$file2)=@_;
  194: 			my @a=stat($file1); my $rv1=$a[7];
  195: 			@a=stat($file2); my $rv2=$a[7];
  196: 			return ($rv1,$rv2); } ),
  197:          'lines'=>( sub { my ($file1,$file2)=@_;
  198: 			my $rv1=`wc -l $file1`; chop $rv1;
  199: 			my $rv2=`wc -l $file2`; chop $rv2;
  200: 			return ($rv1,$rv2); } ),
  201:          'diffs'=>( sub { my ($file1,$file2)=@_;
  202: 			my $rv1=`diff $file1 $file2 | grep '^<' | wc -l`;
  203: 			chop $rv1; $rv1=~s/^\s+//; $rv1=~s/\s+$//;
  204: 			my $rv2=`diff $file1 $file2 | grep '^>' | wc -l`;
  205: 			chop $rv2; $rv2=~s/^\s+//; $rv2=~s/\s+$//;
  206: 			return ($rv1,$rv2); } ),
  207: );
  208: 
  209: FLOP: foreach my $file (@files) {
  210:     my $file1;
  211:     my $file2;
  212:     if ($dirmode eq 'directories') {
  213:         $file1=$loc1.'/'.$file;
  214:         $file2=$loc2.'/'.$file;
  215:     }
  216:     else {
  217:         $file1=$loc1;
  218:         $file2=$loc2;
  219:     }
  220:     my ($existence1,$existence2)=&{$MEASURE{'existence'}}($file1,$file2);
  221:     my $existence=$existence1.':'.$existence2;
  222:     my ($cvstime,$md5sum,$age,$size,$lines,$diffs);
  223:     if ($existence1 eq 'no' or $existence2 eq 'no') {
  224:         $md5sum='n/a';
  225:         $age='n/a';
  226:         $cvstime='n/a';
  227:         $size='n/a';
  228:         $lines='n/a';
  229:         $diffs='n/a';
  230:     }
  231:     else {
  232:         my ($cvstime1,$cvstime2)=&{$MEASURE{'cvstime'}}($file1,$file2);
  233:         $cvstime=$cvstime1-$cvstime2;
  234:         my ($age1,$age2)=&{$MEASURE{'age'}}($file1,$file2);
  235:         $age=$age1-$age2;
  236:         my ($md5sum1,$md5sum2)=&{$MEASURE{'md5sum'}}($file1,$file2);
  237:         if ($md5sum1 eq $md5sum2) {
  238:             $md5sum='same';
  239:             $size=0;
  240:             $lines=0;
  241:             $diffs=0;
  242: 	}
  243:         elsif ($md5sum1 ne $md5sum2) {
  244:             $md5sum='different';
  245:             my ($size1,$size2)=&{$MEASURE{'size'}}($file1,$file2);
  246:             $size=$size1-$size2;
  247:             my ($lines1,$lines2)=&{$MEASURE{'lines'}}($file1,$file2);
  248:             $lines=$lines1-$lines2;
  249:             my ($diffs1,$diffs2)=&{$MEASURE{'diffs'}}($file1,$file2);
  250:             $diffs=$diffs1.':'.$diffs2;
  251:         }
  252:     }
  253:     my $showflag=0;
  254:     if ($show eq 'all') {
  255:         $showflag=1;
  256:     }
  257:     if ($show eq 'different') {
  258:         my @ks=(keys %restrict);
  259:         unless (@ks) {
  260: 	    @ks=('existence','cvstime','md5sum','age','size','lines','diffs');
  261: 	}
  262:         FLOP2: for my $key (@ks) {
  263: 	    if ($key eq 'existence') {
  264: 		if ($existence ne 'yes:yes') {
  265: 		    $showflag=1;
  266: 		}
  267: 	    }
  268: 	    elsif ($key eq 'md5sum') {
  269: 		if ($md5sum ne 'same') {
  270: 		    $showflag=1;
  271: 		}
  272: 	    }
  273: 	    elsif ($key eq 'cvstime') {
  274: 		if ($cvstime!=0) {
  275: 		    $showflag=1;
  276: 		}
  277: 	    }
  278: 	    elsif ($key eq 'age') {
  279: 		if ($age!=0) {
  280: 		    $showflag=1;
  281: 		}
  282: 	    }
  283: 	    elsif ($key eq 'size') {
  284: 		if ($size!=0) {
  285: 		    $showflag=1;
  286: 		}
  287: 	    }
  288: 	    elsif ($key eq 'lines') {
  289: 		if ($lines!=0) {
  290: 		    $showflag=1;
  291: 		}
  292: 	    }
  293: 	    elsif ($key eq 'diffs') {
  294: 		if ($diffs ne '0:0') {
  295: 		    $showflag=1;
  296: 		}
  297: 	    }
  298: 	    if ($showflag) {
  299: 		last FLOP2;
  300: 	    }
  301:         }
  302:     }
  303:     elsif ($show eq 'same') {
  304:         my @ks=(keys %restrict);
  305:         unless (@ks) {
  306: 	    @ks=('existence','md5sum','cvstime','age','size','lines','diffs');
  307: 	}
  308:         my $showcount=length(@ks);
  309:         FLOP3: for my $key (@ks) {
  310: 	    if ($key eq 'existence') {
  311: 		if ($existence ne 'yes:yes') {
  312: 		    $showcount--;
  313: 		}
  314: 	    }
  315: 	    elsif ($key eq 'md5sum') {
  316: 		if ($md5sum ne 'same') {
  317: 		    $showcount--;
  318: 		}
  319: 	    }
  320: 	    elsif ($key eq 'cvstime') {
  321: 		if ($cvstime!=0) {
  322: 		    $showcount--;
  323: 		}
  324: 	    }
  325: 	    elsif ($key eq 'age') {
  326: 		if ($age!=0) {
  327: 		    $showcount--;
  328: 		}
  329: 	    }
  330: 	    elsif ($key eq 'size') {
  331: 		if ($size!=0) {
  332: 		    $showcount--;
  333: 		}
  334: 	    }
  335: 	    elsif ($key eq 'lines') {
  336: 		if ($lines!=0) {
  337: 		    $showcount--;
  338: 		}
  339: 	    }
  340: 	    elsif ($key eq 'diffs') {
  341: 		if ($diffs ne '0:0') {
  342: 		    $showcount--;
  343: 		}
  344: 	    }
  345:         }
  346:         if ($showcount==0) {
  347: 	    $showflag=1;
  348: 	}
  349:     }
  350:     if ($buildmode==1) {
  351:         if ($md5sum eq 'same') {
  352: 	    exit(1);
  353: 	}
  354:         elsif ($cvstime<0) {
  355: 	    exit(2);
  356: 	}
  357:         else {
  358: 	    exit(0);
  359: 	}
  360:     }
  361:     elsif ($buildmode==2) {
  362:         if ($cvstime<0) {
  363: 	    exit(2);
  364: 	}
  365:         else {
  366: 	    exit(0);
  367: 	}
  368:     }
  369:     elsif ($buildmode==3) {
  370:         if ($md5sum eq 'same') {
  371: 	    exit(1);
  372: 	}
  373:         elsif ($age<0) {
  374: 	    exit(2);
  375: 	}
  376:         else {
  377: 	    exit(0);
  378: 	}
  379:     }
  380:     elsif ($buildmode==4) {
  381:         if ($cvstime>0) {
  382: 	    exit(2);
  383: 	}
  384:         else {
  385: 	    exit(0);
  386: 	}
  387:     }
  388:     print "$file";
  389:     if ($verbose==1) {
  390:         print "\t";
  391: 	print &{$OUTPUT{'existence'}}($existence);
  392:         print "\t";
  393: 	print &{$OUTPUT{'cvstime'}}($cvstime);
  394:         print "\t";
  395: 	print &{$OUTPUT{'age'}}($age);
  396:         print "\t";
  397: 	print &{$OUTPUT{'md5sum'}}($md5sum);
  398:         print "\t";
  399: 	print &{$OUTPUT{'size'}}($size);
  400:         print "\t";
  401: 	print &{$OUTPUT{'lines'}}($lines);
  402:         print "\t";
  403: 	print &{$OUTPUT{'diffs'}}($diffs);
  404:     }
  405:     print "\n";
  406: }
  407: 
  408: sub cvstime {
  409:     my ($f)=@_;
  410:     my $path; my $file;
  411:     if ($f=~/^(.*\/)(.*?)$/) {
  412: 	$f=~/^(.*\/)(.*?)$/;
  413: 	($path,$file)=($1,$2);
  414:     }
  415:     else {
  416: 	$file=$f; $path='';
  417:     }
  418:     my $cvstime;
  419:     if ($buildmode!=3) {
  420: 	my $entry=`grep '^/$file/' ${path}CVS/Entries` or
  421: 	    die('*** ERROR *** cannot grep against '.${path}.
  422: 		'CVS/Entries for ' .$file . "\n");
  423:         my @fields=split(/\//,$entry);
  424:         $cvstime=`date -d '$fields[3] UTC' --utc +"%s"`;
  425:         chomp $cvstime;
  426:     }
  427:     else {
  428: 	$cvstime='n/a';
  429:     }
  430:     return $cvstime;
  431: }
  432: 
  433: sub utctime {
  434:     my ($f)=@_;
  435:     my $utctime=`date -d '$f UTC' --utc +"%s"`;
  436:     chomp $utctime;
  437:     return $utctime;
  438: }
  439: 
  440: sub dowarn {
  441:     my ($msg)=@_;
  442:     warn($msg) unless $buildmode;
  443: }
  444: 
  445: =head1 NAME
  446: 
  447: filecompare.pl - script used to help probe and compare file statistics
  448: 
  449: =head1 SYNOPSIS
  450: 
  451: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
  452: 
  453: or
  454: 
  455: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
  456: 
  457: Restrictions: a list of space separated values (after the file/dir names)
  458: can restrict the comparison.
  459: These values can be: existence, cvstime, age, md5sum, size, lines,
  460: and/or diffs.
  461: 
  462: Options (before file/dir names):
  463: 
  464:  -p show all files that have the same comparison
  465: 
  466:  -n show all files that have different comparisons
  467: 
  468:  -a show all files (with comparisons)
  469: 
  470:  -q only show file names (based on first file/dir)
  471: 
  472:  -v verbose mode (default)
  473: 
  474: =head1 DESCRIPTION
  475: 
  476: filecompare.pl can work in two modes: file comparison mode, or directory
  477: comparison mode.
  478: 
  479: Comparisons can be a function of:
  480: * existence similarity
  481: * cvs time similarity (first argument treated as CVS source)
  482: * age similarity (modification time)
  483: * md5sum similarity
  484: * size similarity (bytes)
  485: * line count difference
  486: * number of different lines
  487: 
  488: filecompare.pl integrates smoothly with the LPML installation language
  489: (linux packaging markup language).  filecompare.pl is a tool that can
  490: be used for safe CVS source-to-target installations.
  491: 
  492: =head1 README
  493: 
  494: filecompare.pl integrates smoothly with the LPML installation language
  495: (linux packaging markup language).  filecompare.pl is a tool that can
  496: be used for safe CVS source-to-target installations.
  497: 
  498: The unique identifier is considered to be the file name(s) independent
  499: of the directory path.
  500: 
  501: =head1 PREREQUISITES
  502: 
  503: =head1 COREQUISITES
  504: 
  505: =head1 OSNAMES
  506: 
  507: linux
  508: 
  509: =head1 SCRIPT CATEGORIES
  510: 
  511: Packaging/Administrative
  512: 
  513: =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>