--- loncom/metadata_database/searchcat.pl 2007/01/03 03:58:34 1.70.2.1 +++ loncom/metadata_database/searchcat.pl 2013/08/22 09:30:21 1.81 @@ -2,7 +2,7 @@ # The LearningOnline Network # searchcat.pl "Search Catalog" batch script # -# $Id: searchcat.pl,v 1.70.2.1 2007/01/03 03:58:34 albertel Exp $ +# $Id: searchcat.pl,v 1.81 2013/08/22 09:30:21 bisitz Exp $ # # Copyright Michigan State University Board of Trustees # @@ -68,12 +68,14 @@ use strict; use DBI; use lib '/home/httpd/lib/perl/'; use LONCAPA::lonmetadata; - +use LONCAPA; use Getopt::Long; use IO::File; use HTML::TokeParser; use GDBM_File; use POSIX qw(strftime mktime); +use Mail::Send; +use Apache::loncommon(); use Apache::lonnet(); @@ -123,6 +125,7 @@ my %oldnames = ( 'portfolio' => 'portfolio_metadata', 'access' => 'portfolio_access', 'addedfields' => 'portfolio_addedfields', + 'allusers' => 'allusers', ); my %newnames; @@ -134,6 +137,8 @@ foreach my $key (keys(%oldnames)) { # # Only run if machine is a library server exit if ($Apache::lonnet::perlvar{'lonRole'} ne 'library'); +my $hostid = $Apache::lonnet::perlvar{'lonHostID'}; + # # Make sure this process is running from user=www my $wwwid=getpwnam('www'); @@ -172,7 +177,7 @@ foreach my $key (keys(%newnames)) { } # -# Create the new metadata and portfolio tables +# Create the new metadata, portfolio and allusers tables foreach my $key (keys(%newnames)) { if ($newnames{$key} ne '') { my $request = @@ -206,6 +211,7 @@ foreach my $dom (@domains) { if ($oneuser) { @homeusers=($oneuser); } + # # Loop through the users foreach my $user (@homeusers) { @@ -221,23 +227,33 @@ foreach my $dom (@domains) { no_chdir => 1, }, join('/',($Apache::lonnet::perlvar{'lonDocRoot'},'res',$dom,$user)) ); } - # Search for public portfolio files - my %portusers; + # Search for all users and public portfolio files + my (%allusers,%portusers,%courses); if ($oneuser) { %portusers = ( $oneuser => '', ); + %allusers = ( + $oneuser => '', + ); + %courses = &courseiddump($dom,'.',1,'.','.',$oneuser,undef, + undef,'.'); } else { + # get courseIDs for domain on current machine + %courses=&Apache::lonnet::courseiddump($dom,'.',1,'.','.','.',1,[$hostid],'.'); my $dir = $Apache::lonnet::perlvar{lonUsersDir}.'/'.$dom; - &descend_tree($dir,0,\%portusers); + &descend_tree($dom,$dir,0,\%portusers,\%allusers); } foreach my $uname (keys(%portusers)) { my $urlstart = '/uploaded/'.$dom.'/'.$uname; my $pathstart = &propath($dom,$uname).'/userfiles'; - my $is_course = &Apache::lonnet::is_course($dom,$uname); + my $is_course = ''; + if (exists($courses{$dom.'_'.$uname})) { + $is_course = 1; + } my $curr_perm = &Apache::lonnet::get_portfile_permissions($dom,$uname); my %access = &Apache::lonnet::get_access_controls($curr_perm); - foreach my $file (keys(%access)) { + foreach my $file (keys(%access)) { my ($group,$url,$fullpath); if ($is_course) { ($group, my ($path)) = ($file =~ /^(\w+)(\/.+)$/); @@ -248,10 +264,133 @@ foreach my $dom (@domains) { $url = $urlstart.'/portfolio'.$file; } if (ref($access{$file}) eq 'HASH') { - &process_portfolio_access_data($url,$access{$file}); + my %portaccesslog = + &LONCAPA::lonmetadata::process_portfolio_access_data($dbh, + $simulate,\%newnames,$url,$fullpath,$access{$file}); + &portfolio_logging(%portaccesslog); + } + my %portmetalog = &LONCAPA::lonmetadata::process_portfolio_metadata($dbh,$simulate,\%newnames,$url,$fullpath,$is_course,$dom,$uname,$group); + &portfolio_logging(%portmetalog); + } + } + my (%names_by_id,,%ids_by_name,%idstodelete,%idstoadd,%duplicates); + unless ($simulate || $oneuser) { + my $idshashref; + $idshashref = &tie_domain_hash($dom, "ids", &GDBM_WRCREAT()); + if (ref($idshashref) eq 'HASH') { + %names_by_id = %{$idshashref}; + while (my ($id,$uname) = each(%{$idshashref}) ) { + $id = &unescape($id); + $uname = &unescape($uname); + $names_by_id{$id} = $uname; + push(@{$ids_by_name{$uname}},$id); + } + &untie_domain_hash($idshashref); + } + } + # Update allusers + foreach my $uname (keys(%allusers)) { + next if (exists($courses{$dom.'_'.$uname})); + my %userdata = + &Apache::lonnet::get('environment',['firstname','lastname', + 'middlename','generation','id','permanentemail'],$dom,$uname); + unless ($simulate || $oneuser) { + my $addid; + if ($userdata{'id'} ne '') { + $addid = $userdata{'id'}; + $addid=~tr/A-Z/a-z/; + } + if (exists($ids_by_name{$uname})) { + if (ref($ids_by_name{$uname}) eq 'ARRAY') { + if (scalar(@{$ids_by_name{$uname}}) > 1) { + &log(0,"Multiple employee/student IDs found in ids.db for $uname:$dom -- ".join(', ',@{$ids_by_name{$uname}})); + } + foreach my $id (@{$ids_by_name{$uname}}) { + if ($id eq $userdata{'id'}) { + undef($addid); + } else { + $idstodelete{$id} = $uname; + } + } + } + } + if ($addid ne '') { + if (exists($idstoadd{$addid})) { + push(@{$duplicates{$addid}},$uname); + } else { + $idstoadd{$addid} = $uname; + } + } + } + + $userdata{'username'} = $uname; + $userdata{'domain'} = $dom; + my %alluserslog = + &LONCAPA::lonmetadata::process_allusers_data($dbh,$simulate, + \%newnames,$uname,$dom,\%userdata); + foreach my $item (keys(%alluserslog)) { + &log(0,$alluserslog{$item}); + } + } + unless ($simulate || $oneuser) { + if (keys(%idstodelete) > 0) { + my %resulthash = &Apache::lonnet::iddel($dom,\%idstodelete,$hostid); + if ($resulthash{$hostid} eq 'ok') { + foreach my $id (sort(keys(%idstodelete))) { + &log(0,"Record deleted from ids.db for $dom -- $id => ".$idstodelete{$id}); + } + } else { + &log(0,"Error: '$resulthash{$hostid}' occurred when attempting to delete records from ids.db for $dom"); + } + } + if (keys(%idstoadd) > 0) { + my $idmessage = ''; + my %newids; + foreach my $addid (sort(keys(%idstoadd))) { + if ((exists($names_by_id{$addid})) && ($names_by_id{$addid} ne $idstoadd{$addid}) && !($idstodelete{$addid})) { + &log(0,"Two usernames associated with a single ID $addid in domain: $dom: $names_by_id{$addid} (current) and $idstoadd{$addid}\n"); + $idmessage .= "$addid,$names_by_id{$addid},$idstoadd{$addid}\n"; + } else { + $newids{$addid} = $idstoadd{$addid}; + } + } + if (keys(%newids) > 0) { + my $putresult = &Apache::lonnet::put_dom('ids',\%idstoadd,$dom,$hostid); + if ($putresult eq 'ok') { + foreach my $id (sort(keys(%idstoadd))) { + &log(0,"Record added to ids.db for $dom -- $id => ".$idstoadd{$id}); + } + } else { + &log(0,"Error: '$putresult' occurred when attempting to add records to ids.db for $dom"); + } + } + if ($idmessage) { + my $to = &Apache::loncommon::build_recipient_list(undef,'idconflictsmail',$dom); + if ($to ne '') { + my $msg = new Mail::Send; + $msg->to($to); + $msg->subject('LON-CAPA studentIDs conflict'); + my $lonhost = $Apache::lonnet::perlvar{'lonHostID'}; + my $hostname = &Apache::lonnet::hostname($lonhost); + my $replytoaddress = 'do-not-reply@'.$hostname; + $msg->add('Reply-to',$replytoaddress); + $msg->add('From',"www@$hostname"); + $msg->add('Content-type','text/plain; charset=UTF-8'); + if (my $fh = $msg->open()) { + print $fh + 'The following IDs are used for more than one user in your domain:'."\n". + 'Each row contains: Student/Employee ID, Current username in ids.db file, '. + 'Additional username'."\n\n". + $idmessage; + $fh->close; + } + } + } + } + if (keys(%duplicates) > 0) { + foreach my $id (sort(keys(%duplicates))) { + &log(0,"Duplicate IDs found for entries to add to ids.db in $dom -- $id => $idstodelete{$id}"); } - &process_portfolio_metadata($url,$fullpath,$is_course,$dom, - $uname,$group); } } } @@ -302,8 +441,19 @@ sub log { } } +sub portfolio_logging { + my (%portlog) = @_; + foreach my $key (keys(%portlog)) { + if (ref($portlog{$key}) eq 'HASH') { + foreach my $item (keys(%{$portlog{$key}})) { + &log(0,$portlog{$key}{$item}); + } + } + } +} + sub descend_tree { - my ($dir,$depth,$alldomusers) = @_; + my ($dom,$dir,$depth,$allportusers,$alldomusers) = @_; if (-d $dir) { opendir(DIR,$dir); my @contents = grep(!/^\./,readdir(DIR)); @@ -311,10 +461,12 @@ sub descend_tree { $depth ++; foreach my $item (@contents) { if ($depth < 4) { - &descend_tree($dir.'/'.$item,$depth,$alldomusers); + &descend_tree($dom,$dir.'/'.$item,$depth,$allportusers,$alldomusers); } else { if (-e $dir.'/'.$item.'/file_permissions.db') { - + $$allportusers{$item} = ''; + } + if (-e $dir.'/'.$item.'/passwd') { $$alldomusers{$item} = ''; } } @@ -322,88 +474,6 @@ sub descend_tree { } } -sub process_portfolio_access_data { - my ($url,$access_hash) = @_; - foreach my $key (keys(%{$access_hash})) { - my $acc_data; - $acc_data->{url} = $url; - $acc_data->{keynum} = $key; - my ($num,$scope,$end,$start) = - ($key =~ /^([^:]+):([a-z]+)_(\d*)_?(\d*)$/); - $acc_data->{scope} = $scope; - if ($end != 0) { - $acc_data->{end} = &sqltime($end); - } - $acc_data->{start} = &sqltime($start); - if (! $simulate) { - my ($count,$err) = - &LONCAPA::lonmetadata::store_metadata($dbh, - $newnames{'access'}, - 'portfolio_access',$acc_data); - if ($err) { - &log(0,"MySQL Error Insert: ".$err); - } - if ($count < 1) { - &log(0,"Unable to insert record into MySQL database for $url"); - } - } - } -} - -sub process_portfolio_metadata { - my ($url,$fullpath,$is_course,$dom,$uname,$group) = @_; - my ($ref,$crs,$addedfields) = &portfolio_metadata($fullpath,$dom,$uname, - $group); - &getfiledates($ref,$fullpath); - if ($is_course) { - $ref->{'groupname'} = $group; - } - my %Data; - if (ref($ref) eq 'HASH') { - %Data = %{$ref}; - } - %Data = ( - %Data, - 'url'=>$url, - 'version'=>'current', - ); - if (! $simulate) { - my ($count,$err) = - &LONCAPA::lonmetadata::store_metadata($dbh, - $newnames{'portfolio'}, - 'portfolio_metadata',\%Data); - if ($err) { - &log(0,"MySQL Error Insert: ".$err); - } - if ($count < 1) { - &log(0,"Unable to insert record into MySQL portfolio_metadata database table for $url"); - } - if (ref($addedfields) eq 'HASH') { - if (keys(%{$addedfields}) > 0) { - foreach my $key (keys(%{$addedfields})) { - my $added_data = { - 'url' => $url, - 'field' => $key, - 'value' => $addedfields->{$key}, - 'courserestricted' => $crs, - }; - ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh, - $newnames{'addedfields'}, - 'portfolio_addedfields', - $added_data); - if ($err) { - &log(0,"MySQL Error Insert: ".$err); - } - if ($count < 1) { - &log(0,"Unable to insert record into MySQL portfolio_addedfields database table for url = $url and field = $key"); - } - } - } - } - } - return; -} - ######################################################## ######################################################## ### ### @@ -507,7 +577,7 @@ sub process_meta_file { %dyn=&get_dynamic_metadata($url); &count_type($url); } - &getfiledates($ref,$target); + &LONCAPA::lonmetadata::getfiledates($ref,$target); # my %Data = ( %$ref, @@ -548,7 +618,8 @@ sub metadata { if ($filename !~ /\.meta$/) { $filename.='.meta'; } - my $metastring=&getfile($Apache::lonnet::perlvar{'lonDocRoot'}.'/res/'.$filename); + my $metastring = + &LONCAPA::lonmetadata::getfile($Apache::lonnet::perlvar{'lonDocRoot'}.'/res/'.$filename); return undef if (! defined($metastring)); my $parser=HTML::TokeParser->new(\$metastring); my $token; @@ -579,123 +650,6 @@ sub metadata { return \%metacache; } -############################################################### -############################################################### -### ### -### &portfolio_metadata($filepath,$dom,$uname,$group) ### -### Retrieve metadata for the given file ### -### Returns array - ### -### contains reference to metadatahash and ### -### optional reference to addedfields hash ### -### ### -############################################################### -############################################################### -sub portfolio_metadata { - my ($fullpath,$dom,$uname,$group)=@_; - my ($mime) = ( $fullpath=~/\.(\w+)$/ ); - my %metacache=(); - if ($fullpath !~ /\.meta$/) { - $fullpath .= '.meta'; - } - my (@standard_fields,%addedfields); - my $colsref = - $LONCAPA::lonmetadata::Portfolio_metadata_table_description; - if (ref($colsref) eq 'ARRAY') { - my @columns = @{$colsref}; - foreach my $coldata (@columns) { - push(@standard_fields,$coldata->{'name'}); - } - } - my $metastring=&getfile($fullpath); - if (! defined($metastring)) { - $metacache{'keys'}= 'owner,domain,mime'; - $metacache{'owner'} = $uname.':'.$dom; - $metacache{'domain'} = $dom; - $metacache{'mime'} = $mime; - if (defined($group)) { - $metacache{'keys'} .= ',courserestricted'; - $metacache{'courserestricted'} = 'course.'.$dom.'_'.$uname; - } - } else { - my $parser=HTML::TokeParser->new(\$metastring); - my $token; - while ($token=$parser->get_token) { - if ($token->[0] eq 'S') { - my $entry=$token->[1]; - if ($metacache{'keys'}) { - $metacache{'keys'}.=','.$entry; - } else { - $metacache{'keys'}=$entry; - } - my $value = $parser->get_text('/'.$entry); - if (!grep(/^\Q$entry\E$/,@standard_fields)) { - my $clean_value = lc($value); - $clean_value =~ s/\s/_/g; - if ($clean_value ne $entry) { - if (defined($addedfields{$entry})) { - $addedfields{$entry} .=','.$value; - } else { - $addedfields{$entry} = $value; - } - } - } else { - $metacache{$entry} = $value; - } - } - } # End of ($token->[0] eq 'S') - } - if (keys(%addedfields) > 0) { - foreach my $key (sort keys(%addedfields)) { - $metacache{'addedfieldnames'} .= $key.','; - $metacache{'addedfieldvalues'} .= $addedfields{$key}.'&&&'; - } - $metacache{'addedfieldnames'} =~ s/,$//; - $metacache{'addedfieldvalues'} =~ s/\&\&\&$//; - if ($metacache{'keys'}) { - $metacache{'keys'}.=',addedfieldnames'; - } else { - $metacache{'keys'}='addedfieldnames'; - } - $metacache{'keys'}.=',addedfieldvalues'; - } - return (\%metacache,$metacache{'courserestricted'},\%addedfields); -} - -## -## &getfile($filename) -## Slurps up an entire file into a scalar. -## Returns undef if the file does not exist -sub getfile { - my $file = shift(); - if (! -e $file ) { - return undef; - } - my $fh=IO::File->new($file); - my $contents = ''; - while (<$fh>) { - $contents .= $_; - } - return $contents; -} - -## -## &getfiledates() -## Converts creationdate and modifieddates to SQL format -## Applies stat() to file to retrieve dates if missing -sub getfiledates { - my ($ref,$target) = @_; - if (! defined($ref->{'creationdate'}) || - $ref->{'creationdate'} =~ /^\s*$/) { - $ref->{'creationdate'} = (stat($target))[9]; - } - if (! defined($ref->{'lastrevisiondate'}) || - $ref->{'lastrevisiondate'} =~ /^\s*$/) { - $ref->{'lastrevisiondate'} = (stat($target))[9]; - } - $ref->{'creationdate'} = &sqltime($ref->{'creationdate'}); - $ref->{'lastrevisiondate'} = &sqltime($ref->{'lastrevisiondate'}); -} - ######################################################## ######################################################## ### ### @@ -863,7 +817,7 @@ sub write_copyright_count { ## (copied from lond, modification of the return value) sub ishome { my $author=shift; - $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/; + $author=~s{/home/httpd/html/res/([^/]*)/([^/]*).*}{$1/$2}; my ($udom,$uname)=split(/\//,$author); my $proname=propath($udom,$uname); if (-e $proname) { @@ -874,55 +828,6 @@ sub ishome { } ## -## &propath($udom,$uname) -## Returns the path to the users LON-CAPA directory -## (copied from lond) -sub propath { - my ($udom,$uname)=@_; - $udom=~s/\W//g; - $uname=~s/\W//g; - my $subdir=$uname.'__'; - $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/; - my $proname="$Apache::lonnet::perlvar{'lonUsersDir'}/$udom/$subdir/$uname"; - return $proname; -} - -## -## &sqltime($timestamp) -## -## Convert perl $timestamp to MySQL time. MySQL expects YYYY-MM-DD HH:MM:SS -## -sub sqltime { - my ($time) = @_; - my $mysqltime; - if ($time =~ - /(\d+)-(\d+)-(\d+) # YYYY-MM-DD - \s # a space - (\d+):(\d+):(\d+) # HH:MM::SS - /x ) { - # Some of the .meta files have the time in mysql - # format already, so just make sure they are 0 padded and - # pass them back. - $mysqltime = sprintf('%04d-%02d-%02d %02d:%02d:%02d', - $1,$2,$3,$4,$5,$6); - } elsif ($time =~ /^\d+$/) { - my @TimeData = gmtime($time); - # Alter the month to be 1-12 instead of 0-11 - $TimeData[4]++; - # Alter the year to be from 0 instead of from 1900 - $TimeData[5]+=1900; - $mysqltime = sprintf('%04d-%02d-%02d %02d:%02d:%02d', - @TimeData[5,4,3,2,1,0]); - } elsif (! defined($time) || $time == 0) { - $mysqltime = 0; - } else { - &log(0," sqltime:Unable to decode time ".$time); - $mysqltime = 0; - } - return $mysqltime; -} - -## ## &declutter($filename) ## Given a filename, returns a url for the filename. sub declutter { @@ -933,16 +838,3 @@ sub declutter { return $thisfn; } -## -## Escape / Unescape special characters -sub unescape { - my $str=shift; - $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; - return $str; -} - -sub escape { - my $str=shift; - $str =~ s/(\W)/"%".unpack('H2',$1)/eg; - return $str; -}