diff -r 4a01ae9db5c4 -r 49ff641055a3 checker --- a/checker Tue Jul 26 11:54:40 2011 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,244 +0,0 @@ -#! /usr/bin/perl - -use 5.010; -use strict; -use warnings; -use Pod::Usage; -use Hash::Util qw(lock_keys); -use File::Find; -use File::Temp; -use DB_File; -use File::Basename; -use autodie qw(:all); -use Cwd qw(abs_path); - -use Getopt::Long; -sub get_block_list; -sub purge_unused; -sub check_images; - -my %o = ( - yes => undef, - verbose => undef, - check => undef, -); lock_keys(%o); - -MAIN: { - Getopt::Long::Configure qw(Bundling); - GetOptions( - "y|yes!" => \$o{yes}, - "v|verbose!" => \$o{verbose}, - "c|check" => \$o{check}, - "h|help" => sub { pod2usage(-verbose => 1, -exit 0) }, - "m|man" => sub { pod2usage(-verbose => 2, -exit 0, - -noperldoc => system("perldoc -V 1>/dev/null - 2>&1"))}, - ) and @ARGV or pod2usage; - my $dir = shift; - my $tmp = File::Temp->new; - - # load the index files, remember the latest - # timestamp we see - #tie %idx, "DB_File" => $tmp->filename; - my %block = get_block_list($dir); - - verbose("# indexed: " - . scalar(@{$block{""}//[]}) . " images with " - . (grep !/^\.idx$/ => keys(%block))." blocks"); - - purge_unused($dir => %block); - check_images($dir => %block); -} - -sub verbose { say @_ if $o{verbose} } - -sub get_file_list { - my ($list) = @_; - my @files = (); - - open(my $fh => $list); - while (<$fh>) { - push @files, (split)[2]; - } - return grep /^[a-z\d.\/]+$/ => @files; -} - -sub get_block_list { - my $dir = shift; - my %block; - find(sub { - (-f) or return; # we need to include the tmp files! - push @{$block{""}}, abs_path $_; - foreach my $f (get_file_list($_)) { - push @{$block{$f}} => $#{$block{""}}; - } - }, "$dir/idx"); - return %block; -} - -sub purge_unused { - my ($dir, %block) = @_; - - my ($total, $done); - verbose("# pass 1 - checking for unused blocks"); - verbose("# pass 1 - estimating file count"); - - # calculate the number of files we expect - find(sub { - -d or return; - opendir(my $dh => $_); - map { $total++ if not $_ ~~ [qw<. ..>] and length > 8} readdir $dh; - closedir($dh); - }, "$dir/data"); - - - # progress - local $SIG{ALRM} = sub { - return alarm 1 if not $done; - my $speed = $done / (time - $^T + 1); - verbose sprintf "# pass 1 done %5.1f%% | %25s (%*d of %d blocks)", - 100 * ($done/$total), - scalar(localtime($^T + $speed * ($total - $done))), - length($total) => $done, - $total; - alarm 5; - }; - $SIG{ALRM}->(); - - my @unused; - find(sub { - $done++ if -f; - (-f _) and ((-M _) > 0) or return; # don't process the fresh blocks - - # we don't need uncompressed files if an compressed version - # exists - unlink $_ and return if -f "$_.gz"; - - # cut away the first part of the filename and - # some optional extension - (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/; - exists $block{$rn} and return; - push @unused, abs_path $File::Find::name; - return; - - }, "$dir/data"); - $SIG{ALRM}->(); - alarm 0; - - return if not @unused; - - say sprintf "found %d (%.1f%%) unused files", - 0+@unused, - 100 * (@unused/$total); - - if ($o{yes}) { - verbose("# deleting ".@unused." files"); - unlink @unused; - return; - } - - if (-t) { - while(1) { - print "delete? [y/N/v]: "; - given () { - when (/^y(?:es)?$/i) { unlink @unused; last } - when (/^v/) { say join "\n", @unused; next } - default { last } - } - } - } - -} - -sub check_images { - my ($dir, %block) = @_; - - my $total = grep { $_ ne "" } keys(%block); - my $done = 0; - - verbose("# pass 2 - checking image completeness"); - - # progress - local $SIG{ALRM} = sub { - return alarm 1 if not $done; - my $speed = $done / (time - $^T + 1); - verbose sprintf "# pass 2 done %5.1f%% | %25s (%*d of %d blocks)", - 100 * $done/$total, - scalar(localtime($^T + ($total - $done) * $speed)), - length($total) => $done, - $total; - alarm 5; - }; - $SIG{ALRM}->(); - - my %invalid; - foreach my $k (keys %block) { - my $i = $block{$k}; - next if $k eq ""; - ++$done; - - next if -f "$dir/data/$k" - or -f "$dir/data/$k.gz"; - say "missing $k @$i"; - @invalid{@$i} = (); - } - $SIG{ALRM}->(); - alarm 0; - - # invalid now contains the numbers of the idx files beiing - # invalid - my @invalid = sort @{$block{""}}[keys %invalid]; - - return if not @invalid; - - say sprintf "found %d (%.1f%%) invalid images:", - 0+@invalid, - 100 * (@invalid/$total); - - if ($o{yes}) { - unlink @invalid; - return; - } - - while (-t) { - print "delete? [y/N/v] "; - given () { - when (/^y(?:es)?$/i) { unlink @invalid; last } - when (/^v/i) { say join "\n" => @invalid; next } - default { last } - } - } -} -__END__ - -=head1 NAME - - checker - checks the imager data and index files - -=head1 SYNOPSIS - - checker [options] {directory} - -=head1 DESCRIPTION - -This tool loads all the index files from IF, -checks if all mentioned files are existing and optionally purges -unreferenced files. - -=head1 OPTIONS - -=over - -=item B<-y>|B<--yes> - -Assume "yes" for all questions (dangerous!). (default: no) - -=item B<-h>|B<--help> - -=item B<-m>|B<--man> - -The short and longer help. - -=back - -=cut