diff -r 6bc08224c44e -r 70d578b200ba checker --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/checker Mon Jul 25 11:59:05 2011 +0200 @@ -0,0 +1,201 @@ +#! /usr/bin/perl + +use 5.010; +use strict; +use warnings; +use Pod::Usage; +use Hash::Util qw(lock_keys); +use File::Find; +use File::Temp; +use DB_File; +use File::Basename; +use autodie qw(:all); +use Cwd qw(abs_path); + +use Getopt::Long; +sub get_block_list; +sub purge_unused; +sub check_images; + +my %o = ( + yes => undef, + verbose => undef, + check => undef, +); lock_keys(%o); + +MAIN: { + Getopt::Long::Configure qw(Bundling); + GetOptions( + "y|yes!" => \$o{yes}, + "v|verbose!" => \$o{verbose}, + "c|check" => \$o{check}, + "h|help" => sub { pod2usage(-verbose => 1, -exit 0) }, + "m|man" => sub { pod2usage(-verbose => 2, -exit 0, + -noperldoc => system("perldoc -V 1>/dev/null + 2>&1"))}, + ) and @ARGV or pod2usage; + my $dir = shift; + my $tmp = File::Temp->new; + + # load the index files, remember the latest + # timestamp we see + #tie %idx, "DB_File" => $tmp->filename; + my %block = get_block_list($dir); + + verbose("# indexed: " + . scalar(@{$block{".idx"}//[]}) . " images with " + . (grep /^\.idx/ => keys(%block) - 1)." files"); + + purge_unused($dir => %block); + check_images($dir => %block); +} + +sub verbose { say @_ if $o{verbose} } + +sub get_file_list { + my ($list) = @_; + my @files = (); + + open(my $fh => $list); + while (<$fh>) { + push @files, (split)[2]; + } + return grep /^[a-z\d.\/]+$/ => @files; +} + +sub get_block_list { + my $dir = shift; + my %block; + find(sub { + (-f) and (-M > 0) or return; + #verbose("idx: $File::Find::name"); + push @{$block{".idx"}}, abs_path $_; + foreach my $f (get_file_list($_)) { + push @{$block{$f}} => $#{$block{".idx"}}; + } + }, "$dir/idx"); + return %block; +} + +sub purge_unused { + my ($dir, %block) = @_; + + my ($total, $done); + + # calculate the number of files we expect + find(sub { + -d or return; + opendir(my $dh => $_); + map { $total++ if not $_ ~~ [qw<. ..>] and length > 8} readdir $dh; + closedir($dh); + }, "$dir/data"); + + + # progress + local $SIG{ALRM} = sub { + return alarm 1 if not $done; + my $speed = $done / (time - $^T + 1); + verbose sprintf "# done %5.1f%% | %25s (%*d of %d files)", + 100 * ($done/$total), + scalar(localtime($^T + $speed * ($total - $done))), + length($total) => $done, + $total; + alarm 5; + }; + $SIG{ALRM}->(); + + find(sub { + $done++ if -f; + (-f _) and ((-M _) > 0) or return; + + # cut away the first part of the filename and + # some optional extension + (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/; + exists $block{$rn} and return; + + if ($o{yes}) { + verbose("unlinking abs_path $File::Find::name"); + unlink abs_path $File::Find::name; + return; + } + + verbose("unused abs_path $File::Find::name"); + return; + + }, "$dir/data"); + $SIG{ALRM}->(); + alarm 0; + +} + +sub check_images { + my ($dir, %block) = @_; + + my $total = keys(%block) - 1; # .idx + my $done = 0; + + # progress + local $SIG{ALRM} = sub { + return alarm 1 if not $done; + my $speed = $done / (time - $^T + 1); + say sprintf "# done %5.1f%% | %25s (%*d of %d files)", + 100 * $done/$total, + scalar(localtime($^T + ($total - $done) * $speed)), + length($total) => $done, + $total; + alarm 5; + }; + $SIG{ALRM}->(); + + my %invalid; + foreach my $k (keys %block) { + my $i = $block{$k}; + next if $k eq ".idx"; + ++$done; + + next if -f "$dir/data/$k" + or -f "$dir/data/$k.gz"; + say "missing $k"; + @invalid{@{$block{".idx"}}} = (); + } + $SIG{ALRM}->(); + alarm 0; + + return if not %invalid; + + say "invalid images:\n", join "\n", sort keys %invalid; + unlink keys %invalid if $o{yes}; +} +__END__ + +=head1 NAME + + checker - checks the imager data and index files + +=head1 SYNOPSIS + + checker [options] {directory} + +=head1 DESCRIPTION + +This tool loads all the index files from IF, +checks if all mentioned files are existing and optionally purges +unreferenced files. + +=head1 OPTIONS + +=over + +=item B<-y>|B<--yes> + +Assume "yes" for all questions (dangerous!). (default: no) + +=item B<-h>|B<--help> + +=item B<-m>|B<--man> + +The short and longer help. + +=back + +=cut