# HG changeset patch # User Heiko Schlittermann (JUMPER) # Date 1311587945 -7200 # Node ID 70d578b200ba43836ad8a6991d7df33f70f95623 # Parent 6bc08224c44e52196d47ff330654bf969ae821d7 checker done diff -r 6bc08224c44e -r 70d578b200ba checker --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/checker Mon Jul 25 11:59:05 2011 +0200 @@ -0,0 +1,201 @@ +#! /usr/bin/perl + +use 5.010; +use strict; +use warnings; +use Pod::Usage; +use Hash::Util qw(lock_keys); +use File::Find; +use File::Temp; +use DB_File; +use File::Basename; +use autodie qw(:all); +use Cwd qw(abs_path); + +use Getopt::Long; +sub get_block_list; +sub purge_unused; +sub check_images; + +my %o = ( + yes => undef, + verbose => undef, + check => undef, +); lock_keys(%o); + +MAIN: { + Getopt::Long::Configure qw(Bundling); + GetOptions( + "y|yes!" => \$o{yes}, + "v|verbose!" => \$o{verbose}, + "c|check" => \$o{check}, + "h|help" => sub { pod2usage(-verbose => 1, -exit 0) }, + "m|man" => sub { pod2usage(-verbose => 2, -exit 0, + -noperldoc => system("perldoc -V 1>/dev/null + 2>&1"))}, + ) and @ARGV or pod2usage; + my $dir = shift; + my $tmp = File::Temp->new; + + # load the index files, remember the latest + # timestamp we see + #tie %idx, "DB_File" => $tmp->filename; + my %block = get_block_list($dir); + + verbose("# indexed: " + . scalar(@{$block{".idx"}//[]}) . " images with " + . (grep /^\.idx/ => keys(%block) - 1)." files"); + + purge_unused($dir => %block); + check_images($dir => %block); +} + +sub verbose { say @_ if $o{verbose} } + +sub get_file_list { + my ($list) = @_; + my @files = (); + + open(my $fh => $list); + while (<$fh>) { + push @files, (split)[2]; + } + return grep /^[a-z\d.\/]+$/ => @files; +} + +sub get_block_list { + my $dir = shift; + my %block; + find(sub { + (-f) and (-M > 0) or return; + #verbose("idx: $File::Find::name"); + push @{$block{".idx"}}, abs_path $_; + foreach my $f (get_file_list($_)) { + push @{$block{$f}} => $#{$block{".idx"}}; + } + }, "$dir/idx"); + return %block; +} + +sub purge_unused { + my ($dir, %block) = @_; + + my ($total, $done); + + # calculate the number of files we expect + find(sub { + -d or return; + opendir(my $dh => $_); + map { $total++ if not $_ ~~ [qw<. ..>] and length > 8} readdir $dh; + closedir($dh); + }, "$dir/data"); + + + # progress + local $SIG{ALRM} = sub { + return alarm 1 if not $done; + my $speed = $done / (time - $^T + 1); + verbose sprintf "# done %5.1f%% | %25s (%*d of %d files)", + 100 * ($done/$total), + scalar(localtime($^T + $speed * ($total - $done))), + length($total) => $done, + $total; + alarm 5; + }; + $SIG{ALRM}->(); + + find(sub { + $done++ if -f; + (-f _) and ((-M _) > 0) or return; + + # cut away the first part of the filename and + # some optional extension + (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/; + exists $block{$rn} and return; + + if ($o{yes}) { + verbose("unlinking abs_path $File::Find::name"); + unlink abs_path $File::Find::name; + return; + } + + verbose("unused abs_path $File::Find::name"); + return; + + }, "$dir/data"); + $SIG{ALRM}->(); + alarm 0; + +} + +sub check_images { + my ($dir, %block) = @_; + + my $total = keys(%block) - 1; # .idx + my $done = 0; + + # progress + local $SIG{ALRM} = sub { + return alarm 1 if not $done; + my $speed = $done / (time - $^T + 1); + say sprintf "# done %5.1f%% | %25s (%*d of %d files)", + 100 * $done/$total, + scalar(localtime($^T + ($total - $done) * $speed)), + length($total) => $done, + $total; + alarm 5; + }; + $SIG{ALRM}->(); + + my %invalid; + foreach my $k (keys %block) { + my $i = $block{$k}; + next if $k eq ".idx"; + ++$done; + + next if -f "$dir/data/$k" + or -f "$dir/data/$k.gz"; + say "missing $k"; + @invalid{@{$block{".idx"}}} = (); + } + $SIG{ALRM}->(); + alarm 0; + + return if not %invalid; + + say "invalid images:\n", join "\n", sort keys %invalid; + unlink keys %invalid if $o{yes}; +} +__END__ + +=head1 NAME + + checker - checks the imager data and index files + +=head1 SYNOPSIS + + checker [options] {directory} + +=head1 DESCRIPTION + +This tool loads all the index files from IF, +checks if all mentioned files are existing and optionally purges +unreferenced files. + +=head1 OPTIONS + +=over + +=item B<-y>|B<--yes> + +Assume "yes" for all questions (dangerous!). (default: no) + +=item B<-h>|B<--help> + +=item B<-m>|B<--man> + +The short and longer help. + +=back + +=cut diff -r 6bc08224c44e -r 70d578b200ba cleaner --- a/cleaner Mon Jul 25 08:58:47 2011 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,160 +0,0 @@ -#! /usr/bin/perl - -use 5.010; -use strict; -use warnings; -use Pod::Usage; -use Hash::Util qw(lock_keys); -use File::Find; -use File::Temp; -use DB_File; -use File::Basename; -use autodie qw(:all); -use Cwd qw(abs_path); - -use Getopt::Long; - -my %o = ( - dry => undef, - verbose => undef, - check => undef, -); lock_keys(%o); - -MAIN: { - Getopt::Long::Configure qw(Bundling); - GetOptions( - "n|dry!" => \$o{dry}, - "v|verbose!" => \$o{verbose}, - "c|check" => \$o{check}, - "h|help" => sub { pod2usage(-verbose => 1, -exit 0) }, - "m|man" => sub { pod2usage(-verbose => 2, -exit 0, - -noperldoc => system("perldoc -V 1>/dev/null - 2>&1"))}, - ) and @ARGV or pod2usage; - my $dir = shift; - my $tmp = File::Temp->new; - - # load the index files, remember the latest - # timestamp we see - my (%inuse, @idx); - #tie %idx, "DB_File" => $tmp->filename; - - find(sub { - (-f) and (-M > 0) or return; - #verbose("idx: $File::Find::name"); - push @idx, abs_path $_; - foreach my $f (get_file_list($_)) { - push @{$inuse{$f}} => $#idx; - } - }, "$dir/idx"); - - verbose("indexed: ".scalar(keys %inuse)." files"); - - # simple "forward" check: existence of mentioned files - if ($o{check}) { - my $total = scalar keys %inuse; - my $done = 0; - local $SIG{ALRM} = sub { - say sprintf "done %5.1f%% (%*d of $total)" - => 100 * $done/$total, length($total), $done - if $total; - alarm 5; - }; - $SIG{ALRM}->(); - while (my ($f, $i) = each %inuse) { - ++$done; - next if -f "$dir/data/$f" - or -f "$dir/data/$f.gz"; - say "missing $f from\n", - join "-\t" => "", map { "$_\n" } @idx[@$i]; - } - $SIG{ALRM}->(); - alarm 0; - exit 0; - } - - # full check and probably cleaning: all files, not mentioned - # in some index will be purged -# my (%file); -#- find(sub { -#- (-f) and (-M > 0) or return; -#- $File::Find::name =~ s/^$dir\/data\///; -#- $file{$_} = $_; -#- }, "$dir/data"); -#- -#- verbose("file system: ".scalar(keys %file)." files"); -#- exit 0; - - # ok, now go through all the data files and remove - # files not mentioned in some index, but never remove - # files created after the cleaner started - find(sub { - (-f) and (-M > 0) or return; - - # cut away the first part of the filename and - # some optional extension - (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/; - exists $inuse{$rn} and return; - - if ($o{dry}) { - verbose("(unlinking) abs_path $File::Find::name"); - return; - } - - verbose("unlinking abs_path $File::Find::name"); - unlink abs_path $File::Find::name; - - }, "$dir/data"); - -} - -sub verbose { say @_ if $o{verbose} } - -sub get_file_list { - my ($list) = @_; - my @files = (); - - open(my $fh => $list); - while (<$fh>) { - push @files, (split)[2]; - } - return grep /^[a-z\d.\/]+$/ => @files; -} - - -__END__ - -=head1 NAME - - cleaner - cleans the imager data directory - -=head1 SYNOPSIS - - cleaner [options] {directory} - -=head1 DESCRIPTION - -This tool loads all the index files from IF -and purges all not mentioned files below IF. - -=head1 OPTIONS - -=over - -=item B<-c>|B<--check> - -Check (and exit) if nothing is missing. - -=item B<-n>|B<--dry> - -Do nothing, just print what should be removed. (default: off) - -=item B<-h>|B<--help> - -=item B<-m>|B<--man> - -The short and longer help. - -=back - -=cut