|         |      1 #! /usr/bin/perl | 
|         |      2  | 
|         |      3 use 5.010; | 
|         |      4 use strict; | 
|         |      5 use warnings; | 
|         |      6 use Pod::Usage; | 
|         |      7 use Hash::Util qw(lock_keys); | 
|         |      8 use File::Find; | 
|         |      9 use File::Temp; | 
|         |     10 use DB_File; | 
|         |     11 use File::Basename; | 
|         |     12 use autodie qw(:all); | 
|         |     13 use Cwd qw(abs_path); | 
|         |     14  | 
|         |     15 use Getopt::Long; | 
|         |     16 sub get_block_list; | 
|         |     17 sub purge_unused; | 
|         |     18 sub check_images; | 
|         |     19  | 
|         |     20 my %o = ( | 
|         |     21     yes => undef, | 
|         |     22     verbose => undef, | 
|         |     23     check => undef, | 
|         |     24 ); lock_keys(%o); | 
|         |     25  | 
|         |     26 MAIN: { | 
|         |     27     Getopt::Long::Configure qw(Bundling); | 
|         |     28     GetOptions( | 
|         |     29 	"y|yes!" => \$o{yes}, | 
|         |     30 	"v|verbose!" => \$o{verbose}, | 
|         |     31 	"c|check" => \$o{check}, | 
|         |     32 	"h|help" => sub { pod2usage(-verbose => 1, -exit 0) }, | 
|         |     33 	"m|man"  => sub { pod2usage(-verbose => 2, -exit 0,  | 
|         |     34 			  -noperldoc => system("perldoc -V 1>/dev/null | 
|         |     35 			  2>&1"))}, | 
|         |     36     ) and @ARGV or pod2usage; | 
|         |     37     my $dir = shift; | 
|         |     38     my $tmp = File::Temp->new; | 
|         |     39  | 
|         |     40     # load the index files, remember the latest | 
|         |     41     # timestamp we see | 
|         |     42     #tie %idx, "DB_File" => $tmp->filename; | 
|         |     43     my %block = get_block_list($dir); | 
|         |     44  | 
|         |     45     verbose("# indexed: " | 
|         |     46 	. scalar(@{$block{""}//[]}) . " images with " | 
|         |     47 	. (grep !/^\.idx$/ => keys(%block))." blocks"); | 
|         |     48  | 
|         |     49     purge_unused($dir => %block); | 
|         |     50     check_images($dir => %block); | 
|         |     51 } | 
|         |     52  | 
|         |     53 sub verbose { say @_ if $o{verbose} } | 
|         |     54  | 
|         |     55 sub get_file_list { | 
|         |     56     my ($list) = @_; | 
|         |     57     my @files = (); | 
|         |     58  | 
|         |     59     open(my $fh => $list); | 
|         |     60     while (<$fh>) { | 
|         |     61 	push @files, (split)[2]; | 
|         |     62     } | 
|         |     63     return grep /^[a-z\d.\/]+$/ => @files; | 
|         |     64 } | 
|         |     65  | 
|         |     66 sub get_block_list { | 
|         |     67     my $dir = shift; | 
|         |     68     my %block; | 
|         |     69     find(sub { | 
|         |     70 	(-f) or return;	# we need to include the tmp files! | 
|         |     71 	push @{$block{""}}, abs_path $_; | 
|         |     72 	foreach my $f (get_file_list($_)) { | 
|         |     73 	    push @{$block{$f}} => $#{$block{""}}; | 
|         |     74 	} | 
|         |     75     }, "$dir/idx"); | 
|         |     76     return %block; | 
|         |     77 } | 
|         |     78  | 
|         |     79 sub purge_unused { | 
|         |     80     my ($dir, %block) = @_; | 
|         |     81  | 
|         |     82     my ($total, $done); | 
|         |     83     verbose("# pass 1 - checking for unused blocks"); | 
|         |     84     verbose("# pass 1 - estimating file count"); | 
|         |     85  | 
|         |     86     # calculate the number of files we expect | 
|         |     87     find(sub { | 
|         |     88 	-d or return; | 
|         |     89 	opendir(my $dh => $_); | 
|         |     90 	map { $total++ if not $_ ~~ [qw<. ..>] and length > 8} readdir $dh; | 
|         |     91 	closedir($dh); | 
|         |     92     }, "$dir/data"); | 
|         |     93  | 
|         |     94  | 
|         |     95     # progress | 
|         |     96     local $SIG{ALRM} = sub { | 
|         |     97 	return alarm 1 if not $done; | 
|         |     98 	my $speed = $done / (time - $^T + 1); | 
|         |     99 	verbose sprintf "# pass 1 done %5.1f%% | %25s (%*d of %d blocks)", | 
|         |    100 	    100 * ($done/$total), | 
|         |    101 	    scalar(localtime($^T + $speed * ($total - $done))), | 
|         |    102 	    length($total) => $done, | 
|         |    103 	    $total; | 
|         |    104 	alarm 5; | 
|         |    105     }; | 
|         |    106     $SIG{ALRM}->(); | 
|         |    107  | 
|         |    108     my @unused; | 
|         |    109     find(sub { | 
|         |    110 	$done++ if -f; | 
|         |    111 	(-f _) and ((-M _) > 0) or return;  # don't process the fresh blocks | 
|         |    112  | 
|         |    113 	# we don't need uncompressed files if an compressed version | 
|         |    114 	# exists | 
|         |    115 	unlink $_ and return if -f "$_.gz"; | 
|         |    116  | 
|         |    117 	# cut away the first part of the filename and | 
|         |    118 	# some optional extension | 
|         |    119 	(my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/; | 
|         |    120 	exists $block{$rn} and return; | 
|         |    121 	push @unused, abs_path $File::Find::name; | 
|         |    122 	return; | 
|         |    123  | 
|         |    124     }, "$dir/data"); | 
|         |    125     $SIG{ALRM}->(); | 
|         |    126     alarm 0; | 
|         |    127  | 
|         |    128     return if not @unused; | 
|         |    129  | 
|         |    130     say sprintf "found %d (%.1f%%) unused files", | 
|         |    131 	0+@unused, | 
|         |    132 	100 * (@unused/$total); | 
|         |    133  | 
|         |    134     if ($o{yes}) { | 
|         |    135 	verbose("# deleting ".@unused." files"); | 
|         |    136 	unlink @unused; | 
|         |    137 	return; | 
|         |    138     } | 
|         |    139  | 
|         |    140     if (-t) { | 
|         |    141 	while(1) { | 
|         |    142 	    print "delete? [y/N/v]: "; | 
|         |    143 	    given (<STDIN>) { | 
|         |    144 		when (/^y(?:es)?$/i) { unlink @unused; last } | 
|         |    145 		when (/^v/) { say join "\n", @unused; next } | 
|         |    146 		default { last } | 
|         |    147 	    } | 
|         |    148 	} | 
|         |    149     } | 
|         |    150  | 
|         |    151 } | 
|         |    152  | 
|         |    153 sub check_images { | 
|         |    154     my ($dir, %block) = @_; | 
|         |    155  | 
|         |    156     my $total = grep { $_ ne "" } keys(%block); | 
|         |    157     my $done = 0; | 
|         |    158  | 
|         |    159     verbose("# pass 2 - checking image completeness"); | 
|         |    160  | 
|         |    161     # progress | 
|         |    162     local $SIG{ALRM} = sub { | 
|         |    163 	return alarm 1 if not $done; | 
|         |    164 	my $speed = $done / (time - $^T + 1); | 
|         |    165 	verbose sprintf "# pass 2 done %5.1f%% | %25s (%*d of %d blocks)", | 
|         |    166 	    100 * $done/$total,  | 
|         |    167 	    scalar(localtime($^T + ($total - $done) * $speed)), | 
|         |    168 	    length($total) => $done, | 
|         |    169 	    $total; | 
|         |    170 	    alarm 5; | 
|         |    171     }; | 
|         |    172     $SIG{ALRM}->(); | 
|         |    173  | 
|         |    174     my %invalid; | 
|         |    175     foreach my $k (keys %block) { | 
|         |    176 	my $i = $block{$k}; | 
|         |    177 	next if $k eq ""; | 
|         |    178 	++$done; | 
|         |    179 	 | 
|         |    180 	next if -f "$dir/data/$k" | 
|         |    181 	    or -f "$dir/data/$k.gz"; | 
|         |    182 	say "missing $k @$i"; | 
|         |    183 	@invalid{@$i} = (); | 
|         |    184     } | 
|         |    185     $SIG{ALRM}->(); | 
|         |    186     alarm 0; | 
|         |    187  | 
|         |    188     # invalid now contains the numbers of the idx files beiing | 
|         |    189     # invalid | 
|         |    190     my @invalid = sort @{$block{""}}[keys %invalid]; | 
|         |    191  | 
|         |    192     return if not @invalid; | 
|         |    193  | 
|         |    194     say sprintf "found %d (%.1f%%) invalid images:", | 
|         |    195 	0+@invalid, | 
|         |    196 	100 * (@invalid/$total); | 
|         |    197  | 
|         |    198     if ($o{yes}) { | 
|         |    199 	unlink @invalid; | 
|         |    200 	return; | 
|         |    201     } | 
|         |    202  | 
|         |    203     while (-t) { | 
|         |    204 	print "delete? [y/N/v] "; | 
|         |    205 	given (<STDIN>) { | 
|         |    206 	    when (/^y(?:es)?$/i) { unlink @invalid; last } | 
|         |    207 	    when (/^v/i)	 { say join "\n" => @invalid; next } | 
|         |    208 	default		         { last } | 
|         |    209 	} | 
|         |    210     } | 
|         |    211 } | 
|         |    212 __END__ | 
|         |    213  | 
|         |    214 =head1 NAME | 
|         |    215  | 
|         |    216     checker - checks the imager data and index files | 
|         |    217  | 
|         |    218 =head1 SYNOPSIS | 
|         |    219  | 
|         |    220     checker [options] {directory} | 
|         |    221  | 
|         |    222 =head1 DESCRIPTION | 
|         |    223  | 
|         |    224 This tool loads all the index files from I<directory>F</idx/>, | 
|         |    225 checks if all mentioned files are existing and optionally purges | 
|         |    226 unreferenced files. | 
|         |    227  | 
|         |    228 =head1 OPTIONS | 
|         |    229  | 
|         |    230 =over | 
|         |    231  | 
|         |    232 =item B<-y>|B<--yes> | 
|         |    233  | 
|         |    234 Assume "yes" for all questions (dangerous!). (default: no) | 
|         |    235  | 
|         |    236 =item B<-h>|B<--help> | 
|         |    237  | 
|         |    238 =item B<-m>|B<--man> | 
|         |    239  | 
|         |    240 The short and longer help. | 
|         |    241  | 
|         |    242 =back | 
|         |    243  | 
|         |    244 =cut |