bin/checker
changeset 19 49ff641055a3
parent 17 0e5a8a5f4674
child 21 e0f19213f8b6
equal deleted inserted replaced
18:4a01ae9db5c4 19:49ff641055a3
       
     1 #! /usr/bin/perl
       
     2 
       
     3 use 5.010;
       
     4 use strict;
       
     5 use warnings;
       
     6 use Pod::Usage;
       
     7 use Hash::Util qw(lock_keys);
       
     8 use File::Find;
       
     9 use File::Temp;
       
    10 use DB_File;
       
    11 use File::Basename;
       
    12 use autodie qw(:all);
       
    13 use Cwd qw(abs_path);
       
    14 
       
    15 use Getopt::Long;
       
    16 sub get_block_list;
       
    17 sub purge_unused;
       
    18 sub check_images;
       
    19 
       
    20 my %o = (
       
    21     yes => undef,
       
    22     verbose => undef,
       
    23     check => undef,
       
    24 ); lock_keys(%o);
       
    25 
       
    26 MAIN: {
       
    27     Getopt::Long::Configure qw(Bundling);
       
    28     GetOptions(
       
    29 	"y|yes!" => \$o{yes},
       
    30 	"v|verbose!" => \$o{verbose},
       
    31 	"c|check" => \$o{check},
       
    32 	"h|help" => sub { pod2usage(-verbose => 1, -exit 0) },
       
    33 	"m|man"  => sub { pod2usage(-verbose => 2, -exit 0, 
       
    34 			  -noperldoc => system("perldoc -V 1>/dev/null
       
    35 			  2>&1"))},
       
    36     ) and @ARGV or pod2usage;
       
    37     my $dir = shift;
       
    38     my $tmp = File::Temp->new;
       
    39 
       
    40     # load the index files, remember the latest
       
    41     # timestamp we see
       
    42     #tie %idx, "DB_File" => $tmp->filename;
       
    43     my %block = get_block_list($dir);
       
    44 
       
    45     verbose("# indexed: "
       
    46 	. scalar(@{$block{""}//[]}) . " images with "
       
    47 	. (grep !/^\.idx$/ => keys(%block))." blocks");
       
    48 
       
    49     purge_unused($dir => %block);
       
    50     check_images($dir => %block);
       
    51 }
       
    52 
       
    53 sub verbose { say @_ if $o{verbose} }
       
    54 
       
    55 sub get_file_list {
       
    56     my ($list) = @_;
       
    57     my @files = ();
       
    58 
       
    59     open(my $fh => $list);
       
    60     while (<$fh>) {
       
    61 	push @files, (split)[2];
       
    62     }
       
    63     return grep /^[a-z\d.\/]+$/ => @files;
       
    64 }
       
    65 
       
    66 sub get_block_list {
       
    67     my $dir = shift;
       
    68     my %block;
       
    69     find(sub {
       
    70 	(-f) or return;	# we need to include the tmp files!
       
    71 	push @{$block{""}}, abs_path $_;
       
    72 	foreach my $f (get_file_list($_)) {
       
    73 	    push @{$block{$f}} => $#{$block{""}};
       
    74 	}
       
    75     }, "$dir/idx");
       
    76     return %block;
       
    77 }
       
    78 
       
    79 sub purge_unused {
       
    80     my ($dir, %block) = @_;
       
    81 
       
    82     my ($total, $done);
       
    83     verbose("# pass 1 - checking for unused blocks");
       
    84     verbose("# pass 1 - estimating file count");
       
    85 
       
    86     # calculate the number of files we expect
       
    87     find(sub {
       
    88 	-d or return;
       
    89 	opendir(my $dh => $_);
       
    90 	map { $total++ if not $_ ~~ [qw<. ..>] and length > 8} readdir $dh;
       
    91 	closedir($dh);
       
    92     }, "$dir/data");
       
    93 
       
    94 
       
    95     # progress
       
    96     local $SIG{ALRM} = sub {
       
    97 	return alarm 1 if not $done;
       
    98 	my $speed = $done / (time - $^T + 1);
       
    99 	verbose sprintf "# pass 1 done %5.1f%% | %25s (%*d of %d blocks)",
       
   100 	    100 * ($done/$total),
       
   101 	    scalar(localtime($^T + $speed * ($total - $done))),
       
   102 	    length($total) => $done,
       
   103 	    $total;
       
   104 	alarm 5;
       
   105     };
       
   106     $SIG{ALRM}->();
       
   107 
       
   108     my @unused;
       
   109     find(sub {
       
   110 	$done++ if -f;
       
   111 	(-f _) and ((-M _) > 0) or return;  # don't process the fresh blocks
       
   112 
       
   113 	# we don't need uncompressed files if an compressed version
       
   114 	# exists
       
   115 	unlink $_ and return if -f "$_.gz";
       
   116 
       
   117 	# cut away the first part of the filename and
       
   118 	# some optional extension
       
   119 	(my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
       
   120 	exists $block{$rn} and return;
       
   121 	push @unused, abs_path $File::Find::name;
       
   122 	return;
       
   123 
       
   124     }, "$dir/data");
       
   125     $SIG{ALRM}->();
       
   126     alarm 0;
       
   127 
       
   128     return if not @unused;
       
   129 
       
   130     say sprintf "found %d (%.1f%%) unused files",
       
   131 	0+@unused,
       
   132 	100 * (@unused/$total);
       
   133 
       
   134     if ($o{yes}) {
       
   135 	verbose("# deleting ".@unused." files");
       
   136 	unlink @unused;
       
   137 	return;
       
   138     }
       
   139 
       
   140     if (-t) {
       
   141 	while(1) {
       
   142 	    print "delete? [y/N/v]: ";
       
   143 	    given (<STDIN>) {
       
   144 		when (/^y(?:es)?$/i) { unlink @unused; last }
       
   145 		when (/^v/) { say join "\n", @unused; next }
       
   146 		default { last }
       
   147 	    }
       
   148 	}
       
   149     }
       
   150 
       
   151 }
       
   152 
       
   153 sub check_images {
       
   154     my ($dir, %block) = @_;
       
   155 
       
   156     my $total = grep { $_ ne "" } keys(%block);
       
   157     my $done = 0;
       
   158 
       
   159     verbose("# pass 2 - checking image completeness");
       
   160 
       
   161     # progress
       
   162     local $SIG{ALRM} = sub {
       
   163 	return alarm 1 if not $done;
       
   164 	my $speed = $done / (time - $^T + 1);
       
   165 	verbose sprintf "# pass 2 done %5.1f%% | %25s (%*d of %d blocks)",
       
   166 	    100 * $done/$total, 
       
   167 	    scalar(localtime($^T + ($total - $done) * $speed)),
       
   168 	    length($total) => $done,
       
   169 	    $total;
       
   170 	    alarm 5;
       
   171     };
       
   172     $SIG{ALRM}->();
       
   173 
       
   174     my %invalid;
       
   175     foreach my $k (keys %block) {
       
   176 	my $i = $block{$k};
       
   177 	next if $k eq "";
       
   178 	++$done;
       
   179 	
       
   180 	next if -f "$dir/data/$k"
       
   181 	    or -f "$dir/data/$k.gz";
       
   182 	say "missing $k @$i";
       
   183 	@invalid{@$i} = ();
       
   184     }
       
   185     $SIG{ALRM}->();
       
   186     alarm 0;
       
   187 
       
   188     # invalid now contains the numbers of the idx files beiing
       
   189     # invalid
       
   190     my @invalid = sort @{$block{""}}[keys %invalid];
       
   191 
       
   192     return if not @invalid;
       
   193 
       
   194     say sprintf "found %d (%.1f%%) invalid images:",
       
   195 	0+@invalid,
       
   196 	100 * (@invalid/$total);
       
   197 
       
   198     if ($o{yes}) {
       
   199 	unlink @invalid;
       
   200 	return;
       
   201     }
       
   202 
       
   203     while (-t) {
       
   204 	print "delete? [y/N/v] ";
       
   205 	given (<STDIN>) {
       
   206 	    when (/^y(?:es)?$/i) { unlink @invalid; last }
       
   207 	    when (/^v/i)	 { say join "\n" => @invalid; next }
       
   208 	default		         { last }
       
   209 	}
       
   210     }
       
   211 }
       
   212 __END__
       
   213 
       
   214 =head1 NAME
       
   215 
       
   216     checker - checks the imager data and index files
       
   217 
       
   218 =head1 SYNOPSIS
       
   219 
       
   220     checker [options] {directory}
       
   221 
       
   222 =head1 DESCRIPTION
       
   223 
       
   224 This tool loads all the index files from I<directory>F</idx/>,
       
   225 checks if all mentioned files are existing and optionally purges
       
   226 unreferenced files.
       
   227 
       
   228 =head1 OPTIONS
       
   229 
       
   230 =over
       
   231 
       
   232 =item B<-y>|B<--yes>
       
   233 
       
   234 Assume "yes" for all questions (dangerous!). (default: no)
       
   235 
       
   236 =item B<-h>|B<--help>
       
   237 
       
   238 =item B<-m>|B<--man>
       
   239 
       
   240 The short and longer help.
       
   241 
       
   242 =back
       
   243 
       
   244 =cut