bin/imager.check
changeset 26 496ee9b0f488
parent 21 e0f19213f8b6
child 27 82c0df89b287
equal deleted inserted replaced
25:94a50c69de28 26:496ee9b0f488
       
     1 #! /usr/bin/perl
       
     2 
       
     3 use 5.010;
       
     4 use strict;
       
     5 use warnings;
       
     6 use Pod::Usage;
       
     7 use Hash::Util qw(lock_keys);
       
     8 use File::Find;
       
     9 use File::Temp;
       
    10 use DB_File;
       
    11 use File::Basename;
       
    12 use autodie qw(:all);
       
    13 use Cwd qw(abs_path);
       
    14 
       
    15 use Getopt::Long;
       
    16 sub get_block_list;
       
    17 sub purge_unused;
       
    18 sub check_images;
       
    19 
       
    20 my %o = (
       
    21     yes     => undef,
       
    22     verbose => undef,
       
    23     check   => undef,
       
    24 );
       
    25 lock_keys(%o);
       
    26 
       
    27 MAIN: {
       
    28     Getopt::Long::Configure qw(Bundling);
       
    29     GetOptions(
       
    30         "y|yes!"     => \$o{yes},
       
    31         "v|verbose!" => \$o{verbose},
       
    32         "c|check"    => \$o{check},
       
    33         "h|help"     => sub { pod2usage(-verbose => 1, -exit 0) },
       
    34         "m|man"      => sub {
       
    35             pod2usage(
       
    36                 -verbose => 2,
       
    37                 -exit 0,
       
    38                 -noperldoc => system(
       
    39                     "perldoc -V 1>/dev/null
       
    40 			  2>&1"
       
    41                 )
       
    42             );
       
    43         },
       
    44       )
       
    45       and @ARGV
       
    46       or pod2usage;
       
    47     my $dir = shift;
       
    48     my $tmp = File::Temp->new;
       
    49 
       
    50     # load the index files, remember the latest
       
    51     # timestamp we see
       
    52     #tie %idx, "DB_File" => $tmp->filename;
       
    53     my %block = get_block_list($dir);
       
    54 
       
    55     verbose("# indexed: "
       
    56           . scalar(@{ $block{""} // [] })
       
    57           . " images with "
       
    58           . (grep !/^\.idx$/ => keys(%block))
       
    59           . " blocks");
       
    60 
       
    61     purge_unused($dir => %block);
       
    62     check_images($dir => %block);
       
    63 }
       
    64 
       
    65 sub verbose { say @_ if $o{verbose} }
       
    66 
       
    67 sub get_file_list {
       
    68     my ($list) = @_;
       
    69     my @files = ();
       
    70 
       
    71     open(my $fh => $list);
       
    72     while (<$fh>) {
       
    73         push @files, (split)[2];
       
    74     }
       
    75     return grep /^[a-z\d.\/]+$/ => @files;
       
    76 }
       
    77 
       
    78 sub get_block_list {
       
    79     my $dir = shift;
       
    80     my %block;
       
    81     find(
       
    82         sub {
       
    83             (-f) or return;    # we need to include the tmp files!
       
    84             push @{ $block{""} }, abs_path $_;
       
    85             foreach my $f (get_file_list($_)) {
       
    86                 push @{ $block{$f} } => $#{ $block{""} };
       
    87             }
       
    88         },
       
    89         "$dir/idx"
       
    90     );
       
    91     return %block;
       
    92 }
       
    93 
       
    94 sub purge_unused {
       
    95     my ($dir, %block) = @_;
       
    96 
       
    97     my ($total, $done);
       
    98     verbose("# pass 1 - checking for unused blocks");
       
    99     verbose("# pass 1 - estimating file count");
       
   100 
       
   101     # calculate the number of files we expect
       
   102     find(
       
   103         sub {
       
   104             -d or return;
       
   105             opendir(my $dh => $_);
       
   106             map { $total++ if not $_ ~~ [qw<. ..>] and length > 8 } readdir $dh;
       
   107             closedir($dh);
       
   108         },
       
   109         "$dir/data"
       
   110     );
       
   111 
       
   112     # progress
       
   113     local $SIG{ALRM} = sub {
       
   114         return alarm 1 if not $done;
       
   115         my $speed = $done / (time - $^T + 1);
       
   116         verbose sprintf "# pass 1 done %5.1f%% | %25s (%*d of %d blocks)",
       
   117           100 * ($done / $total),
       
   118           scalar(localtime($^T + $speed * ($total - $done))),
       
   119           length($total) => $done,
       
   120           $total;
       
   121         alarm 5;
       
   122     };
       
   123     $SIG{ALRM}->();
       
   124 
       
   125     my @unused;
       
   126     find(
       
   127         sub {
       
   128             $done++ if -f;
       
   129             (-f _) and ((-M _) > 0) or return;  # don't process the fresh blocks
       
   130 
       
   131             # we don't need uncompressed files if an compressed version
       
   132             # exists
       
   133             unlink $_ and return if -f "$_.gz";
       
   134 
       
   135             # cut away the first part of the filename and
       
   136             # some optional extension
       
   137             (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
       
   138             exists $block{$rn} and return;
       
   139             push @unused, abs_path $File::Find::name;
       
   140             return;
       
   141 
       
   142         },
       
   143         "$dir/data"
       
   144     );
       
   145     $SIG{ALRM}->();
       
   146     alarm 0;
       
   147 
       
   148     return if not @unused;
       
   149 
       
   150     say sprintf "found %d (%.1f%%) unused files",
       
   151       0 + @unused,
       
   152       100 * (@unused / $total);
       
   153 
       
   154     if ($o{yes}) {
       
   155         verbose("# deleting " . @unused . " files");
       
   156         unlink @unused;
       
   157         return;
       
   158     }
       
   159 
       
   160     if (-t) {
       
   161         while (1) {
       
   162             print "delete? [y/N/v]: ";
       
   163             given (<STDIN>) {
       
   164                 when (/^y(?:es)?$/i) { unlink @unused; last }
       
   165                 when (/^v/) { say join "\n", @unused; next }
       
   166                 default { last }
       
   167             }
       
   168         }
       
   169     }
       
   170 
       
   171 }
       
   172 
       
   173 sub check_images {
       
   174     my ($dir, %block) = @_;
       
   175 
       
   176     my $total = grep { $_ ne "" } keys(%block);
       
   177     my $done = 0;
       
   178 
       
   179     verbose("# pass 2 - checking image completeness");
       
   180 
       
   181     # progress
       
   182     local $SIG{ALRM} = sub {
       
   183         return alarm 1 if not $done;
       
   184         my $speed = $done / (time - $^T + 1);
       
   185         verbose sprintf "# pass 2 done %5.1f%% | %25s (%*d of %d blocks)",
       
   186           100 * $done / $total,
       
   187           scalar(localtime($^T + ($total - $done) * $speed)),
       
   188           length($total) => $done,
       
   189           $total;
       
   190         alarm 5;
       
   191     };
       
   192     $SIG{ALRM}->();
       
   193 
       
   194     my %invalid;
       
   195     foreach my $k (keys %block) {
       
   196         my $i = $block{$k};
       
   197         next if $k eq "";
       
   198         ++$done;
       
   199 
       
   200         next
       
   201           if -f "$dir/data/$k"
       
   202               or -f "$dir/data/$k.gz";
       
   203         say "missing $k @$i";
       
   204         @invalid{@$i} = ();
       
   205     }
       
   206     $SIG{ALRM}->();
       
   207     alarm 0;
       
   208 
       
   209     # invalid now contains the numbers of the idx files beiing
       
   210     # invalid
       
   211     my @invalid = sort @{ $block{""} }[keys %invalid];
       
   212 
       
   213     return if not @invalid;
       
   214 
       
   215     say sprintf "found %d (%.1f%%) invalid images:",
       
   216       0 + @invalid,
       
   217       100 * (@invalid / $total);
       
   218 
       
   219     if ($o{yes}) {
       
   220         unlink @invalid;
       
   221         return;
       
   222     }
       
   223 
       
   224     while (-t) {
       
   225         print "delete? [y/N/v] ";
       
   226         given (<STDIN>) {
       
   227             when (/^y(?:es)?$/i) { unlink @invalid; last }
       
   228             when (/^v/i) { say join "\n" => @invalid; next }
       
   229             default { last }
       
   230         }
       
   231     }
       
   232 }
       
   233 __END__
       
   234 
       
   235 =head1 NAME
       
   236 
       
   237     imager.check - checks the imager data and index files
       
   238 
       
   239 =head1 SYNOPSIS
       
   240 
       
   241     imager.check [options] {directory}
       
   242 
       
   243 =head1 DESCRIPTION
       
   244 
       
   245 This tool loads all the index files from I<directory>F</idx/>,
       
   246 checks if all mentioned files are existing and optionally purges
       
   247 unreferenced files.
       
   248 
       
   249 =head1 OPTIONS
       
   250 
       
   251 =over
       
   252 
       
   253 =item B<-y>|B<--yes>
       
   254 
       
   255 Assume "yes" for all questions (dangerous!). (default: no)
       
   256 
       
   257 =item B<-h>|B<--help>
       
   258 
       
   259 =item B<-m>|B<--man>
       
   260 
       
   261 The short and longer help.
       
   262 
       
   263 =back
       
   264 
       
   265 =cut