bin/imager.check
changeset 137 dd11d1262b6c
parent 136 a5d087334439
child 138 790ac145bccc
equal deleted inserted replaced
136:a5d087334439 137:dd11d1262b6c
     1 #! /usr/bin/perl
       
     2 
       
     3 use 5.010;
       
     4 use strict;
       
     5 use warnings;
       
     6 use Pod::Usage;
       
     7 use Hash::Util qw(lock_keys);
       
     8 use File::Find;
       
     9 use File::Temp;
       
    10 use Digest::MD5 qw(md5_hex);
       
    11 use File::Basename;
       
    12 use autodie qw(:all);
       
    13 use Cwd qw(abs_path);
       
    14 use Imager;
       
    15 
       
    16 use Getopt::Long;
       
    17 use constant CIPHER => "aes-128-cbc";
       
    18 sub get_block_list;
       
    19 sub purge_unused;
       
    20 sub check_images;
       
    21 
       
    22 our %o = (
       
    23     yes      => undef,
       
    24     verbose  => 1,
       
    25     checksum => undef,
       
    26     pass     => undef,
       
    27 );
       
    28 lock_keys(%o);
       
    29 
       
    30 MAIN: {
       
    31     Getopt::Long::Configure qw(Bundling);
       
    32     GetOptions(
       
    33         "y|yes!"     => \$o{yes},
       
    34         "v|verbose!" => \$o{verbose},
       
    35         "c|checksum" => \$o{checksum},
       
    36         "p|pass"     => \$o{pass},
       
    37         "h|help"     => sub { pod2usage(-verbose => 1, -exit => 0) },
       
    38         "m|man"      => sub {
       
    39             pod2usage(
       
    40                 -verbose   => 2,
       
    41                 -exit      => 0,
       
    42                 -noperldoc => system(
       
    43                     "perldoc -V 1>/dev/null
       
    44 			  2>&1"
       
    45                 )
       
    46             );
       
    47         },
       
    48       )
       
    49       and @ARGV
       
    50       or pod2usage;
       
    51     my $dir = shift;
       
    52 
       
    53     for (my $pass = 1 ; 1 ; ++$pass) {
       
    54         verbose("# reading index files");
       
    55         my %block = get_block_list($dir);
       
    56         verbose("# indexed: "
       
    57               . scalar(@{ $block{""} // [] })
       
    58               . " images with "
       
    59               . (grep !/^\.idx$/ => keys(%block))
       
    60               . " blocks");
       
    61 
       
    62         my $subpass = 0;
       
    63         purge_unused($pass => ++$subpass, $dir => %block);
       
    64         check_images($pass => ++$subpass, $dir => %block) and last;
       
    65 
       
    66         verbose("# STARTING OVER!");
       
    67     }
       
    68 }
       
    69 
       
    70 sub verbose { say @_ if $o{verbose} }
       
    71 
       
    72 sub get_file_list {
       
    73     my ($list) = @_;
       
    74     my @files = ();
       
    75 
       
    76     open(my $fh => $list);
       
    77     while (<$fh>) {
       
    78         push @files, (split)[2];
       
    79     }
       
    80     return grep /^[a-z\d.\/]+$/ => @files;
       
    81 }
       
    82 
       
    83 sub get_block_list {
       
    84     my $dir = shift;
       
    85     my %block;
       
    86     find(
       
    87         sub {
       
    88             (-f) or return;    # we need to include the tmp files!
       
    89             push @{ $block{""} }, abs_path $_;
       
    90             foreach my $f (get_file_list($_)) {
       
    91                 push @{ $block{$f} } => $#{ $block{""} };
       
    92             }
       
    93         },
       
    94         "$dir/idx"
       
    95     );
       
    96     return %block;
       
    97 }
       
    98 
       
    99 sub purge_unused {
       
   100     my ($pass, $subpass, $dir, %block) = @_;
       
   101     my ($total, $done, $t0);
       
   102 
       
   103     verbose("# pass $pass.$subpass - checking for unused blocks");
       
   104     verbose("#          estimating file count");
       
   105 
       
   106     # calculate the number of files we expect
       
   107     find(
       
   108         sub {
       
   109             -d or return;
       
   110             opendir(my $dh => $_);
       
   111             map { $total++ if not $_ ~~ [qw<. ..>] and length > 8 } readdir $dh;
       
   112             closedir($dh);
       
   113             $File::Find::prune =
       
   114               $_ =~ /^[\d[a-f]{3}$/;    # FIXME should be configurable
       
   115         },
       
   116         "$dir/data"
       
   117     );
       
   118     verbose("#          got $total blocks/files");
       
   119 
       
   120     # progress
       
   121     $t0 = time;
       
   122     local $SIG{ALRM} = sub {
       
   123         return alarm 1 if not $done;
       
   124         my $speed = $done / (time - $t0 + 1);
       
   125         verbose sprintf
       
   126           "# pass $pass.$subpass done %5.1f%% | %25s (%*d of %d blocks)",
       
   127           100 * ($done / $total),
       
   128           scalar(localtime $t0 + $total / $speed), length($total) => $done,
       
   129           $total;
       
   130         alarm 5;
       
   131     };
       
   132     $SIG{ALRM}->();
       
   133 
       
   134     my @unused;
       
   135     find(
       
   136         sub {
       
   137             $done++ if -f;
       
   138             (-f _) and ((-M _) > 0) or return;  # don't process the fresh blocks
       
   139 
       
   140             # we don't need uncompressed files if an compressed version
       
   141             # exists
       
   142             unlink $_ and return if -f "$_.gz";
       
   143             unlink "$_.x" and return if -f "$_.x.gz";
       
   144 
       
   145             # the next step we can't do, because it can happen that
       
   146             # the restorer does not know about a password
       
   147             #unlink "$_.gz.x" and return if -f "$_.gz";
       
   148 
       
   149             # cut away the first part of the filename and
       
   150             # some optional extension
       
   151             (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
       
   152             return if exists $block{$rn};
       
   153             push @unused => abs_path $_;
       
   154             return;
       
   155 
       
   156         },
       
   157         "$dir/data"
       
   158     );
       
   159     $SIG{ALRM}->();
       
   160     alarm 0;
       
   161 
       
   162     return if not @unused;
       
   163 
       
   164     say sprintf "found %d (%.1f%%) unused files",
       
   165       0 + @unused,
       
   166       100 * (@unused / $total);
       
   167 
       
   168     if ($o{yes}) {
       
   169         verbose("# deleting " . @unused . " files");
       
   170         unlink @unused;
       
   171         return;
       
   172     }
       
   173 
       
   174     if (-t) {
       
   175         while (1) {
       
   176             print "delete? [y/N/v]: ";
       
   177             given (<STDIN>) {
       
   178                 when (/^y(?:es)?$/i) { unlink @unused; last }
       
   179                 when (/^v/) { say join "\n", @unused; next }
       
   180                 default { last }
       
   181             }
       
   182         }
       
   183     }
       
   184 
       
   185 }
       
   186 
       
   187 sub check_images {
       
   188     my ($pass, $subpass, $dir, %block) = @_;
       
   189 
       
   190     my $total = grep { $_ ne "" } keys(%block);
       
   191     my $done  = 0;
       
   192     my $t0    = time;
       
   193 
       
   194     verbose("# pass $pass.$subpass - checking image completeness");
       
   195 
       
   196     # progress
       
   197     local $SIG{ALRM} = sub {
       
   198         return alarm 1 if not $done;
       
   199         my $speed = $done / (time - $t0 + 1);
       
   200         verbose sprintf
       
   201           "# pass $pass.$subpass done %5.1f%% | %25s (%*d of %d blocks)",
       
   202           100 * $done / $total,
       
   203           scalar(localtime $t0 + $total / $speed), length($total) => $done,
       
   204           $total;
       
   205         alarm 5;
       
   206     };
       
   207     $SIG{ALRM}->();
       
   208 
       
   209     my %invalid;
       
   210     foreach my $k (keys %block) {
       
   211 	state %checked;
       
   212         my $i = $block{$k};
       
   213         next if $k eq "";
       
   214         ++$done;
       
   215 
       
   216         my ($file) =
       
   217           grep { -f }
       
   218           map { "$dir/data/$_" } ($k, "$k.gz", "$k.x", "$k.x.gz", "$k.gz.x");
       
   219 
       
   220         if (not $file) {
       
   221             say "missing $k @$i";
       
   222             @invalid{@$i} = ();
       
   223             next;
       
   224         }
       
   225 
       
   226         next if not $o{checksum};
       
   227 	next if $checked{$file};
       
   228 
       
   229         # checking the checksum
       
   230 	Imager::get_block($file => \my $buffer);
       
   231 
       
   232 	if (md5_hex($buffer) ne basename($file, qw(.gz .x .gz.x))) {
       
   233 	    say "wrong checksum for $file $k @$i\n";
       
   234 	    @invalid{@$i} = ();
       
   235 	    next;
       
   236 	}
       
   237 
       
   238 	$checked{$file} = 1;
       
   239     }
       
   240     $SIG{ALRM}->();
       
   241     alarm 0;
       
   242 
       
   243     # invalid now contains the numbers of the idx files beiing
       
   244     # invalid
       
   245     my @invalid = sort @{ $block{""} }[keys %invalid];
       
   246 
       
   247     return 1 if not @invalid;
       
   248 
       
   249     say sprintf "found %d (%.1f%%) invalid images:",
       
   250       0 + @invalid,
       
   251       100 * (@invalid / $total);
       
   252 
       
   253     if ($o{yes}) {
       
   254         unlink @invalid;
       
   255         return undef;
       
   256     }
       
   257 
       
   258     while (-t) {
       
   259         print "delete? [y/N/v] ";
       
   260         given (<STDIN>) {
       
   261             when (/^y(?:es)?$/i) { unlink @invalid; return undef }
       
   262             when (/^v/i) { say join "\n" => @invalid; next }
       
   263             default { last }
       
   264         }
       
   265     }
       
   266 
       
   267     return 1;
       
   268 }
       
   269 __END__
       
   270 
       
   271 =head1 NAME
       
   272 
       
   273     imager.check - checks the imager data and index files
       
   274 
       
   275 =head1 SYNOPSIS
       
   276 
       
   277     imager.check [options] {directory}
       
   278 
       
   279 =head1 DESCRIPTION
       
   280 
       
   281 This tool loads all the index files from I<directory>F</idx/>,
       
   282 checks if all mentioned files are existing and optionally purges
       
   283 unreferenced files.
       
   284 
       
   285 =head1 OPTIONS
       
   286 
       
   287 =over
       
   288 
       
   289 =item B<-c>|B<--checksum>
       
   290 
       
   291 Read all block files and check their checksum. (default: off)
       
   292 
       
   293 =item B<-p>|B<--pass> I<pass>
       
   294 
       
   295 In case you're using encrypted blocks, the param is passed to
       
   296 C<openssl>s C<-pass> option. (default: unset)
       
   297 
       
   298 =item B<-v>|B<-->[no]B<verbose>
       
   299 
       
   300 Generate more output about what's going on. (default: on)
       
   301 
       
   302 =item B<-y>|B<--yes>
       
   303 
       
   304 Assume "yes" for all questions (dangerous!). (default: no)
       
   305 
       
   306 =item B<-h>|B<--help>
       
   307 
       
   308 =item B<-m>|B<--man>
       
   309 
       
   310 The short and longer help.
       
   311 
       
   312 =back
       
   313 
       
   314 =cut