bin/imager.check
changeset 137 dd11d1262b6c
parent 136 a5d087334439
child 138 790ac145bccc
--- a/bin/imager.check	Sat Jul 25 17:16:13 2015 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,314 +0,0 @@
-#! /usr/bin/perl
-
-use 5.010;
-use strict;
-use warnings;
-use Pod::Usage;
-use Hash::Util qw(lock_keys);
-use File::Find;
-use File::Temp;
-use Digest::MD5 qw(md5_hex);
-use File::Basename;
-use autodie qw(:all);
-use Cwd qw(abs_path);
-use Imager;
-
-use Getopt::Long;
-use constant CIPHER => "aes-128-cbc";
-sub get_block_list;
-sub purge_unused;
-sub check_images;
-
-our %o = (
-    yes      => undef,
-    verbose  => 1,
-    checksum => undef,
-    pass     => undef,
-);
-lock_keys(%o);
-
-MAIN: {
-    Getopt::Long::Configure qw(Bundling);
-    GetOptions(
-        "y|yes!"     => \$o{yes},
-        "v|verbose!" => \$o{verbose},
-        "c|checksum" => \$o{checksum},
-        "p|pass"     => \$o{pass},
-        "h|help"     => sub { pod2usage(-verbose => 1, -exit => 0) },
-        "m|man"      => sub {
-            pod2usage(
-                -verbose   => 2,
-                -exit      => 0,
-                -noperldoc => system(
-                    "perldoc -V 1>/dev/null
-			  2>&1"
-                )
-            );
-        },
-      )
-      and @ARGV
-      or pod2usage;
-    my $dir = shift;
-
-    for (my $pass = 1 ; 1 ; ++$pass) {
-        verbose("# reading index files");
-        my %block = get_block_list($dir);
-        verbose("# indexed: "
-              . scalar(@{ $block{""} // [] })
-              . " images with "
-              . (grep !/^\.idx$/ => keys(%block))
-              . " blocks");
-
-        my $subpass = 0;
-        purge_unused($pass => ++$subpass, $dir => %block);
-        check_images($pass => ++$subpass, $dir => %block) and last;
-
-        verbose("# STARTING OVER!");
-    }
-}
-
-sub verbose { say @_ if $o{verbose} }
-
-sub get_file_list {
-    my ($list) = @_;
-    my @files = ();
-
-    open(my $fh => $list);
-    while (<$fh>) {
-        push @files, (split)[2];
-    }
-    return grep /^[a-z\d.\/]+$/ => @files;
-}
-
-sub get_block_list {
-    my $dir = shift;
-    my %block;
-    find(
-        sub {
-            (-f) or return;    # we need to include the tmp files!
-            push @{ $block{""} }, abs_path $_;
-            foreach my $f (get_file_list($_)) {
-                push @{ $block{$f} } => $#{ $block{""} };
-            }
-        },
-        "$dir/idx"
-    );
-    return %block;
-}
-
-sub purge_unused {
-    my ($pass, $subpass, $dir, %block) = @_;
-    my ($total, $done, $t0);
-
-    verbose("# pass $pass.$subpass - checking for unused blocks");
-    verbose("#          estimating file count");
-
-    # calculate the number of files we expect
-    find(
-        sub {
-            -d or return;
-            opendir(my $dh => $_);
-            map { $total++ if not $_ ~~ [qw<. ..>] and length > 8 } readdir $dh;
-            closedir($dh);
-            $File::Find::prune =
-              $_ =~ /^[\d[a-f]{3}$/;    # FIXME should be configurable
-        },
-        "$dir/data"
-    );
-    verbose("#          got $total blocks/files");
-
-    # progress
-    $t0 = time;
-    local $SIG{ALRM} = sub {
-        return alarm 1 if not $done;
-        my $speed = $done / (time - $t0 + 1);
-        verbose sprintf
-          "# pass $pass.$subpass done %5.1f%% | %25s (%*d of %d blocks)",
-          100 * ($done / $total),
-          scalar(localtime $t0 + $total / $speed), length($total) => $done,
-          $total;
-        alarm 5;
-    };
-    $SIG{ALRM}->();
-
-    my @unused;
-    find(
-        sub {
-            $done++ if -f;
-            (-f _) and ((-M _) > 0) or return;  # don't process the fresh blocks
-
-            # we don't need uncompressed files if an compressed version
-            # exists
-            unlink $_ and return if -f "$_.gz";
-            unlink "$_.x" and return if -f "$_.x.gz";
-
-            # the next step we can't do, because it can happen that
-            # the restorer does not know about a password
-            #unlink "$_.gz.x" and return if -f "$_.gz";
-
-            # cut away the first part of the filename and
-            # some optional extension
-            (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
-            return if exists $block{$rn};
-            push @unused => abs_path $_;
-            return;
-
-        },
-        "$dir/data"
-    );
-    $SIG{ALRM}->();
-    alarm 0;
-
-    return if not @unused;
-
-    say sprintf "found %d (%.1f%%) unused files",
-      0 + @unused,
-      100 * (@unused / $total);
-
-    if ($o{yes}) {
-        verbose("# deleting " . @unused . " files");
-        unlink @unused;
-        return;
-    }
-
-    if (-t) {
-        while (1) {
-            print "delete? [y/N/v]: ";
-            given (<STDIN>) {
-                when (/^y(?:es)?$/i) { unlink @unused; last }
-                when (/^v/) { say join "\n", @unused; next }
-                default { last }
-            }
-        }
-    }
-
-}
-
-sub check_images {
-    my ($pass, $subpass, $dir, %block) = @_;
-
-    my $total = grep { $_ ne "" } keys(%block);
-    my $done  = 0;
-    my $t0    = time;
-
-    verbose("# pass $pass.$subpass - checking image completeness");
-
-    # progress
-    local $SIG{ALRM} = sub {
-        return alarm 1 if not $done;
-        my $speed = $done / (time - $t0 + 1);
-        verbose sprintf
-          "# pass $pass.$subpass done %5.1f%% | %25s (%*d of %d blocks)",
-          100 * $done / $total,
-          scalar(localtime $t0 + $total / $speed), length($total) => $done,
-          $total;
-        alarm 5;
-    };
-    $SIG{ALRM}->();
-
-    my %invalid;
-    foreach my $k (keys %block) {
-	state %checked;
-        my $i = $block{$k};
-        next if $k eq "";
-        ++$done;
-
-        my ($file) =
-          grep { -f }
-          map { "$dir/data/$_" } ($k, "$k.gz", "$k.x", "$k.x.gz", "$k.gz.x");
-
-        if (not $file) {
-            say "missing $k @$i";
-            @invalid{@$i} = ();
-            next;
-        }
-
-        next if not $o{checksum};
-	next if $checked{$file};
-
-        # checking the checksum
-	Imager::get_block($file => \my $buffer);
-
-	if (md5_hex($buffer) ne basename($file, qw(.gz .x .gz.x))) {
-	    say "wrong checksum for $file $k @$i\n";
-	    @invalid{@$i} = ();
-	    next;
-	}
-
-	$checked{$file} = 1;
-    }
-    $SIG{ALRM}->();
-    alarm 0;
-
-    # invalid now contains the numbers of the idx files beiing
-    # invalid
-    my @invalid = sort @{ $block{""} }[keys %invalid];
-
-    return 1 if not @invalid;
-
-    say sprintf "found %d (%.1f%%) invalid images:",
-      0 + @invalid,
-      100 * (@invalid / $total);
-
-    if ($o{yes}) {
-        unlink @invalid;
-        return undef;
-    }
-
-    while (-t) {
-        print "delete? [y/N/v] ";
-        given (<STDIN>) {
-            when (/^y(?:es)?$/i) { unlink @invalid; return undef }
-            when (/^v/i) { say join "\n" => @invalid; next }
-            default { last }
-        }
-    }
-
-    return 1;
-}
-__END__
-
-=head1 NAME
-
-    imager.check - checks the imager data and index files
-
-=head1 SYNOPSIS
-
-    imager.check [options] {directory}
-
-=head1 DESCRIPTION
-
-This tool loads all the index files from I<directory>F</idx/>,
-checks if all mentioned files are existing and optionally purges
-unreferenced files.
-
-=head1 OPTIONS
-
-=over
-
-=item B<-c>|B<--checksum>
-
-Read all block files and check their checksum. (default: off)
-
-=item B<-p>|B<--pass> I<pass>
-
-In case you're using encrypted blocks, the param is passed to
-C<openssl>s C<-pass> option. (default: unset)
-
-=item B<-v>|B<-->[no]B<verbose>
-
-Generate more output about what's going on. (default: on)
-
-=item B<-y>|B<--yes>
-
-Assume "yes" for all questions (dangerous!). (default: no)
-
-=item B<-h>|B<--help>
-
-=item B<-m>|B<--man>
-
-The short and longer help.
-
-=back
-
-=cut