bin/imager.check
changeset 26 496ee9b0f488
parent 21 e0f19213f8b6
child 27 82c0df89b287
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/imager.check	Fri Jul 29 10:53:14 2011 +0200
@@ -0,0 +1,265 @@
+#! /usr/bin/perl
+
+use 5.010;
+use strict;
+use warnings;
+use Pod::Usage;
+use Hash::Util qw(lock_keys);
+use File::Find;
+use File::Temp;
+use DB_File;
+use File::Basename;
+use autodie qw(:all);
+use Cwd qw(abs_path);
+
+use Getopt::Long;
+sub get_block_list;
+sub purge_unused;
+sub check_images;
+
+my %o = (
+    yes     => undef,
+    verbose => undef,
+    check   => undef,
+);
+lock_keys(%o);
+
+MAIN: {
+    Getopt::Long::Configure qw(Bundling);
+    GetOptions(
+        "y|yes!"     => \$o{yes},
+        "v|verbose!" => \$o{verbose},
+        "c|check"    => \$o{check},
+        "h|help"     => sub { pod2usage(-verbose => 1, -exit 0) },
+        "m|man"      => sub {
+            pod2usage(
+                -verbose => 2,
+                -exit 0,
+                -noperldoc => system(
+                    "perldoc -V 1>/dev/null
+			  2>&1"
+                )
+            );
+        },
+      )
+      and @ARGV
+      or pod2usage;
+    my $dir = shift;
+    my $tmp = File::Temp->new;
+
+    # load the index files, remember the latest
+    # timestamp we see
+    #tie %idx, "DB_File" => $tmp->filename;
+    my %block = get_block_list($dir);
+
+    verbose("# indexed: "
+          . scalar(@{ $block{""} // [] })
+          . " images with "
+          . (grep !/^\.idx$/ => keys(%block))
+          . " blocks");
+
+    purge_unused($dir => %block);
+    check_images($dir => %block);
+}
+
+sub verbose { say @_ if $o{verbose} }
+
+sub get_file_list {
+    my ($list) = @_;
+    my @files = ();
+
+    open(my $fh => $list);
+    while (<$fh>) {
+        push @files, (split)[2];
+    }
+    return grep /^[a-z\d.\/]+$/ => @files;
+}
+
+sub get_block_list {
+    my $dir = shift;
+    my %block;
+    find(
+        sub {
+            (-f) or return;    # we need to include the tmp files!
+            push @{ $block{""} }, abs_path $_;
+            foreach my $f (get_file_list($_)) {
+                push @{ $block{$f} } => $#{ $block{""} };
+            }
+        },
+        "$dir/idx"
+    );
+    return %block;
+}
+
+sub purge_unused {
+    my ($dir, %block) = @_;
+
+    my ($total, $done);
+    verbose("# pass 1 - checking for unused blocks");
+    verbose("# pass 1 - estimating file count");
+
+    # calculate the number of files we expect
+    find(
+        sub {
+            -d or return;
+            opendir(my $dh => $_);
+            map { $total++ if not $_ ~~ [qw<. ..>] and length > 8 } readdir $dh;
+            closedir($dh);
+        },
+        "$dir/data"
+    );
+
+    # progress
+    local $SIG{ALRM} = sub {
+        return alarm 1 if not $done;
+        my $speed = $done / (time - $^T + 1);
+        verbose sprintf "# pass 1 done %5.1f%% | %25s (%*d of %d blocks)",
+          100 * ($done / $total),
+          scalar(localtime($^T + $speed * ($total - $done))),
+          length($total) => $done,
+          $total;
+        alarm 5;
+    };
+    $SIG{ALRM}->();
+
+    my @unused;
+    find(
+        sub {
+            $done++ if -f;
+            (-f _) and ((-M _) > 0) or return;  # don't process the fresh blocks
+
+            # we don't need uncompressed files if an compressed version
+            # exists
+            unlink $_ and return if -f "$_.gz";
+
+            # cut away the first part of the filename and
+            # some optional extension
+            (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
+            exists $block{$rn} and return;
+            push @unused, abs_path $File::Find::name;
+            return;
+
+        },
+        "$dir/data"
+    );
+    $SIG{ALRM}->();
+    alarm 0;
+
+    return if not @unused;
+
+    say sprintf "found %d (%.1f%%) unused files",
+      0 + @unused,
+      100 * (@unused / $total);
+
+    if ($o{yes}) {
+        verbose("# deleting " . @unused . " files");
+        unlink @unused;
+        return;
+    }
+
+    if (-t) {
+        while (1) {
+            print "delete? [y/N/v]: ";
+            given (<STDIN>) {
+                when (/^y(?:es)?$/i) { unlink @unused; last }
+                when (/^v/) { say join "\n", @unused; next }
+                default { last }
+            }
+        }
+    }
+
+}
+
+sub check_images {
+    my ($dir, %block) = @_;
+
+    my $total = grep { $_ ne "" } keys(%block);
+    my $done = 0;
+
+    verbose("# pass 2 - checking image completeness");
+
+    # progress
+    local $SIG{ALRM} = sub {
+        return alarm 1 if not $done;
+        my $speed = $done / (time - $^T + 1);
+        verbose sprintf "# pass 2 done %5.1f%% | %25s (%*d of %d blocks)",
+          100 * $done / $total,
+          scalar(localtime($^T + ($total - $done) * $speed)),
+          length($total) => $done,
+          $total;
+        alarm 5;
+    };
+    $SIG{ALRM}->();
+
+    my %invalid;
+    foreach my $k (keys %block) {
+        my $i = $block{$k};
+        next if $k eq "";
+        ++$done;
+
+        next
+          if -f "$dir/data/$k"
+              or -f "$dir/data/$k.gz";
+        say "missing $k @$i";
+        @invalid{@$i} = ();
+    }
+    $SIG{ALRM}->();
+    alarm 0;
+
+    # invalid now contains the numbers of the idx files beiing
+    # invalid
+    my @invalid = sort @{ $block{""} }[keys %invalid];
+
+    return if not @invalid;
+
+    say sprintf "found %d (%.1f%%) invalid images:",
+      0 + @invalid,
+      100 * (@invalid / $total);
+
+    if ($o{yes}) {
+        unlink @invalid;
+        return;
+    }
+
+    while (-t) {
+        print "delete? [y/N/v] ";
+        given (<STDIN>) {
+            when (/^y(?:es)?$/i) { unlink @invalid; last }
+            when (/^v/i) { say join "\n" => @invalid; next }
+            default { last }
+        }
+    }
+}
+__END__
+
+=head1 NAME
+
+    imager.check - checks the imager data and index files
+
+=head1 SYNOPSIS
+
+    imager.check [options] {directory}
+
+=head1 DESCRIPTION
+
+This tool loads all the index files from I<directory>F</idx/>,
+checks if all mentioned files are existing and optionally purges
+unreferenced files.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-y>|B<--yes>
+
+Assume "yes" for all questions (dangerous!). (default: no)
+
+=item B<-h>|B<--help>
+
+=item B<-m>|B<--man>
+
+The short and longer help.
+
+=back
+
+=cut