--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/imager.check Fri Jul 29 10:53:14 2011 +0200
@@ -0,0 +1,265 @@
+#! /usr/bin/perl
+
+use 5.010;
+use strict;
+use warnings;
+use Pod::Usage;
+use Hash::Util qw(lock_keys);
+use File::Find;
+use File::Temp;
+use DB_File;
+use File::Basename;
+use autodie qw(:all);
+use Cwd qw(abs_path);
+
+use Getopt::Long;
+sub get_block_list;
+sub purge_unused;
+sub check_images;
+
+my %o = (
+ yes => undef,
+ verbose => undef,
+ check => undef,
+);
+lock_keys(%o);
+
+MAIN: {
+ Getopt::Long::Configure qw(Bundling);
+ GetOptions(
+ "y|yes!" => \$o{yes},
+ "v|verbose!" => \$o{verbose},
+ "c|check" => \$o{check},
+ "h|help" => sub { pod2usage(-verbose => 1, -exit 0) },
+ "m|man" => sub {
+ pod2usage(
+ -verbose => 2,
+ -exit 0,
+ -noperldoc => system(
+ "perldoc -V 1>/dev/null
+ 2>&1"
+ )
+ );
+ },
+ )
+ and @ARGV
+ or pod2usage;
+ my $dir = shift;
+ my $tmp = File::Temp->new;
+
+ # load the index files, remember the latest
+ # timestamp we see
+ #tie %idx, "DB_File" => $tmp->filename;
+ my %block = get_block_list($dir);
+
+ verbose("# indexed: "
+ . scalar(@{ $block{""} // [] })
+ . " images with "
+ . (grep !/^\.idx$/ => keys(%block))
+ . " blocks");
+
+ purge_unused($dir => %block);
+ check_images($dir => %block);
+}
+
+sub verbose { say @_ if $o{verbose} }
+
+sub get_file_list {
+ my ($list) = @_;
+ my @files = ();
+
+ open(my $fh => $list);
+ while (<$fh>) {
+ push @files, (split)[2];
+ }
+ return grep /^[a-z\d.\/]+$/ => @files;
+}
+
+sub get_block_list {
+ my $dir = shift;
+ my %block;
+ find(
+ sub {
+ (-f) or return; # we need to include the tmp files!
+ push @{ $block{""} }, abs_path $_;
+ foreach my $f (get_file_list($_)) {
+ push @{ $block{$f} } => $#{ $block{""} };
+ }
+ },
+ "$dir/idx"
+ );
+ return %block;
+}
+
+sub purge_unused {
+ my ($dir, %block) = @_;
+
+ my ($total, $done);
+ verbose("# pass 1 - checking for unused blocks");
+ verbose("# pass 1 - estimating file count");
+
+ # calculate the number of files we expect
+ find(
+ sub {
+ -d or return;
+ opendir(my $dh => $_);
+ map { $total++ if not $_ ~~ [qw<. ..>] and length > 8 } readdir $dh;
+ closedir($dh);
+ },
+ "$dir/data"
+ );
+
+ # progress
+ local $SIG{ALRM} = sub {
+ return alarm 1 if not $done;
+ my $speed = $done / (time - $^T + 1);
+ verbose sprintf "# pass 1 done %5.1f%% | %25s (%*d of %d blocks)",
+ 100 * ($done / $total),
+ scalar(localtime($^T + $speed * ($total - $done))),
+ length($total) => $done,
+ $total;
+ alarm 5;
+ };
+ $SIG{ALRM}->();
+
+ my @unused;
+ find(
+ sub {
+ $done++ if -f;
+ (-f _) and ((-M _) > 0) or return; # don't process the fresh blocks
+
+ # we don't need uncompressed files if an compressed version
+ # exists
+ unlink $_ and return if -f "$_.gz";
+
+ # cut away the first part of the filename and
+ # some optional extension
+ (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
+ exists $block{$rn} and return;
+ push @unused, abs_path $File::Find::name;
+ return;
+
+ },
+ "$dir/data"
+ );
+ $SIG{ALRM}->();
+ alarm 0;
+
+ return if not @unused;
+
+ say sprintf "found %d (%.1f%%) unused files",
+ 0 + @unused,
+ 100 * (@unused / $total);
+
+ if ($o{yes}) {
+ verbose("# deleting " . @unused . " files");
+ unlink @unused;
+ return;
+ }
+
+ if (-t) {
+ while (1) {
+ print "delete? [y/N/v]: ";
+ given (<STDIN>) {
+ when (/^y(?:es)?$/i) { unlink @unused; last }
+ when (/^v/) { say join "\n", @unused; next }
+ default { last }
+ }
+ }
+ }
+
+}
+
+sub check_images {
+ my ($dir, %block) = @_;
+
+ my $total = grep { $_ ne "" } keys(%block);
+ my $done = 0;
+
+ verbose("# pass 2 - checking image completeness");
+
+ # progress
+ local $SIG{ALRM} = sub {
+ return alarm 1 if not $done;
+ my $speed = $done / (time - $^T + 1);
+ verbose sprintf "# pass 2 done %5.1f%% | %25s (%*d of %d blocks)",
+ 100 * $done / $total,
+ scalar(localtime($^T + ($total - $done) * $speed)),
+ length($total) => $done,
+ $total;
+ alarm 5;
+ };
+ $SIG{ALRM}->();
+
+ my %invalid;
+ foreach my $k (keys %block) {
+ my $i = $block{$k};
+ next if $k eq "";
+ ++$done;
+
+ next
+ if -f "$dir/data/$k"
+ or -f "$dir/data/$k.gz";
+ say "missing $k @$i";
+ @invalid{@$i} = ();
+ }
+ $SIG{ALRM}->();
+ alarm 0;
+
+ # invalid now contains the numbers of the idx files beiing
+ # invalid
+ my @invalid = sort @{ $block{""} }[keys %invalid];
+
+ return if not @invalid;
+
+ say sprintf "found %d (%.1f%%) invalid images:",
+ 0 + @invalid,
+ 100 * (@invalid / $total);
+
+ if ($o{yes}) {
+ unlink @invalid;
+ return;
+ }
+
+ while (-t) {
+ print "delete? [y/N/v] ";
+ given (<STDIN>) {
+ when (/^y(?:es)?$/i) { unlink @invalid; last }
+ when (/^v/i) { say join "\n" => @invalid; next }
+ default { last }
+ }
+ }
+}
+__END__
+
+=head1 NAME
+
+ imager.check - checks the imager data and index files
+
+=head1 SYNOPSIS
+
+ imager.check [options] {directory}
+
+=head1 DESCRIPTION
+
+This tool loads all the index files from I<directory>F</idx/>,
+checks if all mentioned files are existing and optionally purges
+unreferenced files.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-y>|B<--yes>
+
+Assume "yes" for all questions (dangerous!). (default: no)
+
+=item B<-h>|B<--help>
+
+=item B<-m>|B<--man>
+
+The short and longer help.
+
+=back
+
+=cut