# HG changeset patch
# User Heiko Schlittermann (JUMPER) <hs@schlittermann.de>
# Date 1311587945 -7200
# Node ID 70d578b200ba43836ad8a6991d7df33f70f95623
# Parent  6bc08224c44e52196d47ff330654bf969ae821d7
checker done

diff -r 6bc08224c44e -r 70d578b200ba checker
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/checker	Mon Jul 25 11:59:05 2011 +0200
@@ -0,0 +1,201 @@
+#! /usr/bin/perl
+
+use 5.010;
+use strict;
+use warnings;
+use Pod::Usage;
+use Hash::Util qw(lock_keys);
+use File::Find;
+use File::Temp;
+use DB_File;
+use File::Basename;
+use autodie qw(:all);
+use Cwd qw(abs_path);
+
+use Getopt::Long;
+sub get_block_list;
+sub purge_unused;
+sub check_images;
+
+my %o = (
+    yes => undef,
+    verbose => undef,
+    check => undef,
+); lock_keys(%o);
+
+MAIN: {
+    Getopt::Long::Configure qw(Bundling);
+    GetOptions(
+	"y|yes!" => \$o{yes},
+	"v|verbose!" => \$o{verbose},
+	"c|check" => \$o{check},
+	"h|help" => sub { pod2usage(-verbose => 1, -exit 0) },
+	"m|man"  => sub { pod2usage(-verbose => 2, -exit 0, 
+			  -noperldoc => system("perldoc -V 1>/dev/null
+			  2>&1"))},
+    ) and @ARGV or pod2usage;
+    my $dir = shift;
+    my $tmp = File::Temp->new;
+
+    # load the index files, remember the latest
+    # timestamp we see
+    #tie %idx, "DB_File" => $tmp->filename;
+    my %block = get_block_list($dir);
+
+    verbose("# indexed: "
+	. scalar(@{$block{".idx"}//[]}) . " images with "
+	. (grep /^\.idx/ => keys(%block) - 1)." files");
+
+    purge_unused($dir => %block);
+    check_images($dir => %block);
+}
+
+sub verbose { say @_ if $o{verbose} }
+
+sub get_file_list {
+    my ($list) = @_;
+    my @files = ();
+
+    open(my $fh => $list);
+    while (<$fh>) {
+	push @files, (split)[2];
+    }
+    return grep /^[a-z\d.\/]+$/ => @files;
+}
+
+sub get_block_list {
+    my $dir = shift;
+    my %block;
+    find(sub {
+	(-f) and (-M > 0) or return;
+	#verbose("idx: $File::Find::name");
+	push @{$block{".idx"}}, abs_path $_;
+	foreach my $f (get_file_list($_)) {
+	    push @{$block{$f}} => $#{$block{".idx"}};
+	}
+    }, "$dir/idx");
+    return %block;
+}
+
+sub purge_unused {
+    my ($dir, %block) = @_;
+
+    my ($total, $done);
+
+    # calculate the number of files we expect
+    find(sub {
+	-d or return;
+	opendir(my $dh => $_);
+	map { $total++ if not $_ ~~ [qw<. ..>] and length > 8} readdir $dh;
+	closedir($dh);
+    }, "$dir/data");
+
+
+    # progress
+    local $SIG{ALRM} = sub {
+	return alarm 1 if not $done;
+	my $speed = $done / (time - $^T + 1);
+	verbose sprintf "# done %5.1f%% | %25s (%*d of %d files)",
+	    100 * ($done/$total),
+	    scalar(localtime($^T + $speed * ($total - $done))),
+	    length($total) => $done,
+	    $total;
+	alarm 5;
+    };
+    $SIG{ALRM}->();
+
+    find(sub {
+	$done++ if -f;
+	(-f _) and ((-M _) > 0) or return;
+
+	# cut away the first part of the filename and
+	# some optional extension
+	(my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
+	exists $block{$rn} and return;
+
+	if ($o{yes}) {
+	    verbose("unlinking abs_path $File::Find::name");
+	    unlink abs_path $File::Find::name;
+	    return;
+	}
+
+	verbose("unused abs_path $File::Find::name");
+	return;
+
+    }, "$dir/data");
+    $SIG{ALRM}->();
+    alarm 0;
+
+}
+
+sub check_images {
+    my ($dir, %block) = @_;
+
+    my $total = keys(%block) - 1; # .idx
+    my $done = 0;
+
+    # progress
+    local $SIG{ALRM} = sub {
+	return alarm 1 if not $done;
+	my $speed = $done / (time - $^T + 1);
+	say sprintf "# done %5.1f%% | %25s (%*d of %d files)",
+	    100 * $done/$total, 
+	    scalar(localtime($^T + ($total - $done) * $speed)),
+	    length($total) => $done,
+	    $total;
+	    alarm 5;
+    };
+    $SIG{ALRM}->();
+
+    my %invalid;
+    foreach my $k (keys %block) {
+	my $i = $block{$k};
+	next if $k eq ".idx";
+	++$done;
+	
+	next if -f "$dir/data/$k"
+	    or -f "$dir/data/$k.gz";
+	say "missing $k";
+	@invalid{@{$block{".idx"}}} = ();
+    }
+    $SIG{ALRM}->();
+    alarm 0;
+
+    return if not %invalid;
+
+    say "invalid images:\n", join "\n", sort keys %invalid;
+    unlink keys %invalid if $o{yes};
+}
+__END__
+
+=head1 NAME
+
+    checker - checks the imager data and index files
+
+=head1 SYNOPSIS
+
+    checker [options] {directory}
+
+=head1 DESCRIPTION
+
+This tool loads all the index files from I<directory>F</idx/>,
+checks if all mentioned files are existing and optionally purges
+unreferenced files.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-y>|B<--yes>
+
+Assume "yes" for all questions (dangerous!). (default: no)
+
+=item B<-h>|B<--help>
+
+=item B<-m>|B<--man>
+
+The short and longer help.
+
+=back
+
+=cut
diff -r 6bc08224c44e -r 70d578b200ba cleaner
--- a/cleaner	Mon Jul 25 08:58:47 2011 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,160 +0,0 @@
-#! /usr/bin/perl
-
-use 5.010;
-use strict;
-use warnings;
-use Pod::Usage;
-use Hash::Util qw(lock_keys);
-use File::Find;
-use File::Temp;
-use DB_File;
-use File::Basename;
-use autodie qw(:all);
-use Cwd qw(abs_path);
-
-use Getopt::Long;
-
-my %o = (
-    dry => undef,
-    verbose => undef,
-    check => undef,
-); lock_keys(%o);
-
-MAIN: {
-    Getopt::Long::Configure qw(Bundling);
-    GetOptions(
-	"n|dry!" => \$o{dry},
-	"v|verbose!" => \$o{verbose},
-	"c|check" => \$o{check},
-	"h|help" => sub { pod2usage(-verbose => 1, -exit 0) },
-	"m|man"  => sub { pod2usage(-verbose => 2, -exit 0, 
-			  -noperldoc => system("perldoc -V 1>/dev/null
-			  2>&1"))},
-    ) and @ARGV or pod2usage;
-    my $dir = shift;
-    my $tmp = File::Temp->new;
-
-    # load the index files, remember the latest
-    # timestamp we see
-    my (%inuse, @idx);
-    #tie %idx, "DB_File" => $tmp->filename;
-
-    find(sub {
-	(-f) and (-M > 0) or return;
-	#verbose("idx: $File::Find::name");
-	push @idx, abs_path $_;
-	foreach my $f (get_file_list($_)) {
-	    push @{$inuse{$f}} => $#idx;
-	}
-    }, "$dir/idx");
-
-    verbose("indexed: ".scalar(keys %inuse)." files");
-
-    # simple "forward" check: existence of mentioned files
-    if ($o{check}) {
-	my $total = scalar keys %inuse;
-	my $done = 0;
-	local $SIG{ALRM} = sub {
-	    say sprintf "done %5.1f%% (%*d of $total)" 
-		=> 100 * $done/$total, length($total), $done
-		if $total;
-	    alarm 5;
-	};
-	$SIG{ALRM}->();
-	while (my ($f, $i) = each %inuse) {
-	    ++$done;
-	    next if -f "$dir/data/$f"
-		or -f "$dir/data/$f.gz";
-	    say "missing $f from\n",
-		join "-\t" => "", map { "$_\n" } @idx[@$i];
-	}
-	$SIG{ALRM}->();
-	alarm 0;
-	exit 0;
-    }
-
-    # full check and probably cleaning: all files, not mentioned
-    # in some index will be purged
-#   my (%file);
-#-    find(sub {
-#-	(-f) and (-M > 0) or return;
-#-	$File::Find::name =~ s/^$dir\/data\///;
-#-	$file{$_} = $_;
-#-    }, "$dir/data");
-#-
-#-    verbose("file system: ".scalar(keys %file)." files");
-#-    exit 0;
-
-    # ok, now go through all the data files and remove
-    # files not mentioned in some index, but never remove
-    # files created after the cleaner started
-    find(sub {
-	(-f) and (-M > 0) or return;
-
-	# cut away the first part of the filename and
-	# some optional extension
-	(my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
-	exists $inuse{$rn} and return;
-
-	if ($o{dry}) {
-	    verbose("(unlinking) abs_path $File::Find::name");
-	    return;
-	}
-
-	verbose("unlinking abs_path $File::Find::name");
-	unlink abs_path $File::Find::name;
-
-    }, "$dir/data");
-
-}
-
-sub verbose { say @_ if $o{verbose} }
-
-sub get_file_list {
-    my ($list) = @_;
-    my @files = ();
-
-    open(my $fh => $list);
-    while (<$fh>) {
-	push @files, (split)[2];
-    }
-    return grep /^[a-z\d.\/]+$/ => @files;
-}
-
-
-__END__
-
-=head1 NAME
-
-    cleaner - cleans the imager data directory
-
-=head1 SYNOPSIS
-
-    cleaner [options] {directory}
-
-=head1 DESCRIPTION
-
-This tool loads all the index files from I<directory>F</idx/>
-and purges all not mentioned files below I<directory>F</data/>.
-
-=head1 OPTIONS
-
-=over
-
-=item B<-c>|B<--check>
-
-Check (and exit) if nothing is missing.
-
-=item B<-n>|B<--dry>
-
-Do nothing, just print what should be removed. (default: off)
-
-=item B<-h>|B<--help>
-
-=item B<-m>|B<--man>
-
-The short and longer help.
-
-=back
-
-=cut