checker
changeset 19 49ff641055a3
parent 18 4a01ae9db5c4
child 20 6c5ad12e1f2d
child 21 e0f19213f8b6
--- a/checker	Tue Jul 26 11:54:40 2011 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,244 +0,0 @@
-#! /usr/bin/perl
-
-use 5.010;
-use strict;
-use warnings;
-use Pod::Usage;
-use Hash::Util qw(lock_keys);
-use File::Find;
-use File::Temp;
-use DB_File;
-use File::Basename;
-use autodie qw(:all);
-use Cwd qw(abs_path);
-
-use Getopt::Long;
-sub get_block_list;
-sub purge_unused;
-sub check_images;
-
-my %o = (
-    yes => undef,
-    verbose => undef,
-    check => undef,
-); lock_keys(%o);
-
-MAIN: {
-    Getopt::Long::Configure qw(Bundling);
-    GetOptions(
-	"y|yes!" => \$o{yes},
-	"v|verbose!" => \$o{verbose},
-	"c|check" => \$o{check},
-	"h|help" => sub { pod2usage(-verbose => 1, -exit 0) },
-	"m|man"  => sub { pod2usage(-verbose => 2, -exit 0, 
-			  -noperldoc => system("perldoc -V 1>/dev/null
-			  2>&1"))},
-    ) and @ARGV or pod2usage;
-    my $dir = shift;
-    my $tmp = File::Temp->new;
-
-    # load the index files, remember the latest
-    # timestamp we see
-    #tie %idx, "DB_File" => $tmp->filename;
-    my %block = get_block_list($dir);
-
-    verbose("# indexed: "
-	. scalar(@{$block{""}//[]}) . " images with "
-	. (grep !/^\.idx$/ => keys(%block))." blocks");
-
-    purge_unused($dir => %block);
-    check_images($dir => %block);
-}
-
-sub verbose { say @_ if $o{verbose} }
-
-sub get_file_list {
-    my ($list) = @_;
-    my @files = ();
-
-    open(my $fh => $list);
-    while (<$fh>) {
-	push @files, (split)[2];
-    }
-    return grep /^[a-z\d.\/]+$/ => @files;
-}
-
-sub get_block_list {
-    my $dir = shift;
-    my %block;
-    find(sub {
-	(-f) or return;	# we need to include the tmp files!
-	push @{$block{""}}, abs_path $_;
-	foreach my $f (get_file_list($_)) {
-	    push @{$block{$f}} => $#{$block{""}};
-	}
-    }, "$dir/idx");
-    return %block;
-}
-
-sub purge_unused {
-    my ($dir, %block) = @_;
-
-    my ($total, $done);
-    verbose("# pass 1 - checking for unused blocks");
-    verbose("# pass 1 - estimating file count");
-
-    # calculate the number of files we expect
-    find(sub {
-	-d or return;
-	opendir(my $dh => $_);
-	map { $total++ if not $_ ~~ [qw<. ..>] and length > 8} readdir $dh;
-	closedir($dh);
-    }, "$dir/data");
-
-
-    # progress
-    local $SIG{ALRM} = sub {
-	return alarm 1 if not $done;
-	my $speed = $done / (time - $^T + 1);
-	verbose sprintf "# pass 1 done %5.1f%% | %25s (%*d of %d blocks)",
-	    100 * ($done/$total),
-	    scalar(localtime($^T + $speed * ($total - $done))),
-	    length($total) => $done,
-	    $total;
-	alarm 5;
-    };
-    $SIG{ALRM}->();
-
-    my @unused;
-    find(sub {
-	$done++ if -f;
-	(-f _) and ((-M _) > 0) or return;  # don't process the fresh blocks
-
-	# we don't need uncompressed files if an compressed version
-	# exists
-	unlink $_ and return if -f "$_.gz";
-
-	# cut away the first part of the filename and
-	# some optional extension
-	(my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
-	exists $block{$rn} and return;
-	push @unused, abs_path $File::Find::name;
-	return;
-
-    }, "$dir/data");
-    $SIG{ALRM}->();
-    alarm 0;
-
-    return if not @unused;
-
-    say sprintf "found %d (%.1f%%) unused files",
-	0+@unused,
-	100 * (@unused/$total);
-
-    if ($o{yes}) {
-	verbose("# deleting ".@unused." files");
-	unlink @unused;
-	return;
-    }
-
-    if (-t) {
-	while(1) {
-	    print "delete? [y/N/v]: ";
-	    given (<STDIN>) {
-		when (/^y(?:es)?$/i) { unlink @unused; last }
-		when (/^v/) { say join "\n", @unused; next }
-		default { last }
-	    }
-	}
-    }
-
-}
-
-sub check_images {
-    my ($dir, %block) = @_;
-
-    my $total = grep { $_ ne "" } keys(%block);
-    my $done = 0;
-
-    verbose("# pass 2 - checking image completeness");
-
-    # progress
-    local $SIG{ALRM} = sub {
-	return alarm 1 if not $done;
-	my $speed = $done / (time - $^T + 1);
-	verbose sprintf "# pass 2 done %5.1f%% | %25s (%*d of %d blocks)",
-	    100 * $done/$total, 
-	    scalar(localtime($^T + ($total - $done) * $speed)),
-	    length($total) => $done,
-	    $total;
-	    alarm 5;
-    };
-    $SIG{ALRM}->();
-
-    my %invalid;
-    foreach my $k (keys %block) {
-	my $i = $block{$k};
-	next if $k eq "";
-	++$done;
-	
-	next if -f "$dir/data/$k"
-	    or -f "$dir/data/$k.gz";
-	say "missing $k @$i";
-	@invalid{@$i} = ();
-    }
-    $SIG{ALRM}->();
-    alarm 0;
-
-    # invalid now contains the numbers of the idx files beiing
-    # invalid
-    my @invalid = sort @{$block{""}}[keys %invalid];
-
-    return if not @invalid;
-
-    say sprintf "found %d (%.1f%%) invalid images:",
-	0+@invalid,
-	100 * (@invalid/$total);
-
-    if ($o{yes}) {
-	unlink @invalid;
-	return;
-    }
-
-    while (-t) {
-	print "delete? [y/N/v] ";
-	given (<STDIN>) {
-	    when (/^y(?:es)?$/i) { unlink @invalid; last }
-	    when (/^v/i)	 { say join "\n" => @invalid; next }
-	default		         { last }
-	}
-    }
-}
-__END__
-
-=head1 NAME
-
-    checker - checks the imager data and index files
-
-=head1 SYNOPSIS
-
-    checker [options] {directory}
-
-=head1 DESCRIPTION
-
-This tool loads all the index files from I<directory>F</idx/>,
-checks if all mentioned files are existing and optionally purges
-unreferenced files.
-
-=head1 OPTIONS
-
-=over
-
-=item B<-y>|B<--yes>
-
-Assume "yes" for all questions (dangerous!). (default: no)
-
-=item B<-h>|B<--help>
-
-=item B<-m>|B<--man>
-
-The short and longer help.
-
-=back
-
-=cut