--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/checker Mon Jul 25 11:59:05 2011 +0200
@@ -0,0 +1,201 @@
+#! /usr/bin/perl
+
+use 5.010;
+use strict;
+use warnings;
+use Pod::Usage;
+use Hash::Util qw(lock_keys);
+use File::Find;
+use File::Temp;
+use DB_File;
+use File::Basename;
+use autodie qw(:all);
+use Cwd qw(abs_path);
+
+use Getopt::Long;
+sub get_block_list;
+sub purge_unused;
+sub check_images;
+
+my %o = (
+ yes => undef,
+ verbose => undef,
+ check => undef,
+); lock_keys(%o);
+
+MAIN: {
+ Getopt::Long::Configure qw(Bundling);
+ GetOptions(
+ "y|yes!" => \$o{yes},
+ "v|verbose!" => \$o{verbose},
+ "c|check" => \$o{check},
+ "h|help" => sub { pod2usage(-verbose => 1, -exit 0) },
+ "m|man" => sub { pod2usage(-verbose => 2, -exit 0,
+ -noperldoc => system("perldoc -V 1>/dev/null
+ 2>&1"))},
+ ) and @ARGV or pod2usage;
+ my $dir = shift;
+ my $tmp = File::Temp->new;
+
+ # load the index files, remember the latest
+ # timestamp we see
+ #tie %idx, "DB_File" => $tmp->filename;
+ my %block = get_block_list($dir);
+
+ verbose("# indexed: "
+ . scalar(@{$block{".idx"}//[]}) . " images with "
+ . (grep /^\.idx/ => keys(%block) - 1)." files");
+
+ purge_unused($dir => %block);
+ check_images($dir => %block);
+}
+
+sub verbose { say @_ if $o{verbose} }
+
+sub get_file_list {
+ my ($list) = @_;
+ my @files = ();
+
+ open(my $fh => $list);
+ while (<$fh>) {
+ push @files, (split)[2];
+ }
+ return grep /^[a-z\d.\/]+$/ => @files;
+}
+
+sub get_block_list {
+ my $dir = shift;
+ my %block;
+ find(sub {
+ (-f) and (-M > 0) or return;
+ #verbose("idx: $File::Find::name");
+ push @{$block{".idx"}}, abs_path $_;
+ foreach my $f (get_file_list($_)) {
+ push @{$block{$f}} => $#{$block{".idx"}};
+ }
+ }, "$dir/idx");
+ return %block;
+}
+
+sub purge_unused {
+ my ($dir, %block) = @_;
+
+ my ($total, $done);
+
+ # calculate the number of files we expect
+ find(sub {
+ -d or return;
+ opendir(my $dh => $_);
+ map { $total++ if not $_ ~~ [qw<. ..>] and length > 8} readdir $dh;
+ closedir($dh);
+ }, "$dir/data");
+
+
+ # progress
+ local $SIG{ALRM} = sub {
+ return alarm 1 if not $done;
+ my $speed = $done / (time - $^T + 1);
+ verbose sprintf "# done %5.1f%% | %25s (%*d of %d files)",
+ 100 * ($done/$total),
+ scalar(localtime($^T + $speed * ($total - $done))),
+ length($total) => $done,
+ $total;
+ alarm 5;
+ };
+ $SIG{ALRM}->();
+
+ find(sub {
+ $done++ if -f;
+ (-f _) and ((-M _) > 0) or return;
+
+ # cut away the first part of the filename and
+ # some optional extension
+ (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
+ exists $block{$rn} and return;
+
+ if ($o{yes}) {
+ verbose("unlinking abs_path $File::Find::name");
+ unlink abs_path $File::Find::name;
+ return;
+ }
+
+ verbose("unused abs_path $File::Find::name");
+ return;
+
+ }, "$dir/data");
+ $SIG{ALRM}->();
+ alarm 0;
+
+}
+
+sub check_images {
+ my ($dir, %block) = @_;
+
+ my $total = keys(%block) - 1; # .idx
+ my $done = 0;
+
+ # progress
+ local $SIG{ALRM} = sub {
+ return alarm 1 if not $done;
+ my $speed = $done / (time - $^T + 1);
+ say sprintf "# done %5.1f%% | %25s (%*d of %d files)",
+ 100 * $done/$total,
+ scalar(localtime($^T + ($total - $done) * $speed)),
+ length($total) => $done,
+ $total;
+ alarm 5;
+ };
+ $SIG{ALRM}->();
+
+ my %invalid;
+ foreach my $k (keys %block) {
+ my $i = $block{$k};
+ next if $k eq ".idx";
+ ++$done;
+
+ next if -f "$dir/data/$k"
+ or -f "$dir/data/$k.gz";
+ say "missing $k";
+ @invalid{@{$block{".idx"}}} = ();
+ }
+ $SIG{ALRM}->();
+ alarm 0;
+
+ return if not %invalid;
+
+ say "invalid images:\n", join "\n", sort keys %invalid;
+ unlink keys %invalid if $o{yes};
+}
+__END__
+
+=head1 NAME
+
+ checker - checks the imager data and index files
+
+=head1 SYNOPSIS
+
+ checker [options] {directory}
+
+=head1 DESCRIPTION
+
+This tool loads all the index files from I<directory>F</idx/>,
+checks if all mentioned files are existing and optionally purges
+unreferenced files.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-y>|B<--yes>
+
+Assume "yes" for all questions (dangerous!). (default: no)
+
+=item B<-h>|B<--help>
+
+=item B<-m>|B<--man>
+
+The short and longer help.
+
+=back
+
+=cut
--- a/cleaner Mon Jul 25 08:58:47 2011 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,160 +0,0 @@
-#! /usr/bin/perl
-
-use 5.010;
-use strict;
-use warnings;
-use Pod::Usage;
-use Hash::Util qw(lock_keys);
-use File::Find;
-use File::Temp;
-use DB_File;
-use File::Basename;
-use autodie qw(:all);
-use Cwd qw(abs_path);
-
-use Getopt::Long;
-
-my %o = (
- dry => undef,
- verbose => undef,
- check => undef,
-); lock_keys(%o);
-
-MAIN: {
- Getopt::Long::Configure qw(Bundling);
- GetOptions(
- "n|dry!" => \$o{dry},
- "v|verbose!" => \$o{verbose},
- "c|check" => \$o{check},
- "h|help" => sub { pod2usage(-verbose => 1, -exit 0) },
- "m|man" => sub { pod2usage(-verbose => 2, -exit 0,
- -noperldoc => system("perldoc -V 1>/dev/null
- 2>&1"))},
- ) and @ARGV or pod2usage;
- my $dir = shift;
- my $tmp = File::Temp->new;
-
- # load the index files, remember the latest
- # timestamp we see
- my (%inuse, @idx);
- #tie %idx, "DB_File" => $tmp->filename;
-
- find(sub {
- (-f) and (-M > 0) or return;
- #verbose("idx: $File::Find::name");
- push @idx, abs_path $_;
- foreach my $f (get_file_list($_)) {
- push @{$inuse{$f}} => $#idx;
- }
- }, "$dir/idx");
-
- verbose("indexed: ".scalar(keys %inuse)." files");
-
- # simple "forward" check: existence of mentioned files
- if ($o{check}) {
- my $total = scalar keys %inuse;
- my $done = 0;
- local $SIG{ALRM} = sub {
- say sprintf "done %5.1f%% (%*d of $total)"
- => 100 * $done/$total, length($total), $done
- if $total;
- alarm 5;
- };
- $SIG{ALRM}->();
- while (my ($f, $i) = each %inuse) {
- ++$done;
- next if -f "$dir/data/$f"
- or -f "$dir/data/$f.gz";
- say "missing $f from\n",
- join "-\t" => "", map { "$_\n" } @idx[@$i];
- }
- $SIG{ALRM}->();
- alarm 0;
- exit 0;
- }
-
- # full check and probably cleaning: all files, not mentioned
- # in some index will be purged
-# my (%file);
-#- find(sub {
-#- (-f) and (-M > 0) or return;
-#- $File::Find::name =~ s/^$dir\/data\///;
-#- $file{$_} = $_;
-#- }, "$dir/data");
-#-
-#- verbose("file system: ".scalar(keys %file)." files");
-#- exit 0;
-
- # ok, now go through all the data files and remove
- # files not mentioned in some index, but never remove
- # files created after the cleaner started
- find(sub {
- (-f) and (-M > 0) or return;
-
- # cut away the first part of the filename and
- # some optional extension
- (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
- exists $inuse{$rn} and return;
-
- if ($o{dry}) {
- verbose("(unlinking) abs_path $File::Find::name");
- return;
- }
-
- verbose("unlinking abs_path $File::Find::name");
- unlink abs_path $File::Find::name;
-
- }, "$dir/data");
-
-}
-
-sub verbose { say @_ if $o{verbose} }
-
-sub get_file_list {
- my ($list) = @_;
- my @files = ();
-
- open(my $fh => $list);
- while (<$fh>) {
- push @files, (split)[2];
- }
- return grep /^[a-z\d.\/]+$/ => @files;
-}
-
-
-__END__
-
-=head1 NAME
-
- cleaner - cleans the imager data directory
-
-=head1 SYNOPSIS
-
- cleaner [options] {directory}
-
-=head1 DESCRIPTION
-
-This tool loads all the index files from I<directory>F</idx/>
-and purges all not mentioned files below I<directory>F</data/>.
-
-=head1 OPTIONS
-
-=over
-
-=item B<-c>|B<--check>
-
-Check (and exit) if nothing is missing.
-
-=item B<-n>|B<--dry>
-
-Do nothing, just print what should be removed. (default: off)
-
-=item B<-h>|B<--help>
-
-=item B<-m>|B<--man>
-
-The short and longer help.
-
-=back
-
-=cut