--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cleaner Fri Jul 22 17:06:09 2011 +0200
@@ -0,0 +1,159 @@
+#! /usr/bin/perl
+
+use 5.010;
+use strict;
+use warnings;
+use Pod::Usage;
+use Hash::Util qw(lock_keys);
+use File::Find;
+use File::Temp;
+use DB_File;
+use File::Basename;
+use autodie qw(:all);
+use Cwd qw(abs_path);
+
+use Getopt::Long;
+
+my %o = (
+ dry => undef,
+ verbose => undef,
+ check => undef,
+); lock_keys(%o);
+
+MAIN: {
+ GetOptions(
+ "n|dry!" => \$o{dry},
+ "v|verbose!" => \$o{verbose},
+ "c|check" => \$o{check},
+ "h|help" => sub { pod2usage(-verbose => 1, -exit 0) },
+ "m|man" => sub { pod2usage(-verbose => 2, -exit 0,
+ -noperldoc => system("perldoc -V 1>/dev/null
+ 2>&1"))},
+ ) and @ARGV or pod2usage;
+ my $dir = shift;
+ my $tmp = File::Temp->new;
+
+ # load the index files, remember the latest
+ # timestamp we see
+ my (%inuse, @idx);
+ #tie %idx, "DB_File" => $tmp->filename;
+
+ find(sub {
+ (-f) and (-M > 0) or return;
+ verbose("idx: $File::Find::name");
+ push @idx, abs_path $_;
+ foreach my $f (get_file_list($_)) {
+ push @{$inuse{$f}} => $#idx;
+ }
+ }, "$dir/idx");
+
+ verbose("indexed: ".scalar(keys %inuse)." files");
+
+ # simple "forward" check: existence of mentioned files
+ if ($o{check}) {
+ my $total = scalar keys %inuse;
+ my $done = 0;
+ local $SIG{ALRM} = sub {
+ say sprintf "done %5.1f%% (%*d of $total)"
+ => 100 * $done/$total, length($total), $done;
+ alarm 5;
+ };
+ $SIG{ALRM}->();
+ while (my ($f, $i) = each %inuse) {
+ ++$done;
+ next if -f "$dir/data/$f"
+ or -f "$dir/data/$f.gz";
+ say "missing $f from\n",
+ join "-\t" => "", map { "$_\n" } @idx[@$i];
+ }
+ $SIG{ALRM}->();
+ alarm 0;
+ exit 0;
+ }
+
+ # full check and probably cleaning: all files, not mentioned
+ # in some index will be purged
+# my (%file);
+#- find(sub {
+#- (-f) and (-M > 0) or return;
+#- $File::Find::name =~ s/^$dir\/data\///;
+#- $file{$_} = $_;
+#- }, "$dir/data");
+#-
+#- verbose("file system: ".scalar(keys %file)." files");
+#- exit 0;
+
+ # ok, now go through all the data files and remove
+ # files not mentioned in some index, but never remove
+ # files created after the cleaner started
+ find(sub {
+ (-f) and (-M > 0) or return;
+
+ # cut away the first part of the filename and
+ # some optional extension
+ $File::Find::name = abs_path $File::Find::name;
+ (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
+ exists $inuse{$rn} and return;
+
+ if ($o{dry}) {
+ verbose("(unlinking) $File::Find::name");
+ return;
+ }
+
+ verbose("unlinking $File::Find::name");
+ unlink $File::Find::name;
+
+ }, "$dir/data");
+
+}
+
+sub verbose { say @_ if $o{verbose} }
+
+sub get_file_list {
+ my ($list) = @_;
+ my @files = ();
+
+ open(my $fh => $list);
+ while (<$fh>) {
+ push @files, (split)[2];
+ }
+ return grep /^[a-z\d.\/]+$/ => @files;
+}
+
+
+__END__
+
+=head1 NAME
+
+ cleaner - cleans the imager data directory
+
+=head1 SYNOPSIS
+
+ cleaner [options] {directory}
+
+=head1 DESCRIPTION
+
+This tool loads all the index files from I<directory>F</idx/>
+and purges all not mentioned files below I<directory>F</data/>.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-c>|B<--check>
+
+Check (and exit) if nothing is missing.
+
+=item B<-n>|B<--dry>
+
+Do nothing, just print what should be removed. (default: off)
+
+=item B<-h>|B<--help>
+
+=item B<-m>|B<--man>
+
+The short and longer help.
+
+=back
+
+=cut