cleaner
changeset 4 fb2455a007a7
child 6 129b39480dc5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cleaner	Fri Jul 22 17:06:09 2011 +0200
@@ -0,0 +1,159 @@
+#! /usr/bin/perl
+
+use 5.010;
+use strict;
+use warnings;
+use Pod::Usage;
+use Hash::Util qw(lock_keys);
+use File::Find;
+use File::Temp;
+use DB_File;
+use File::Basename;
+use autodie qw(:all);
+use Cwd qw(abs_path);
+
+use Getopt::Long;
+
+my %o = (
+    dry => undef,
+    verbose => undef,
+    check => undef,
+); lock_keys(%o);
+
+MAIN: {
+    GetOptions(
+	"n|dry!" => \$o{dry},
+	"v|verbose!" => \$o{verbose},
+	"c|check" => \$o{check},
+	"h|help" => sub { pod2usage(-verbose => 1, -exit 0) },
+	"m|man"  => sub { pod2usage(-verbose => 2, -exit 0, 
+			  -noperldoc => system("perldoc -V 1>/dev/null
+			  2>&1"))},
+    ) and @ARGV or pod2usage;
+    my $dir = shift;
+    my $tmp = File::Temp->new;
+
+    # load the index files, remember the latest
+    # timestamp we see
+    my (%inuse, @idx);
+    #tie %idx, "DB_File" => $tmp->filename;
+
+    find(sub {
+	(-f) and (-M > 0) or return;
+	verbose("idx: $File::Find::name");
+	push @idx, abs_path $_;
+	foreach my $f (get_file_list($_)) {
+	    push @{$inuse{$f}} => $#idx;
+	}
+    }, "$dir/idx");
+
+    verbose("indexed: ".scalar(keys %inuse)." files");
+
+    # simple "forward" check: existence of mentioned files
+    if ($o{check}) {
+	my $total = scalar keys %inuse;
+	my $done = 0;
+	local $SIG{ALRM} = sub {
+	    say sprintf "done %5.1f%% (%*d of $total)" 
+		=> 100 * $done/$total, length($total), $done;
+	    alarm 5;
+	};
+	$SIG{ALRM}->();
+	while (my ($f, $i) = each %inuse) {
+	    ++$done;
+	    next if -f "$dir/data/$f"
+		or -f "$dir/data/$f.gz";
+	    say "missing $f from\n",
+		join "-\t" => "", map { "$_\n" } @idx[@$i];
+	}
+	$SIG{ALRM}->();
+	alarm 0;
+	exit 0;
+    }
+
+    # full check and probably cleaning: all files, not mentioned
+    # in some index will be purged
+#   my (%file);
+#-    find(sub {
+#-	(-f) and (-M > 0) or return;
+#-	$File::Find::name =~ s/^$dir\/data\///;
+#-	$file{$_} = $_;
+#-    }, "$dir/data");
+#-
+#-    verbose("file system: ".scalar(keys %file)." files");
+#-    exit 0;
+
+    # ok, now go through all the data files and remove
+    # files not mentioned in some index, but never remove
+    # files created after the cleaner started
+    find(sub {
+	(-f) and (-M > 0) or return;
+
+	# cut away the first part of the filename and
+	# some optional extension
+	$File::Find::name = abs_path $File::Find::name;
+	(my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
+	exists $inuse{$rn} and return;
+
+	if ($o{dry}) {
+	    verbose("(unlinking) $File::Find::name");
+	    return;
+	}
+
+	verbose("unlinking $File::Find::name");
+	unlink $File::Find::name;
+
+    }, "$dir/data");
+
+}
+
+sub verbose { say @_ if $o{verbose} }
+
+sub get_file_list {
+    my ($list) = @_;
+    my @files = ();
+
+    open(my $fh => $list);
+    while (<$fh>) {
+	push @files, (split)[2];
+    }
+    return grep /^[a-z\d.\/]+$/ => @files;
+}
+
+
+__END__
+
+=head1 NAME
+
+    cleaner - cleans the imager data directory
+
+=head1 SYNOPSIS
+
+    cleaner [options] {directory}
+
+=head1 DESCRIPTION
+
+This tool loads all the index files from I<directory>F</idx/>
+and purges all not mentioned files below I<directory>F</data/>.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-c>|B<--check>
+
+Check (and exit) if nothing is missing.
+
+=item B<-n>|B<--dry>
+
+Do nothing, just print what should be removed. (default: off)
+
+=item B<-h>|B<--help>
+
+=item B<-m>|B<--man>
+
+The short and longer help.
+
+=back
+
+=cut