diff -r 910cff130541 -r fb2455a007a7 cleaner --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cleaner Fri Jul 22 17:06:09 2011 +0200 @@ -0,0 +1,159 @@ +#! /usr/bin/perl + +use 5.010; +use strict; +use warnings; +use Pod::Usage; +use Hash::Util qw(lock_keys); +use File::Find; +use File::Temp; +use DB_File; +use File::Basename; +use autodie qw(:all); +use Cwd qw(abs_path); + +use Getopt::Long; + +my %o = ( + dry => undef, + verbose => undef, + check => undef, +); lock_keys(%o); + +MAIN: { + GetOptions( + "n|dry!" => \$o{dry}, + "v|verbose!" => \$o{verbose}, + "c|check" => \$o{check}, + "h|help" => sub { pod2usage(-verbose => 1, -exit 0) }, + "m|man" => sub { pod2usage(-verbose => 2, -exit 0, + -noperldoc => system("perldoc -V 1>/dev/null + 2>&1"))}, + ) and @ARGV or pod2usage; + my $dir = shift; + my $tmp = File::Temp->new; + + # load the index files, remember the latest + # timestamp we see + my (%inuse, @idx); + #tie %idx, "DB_File" => $tmp->filename; + + find(sub { + (-f) and (-M > 0) or return; + verbose("idx: $File::Find::name"); + push @idx, abs_path $_; + foreach my $f (get_file_list($_)) { + push @{$inuse{$f}} => $#idx; + } + }, "$dir/idx"); + + verbose("indexed: ".scalar(keys %inuse)." files"); + + # simple "forward" check: existence of mentioned files + if ($o{check}) { + my $total = scalar keys %inuse; + my $done = 0; + local $SIG{ALRM} = sub { + say sprintf "done %5.1f%% (%*d of $total)" + => 100 * $done/$total, length($total), $done; + alarm 5; + }; + $SIG{ALRM}->(); + while (my ($f, $i) = each %inuse) { + ++$done; + next if -f "$dir/data/$f" + or -f "$dir/data/$f.gz"; + say "missing $f from\n", + join "-\t" => "", map { "$_\n" } @idx[@$i]; + } + $SIG{ALRM}->(); + alarm 0; + exit 0; + } + + # full check and probably cleaning: all files, not mentioned + # in some index will be purged +# my (%file); +#- find(sub { +#- (-f) and (-M > 0) or return; +#- $File::Find::name =~ s/^$dir\/data\///; +#- $file{$_} = $_; +#- }, "$dir/data"); +#- +#- verbose("file system: ".scalar(keys %file)." files"); +#- exit 0; + + # ok, now go through all the data files and remove + # files not mentioned in some index, but never remove + # files created after the cleaner started + find(sub { + (-f) and (-M > 0) or return; + + # cut away the first part of the filename and + # some optional extension + $File::Find::name = abs_path $File::Find::name; + (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/; + exists $inuse{$rn} and return; + + if ($o{dry}) { + verbose("(unlinking) $File::Find::name"); + return; + } + + verbose("unlinking $File::Find::name"); + unlink $File::Find::name; + + }, "$dir/data"); + +} + +sub verbose { say @_ if $o{verbose} } + +sub get_file_list { + my ($list) = @_; + my @files = (); + + open(my $fh => $list); + while (<$fh>) { + push @files, (split)[2]; + } + return grep /^[a-z\d.\/]+$/ => @files; +} + + +__END__ + +=head1 NAME + + cleaner - cleans the imager data directory + +=head1 SYNOPSIS + + cleaner [options] {directory} + +=head1 DESCRIPTION + +This tool loads all the index files from IF +and purges all not mentioned files below IF. + +=head1 OPTIONS + +=over + +=item B<-c>|B<--check> + +Check (and exit) if nothing is missing. + +=item B<-n>|B<--dry> + +Do nothing, just print what should be removed. (default: off) + +=item B<-h>|B<--help> + +=item B<-m>|B<--man> + +The short and longer help. + +=back + +=cut