diff -r 6bc08224c44e -r 70d578b200ba cleaner --- a/cleaner Mon Jul 25 08:58:47 2011 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,160 +0,0 @@ -#! /usr/bin/perl - -use 5.010; -use strict; -use warnings; -use Pod::Usage; -use Hash::Util qw(lock_keys); -use File::Find; -use File::Temp; -use DB_File; -use File::Basename; -use autodie qw(:all); -use Cwd qw(abs_path); - -use Getopt::Long; - -my %o = ( - dry => undef, - verbose => undef, - check => undef, -); lock_keys(%o); - -MAIN: { - Getopt::Long::Configure qw(Bundling); - GetOptions( - "n|dry!" => \$o{dry}, - "v|verbose!" => \$o{verbose}, - "c|check" => \$o{check}, - "h|help" => sub { pod2usage(-verbose => 1, -exit 0) }, - "m|man" => sub { pod2usage(-verbose => 2, -exit 0, - -noperldoc => system("perldoc -V 1>/dev/null - 2>&1"))}, - ) and @ARGV or pod2usage; - my $dir = shift; - my $tmp = File::Temp->new; - - # load the index files, remember the latest - # timestamp we see - my (%inuse, @idx); - #tie %idx, "DB_File" => $tmp->filename; - - find(sub { - (-f) and (-M > 0) or return; - #verbose("idx: $File::Find::name"); - push @idx, abs_path $_; - foreach my $f (get_file_list($_)) { - push @{$inuse{$f}} => $#idx; - } - }, "$dir/idx"); - - verbose("indexed: ".scalar(keys %inuse)." files"); - - # simple "forward" check: existence of mentioned files - if ($o{check}) { - my $total = scalar keys %inuse; - my $done = 0; - local $SIG{ALRM} = sub { - say sprintf "done %5.1f%% (%*d of $total)" - => 100 * $done/$total, length($total), $done - if $total; - alarm 5; - }; - $SIG{ALRM}->(); - while (my ($f, $i) = each %inuse) { - ++$done; - next if -f "$dir/data/$f" - or -f "$dir/data/$f.gz"; - say "missing $f from\n", - join "-\t" => "", map { "$_\n" } @idx[@$i]; - } - $SIG{ALRM}->(); - alarm 0; - exit 0; - } - - # full check and probably cleaning: all files, not mentioned - # in some index will be purged -# my (%file); -#- find(sub { -#- (-f) and (-M > 0) or return; -#- $File::Find::name =~ s/^$dir\/data\///; -#- $file{$_} = $_; -#- }, "$dir/data"); -#- -#- verbose("file system: ".scalar(keys %file)." files"); -#- exit 0; - - # ok, now go through all the data files and remove - # files not mentioned in some index, but never remove - # files created after the cleaner started - find(sub { - (-f) and (-M > 0) or return; - - # cut away the first part of the filename and - # some optional extension - (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/; - exists $inuse{$rn} and return; - - if ($o{dry}) { - verbose("(unlinking) abs_path $File::Find::name"); - return; - } - - verbose("unlinking abs_path $File::Find::name"); - unlink abs_path $File::Find::name; - - }, "$dir/data"); - -} - -sub verbose { say @_ if $o{verbose} } - -sub get_file_list { - my ($list) = @_; - my @files = (); - - open(my $fh => $list); - while (<$fh>) { - push @files, (split)[2]; - } - return grep /^[a-z\d.\/]+$/ => @files; -} - - -__END__ - -=head1 NAME - - cleaner - cleans the imager data directory - -=head1 SYNOPSIS - - cleaner [options] {directory} - -=head1 DESCRIPTION - -This tool loads all the index files from IF -and purges all not mentioned files below IF. - -=head1 OPTIONS - -=over - -=item B<-c>|B<--check> - -Check (and exit) if nothing is missing. - -=item B<-n>|B<--dry> - -Do nothing, just print what should be removed. (default: off) - -=item B<-h>|B<--help> - -=item B<-m>|B<--man> - -The short and longer help. - -=back - -=cut