cleaner
changeset 4 fb2455a007a7
child 6 129b39480dc5
equal deleted inserted replaced
3:910cff130541 4:fb2455a007a7
       
     1 #! /usr/bin/perl
       
     2 
       
     3 use 5.010;
       
     4 use strict;
       
     5 use warnings;
       
     6 use Pod::Usage;
       
     7 use Hash::Util qw(lock_keys);
       
     8 use File::Find;
       
     9 use File::Temp;
       
    10 use DB_File;
       
    11 use File::Basename;
       
    12 use autodie qw(:all);
       
    13 use Cwd qw(abs_path);
       
    14 
       
    15 use Getopt::Long;
       
    16 
       
    17 my %o = (
       
    18     dry => undef,
       
    19     verbose => undef,
       
    20     check => undef,
       
    21 ); lock_keys(%o);
       
    22 
       
    23 MAIN: {
       
    24     GetOptions(
       
    25 	"n|dry!" => \$o{dry},
       
    26 	"v|verbose!" => \$o{verbose},
       
    27 	"c|check" => \$o{check},
       
    28 	"h|help" => sub { pod2usage(-verbose => 1, -exit 0) },
       
    29 	"m|man"  => sub { pod2usage(-verbose => 2, -exit 0, 
       
    30 			  -noperldoc => system("perldoc -V 1>/dev/null
       
    31 			  2>&1"))},
       
    32     ) and @ARGV or pod2usage;
       
    33     my $dir = shift;
       
    34     my $tmp = File::Temp->new;
       
    35 
       
    36     # load the index files, remember the latest
       
    37     # timestamp we see
       
    38     my (%inuse, @idx);
       
    39     #tie %idx, "DB_File" => $tmp->filename;
       
    40 
       
    41     find(sub {
       
    42 	(-f) and (-M > 0) or return;
       
    43 	verbose("idx: $File::Find::name");
       
    44 	push @idx, abs_path $_;
       
    45 	foreach my $f (get_file_list($_)) {
       
    46 	    push @{$inuse{$f}} => $#idx;
       
    47 	}
       
    48     }, "$dir/idx");
       
    49 
       
    50     verbose("indexed: ".scalar(keys %inuse)." files");
       
    51 
       
    52     # simple "forward" check: existence of mentioned files
       
    53     if ($o{check}) {
       
    54 	my $total = scalar keys %inuse;
       
    55 	my $done = 0;
       
    56 	local $SIG{ALRM} = sub {
       
    57 	    say sprintf "done %5.1f%% (%*d of $total)" 
       
    58 		=> 100 * $done/$total, length($total), $done;
       
    59 	    alarm 5;
       
    60 	};
       
    61 	$SIG{ALRM}->();
       
    62 	while (my ($f, $i) = each %inuse) {
       
    63 	    ++$done;
       
    64 	    next if -f "$dir/data/$f"
       
    65 		or -f "$dir/data/$f.gz";
       
    66 	    say "missing $f from\n",
       
    67 		join "-\t" => "", map { "$_\n" } @idx[@$i];
       
    68 	}
       
    69 	$SIG{ALRM}->();
       
    70 	alarm 0;
       
    71 	exit 0;
       
    72     }
       
    73 
       
    74     # full check and probably cleaning: all files, not mentioned
       
    75     # in some index will be purged
       
    76 #   my (%file);
       
    77 #-    find(sub {
       
    78 #-	(-f) and (-M > 0) or return;
       
    79 #-	$File::Find::name =~ s/^$dir\/data\///;
       
    80 #-	$file{$_} = $_;
       
    81 #-    }, "$dir/data");
       
    82 #-
       
    83 #-    verbose("file system: ".scalar(keys %file)." files");
       
    84 #-    exit 0;
       
    85 
       
    86     # ok, now go through all the data files and remove
       
    87     # files not mentioned in some index, but never remove
       
    88     # files created after the cleaner started
       
    89     find(sub {
       
    90 	(-f) and (-M > 0) or return;
       
    91 
       
    92 	# cut away the first part of the filename and
       
    93 	# some optional extension
       
    94 	$File::Find::name = abs_path $File::Find::name;
       
    95 	(my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
       
    96 	exists $inuse{$rn} and return;
       
    97 
       
    98 	if ($o{dry}) {
       
    99 	    verbose("(unlinking) $File::Find::name");
       
   100 	    return;
       
   101 	}
       
   102 
       
   103 	verbose("unlinking $File::Find::name");
       
   104 	unlink $File::Find::name;
       
   105 
       
   106     }, "$dir/data");
       
   107 
       
   108 }
       
   109 
       
   110 sub verbose { say @_ if $o{verbose} }
       
   111 
       
   112 sub get_file_list {
       
   113     my ($list) = @_;
       
   114     my @files = ();
       
   115 
       
   116     open(my $fh => $list);
       
   117     while (<$fh>) {
       
   118 	push @files, (split)[2];
       
   119     }
       
   120     return grep /^[a-z\d.\/]+$/ => @files;
       
   121 }
       
   122 
       
   123 
       
   124 __END__
       
   125 
       
   126 =head1 NAME
       
   127 
       
   128     cleaner - cleans the imager data directory
       
   129 
       
   130 =head1 SYNOPSIS
       
   131 
       
   132     cleaner [options] {directory}
       
   133 
       
   134 =head1 DESCRIPTION
       
   135 
       
   136 This tool loads all the index files from I<directory>F</idx/>
       
   137 and purges all not mentioned files below I<directory>F</data/>.
       
   138 
       
   139 =head1 OPTIONS
       
   140 
       
   141 =over
       
   142 
       
   143 =item B<-c>|B<--check>
       
   144 
       
   145 Check (and exit) if nothing is missing.
       
   146 
       
   147 =item B<-n>|B<--dry>
       
   148 
       
   149 Do nothing, just print what should be removed. (default: off)
       
   150 
       
   151 =item B<-h>|B<--help>
       
   152 
       
   153 =item B<-m>|B<--man>
       
   154 
       
   155 The short and longer help.
       
   156 
       
   157 =back
       
   158 
       
   159 =cut