checker
changeset 9 70d578b200ba
parent 7 a508df65c738
child 10 fd5225120ee9
equal deleted inserted replaced
8:6bc08224c44e 9:70d578b200ba
       
     1 #! /usr/bin/perl
       
     2 
       
     3 use 5.010;
       
     4 use strict;
       
     5 use warnings;
       
     6 use Pod::Usage;
       
     7 use Hash::Util qw(lock_keys);
       
     8 use File::Find;
       
     9 use File::Temp;
       
    10 use DB_File;
       
    11 use File::Basename;
       
    12 use autodie qw(:all);
       
    13 use Cwd qw(abs_path);
       
    14 
       
    15 use Getopt::Long;
       
    16 sub get_block_list;
       
    17 sub purge_unused;
       
    18 sub check_images;
       
    19 
       
    20 my %o = (
       
    21     yes => undef,
       
    22     verbose => undef,
       
    23     check => undef,
       
    24 ); lock_keys(%o);
       
    25 
       
    26 MAIN: {
       
    27     Getopt::Long::Configure qw(Bundling);
       
    28     GetOptions(
       
    29 	"y|yes!" => \$o{yes},
       
    30 	"v|verbose!" => \$o{verbose},
       
    31 	"c|check" => \$o{check},
       
    32 	"h|help" => sub { pod2usage(-verbose => 1, -exit 0) },
       
    33 	"m|man"  => sub { pod2usage(-verbose => 2, -exit 0, 
       
    34 			  -noperldoc => system("perldoc -V 1>/dev/null
       
    35 			  2>&1"))},
       
    36     ) and @ARGV or pod2usage;
       
    37     my $dir = shift;
       
    38     my $tmp = File::Temp->new;
       
    39 
       
    40     # load the index files, remember the latest
       
    41     # timestamp we see
       
    42     #tie %idx, "DB_File" => $tmp->filename;
       
    43     my %block = get_block_list($dir);
       
    44 
       
    45     verbose("# indexed: "
       
    46 	. scalar(@{$block{".idx"}//[]}) . " images with "
       
    47 	. (grep /^\.idx/ => keys(%block) - 1)." files");
       
    48 
       
    49     purge_unused($dir => %block);
       
    50     check_images($dir => %block);
       
    51 }
       
    52 
       
    53 sub verbose { say @_ if $o{verbose} }
       
    54 
       
    55 sub get_file_list {
       
    56     my ($list) = @_;
       
    57     my @files = ();
       
    58 
       
    59     open(my $fh => $list);
       
    60     while (<$fh>) {
       
    61 	push @files, (split)[2];
       
    62     }
       
    63     return grep /^[a-z\d.\/]+$/ => @files;
       
    64 }
       
    65 
       
    66 sub get_block_list {
       
    67     my $dir = shift;
       
    68     my %block;
       
    69     find(sub {
       
    70 	(-f) and (-M > 0) or return;
       
    71 	#verbose("idx: $File::Find::name");
       
    72 	push @{$block{".idx"}}, abs_path $_;
       
    73 	foreach my $f (get_file_list($_)) {
       
    74 	    push @{$block{$f}} => $#{$block{".idx"}};
       
    75 	}
       
    76     }, "$dir/idx");
       
    77     return %block;
       
    78 }
       
    79 
       
    80 sub purge_unused {
       
    81     my ($dir, %block) = @_;
       
    82 
       
    83     my ($total, $done);
       
    84 
       
    85     # calculate the number of files we expect
       
    86     find(sub {
       
    87 	-d or return;
       
    88 	opendir(my $dh => $_);
       
    89 	map { $total++ if not $_ ~~ [qw<. ..>] and length > 8} readdir $dh;
       
    90 	closedir($dh);
       
    91     }, "$dir/data");
       
    92 
       
    93 
       
    94     # progress
       
    95     local $SIG{ALRM} = sub {
       
    96 	return alarm 1 if not $done;
       
    97 	my $speed = $done / (time - $^T + 1);
       
    98 	verbose sprintf "# done %5.1f%% | %25s (%*d of %d files)",
       
    99 	    100 * ($done/$total),
       
   100 	    scalar(localtime($^T + $speed * ($total - $done))),
       
   101 	    length($total) => $done,
       
   102 	    $total;
       
   103 	alarm 5;
       
   104     };
       
   105     $SIG{ALRM}->();
       
   106 
       
   107     find(sub {
       
   108 	$done++ if -f;
       
   109 	(-f _) and ((-M _) > 0) or return;
       
   110 
       
   111 	# cut away the first part of the filename and
       
   112 	# some optional extension
       
   113 	(my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
       
   114 	exists $block{$rn} and return;
       
   115 
       
   116 	if ($o{yes}) {
       
   117 	    verbose("unlinking abs_path $File::Find::name");
       
   118 	    unlink abs_path $File::Find::name;
       
   119 	    return;
       
   120 	}
       
   121 
       
   122 	verbose("unused abs_path $File::Find::name");
       
   123 	return;
       
   124 
       
   125     }, "$dir/data");
       
   126     $SIG{ALRM}->();
       
   127     alarm 0;
       
   128 
       
   129 }
       
   130 
       
   131 sub check_images {
       
   132     my ($dir, %block) = @_;
       
   133 
       
   134     my $total = keys(%block) - 1; # .idx
       
   135     my $done = 0;
       
   136 
       
   137     # progress
       
   138     local $SIG{ALRM} = sub {
       
   139 	return alarm 1 if not $done;
       
   140 	my $speed = $done / (time - $^T + 1);
       
   141 	say sprintf "# done %5.1f%% | %25s (%*d of %d files)",
       
   142 	    100 * $done/$total, 
       
   143 	    scalar(localtime($^T + ($total - $done) * $speed)),
       
   144 	    length($total) => $done,
       
   145 	    $total;
       
   146 	    alarm 5;
       
   147     };
       
   148     $SIG{ALRM}->();
       
   149 
       
   150     my %invalid;
       
   151     foreach my $k (keys %block) {
       
   152 	my $i = $block{$k};
       
   153 	next if $k eq ".idx";
       
   154 	++$done;
       
   155 	
       
   156 	next if -f "$dir/data/$k"
       
   157 	    or -f "$dir/data/$k.gz";
       
   158 	say "missing $k";
       
   159 	@invalid{@{$block{".idx"}}} = ();
       
   160     }
       
   161     $SIG{ALRM}->();
       
   162     alarm 0;
       
   163 
       
   164     return if not %invalid;
       
   165 
       
   166     say "invalid images:\n", join "\n", sort keys %invalid;
       
   167     unlink keys %invalid if $o{yes};
       
   168 }
       
   169 __END__
       
   170 
       
   171 =head1 NAME
       
   172 
       
   173     checker - checks the imager data and index files
       
   174 
       
   175 =head1 SYNOPSIS
       
   176 
       
   177     checker [options] {directory}
       
   178 
       
   179 =head1 DESCRIPTION
       
   180 
       
   181 This tool loads all the index files from I<directory>F</idx/>,
       
   182 checks if all mentioned files are existing and optionally purges
       
   183 unreferenced files.
       
   184 
       
   185 =head1 OPTIONS
       
   186 
       
   187 =over
       
   188 
       
   189 =item B<-y>|B<--yes>
       
   190 
       
   191 Assume "yes" for all questions (dangerous!). (default: no)
       
   192 
       
   193 =item B<-h>|B<--help>
       
   194 
       
   195 =item B<-m>|B<--man>
       
   196 
       
   197 The short and longer help.
       
   198 
       
   199 =back
       
   200 
       
   201 =cut