bin/checker
changeset 21 e0f19213f8b6
parent 19 49ff641055a3
equal deleted inserted replaced
19:49ff641055a3 21:e0f19213f8b6
    16 sub get_block_list;
    16 sub get_block_list;
    17 sub purge_unused;
    17 sub purge_unused;
    18 sub check_images;
    18 sub check_images;
    19 
    19 
    20 my %o = (
    20 my %o = (
    21     yes => undef,
    21     yes     => undef,
    22     verbose => undef,
    22     verbose => undef,
    23     check => undef,
    23     check   => undef,
    24 ); lock_keys(%o);
    24 );
       
    25 lock_keys(%o);
    25 
    26 
    26 MAIN: {
    27 MAIN: {
    27     Getopt::Long::Configure qw(Bundling);
    28     Getopt::Long::Configure qw(Bundling);
    28     GetOptions(
    29     GetOptions(
    29 	"y|yes!" => \$o{yes},
    30         "y|yes!"     => \$o{yes},
    30 	"v|verbose!" => \$o{verbose},
    31         "v|verbose!" => \$o{verbose},
    31 	"c|check" => \$o{check},
    32         "c|check"    => \$o{check},
    32 	"h|help" => sub { pod2usage(-verbose => 1, -exit 0) },
    33         "h|help"     => sub { pod2usage(-verbose => 1, -exit 0) },
    33 	"m|man"  => sub { pod2usage(-verbose => 2, -exit 0, 
    34         "m|man"      => sub {
    34 			  -noperldoc => system("perldoc -V 1>/dev/null
    35             pod2usage(
    35 			  2>&1"))},
    36                 -verbose => 2,
    36     ) and @ARGV or pod2usage;
    37                 -exit 0,
       
    38                 -noperldoc => system(
       
    39                     "perldoc -V 1>/dev/null
       
    40 			  2>&1"
       
    41                 )
       
    42             );
       
    43         },
       
    44       )
       
    45       and @ARGV
       
    46       or pod2usage;
    37     my $dir = shift;
    47     my $dir = shift;
    38     my $tmp = File::Temp->new;
    48     my $tmp = File::Temp->new;
    39 
    49 
    40     # load the index files, remember the latest
    50     # load the index files, remember the latest
    41     # timestamp we see
    51     # timestamp we see
    42     #tie %idx, "DB_File" => $tmp->filename;
    52     #tie %idx, "DB_File" => $tmp->filename;
    43     my %block = get_block_list($dir);
    53     my %block = get_block_list($dir);
    44 
    54 
    45     verbose("# indexed: "
    55     verbose("# indexed: "
    46 	. scalar(@{$block{""}//[]}) . " images with "
    56           . scalar(@{ $block{""} // [] })
    47 	. (grep !/^\.idx$/ => keys(%block))." blocks");
    57           . " images with "
       
    58           . (grep !/^\.idx$/ => keys(%block))
       
    59           . " blocks");
    48 
    60 
    49     purge_unused($dir => %block);
    61     purge_unused($dir => %block);
    50     check_images($dir => %block);
    62     check_images($dir => %block);
    51 }
    63 }
    52 
    64 
    56     my ($list) = @_;
    68     my ($list) = @_;
    57     my @files = ();
    69     my @files = ();
    58 
    70 
    59     open(my $fh => $list);
    71     open(my $fh => $list);
    60     while (<$fh>) {
    72     while (<$fh>) {
    61 	push @files, (split)[2];
    73         push @files, (split)[2];
    62     }
    74     }
    63     return grep /^[a-z\d.\/]+$/ => @files;
    75     return grep /^[a-z\d.\/]+$/ => @files;
    64 }
    76 }
    65 
    77 
    66 sub get_block_list {
    78 sub get_block_list {
    67     my $dir = shift;
    79     my $dir = shift;
    68     my %block;
    80     my %block;
    69     find(sub {
    81     find(
    70 	(-f) or return;	# we need to include the tmp files!
    82         sub {
    71 	push @{$block{""}}, abs_path $_;
    83             (-f) or return;    # we need to include the tmp files!
    72 	foreach my $f (get_file_list($_)) {
    84             push @{ $block{""} }, abs_path $_;
    73 	    push @{$block{$f}} => $#{$block{""}};
    85             foreach my $f (get_file_list($_)) {
    74 	}
    86                 push @{ $block{$f} } => $#{ $block{""} };
    75     }, "$dir/idx");
    87             }
       
    88         },
       
    89         "$dir/idx"
       
    90     );
    76     return %block;
    91     return %block;
    77 }
    92 }
    78 
    93 
    79 sub purge_unused {
    94 sub purge_unused {
    80     my ($dir, %block) = @_;
    95     my ($dir, %block) = @_;
    82     my ($total, $done);
    97     my ($total, $done);
    83     verbose("# pass 1 - checking for unused blocks");
    98     verbose("# pass 1 - checking for unused blocks");
    84     verbose("# pass 1 - estimating file count");
    99     verbose("# pass 1 - estimating file count");
    85 
   100 
    86     # calculate the number of files we expect
   101     # calculate the number of files we expect
    87     find(sub {
   102     find(
    88 	-d or return;
   103         sub {
    89 	opendir(my $dh => $_);
   104             -d or return;
    90 	map { $total++ if not $_ ~~ [qw<. ..>] and length > 8} readdir $dh;
   105             opendir(my $dh => $_);
    91 	closedir($dh);
   106             map { $total++ if not $_ ~~ [qw<. ..>] and length > 8 } readdir $dh;
    92     }, "$dir/data");
   107             closedir($dh);
    93 
   108         },
       
   109         "$dir/data"
       
   110     );
    94 
   111 
    95     # progress
   112     # progress
    96     local $SIG{ALRM} = sub {
   113     local $SIG{ALRM} = sub {
    97 	return alarm 1 if not $done;
   114         return alarm 1 if not $done;
    98 	my $speed = $done / (time - $^T + 1);
   115         my $speed = $done / (time - $^T + 1);
    99 	verbose sprintf "# pass 1 done %5.1f%% | %25s (%*d of %d blocks)",
   116         verbose sprintf "# pass 1 done %5.1f%% | %25s (%*d of %d blocks)",
   100 	    100 * ($done/$total),
   117           100 * ($done / $total),
   101 	    scalar(localtime($^T + $speed * ($total - $done))),
   118           scalar(localtime($^T + $speed * ($total - $done))),
   102 	    length($total) => $done,
   119           length($total) => $done,
   103 	    $total;
   120           $total;
   104 	alarm 5;
   121         alarm 5;
   105     };
   122     };
   106     $SIG{ALRM}->();
   123     $SIG{ALRM}->();
   107 
   124 
   108     my @unused;
   125     my @unused;
   109     find(sub {
   126     find(
   110 	$done++ if -f;
   127         sub {
   111 	(-f _) and ((-M _) > 0) or return;  # don't process the fresh blocks
   128             $done++ if -f;
   112 
   129             (-f _) and ((-M _) > 0) or return;  # don't process the fresh blocks
   113 	# we don't need uncompressed files if an compressed version
   130 
   114 	# exists
   131             # we don't need uncompressed files if an compressed version
   115 	unlink $_ and return if -f "$_.gz";
   132             # exists
   116 
   133             unlink $_ and return if -f "$_.gz";
   117 	# cut away the first part of the filename and
   134 
   118 	# some optional extension
   135             # cut away the first part of the filename and
   119 	(my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
   136             # some optional extension
   120 	exists $block{$rn} and return;
   137             (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
   121 	push @unused, abs_path $File::Find::name;
   138             exists $block{$rn} and return;
   122 	return;
   139             push @unused, abs_path $File::Find::name;
   123 
   140             return;
   124     }, "$dir/data");
   141 
       
   142         },
       
   143         "$dir/data"
       
   144     );
   125     $SIG{ALRM}->();
   145     $SIG{ALRM}->();
   126     alarm 0;
   146     alarm 0;
   127 
   147 
   128     return if not @unused;
   148     return if not @unused;
   129 
   149 
   130     say sprintf "found %d (%.1f%%) unused files",
   150     say sprintf "found %d (%.1f%%) unused files",
   131 	0+@unused,
   151       0 + @unused,
   132 	100 * (@unused/$total);
   152       100 * (@unused / $total);
   133 
   153 
   134     if ($o{yes}) {
   154     if ($o{yes}) {
   135 	verbose("# deleting ".@unused." files");
   155         verbose("# deleting " . @unused . " files");
   136 	unlink @unused;
   156         unlink @unused;
   137 	return;
   157         return;
   138     }
   158     }
   139 
   159 
   140     if (-t) {
   160     if (-t) {
   141 	while(1) {
   161         while (1) {
   142 	    print "delete? [y/N/v]: ";
   162             print "delete? [y/N/v]: ";
   143 	    given (<STDIN>) {
   163             given (<STDIN>) {
   144 		when (/^y(?:es)?$/i) { unlink @unused; last }
   164                 when (/^y(?:es)?$/i) { unlink @unused; last }
   145 		when (/^v/) { say join "\n", @unused; next }
   165                 when (/^v/) { say join "\n", @unused; next }
   146 		default { last }
   166                 default { last }
   147 	    }
   167             }
   148 	}
   168         }
   149     }
   169     }
   150 
   170 
   151 }
   171 }
   152 
   172 
   153 sub check_images {
   173 sub check_images {
   158 
   178 
   159     verbose("# pass 2 - checking image completeness");
   179     verbose("# pass 2 - checking image completeness");
   160 
   180 
   161     # progress
   181     # progress
   162     local $SIG{ALRM} = sub {
   182     local $SIG{ALRM} = sub {
   163 	return alarm 1 if not $done;
   183         return alarm 1 if not $done;
   164 	my $speed = $done / (time - $^T + 1);
   184         my $speed = $done / (time - $^T + 1);
   165 	verbose sprintf "# pass 2 done %5.1f%% | %25s (%*d of %d blocks)",
   185         verbose sprintf "# pass 2 done %5.1f%% | %25s (%*d of %d blocks)",
   166 	    100 * $done/$total, 
   186           100 * $done / $total,
   167 	    scalar(localtime($^T + ($total - $done) * $speed)),
   187           scalar(localtime($^T + ($total - $done) * $speed)),
   168 	    length($total) => $done,
   188           length($total) => $done,
   169 	    $total;
   189           $total;
   170 	    alarm 5;
   190         alarm 5;
   171     };
   191     };
   172     $SIG{ALRM}->();
   192     $SIG{ALRM}->();
   173 
   193 
   174     my %invalid;
   194     my %invalid;
   175     foreach my $k (keys %block) {
   195     foreach my $k (keys %block) {
   176 	my $i = $block{$k};
   196         my $i = $block{$k};
   177 	next if $k eq "";
   197         next if $k eq "";
   178 	++$done;
   198         ++$done;
   179 	
   199 
   180 	next if -f "$dir/data/$k"
   200         next
   181 	    or -f "$dir/data/$k.gz";
   201           if -f "$dir/data/$k"
   182 	say "missing $k @$i";
   202               or -f "$dir/data/$k.gz";
   183 	@invalid{@$i} = ();
   203         say "missing $k @$i";
       
   204         @invalid{@$i} = ();
   184     }
   205     }
   185     $SIG{ALRM}->();
   206     $SIG{ALRM}->();
   186     alarm 0;
   207     alarm 0;
   187 
   208 
   188     # invalid now contains the numbers of the idx files beiing
   209     # invalid now contains the numbers of the idx files beiing
   189     # invalid
   210     # invalid
   190     my @invalid = sort @{$block{""}}[keys %invalid];
   211     my @invalid = sort @{ $block{""} }[keys %invalid];
   191 
   212 
   192     return if not @invalid;
   213     return if not @invalid;
   193 
   214 
   194     say sprintf "found %d (%.1f%%) invalid images:",
   215     say sprintf "found %d (%.1f%%) invalid images:",
   195 	0+@invalid,
   216       0 + @invalid,
   196 	100 * (@invalid/$total);
   217       100 * (@invalid / $total);
   197 
   218 
   198     if ($o{yes}) {
   219     if ($o{yes}) {
   199 	unlink @invalid;
   220         unlink @invalid;
   200 	return;
   221         return;
   201     }
   222     }
   202 
   223 
   203     while (-t) {
   224     while (-t) {
   204 	print "delete? [y/N/v] ";
   225         print "delete? [y/N/v] ";
   205 	given (<STDIN>) {
   226         given (<STDIN>) {
   206 	    when (/^y(?:es)?$/i) { unlink @invalid; last }
   227             when (/^y(?:es)?$/i) { unlink @invalid; last }
   207 	    when (/^v/i)	 { say join "\n" => @invalid; next }
   228             when (/^v/i) { say join "\n" => @invalid; next }
   208 	default		         { last }
   229             default { last }
   209 	}
   230         }
   210     }
   231     }
   211 }
   232 }
   212 __END__
   233 __END__
   213 
   234 
   214 =head1 NAME
   235 =head1 NAME