--- a/checker Mon Jul 25 15:13:07 2011 +0200
+++ b/checker Mon Jul 25 16:36:53 2011 +0200
@@ -81,7 +81,8 @@
my ($dir, %block) = @_;
my ($total, $done);
- verbose("# pass 1 - purge unused blocks");
+ verbose("# pass 1 - checking for unused blocks");
+ verbose("# estimating file count");
# calculate the number of files we expect
find(sub {
@@ -105,29 +106,49 @@
};
$SIG{ALRM}->();
+ my @unused;
find(sub {
$done++ if -f;
(-f _) and ((-M _) > 0) or return;
+ # we don't need uncompressed files if an compressed version
+ # exists
+ unlink $_ and return if -f "$_.gz";
+
# cut away the first part of the filename and
# some optional extension
(my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
exists $block{$rn} and return;
-
-
- if ($o{yes}) {
- verbose("unlinking " . abs_path $File::Find::name);
- unlink abs_path $File::Find::name;
- return;
- }
-
- verbose("unused " . abs_path $File::Find::name);
+ push @unused, abs_path $File::Find::name;
return;
}, "$dir/data");
$SIG{ALRM}->();
alarm 0;
+ return if not @unused;
+
+ say sprintf "found %d (%.1f%%) unused files",
+ 0+@unused,
+ 100 * (@unused/$total);
+
+ if ($o{yes}) {
+ verbose("# deleting ".@unused." files");
+ unlink @unused;
+ return;
+ }
+
+ if (-t) {
+ while(1) {
+ print "delete? [y/N/v]: ";
+ given (<STDIN>) {
+ when (/^y(?:es)?$/i) { unlink @unused; last }
+ when (/^v/) { say join "\n", @unused; next }
+ default { last }
+ }
+ }
+ }
+
}
sub check_images {
@@ -159,16 +180,35 @@
next if -f "$dir/data/$k"
or -f "$dir/data/$k.gz";
- say "missing $k";
- @invalid{@{$block{""}}} = ();
+ say "missing $k @$i";
+ @invalid{@$i} = ();
}
$SIG{ALRM}->();
alarm 0;
- return if not %invalid;
+ # invalid now contains the numbers of the idx files beiing
+ # invalid
+ my @invalid = sort @{$block{""}}[keys %invalid];
+
+ return if not @invalid;
+
+ say sprintf "found %d (%.1f%%) invalid images:",
+ 0+@invalid,
+ 100 * (@invalid/$total);
- say "invalid images:\n", join "\n", sort keys %invalid;
- unlink keys %invalid if $o{yes};
+ if ($o{yes}) {
+ unlink @invalid;
+ return;
+ }
+
+ while (-t) {
+ print "delete? [y/N/v] ";
+ given (<STDIN>) {
+ when (/^y(?:es)?$/i) { unlink @invalid; last }
+ when (/^v/i) { say join "\n" => @invalid; next }
+ default { last }
+ }
+ }
}
__END__
--- a/imager Mon Jul 25 15:13:07 2011 +0200
+++ b/imager Mon Jul 25 16:36:53 2011 +0200
@@ -110,14 +110,13 @@
my ($file, $ext, $cs);
$file = $cs = md5_hex($buffer);
$file =~ s/(?<fn>(?<prefix>...).*)/$+{prefix}\/$+{fn}/g;
- $ext = $o{compress} ? ".gz" : "";
# the extension we do not put into the index
my $log = sprintf "%12d %s %s" => ($.-1), $cs, $file;
- if (not (-e "$data/$file" or -e "$data/$file$ext")) {
- mkpath dirname("$data/$file$ext");
- my $out = File::Temp->new(TEMPLATE => ".XXXXXXX", DIR => dirname("$data/$file$ext"));
+ if (not (-e "$data/$file" or -e "$data/$file.gz")) {
+ mkpath dirname("$data/$file.gz");
+ my $out = File::Temp->new(TEMPLATE => ".XXXXXXX", DIR => dirname("$data/$file.gz"));
binmode($out);
if ($o{compress}) {
gzip(\$buffer => $out,
@@ -128,7 +127,7 @@
}
else { print {$out} $buffer }
close($out);
- rename($out => "$data/$file$ext");
+ rename($out => "$data/$file.gz");
$log .= " *";
$stats{written}++;
}
@@ -137,7 +136,6 @@
$stats{skipped}++;
}
- #say $log . sprintf "%3d%%" => 100 * ($. * $o{blocksize})/$size;
say {$index} $log;
}
$SIG{ALRM}->();