fixed checker
authorHeiko Schlittermann (JUMPER) <hs@schlittermann.de>
Mon, 25 Jul 2011 16:36:53 +0200
changeset 11 675ef249d116
parent 10 fd5225120ee9
child 12 46a3e65e850f
fixed checker
checker
imager
--- a/checker	Mon Jul 25 15:13:07 2011 +0200
+++ b/checker	Mon Jul 25 16:36:53 2011 +0200
@@ -81,7 +81,8 @@
     my ($dir, %block) = @_;
 
     my ($total, $done);
-    verbose("# pass 1 - purge unused blocks");
+    verbose("# pass 1 - checking for unused blocks");
+    verbose("# estimating file count");
 
     # calculate the number of files we expect
     find(sub {
@@ -105,29 +106,49 @@
     };
     $SIG{ALRM}->();
 
+    my @unused;
     find(sub {
 	$done++ if -f;
 	(-f _) and ((-M _) > 0) or return;
 
+	# we don't need uncompressed files if an compressed version
+	# exists
+	unlink $_ and return if -f "$_.gz";
+
 	# cut away the first part of the filename and
 	# some optional extension
 	(my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
 	exists $block{$rn} and return;
-
-
-	if ($o{yes}) {
-	   verbose("unlinking " . abs_path $File::Find::name);
-	   unlink abs_path $File::Find::name;
-	   return;
-	}
-
-	verbose("unused " . abs_path $File::Find::name);
+	push @unused, abs_path $File::Find::name;
 	return;
 
     }, "$dir/data");
     $SIG{ALRM}->();
     alarm 0;
 
+    return if not @unused;
+
+    say sprintf "found %d (%.1f%%) unused files",
+	0+@unused,
+	100 * (@unused/$total);
+
+    if ($o{yes}) {
+	verbose("# deleting ".@unused." files");
+	unlink @unused;
+	return;
+    }
+
+    if (-t) {
+	while(1) {
+	    print "delete? [y/N/v]: ";
+	    given (<STDIN>) {
+		when (/^y(?:es)?$/i) { unlink @unused; last }
+		when (/^v/) { say join "\n", @unused; next }
+		default { last }
+	    }
+	}
+    }
+
 }
 
 sub check_images {
@@ -159,16 +180,35 @@
 	
 	next if -f "$dir/data/$k"
 	    or -f "$dir/data/$k.gz";
-	say "missing $k";
-	@invalid{@{$block{""}}} = ();
+	say "missing $k @$i";
+	@invalid{@$i} = ();
     }
     $SIG{ALRM}->();
     alarm 0;
 
-    return if not %invalid;
+    # invalid now contains the numbers of the idx files beiing
+    # invalid
+    my @invalid = sort @{$block{""}}[keys %invalid];
+
+    return if not @invalid;
+
+    say sprintf "found %d (%.1f%%) invalid images:",
+	0+@invalid,
+	100 * (@invalid/$total);
 
-    say "invalid images:\n", join "\n", sort keys %invalid;
-    unlink keys %invalid if $o{yes};
+    if ($o{yes}) {
+	unlink @invalid;
+	return;
+    }
+
+    while (-t) {
+	print "delete? [y/N/v] ";
+	given (<STDIN>) {
+	    when (/^y(?:es)?$/i) { unlink @invalid; last }
+	    when (/^v/i)	 { say join "\n" => @invalid; next }
+	default		         { last }
+	}
+    }
 }
 __END__
 
--- a/imager	Mon Jul 25 15:13:07 2011 +0200
+++ b/imager	Mon Jul 25 16:36:53 2011 +0200
@@ -110,14 +110,13 @@
 	my ($file, $ext, $cs);
 	$file = $cs = md5_hex($buffer);
 	$file =~ s/(?<fn>(?<prefix>...).*)/$+{prefix}\/$+{fn}/g;
-	$ext = $o{compress} ? ".gz" : "";
 
 	# the extension we do not put into the index
 	my $log = sprintf "%12d %s %s" => ($.-1), $cs, $file;
 
-	if (not (-e "$data/$file" or -e "$data/$file$ext")) {
-	    mkpath dirname("$data/$file$ext");
-	    my $out = File::Temp->new(TEMPLATE => ".XXXXXXX", DIR => dirname("$data/$file$ext"));
+	if (not (-e "$data/$file" or -e "$data/$file.gz")) {
+	    mkpath dirname("$data/$file.gz");
+	    my $out = File::Temp->new(TEMPLATE => ".XXXXXXX", DIR => dirname("$data/$file.gz"));
 	    binmode($out);
 	    if ($o{compress}) { 
 		gzip(\$buffer => $out, 
@@ -128,7 +127,7 @@
 	    } 
 	    else { print {$out} $buffer }
 	    close($out);
-	    rename($out => "$data/$file$ext");
+	    rename($out => "$data/$file.gz");
 	    $log .= " *";
 	    $stats{written}++;
 	}
@@ -137,7 +136,6 @@
 	    $stats{skipped}++;
 	}
 
-	#say $log . sprintf "%3d%%" => 100 * ($. * $o{blocksize})/$size;
 	say {$index} $log;
     }
     $SIG{ALRM}->();