checker und catter should work now
authorHeiko Schlittermann (JUMPER) <hs@schlittermann.de>
Mon, 25 Jul 2011 15:13:07 +0200
changeset 10 fd5225120ee9
parent 9 70d578b200ba
child 11 675ef249d116
checker und catter should work now
catter
checker
--- a/catter	Mon Jul 25 11:59:05 2011 +0200
+++ b/catter	Mon Jul 25 15:13:07 2011 +0200
@@ -1,42 +1,105 @@
 #! /usr/bin/perl
 # Eigentlich geht das selbe mit:
-# grep -v '^\[' IDX-File | while read x x file x; do test "$file" && cat DATA/$file; done
-# 
+# grep '^[[:space:]]*[[:digit:]]' IDX-file | tr -d | cut -f4 -d' ' | while read f; do
+#	cat DATA/$f || zcat DATA/$f.gz
+# done
+# ODER
+# perl -ne '/^\s*\d/ and print "DATA/" . (split)[2] . "\n"' IDX-File | while read f; do
+#	cat DATA/$f || zcat DATA/$f.gz
+# done
+
+
 use 5.010;
 use strict;
 use warnings;
 use File::Basename;
 use Cwd qw(abs_path);
 use autodie qw(:all);
+use Pod::Usage;
+use Getopt::Long;
+use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
 
 use constant KiB => 1024;
 use constant MiB => 1024 * KiB;
 use constant GiB => 1024 * MiB;
+use constant ME => basename $0;
 
-my $BS = 64 * MiB;
-my $IDX = shift // die "Need index file\n";
-my $DST = shift // die "Need destination for writing the image.\n";
-my $DATA = abs_path(dirname($IDX) . "/../data");
+sub find_data_dir;
+
+MAIN: {
+
+    Getopt::Long::Configure(qw(Bundling));
+    GetOptions(
+	"h|help" => sub { pod2usage(-verbose => 1, -exit => 0) },
+	"m|man"  => sub { pod2usage(-verbose => 2, -exit => 0,
+			  -noperldoc => system("perldoc -V 1>/dev/null
+			  2>&1")) },
+    ) and @ARGV == 2 or pod2usage;
+
+    my $idx = shift;
+    my $dst = shift;
+    my $blocksize = undef;
+    my $data = find_data_dir($idx);
+
+    open(my $fh => $idx);
+    { local $/ = ""; $_ = <$fh>; }
+    /^format:\s*1$/m or die ME.": expected index format 1\n";
+    ($blocksize) = /^blocksize:\s*(\d+)/m or die ME.": no blocksize found\n";
+
 
-open(my $idx => $IDX);
+    my $out;
+    if ($dst eq "-") { open($out => ">&STDOUT") } 
+    else { open($out => ">", $dst) };
+
+    while (<$fh>) {
+	next if /^#/;
+	my ($blk, $hash, $path) = split;
+	my ($in, $buffer);
 
-{ local $/ = "";
-  scalar <$idx>;
+	if (-f "$data/$path") {
+	    open($in => "$data/$path");
+	    binmode($in);
+	    local $/ = \$blocksize;
+	    $buffer = <$in>;
+	}
+	elsif (-f "$data/$path.gz") {
+	    open($in => "$data/$path.gz");
+	    binmode($in);
+	    gunzip($in => \$buffer)
+		or die $GunzipError;
+	}
+	else {
+	    die ME.": Can't open $data/$path: $!\n";
+	}
+	print {$out} $buffer;
+	close($in);
+    }
+    close($out);
+    close($fh);
 }
 
-my $out;
-if ($DST eq "-") { open($out => ">&STDOUT") } 
-else { open($out => ">", $DST) };
+sub find_data_dir {
+    for (my $dir = shift; $dir ne "/"; $dir = abs_path("$dir/..")) {
+	return "$dir/data" if -d "$dir/data" and -d "$dir/idx";
+    }
+    die ME.": no data directory found!\n";
+}
+
+__END__
+
+=head1 NAME
+
+    catter - cats the blocks of the imager
 
-while (<$idx>) {
-    next if /^#/;
-    my ($blk, $hash, $path) = split;
-    open(my $in => "$DATA/$path");
-    {
-	my $buffer;
-	local $/ = \$BS;
-	print {$out} $buffer while defined($buffer = <$in>);
-    }
-    close($in);
-}
-close($out);
+=head1 SYNOPSIS
+
+    catter {idx} {destination}
+
+=head1 DESCRIPTION
+
+The B<catter> takes all the blocks from the IDX file and
+cats them as one data stream. The destination can be any block device,
+a file name or even B<-> (STDOUT).
+
+
+=cut
--- a/checker	Mon Jul 25 11:59:05 2011 +0200
+++ b/checker	Mon Jul 25 15:13:07 2011 +0200
@@ -43,8 +43,8 @@
     my %block = get_block_list($dir);
 
     verbose("# indexed: "
-	. scalar(@{$block{".idx"}//[]}) . " images with "
-	. (grep /^\.idx/ => keys(%block) - 1)." files");
+	. scalar(@{$block{""}//[]}) . " images with "
+	. (grep !/^\.idx$/ => keys(%block))." blocks");
 
     purge_unused($dir => %block);
     check_images($dir => %block);
@@ -69,9 +69,9 @@
     find(sub {
 	(-f) and (-M > 0) or return;
 	#verbose("idx: $File::Find::name");
-	push @{$block{".idx"}}, abs_path $_;
+	push @{$block{""}}, abs_path $_;
 	foreach my $f (get_file_list($_)) {
-	    push @{$block{$f}} => $#{$block{".idx"}};
+	    push @{$block{$f}} => $#{$block{""}};
 	}
     }, "$dir/idx");
     return %block;
@@ -81,6 +81,7 @@
     my ($dir, %block) = @_;
 
     my ($total, $done);
+    verbose("# pass 1 - purge unused blocks");
 
     # calculate the number of files we expect
     find(sub {
@@ -95,7 +96,7 @@
     local $SIG{ALRM} = sub {
 	return alarm 1 if not $done;
 	my $speed = $done / (time - $^T + 1);
-	verbose sprintf "# done %5.1f%% | %25s (%*d of %d files)",
+	verbose sprintf "# pass 1 done %5.1f%% | %25s (%*d of %d blocks)",
 	    100 * ($done/$total),
 	    scalar(localtime($^T + $speed * ($total - $done))),
 	    length($total) => $done,
@@ -113,13 +114,14 @@
 	(my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
 	exists $block{$rn} and return;
 
+
 	if ($o{yes}) {
-	    verbose("unlinking abs_path $File::Find::name");
-	    unlink abs_path $File::Find::name;
-	    return;
+	   verbose("unlinking " . abs_path $File::Find::name);
+	   unlink abs_path $File::Find::name;
+	   return;
 	}
 
-	verbose("unused abs_path $File::Find::name");
+	verbose("unused " . abs_path $File::Find::name);
 	return;
 
     }, "$dir/data");
@@ -131,14 +133,16 @@
 sub check_images {
     my ($dir, %block) = @_;
 
-    my $total = keys(%block) - 1; # .idx
+    my $total = grep { $_ ne "" } keys(%block);
     my $done = 0;
 
+    verbose("# pass 2 - check image completness");
+
     # progress
     local $SIG{ALRM} = sub {
 	return alarm 1 if not $done;
 	my $speed = $done / (time - $^T + 1);
-	say sprintf "# done %5.1f%% | %25s (%*d of %d files)",
+	verbose sprintf "# pass 2 done %5.1f%% | %25s (%*d of %d blocks)",
 	    100 * $done/$total, 
 	    scalar(localtime($^T + ($total - $done) * $speed)),
 	    length($total) => $done,
@@ -150,13 +154,13 @@
     my %invalid;
     foreach my $k (keys %block) {
 	my $i = $block{$k};
-	next if $k eq ".idx";
+	next if $k eq "";
 	++$done;
 	
 	next if -f "$dir/data/$k"
 	    or -f "$dir/data/$k.gz";
 	say "missing $k";
-	@invalid{@{$block{".idx"}}} = ();
+	@invalid{@{$block{""}}} = ();
     }
     $SIG{ALRM}->();
     alarm 0;