catter
changeset 10 fd5225120ee9
parent 0 e92e765779e7
--- a/catter	Mon Jul 25 11:59:05 2011 +0200
+++ b/catter	Mon Jul 25 15:13:07 2011 +0200
@@ -1,42 +1,105 @@
 #! /usr/bin/perl
 # Eigentlich geht das selbe mit:
-# grep -v '^\[' IDX-File | while read x x file x; do test "$file" && cat DATA/$file; done
-# 
+# grep '^[[:space:]]*[[:digit:]]' IDX-file | tr -d | cut -f4 -d' ' | while read f; do
+#	cat DATA/$f || zcat DATA/$f.gz
+# done
+# ODER
+# perl -ne '/^\s*\d/ and print "DATA/" . (split)[2] . "\n"' IDX-File | while read f; do
+#	cat DATA/$f || zcat DATA/$f.gz
+# done
+
+
 use 5.010;
 use strict;
 use warnings;
 use File::Basename;
 use Cwd qw(abs_path);
 use autodie qw(:all);
+use Pod::Usage;
+use Getopt::Long;
+use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
 
 use constant KiB => 1024;
 use constant MiB => 1024 * KiB;
 use constant GiB => 1024 * MiB;
+use constant ME => basename $0;
 
-my $BS = 64 * MiB;
-my $IDX = shift // die "Need index file\n";
-my $DST = shift // die "Need destination for writing the image.\n";
-my $DATA = abs_path(dirname($IDX) . "/../data");
+sub find_data_dir;
+
+MAIN: {
+
+    Getopt::Long::Configure(qw(Bundling));
+    GetOptions(
+	"h|help" => sub { pod2usage(-verbose => 1, -exit => 0) },
+	"m|man"  => sub { pod2usage(-verbose => 2, -exit => 0,
+			  -noperldoc => system("perldoc -V 1>/dev/null
+			  2>&1")) },
+    ) and @ARGV == 2 or pod2usage;
+
+    my $idx = shift;
+    my $dst = shift;
+    my $blocksize = undef;
+    my $data = find_data_dir($idx);
+
+    open(my $fh => $idx);
+    { local $/ = ""; $_ = <$fh>; }
+    /^format:\s*1$/m or die ME.": expected index format 1\n";
+    ($blocksize) = /^blocksize:\s*(\d+)/m or die ME.": no blocksize found\n";
+
 
-open(my $idx => $IDX);
+    my $out;
+    if ($dst eq "-") { open($out => ">&STDOUT") } 
+    else { open($out => ">", $dst) };
+
+    while (<$fh>) {
+	next if /^#/;
+	my ($blk, $hash, $path) = split;
+	my ($in, $buffer);
 
-{ local $/ = "";
-  scalar <$idx>;
+	if (-f "$data/$path") {
+	    open($in => "$data/$path");
+	    binmode($in);
+	    local $/ = \$blocksize;
+	    $buffer = <$in>;
+	}
+	elsif (-f "$data/$path.gz") {
+	    open($in => "$data/$path.gz");
+	    binmode($in);
+	    gunzip($in => \$buffer)
+		or die $GunzipError;
+	}
+	else {
+	    die ME.": Can't open $data/$path: $!\n";
+	}
+	print {$out} $buffer;
+	close($in);
+    }
+    close($out);
+    close($fh);
 }
 
-my $out;
-if ($DST eq "-") { open($out => ">&STDOUT") } 
-else { open($out => ">", $DST) };
+sub find_data_dir {
+    for (my $dir = shift; $dir ne "/"; $dir = abs_path("$dir/..")) {
+	return "$dir/data" if -d "$dir/data" and -d "$dir/idx";
+    }
+    die ME.": no data directory found!\n";
+}
+
+__END__
+
+=head1 NAME
+
+    catter - cats the blocks of the imager
 
-while (<$idx>) {
-    next if /^#/;
-    my ($blk, $hash, $path) = split;
-    open(my $in => "$DATA/$path");
-    {
-	my $buffer;
-	local $/ = \$BS;
-	print {$out} $buffer while defined($buffer = <$in>);
-    }
-    close($in);
-}
-close($out);
+=head1 SYNOPSIS
+
+    catter {idx} {destination}
+
+=head1 DESCRIPTION
+
+The B<catter> takes all the blocks from the IDX file and
+cats them as one data stream. The destination can be any block device,
+a file name or even B<-> (STDOUT).
+
+
+=cut