diff -r 70d578b200ba -r fd5225120ee9 catter --- a/catter Mon Jul 25 11:59:05 2011 +0200 +++ b/catter Mon Jul 25 15:13:07 2011 +0200 @@ -1,42 +1,105 @@ #! /usr/bin/perl # Eigentlich geht das selbe mit: -# grep -v '^\[' IDX-File | while read x x file x; do test "$file" && cat DATA/$file; done -# +# grep '^[[:space:]]*[[:digit:]]' IDX-file | tr -d | cut -f4 -d' ' | while read f; do +# cat DATA/$f || zcat DATA/$f.gz +# done +# ODER +# perl -ne '/^\s*\d/ and print "DATA/" . (split)[2] . "\n"' IDX-File | while read f; do +# cat DATA/$f || zcat DATA/$f.gz +# done + + use 5.010; use strict; use warnings; use File::Basename; use Cwd qw(abs_path); use autodie qw(:all); +use Pod::Usage; +use Getopt::Long; +use IO::Uncompress::Gunzip qw(gunzip $GunzipError); use constant KiB => 1024; use constant MiB => 1024 * KiB; use constant GiB => 1024 * MiB; +use constant ME => basename $0; -my $BS = 64 * MiB; -my $IDX = shift // die "Need index file\n"; -my $DST = shift // die "Need destination for writing the image.\n"; -my $DATA = abs_path(dirname($IDX) . "/../data"); +sub find_data_dir; + +MAIN: { + + Getopt::Long::Configure(qw(Bundling)); + GetOptions( + "h|help" => sub { pod2usage(-verbose => 1, -exit => 0) }, + "m|man" => sub { pod2usage(-verbose => 2, -exit => 0, + -noperldoc => system("perldoc -V 1>/dev/null + 2>&1")) }, + ) and @ARGV == 2 or pod2usage; + + my $idx = shift; + my $dst = shift; + my $blocksize = undef; + my $data = find_data_dir($idx); + + open(my $fh => $idx); + { local $/ = ""; $_ = <$fh>; } + /^format:\s*1$/m or die ME.": expected index format 1\n"; + ($blocksize) = /^blocksize:\s*(\d+)/m or die ME.": no blocksize found\n"; + -open(my $idx => $IDX); + my $out; + if ($dst eq "-") { open($out => ">&STDOUT") } + else { open($out => ">", $dst) }; + + while (<$fh>) { + next if /^#/; + my ($blk, $hash, $path) = split; + my ($in, $buffer); -{ local $/ = ""; - scalar <$idx>; + if (-f "$data/$path") { + open($in => "$data/$path"); + binmode($in); + local $/ = \$blocksize; + $buffer = <$in>; + } + elsif (-f "$data/$path.gz") { + open($in => "$data/$path.gz"); + binmode($in); + gunzip($in => \$buffer) + or die $GunzipError; + } + else { + die ME.": Can't open $data/$path: $!\n"; + } + print {$out} $buffer; + close($in); + } + close($out); + close($fh); } -my $out; -if ($DST eq "-") { open($out => ">&STDOUT") } -else { open($out => ">", $DST) }; +sub find_data_dir { + for (my $dir = shift; $dir ne "/"; $dir = abs_path("$dir/..")) { + return "$dir/data" if -d "$dir/data" and -d "$dir/idx"; + } + die ME.": no data directory found!\n"; +} + +__END__ + +=head1 NAME + + catter - cats the blocks of the imager -while (<$idx>) { - next if /^#/; - my ($blk, $hash, $path) = split; - open(my $in => "$DATA/$path"); - { - my $buffer; - local $/ = \$BS; - print {$out} $buffer while defined($buffer = <$in>); - } - close($in); -} -close($out); +=head1 SYNOPSIS + + catter {idx} {destination} + +=head1 DESCRIPTION + +The B takes all the blocks from the IDX file and +cats them as one data stream. The destination can be any block device, +a file name or even B<-> (STDOUT). + + +=cut