--- a/catter Mon Jul 25 11:59:05 2011 +0200
+++ b/catter Mon Jul 25 15:13:07 2011 +0200
@@ -1,42 +1,105 @@
#! /usr/bin/perl
# Eigentlich geht das selbe mit:
-# grep -v '^\[' IDX-File | while read x x file x; do test "$file" && cat DATA/$file; done
-#
+# grep '^[[:space:]]*[[:digit:]]' IDX-file | tr -d | cut -f4 -d' ' | while read f; do
+# cat DATA/$f || zcat DATA/$f.gz
+# done
+# ODER
+# perl -ne '/^\s*\d/ and print "DATA/" . (split)[2] . "\n"' IDX-File | while read f; do
+# cat DATA/$f || zcat DATA/$f.gz
+# done
+
+
use 5.010;
use strict;
use warnings;
use File::Basename;
use Cwd qw(abs_path);
use autodie qw(:all);
+use Pod::Usage;
+use Getopt::Long;
+use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
use constant KiB => 1024;
use constant MiB => 1024 * KiB;
use constant GiB => 1024 * MiB;
+use constant ME => basename $0;
-my $BS = 64 * MiB;
-my $IDX = shift // die "Need index file\n";
-my $DST = shift // die "Need destination for writing the image.\n";
-my $DATA = abs_path(dirname($IDX) . "/../data");
+sub find_data_dir;
+
+MAIN: {
+
+ Getopt::Long::Configure(qw(Bundling));
+ GetOptions(
+ "h|help" => sub { pod2usage(-verbose => 1, -exit => 0) },
+ "m|man" => sub { pod2usage(-verbose => 2, -exit => 0,
+ -noperldoc => system("perldoc -V 1>/dev/null
+ 2>&1")) },
+ ) and @ARGV == 2 or pod2usage;
+
+ my $idx = shift;
+ my $dst = shift;
+ my $blocksize = undef;
+ my $data = find_data_dir($idx);
+
+ open(my $fh => $idx);
+ { local $/ = ""; $_ = <$fh>; }
+ /^format:\s*1$/m or die ME.": expected index format 1\n";
+ ($blocksize) = /^blocksize:\s*(\d+)/m or die ME.": no blocksize found\n";
+
-open(my $idx => $IDX);
+ my $out;
+ if ($dst eq "-") { open($out => ">&STDOUT") }
+ else { open($out => ">", $dst) };
+
+ while (<$fh>) {
+ next if /^#/;
+ my ($blk, $hash, $path) = split;
+ my ($in, $buffer);
-{ local $/ = "";
- scalar <$idx>;
+ if (-f "$data/$path") {
+ open($in => "$data/$path");
+ binmode($in);
+ local $/ = \$blocksize;
+ $buffer = <$in>;
+ }
+ elsif (-f "$data/$path.gz") {
+ open($in => "$data/$path.gz");
+ binmode($in);
+ gunzip($in => \$buffer)
+ or die $GunzipError;
+ }
+ else {
+ die ME.": Can't open $data/$path: $!\n";
+ }
+ print {$out} $buffer;
+ close($in);
+ }
+ close($out);
+ close($fh);
}
-my $out;
-if ($DST eq "-") { open($out => ">&STDOUT") }
-else { open($out => ">", $DST) };
+sub find_data_dir {
+ for (my $dir = shift; $dir ne "/"; $dir = abs_path("$dir/..")) {
+ return "$dir/data" if -d "$dir/data" and -d "$dir/idx";
+ }
+ die ME.": no data directory found!\n";
+}
+
+__END__
+
+=head1 NAME
+
+ catter - cats the blocks of the imager
-while (<$idx>) {
- next if /^#/;
- my ($blk, $hash, $path) = split;
- open(my $in => "$DATA/$path");
- {
- my $buffer;
- local $/ = \$BS;
- print {$out} $buffer while defined($buffer = <$in>);
- }
- close($in);
-}
-close($out);
+=head1 SYNOPSIS
+
+ catter {idx} {destination}
+
+=head1 DESCRIPTION
+
+The B<catter> takes all the blocks from the IDX file and
+cats them as one data stream. The destination can be any block device,
+a file name or even B<-> (STDOUT).
+
+
+=cut
--- a/checker Mon Jul 25 11:59:05 2011 +0200
+++ b/checker Mon Jul 25 15:13:07 2011 +0200
@@ -43,8 +43,8 @@
my %block = get_block_list($dir);
verbose("# indexed: "
- . scalar(@{$block{".idx"}//[]}) . " images with "
- . (grep /^\.idx/ => keys(%block) - 1)." files");
+ . scalar(@{$block{""}//[]}) . " images with "
+ . (grep !/^\.idx$/ => keys(%block))." blocks");
purge_unused($dir => %block);
check_images($dir => %block);
@@ -69,9 +69,9 @@
find(sub {
(-f) and (-M > 0) or return;
#verbose("idx: $File::Find::name");
- push @{$block{".idx"}}, abs_path $_;
+ push @{$block{""}}, abs_path $_;
foreach my $f (get_file_list($_)) {
- push @{$block{$f}} => $#{$block{".idx"}};
+ push @{$block{$f}} => $#{$block{""}};
}
}, "$dir/idx");
return %block;
@@ -81,6 +81,7 @@
my ($dir, %block) = @_;
my ($total, $done);
+ verbose("# pass 1 - purge unused blocks");
# calculate the number of files we expect
find(sub {
@@ -95,7 +96,7 @@
local $SIG{ALRM} = sub {
return alarm 1 if not $done;
my $speed = $done / (time - $^T + 1);
- verbose sprintf "# done %5.1f%% | %25s (%*d of %d files)",
+ verbose sprintf "# pass 1 done %5.1f%% | %25s (%*d of %d blocks)",
100 * ($done/$total),
scalar(localtime($^T + $speed * ($total - $done))),
length($total) => $done,
@@ -113,13 +114,14 @@
(my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
exists $block{$rn} and return;
+
if ($o{yes}) {
- verbose("unlinking abs_path $File::Find::name");
- unlink abs_path $File::Find::name;
- return;
+ verbose("unlinking " . abs_path $File::Find::name);
+ unlink abs_path $File::Find::name;
+ return;
}
- verbose("unused abs_path $File::Find::name");
+ verbose("unused " . abs_path $File::Find::name);
return;
}, "$dir/data");
@@ -131,14 +133,16 @@
sub check_images {
my ($dir, %block) = @_;
- my $total = keys(%block) - 1; # .idx
+ my $total = grep { $_ ne "" } keys(%block);
my $done = 0;
+ verbose("# pass 2 - check image completness");
+
# progress
local $SIG{ALRM} = sub {
return alarm 1 if not $done;
my $speed = $done / (time - $^T + 1);
- say sprintf "# done %5.1f%% | %25s (%*d of %d files)",
+ verbose sprintf "# pass 2 done %5.1f%% | %25s (%*d of %d blocks)",
100 * $done/$total,
scalar(localtime($^T + ($total - $done) * $speed)),
length($total) => $done,
@@ -150,13 +154,13 @@
my %invalid;
foreach my $k (keys %block) {
my $i = $block{$k};
- next if $k eq ".idx";
+ next if $k eq "";
++$done;
next if -f "$dir/data/$k"
or -f "$dir/data/$k.gz";
say "missing $k";
- @invalid{@{$block{".idx"}}} = ();
+ @invalid{@{$block{""}}} = ();
}
$SIG{ALRM}->();
alarm 0;