# HG changeset patch # User Heiko Schlittermann (JUMPER) # Date 1311599587 -7200 # Node ID fd5225120ee921a108e2d1efea23eed2662784fb # Parent 70d578b200ba43836ad8a6991d7df33f70f95623 checker und catter should work now diff -r 70d578b200ba -r fd5225120ee9 catter --- a/catter Mon Jul 25 11:59:05 2011 +0200 +++ b/catter Mon Jul 25 15:13:07 2011 +0200 @@ -1,42 +1,105 @@ #! /usr/bin/perl # Eigentlich geht das selbe mit: -# grep -v '^\[' IDX-File | while read x x file x; do test "$file" && cat DATA/$file; done -# +# grep '^[[:space:]]*[[:digit:]]' IDX-file | tr -d | cut -f4 -d' ' | while read f; do +# cat DATA/$f || zcat DATA/$f.gz +# done +# ODER +# perl -ne '/^\s*\d/ and print "DATA/" . (split)[2] . "\n"' IDX-File | while read f; do +# cat DATA/$f || zcat DATA/$f.gz +# done + + use 5.010; use strict; use warnings; use File::Basename; use Cwd qw(abs_path); use autodie qw(:all); +use Pod::Usage; +use Getopt::Long; +use IO::Uncompress::Gunzip qw(gunzip $GunzipError); use constant KiB => 1024; use constant MiB => 1024 * KiB; use constant GiB => 1024 * MiB; +use constant ME => basename $0; -my $BS = 64 * MiB; -my $IDX = shift // die "Need index file\n"; -my $DST = shift // die "Need destination for writing the image.\n"; -my $DATA = abs_path(dirname($IDX) . "/../data"); +sub find_data_dir; + +MAIN: { + + Getopt::Long::Configure(qw(Bundling)); + GetOptions( + "h|help" => sub { pod2usage(-verbose => 1, -exit => 0) }, + "m|man" => sub { pod2usage(-verbose => 2, -exit => 0, + -noperldoc => system("perldoc -V 1>/dev/null + 2>&1")) }, + ) and @ARGV == 2 or pod2usage; + + my $idx = shift; + my $dst = shift; + my $blocksize = undef; + my $data = find_data_dir($idx); + + open(my $fh => $idx); + { local $/ = ""; $_ = <$fh>; } + /^format:\s*1$/m or die ME.": expected index format 1\n"; + ($blocksize) = /^blocksize:\s*(\d+)/m or die ME.": no blocksize found\n"; + -open(my $idx => $IDX); + my $out; + if ($dst eq "-") { open($out => ">&STDOUT") } + else { open($out => ">", $dst) }; + + while (<$fh>) { + next if /^#/; + my ($blk, $hash, $path) = split; + my ($in, $buffer); -{ local $/ = ""; - scalar <$idx>; + if (-f "$data/$path") { + open($in => "$data/$path"); + binmode($in); + local $/ = \$blocksize; + $buffer = <$in>; + } + elsif (-f "$data/$path.gz") { + open($in => "$data/$path.gz"); + binmode($in); + gunzip($in => \$buffer) + or die $GunzipError; + } + else { + die ME.": Can't open $data/$path: $!\n"; + } + print {$out} $buffer; + close($in); + } + close($out); + close($fh); } -my $out; -if ($DST eq "-") { open($out => ">&STDOUT") } -else { open($out => ">", $DST) }; +sub find_data_dir { + for (my $dir = shift; $dir ne "/"; $dir = abs_path("$dir/..")) { + return "$dir/data" if -d "$dir/data" and -d "$dir/idx"; + } + die ME.": no data directory found!\n"; +} + +__END__ + +=head1 NAME + + catter - cats the blocks of the imager -while (<$idx>) { - next if /^#/; - my ($blk, $hash, $path) = split; - open(my $in => "$DATA/$path"); - { - my $buffer; - local $/ = \$BS; - print {$out} $buffer while defined($buffer = <$in>); - } - close($in); -} -close($out); +=head1 SYNOPSIS + + catter {idx} {destination} + +=head1 DESCRIPTION + +The B takes all the blocks from the IDX file and +cats them as one data stream. The destination can be any block device, +a file name or even B<-> (STDOUT). + + +=cut diff -r 70d578b200ba -r fd5225120ee9 checker --- a/checker Mon Jul 25 11:59:05 2011 +0200 +++ b/checker Mon Jul 25 15:13:07 2011 +0200 @@ -43,8 +43,8 @@ my %block = get_block_list($dir); verbose("# indexed: " - . scalar(@{$block{".idx"}//[]}) . " images with " - . (grep /^\.idx/ => keys(%block) - 1)." files"); + . scalar(@{$block{""}//[]}) . " images with " + . (grep !/^\.idx$/ => keys(%block))." blocks"); purge_unused($dir => %block); check_images($dir => %block); @@ -69,9 +69,9 @@ find(sub { (-f) and (-M > 0) or return; #verbose("idx: $File::Find::name"); - push @{$block{".idx"}}, abs_path $_; + push @{$block{""}}, abs_path $_; foreach my $f (get_file_list($_)) { - push @{$block{$f}} => $#{$block{".idx"}}; + push @{$block{$f}} => $#{$block{""}}; } }, "$dir/idx"); return %block; @@ -81,6 +81,7 @@ my ($dir, %block) = @_; my ($total, $done); + verbose("# pass 1 - purge unused blocks"); # calculate the number of files we expect find(sub { @@ -95,7 +96,7 @@ local $SIG{ALRM} = sub { return alarm 1 if not $done; my $speed = $done / (time - $^T + 1); - verbose sprintf "# done %5.1f%% | %25s (%*d of %d files)", + verbose sprintf "# pass 1 done %5.1f%% | %25s (%*d of %d blocks)", 100 * ($done/$total), scalar(localtime($^T + $speed * ($total - $done))), length($total) => $done, @@ -113,13 +114,14 @@ (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/; exists $block{$rn} and return; + if ($o{yes}) { - verbose("unlinking abs_path $File::Find::name"); - unlink abs_path $File::Find::name; - return; + verbose("unlinking " . abs_path $File::Find::name); + unlink abs_path $File::Find::name; + return; } - verbose("unused abs_path $File::Find::name"); + verbose("unused " . abs_path $File::Find::name); return; }, "$dir/data"); @@ -131,14 +133,16 @@ sub check_images { my ($dir, %block) = @_; - my $total = keys(%block) - 1; # .idx + my $total = grep { $_ ne "" } keys(%block); my $done = 0; + verbose("# pass 2 - check image completness"); + # progress local $SIG{ALRM} = sub { return alarm 1 if not $done; my $speed = $done / (time - $^T + 1); - say sprintf "# done %5.1f%% | %25s (%*d of %d files)", + verbose sprintf "# pass 2 done %5.1f%% | %25s (%*d of %d blocks)", 100 * $done/$total, scalar(localtime($^T + ($total - $done) * $speed)), length($total) => $done, @@ -150,13 +154,13 @@ my %invalid; foreach my $k (keys %block) { my $i = $block{$k}; - next if $k eq ".idx"; + next if $k eq ""; ++$done; next if -f "$dir/data/$k" or -f "$dir/data/$k.gz"; say "missing $k"; - @invalid{@{$block{".idx"}}} = (); + @invalid{@{$block{""}}} = (); } $SIG{ALRM}->(); alarm 0;