# HG changeset patch # User Heiko Schlittermann (JUMPER) # Date 1311929594 -7200 # Node ID 496ee9b0f48805547fa057146480b348576767d1 # Parent 94a50c69de28b33cd8a347afe451c2b60d16d6b4 renamed and restrurcted diff -r 94a50c69de28 -r 496ee9b0f488 bin/catter --- a/bin/catter Thu Jul 28 12:21:00 2011 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,112 +0,0 @@ -#! /usr/bin/perl -# Eigentlich geht das selbe mit: -# grep '^[[:space:]]*[[:digit:]]' IDX-file | tr -d | cut -f4 -d' ' | while read f; do -# cat DATA/$f || zcat DATA/$f.gz -# done -# ODER -# perl -ne '/^\s*\d/ and print "DATA/" . (split)[2] . "\n"' IDX-File | while read f; do -# cat DATA/$f || zcat DATA/$f.gz -# done - -use 5.010; -use strict; -use warnings; -use File::Basename; -use Cwd qw(abs_path); -use autodie qw(:all); -use Pod::Usage; -use Getopt::Long; -use IO::Uncompress::Gunzip qw(gunzip $GunzipError); - -use constant KiB => 1024; -use constant MiB => 1024 * KiB; -use constant GiB => 1024 * MiB; -use constant ME => basename $0; - -sub find_data_dir; - -MAIN: { - - Getopt::Long::Configure(qw(Bundling)); - GetOptions( - "h|help" => sub { pod2usage(-verbose => 1, -exit => 0) }, - "m|man" => sub { - pod2usage( - -verbose => 2, - -exit => 0, - -noperldoc => system( - "perldoc -V 1>/dev/null - 2>&1" - ) - ); - }, - ) - and @ARGV == 2 - or pod2usage; - - my $idx = shift; - my $dst = shift; - my $blocksize = undef; - my $data = find_data_dir($idx); - - open(my $fh => $idx); - { local $/ = ""; $_ = <$fh>; } - /^format:\s*1$/m or die ME . ": expected index format 1\n"; - ($blocksize) = /^blocksize:\s*(\d+)/m or die ME . ": no blocksize found\n"; - - my $out; - if ($dst eq "-") { open($out => ">&STDOUT") } - else { open($out => ">", $dst) } - - while (<$fh>) { - next if /^#/; - my ($blk, $hash, $path) = split; - my ($in, $buffer); - - if (-f "$data/$path") { - open($in => "$data/$path"); - binmode($in); - local $/ = \$blocksize; - $buffer = <$in>; - } - elsif (-f "$data/$path.gz") { - open($in => "$data/$path.gz"); - binmode($in); - gunzip($in => \$buffer) - or die $GunzipError; - } - else { - die ME . ": Can't open $data/$path: $!\n"; - } - print {$out} $buffer; - close($in); - } - close($out); - close($fh); -} - -sub find_data_dir { - for (my $dir = shift ; $dir ne "/" ; $dir = abs_path("$dir/..")) { - return "$dir/data" if -d "$dir/data" and -d "$dir/idx"; - } - die ME . ": no data directory found!\n"; -} - -__END__ - -=head1 NAME - - catter - cats the blocks of the imager - -=head1 SYNOPSIS - - catter {idx} {destination} - -=head1 DESCRIPTION - -The B takes all the blocks from the IDX file and -cats them as one data stream. The destination can be any block device, -a file name or even B<-> (STDOUT). - - -=cut diff -r 94a50c69de28 -r 496ee9b0f488 bin/checker --- a/bin/checker Thu Jul 28 12:21:00 2011 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,265 +0,0 @@ -#! /usr/bin/perl - -use 5.010; -use strict; -use warnings; -use Pod::Usage; -use Hash::Util qw(lock_keys); -use File::Find; -use File::Temp; -use DB_File; -use File::Basename; -use autodie qw(:all); -use Cwd qw(abs_path); - -use Getopt::Long; -sub get_block_list; -sub purge_unused; -sub check_images; - -my %o = ( - yes => undef, - verbose => undef, - check => undef, -); -lock_keys(%o); - -MAIN: { - Getopt::Long::Configure qw(Bundling); - GetOptions( - "y|yes!" => \$o{yes}, - "v|verbose!" => \$o{verbose}, - "c|check" => \$o{check}, - "h|help" => sub { pod2usage(-verbose => 1, -exit 0) }, - "m|man" => sub { - pod2usage( - -verbose => 2, - -exit 0, - -noperldoc => system( - "perldoc -V 1>/dev/null - 2>&1" - ) - ); - }, - ) - and @ARGV - or pod2usage; - my $dir = shift; - my $tmp = File::Temp->new; - - # load the index files, remember the latest - # timestamp we see - #tie %idx, "DB_File" => $tmp->filename; - my %block = get_block_list($dir); - - verbose("# indexed: " - . scalar(@{ $block{""} // [] }) - . " images with " - . (grep !/^\.idx$/ => keys(%block)) - . " blocks"); - - purge_unused($dir => %block); - check_images($dir => %block); -} - -sub verbose { say @_ if $o{verbose} } - -sub get_file_list { - my ($list) = @_; - my @files = (); - - open(my $fh => $list); - while (<$fh>) { - push @files, (split)[2]; - } - return grep /^[a-z\d.\/]+$/ => @files; -} - -sub get_block_list { - my $dir = shift; - my %block; - find( - sub { - (-f) or return; # we need to include the tmp files! - push @{ $block{""} }, abs_path $_; - foreach my $f (get_file_list($_)) { - push @{ $block{$f} } => $#{ $block{""} }; - } - }, - "$dir/idx" - ); - return %block; -} - -sub purge_unused { - my ($dir, %block) = @_; - - my ($total, $done); - verbose("# pass 1 - checking for unused blocks"); - verbose("# pass 1 - estimating file count"); - - # calculate the number of files we expect - find( - sub { - -d or return; - opendir(my $dh => $_); - map { $total++ if not $_ ~~ [qw<. ..>] and length > 8 } readdir $dh; - closedir($dh); - }, - "$dir/data" - ); - - # progress - local $SIG{ALRM} = sub { - return alarm 1 if not $done; - my $speed = $done / (time - $^T + 1); - verbose sprintf "# pass 1 done %5.1f%% | %25s (%*d of %d blocks)", - 100 * ($done / $total), - scalar(localtime($^T + $speed * ($total - $done))), - length($total) => $done, - $total; - alarm 5; - }; - $SIG{ALRM}->(); - - my @unused; - find( - sub { - $done++ if -f; - (-f _) and ((-M _) > 0) or return; # don't process the fresh blocks - - # we don't need uncompressed files if an compressed version - # exists - unlink $_ and return if -f "$_.gz"; - - # cut away the first part of the filename and - # some optional extension - (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/; - exists $block{$rn} and return; - push @unused, abs_path $File::Find::name; - return; - - }, - "$dir/data" - ); - $SIG{ALRM}->(); - alarm 0; - - return if not @unused; - - say sprintf "found %d (%.1f%%) unused files", - 0 + @unused, - 100 * (@unused / $total); - - if ($o{yes}) { - verbose("# deleting " . @unused . " files"); - unlink @unused; - return; - } - - if (-t) { - while (1) { - print "delete? [y/N/v]: "; - given () { - when (/^y(?:es)?$/i) { unlink @unused; last } - when (/^v/) { say join "\n", @unused; next } - default { last } - } - } - } - -} - -sub check_images { - my ($dir, %block) = @_; - - my $total = grep { $_ ne "" } keys(%block); - my $done = 0; - - verbose("# pass 2 - checking image completeness"); - - # progress - local $SIG{ALRM} = sub { - return alarm 1 if not $done; - my $speed = $done / (time - $^T + 1); - verbose sprintf "# pass 2 done %5.1f%% | %25s (%*d of %d blocks)", - 100 * $done / $total, - scalar(localtime($^T + ($total - $done) * $speed)), - length($total) => $done, - $total; - alarm 5; - }; - $SIG{ALRM}->(); - - my %invalid; - foreach my $k (keys %block) { - my $i = $block{$k}; - next if $k eq ""; - ++$done; - - next - if -f "$dir/data/$k" - or -f "$dir/data/$k.gz"; - say "missing $k @$i"; - @invalid{@$i} = (); - } - $SIG{ALRM}->(); - alarm 0; - - # invalid now contains the numbers of the idx files beiing - # invalid - my @invalid = sort @{ $block{""} }[keys %invalid]; - - return if not @invalid; - - say sprintf "found %d (%.1f%%) invalid images:", - 0 + @invalid, - 100 * (@invalid / $total); - - if ($o{yes}) { - unlink @invalid; - return; - } - - while (-t) { - print "delete? [y/N/v] "; - given () { - when (/^y(?:es)?$/i) { unlink @invalid; last } - when (/^v/i) { say join "\n" => @invalid; next } - default { last } - } - } -} -__END__ - -=head1 NAME - - checker - checks the imager data and index files - -=head1 SYNOPSIS - - checker [options] {directory} - -=head1 DESCRIPTION - -This tool loads all the index files from IF, -checks if all mentioned files are existing and optionally purges -unreferenced files. - -=head1 OPTIONS - -=over - -=item B<-y>|B<--yes> - -Assume "yes" for all questions (dangerous!). (default: no) - -=item B<-h>|B<--help> - -=item B<-m>|B<--man> - -The short and longer help. - -=back - -=cut diff -r 94a50c69de28 -r 496ee9b0f488 bin/fuse-imager --- a/bin/fuse-imager Thu Jul 28 12:21:00 2011 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,284 +0,0 @@ -#! /usr/bin/perl - -use 5.010; -use strict; -use warnings; -use autodie qw(:all); -use Getopt::Long; -use Fuse; -use POSIX qw(setpgid :errno_h); -use IO::Uncompress::Gunzip qw(gunzip $GunzipError); -use Pod::Usage; -use Hash::Util qw(lock_keys); -use File::Temp; -use DB_File; -use File::Basename; - -my %o = ( - debug => undef, - detach => 1, - tmp => undef, -); -lock_keys %o; - -use constant ME => basename $0; -my ($DATA, $IDX); - -sub tie_vars; - -MAIN: { - - GetOptions( - "d|debug!" => \$o{debug}, - "detach!" => \$o{detach}, - "tmp:s" => sub { $o{tmp} = length $_[1] ? $_[1] : $ENV{TMP} // "/tmp" }, - "h|help" => sub { pod2usage(-verbose => 1, -exit => 0) }, - "m|man" => sub { - pod2usage( - -verbose => 2, - -exit => 0, - -noperlpod => system("perldoc -V 1>/dev/null 2>&1") - ); - }, - ) - and @ARGV == 2 - or pod2usage; - - my ($src, $mp) = @ARGV; - - $DATA = "$src/data"; - $IDX = "$src/idx"; - - die ME . ": $DATA: $!" if not -d $DATA; - die ME . ": $IDX: $!" if not -d $IDX; - - if (!$o{debug} and $o{detach}) { - fork() and exit; - $0 = "FUSE $src $mp"; - open(STDOUT => ">/dev/null"); - open(STDIN => "/dev/null"); - - setpgid($$ => $$); - } - - tie_vars $o{tmp}; - - Fuse::main( - mountpoint => $mp, - debug => $o{debug} // 0, - getattr => \&getattr, - getdir => \&getdir, - open => \&openfile, - read => \&readbuffer, - write => \&writebuffer, - ); - - exit; - -} - -# not the fuse functions - -{ - my (%IMAGE, %DIRTY); - - sub tie_vars { - return if not defined $_[0]; - my $file = - -d $_[0] - ? File::Temp->new(DIR => shift, TEMPLATE => "tmp.fuse.XXXXXX") - ->filename - : shift; - tie %DIRTY, "DB_File" => $file - or die "Can't tie to $file: $!\n"; - } - - sub getattr { - my $path = $IDX . shift; - return stat $path if -d $path; - my @attr = stat $path or return -(ENOENT); - my %meta = _get_meta($path); - $attr[7] = $meta{devsize}; - $attr[9] = $meta{timestamp}; - $attr[2] &= ~0222; # r/o - return @attr; - } - - sub getdir { - my $path = $IDX . shift; - opendir(my $dh, $path) or return 0; - return (readdir($dh), 0); - } - - sub openfile { - my $path = $IDX . shift; - return 0 if exists $IMAGE{$path}; - $IMAGE{$path}{meta} = { _get_meta($path) }; - $IMAGE{$path}{blocklist} = {}; - - # skip the file header - open(my $fh => $path); - { local $/ = ""; scalar <$fh> } - - # should check for the format - # $IMAGE{$path}{meta}{format} - - # now read the block list - while (<$fh>) { - /^#/ and last; - my ($block, $cs, $file) = split; - $IMAGE{$path}{blocklist}{$block} = $file; - } - close $fh; - return 0; - } - - sub readbuffer { - my $path = $IDX . shift; - my ($size, $offset) = @_; - my $finfo = $IMAGE{$path} or die "File $path is not opened!"; - return "" if $offset >= $finfo->{meta}{devsize}; - - my $buffer = ""; - for (my $need = $size ; $need > 0 ; $need = $size - length($buffer)) { - $buffer .= _readblock($finfo, $need, $offset + length($buffer)); - } - - return $buffer; - } - - sub _readblock { - my ($finfo, $size, $offset) = @_; - - my $block = int($offset / $finfo->{meta}{blocksize}); - my $blockoffset = $offset % $finfo->{meta}{blocksize}; - - my $length = $finfo->{meta}{blocksize} - $blockoffset; - $length = $size if $size <= $length; - - if (exists $DIRTY{ $finfo . $block }) { - return substr $DIRTY{ $finfo . $block }, $blockoffset, $length; - } - - my $fn = "$DATA/" . $finfo->{blocklist}{$block}; - if (-e $fn) { - open(my $fh => $fn); - binmode($fh); - seek($fh => $blockoffset, 0) or die "seek: $!"; - local $/ = \$length; - return scalar <$fh>; - } - elsif (-e "$fn.gz") { - open(my $fh => "$fn.gz"); - binmode($fh); - my $buffer; - gunzip($fh => \$buffer) - or die $GunzipError; - close($fh); - return substr($buffer, $blockoffset, $size); - } - - die "$fn: $!\n"; - } - - sub writebuffer { - my $path = $IDX . shift; - my ($buffer, $offset) = @_; - my $size = length($buffer); - my $finfo = $IMAGE{$path} or die "File $path is not opened!"; - - for (my $written = 0 ; $written < $size ;) { - - # OPTIMIZE: we should not ask for writing more than the - # blocksize - my $n = - _writeblock($finfo, substr($buffer, $written), $offset + $written) - or return $written; - $written += $n; - } - return $size; - } - - sub _writeblock { - my ($finfo, $buffer, $offset) = @_; - my $size = length($buffer); - - my $block = int($offset / $finfo->{meta}{blocksize}); - my $blockoffset = $offset % $finfo->{meta}{blocksize}; - - if (not exists $DIRTY{ $finfo . $block }) { - $DIRTY{ $finfo . $block } = _readblock( - $finfo, - $finfo->{meta}{blocksize}, - $block * $finfo->{meta}{blocksize} - ); - } - - my $length = $finfo->{meta}{blocksize} - $blockoffset; - $length = $size if $size < $length; - - substr($DIRTY{ $finfo . $block }, $blockoffset, $length) = - substr($buffer, 0, $length); - - return $length; - } - - sub _get_meta { - my $path = shift; - my %meta; - open(my $fh => $path); - while (<$fh>) { - last if /^$/; - /^(?\S+):\s+(?.*?)\s*$/ - and do { $meta{ $+{k} } = $+{v}; next; }; - } - return %meta; - } - -} - -__END__ - -=head1 NAME - - fuse-imager - the fuse mount helper for imagers backups - -=head1 SYNOPSIS - - fuse-imager [options] {src} {mount point} - -=head1 DESCRIPTION - -B mounts the src directory (containing F and F -directories) the the specified mount point. - -=head1 OPTIONS - -=over 4 - -=item B<--tmp> [I] - -Write dirty blocks into a buffer file in the specified tmp directory. -If no directory is specified, the system default (usually F) will -be used. (default: no temp file) - -B: The temporary file may get B. - -=item B<-d>|B<--debug> - -Enables debugging output from B. When using this option, -B does not detach from the terminal. (default: off) - -=item B<-->I<[no]>B - -Detach or don't detach from the terminal. (default: detach) - -=item B<-h>|B<--help> - -=item B<-m>|B<--man> - -The common help and man options. - -=back - -=cut diff -r 94a50c69de28 -r 496ee9b0f488 bin/imager --- a/bin/imager Thu Jul 28 12:21:00 2011 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,228 +0,0 @@ -#! /usr/bin/perl - -use 5.010; -use strict; -use warnings; -use POSIX qw(strftime); -use autodie qw(:all); -use Digest::MD5 qw(md5_hex); -use File::Path qw(mkpath); -use File::Basename; -use File::Temp; -use Sys::Hostname; -use IO::Compress::Gzip qw(gzip $GzipError :level :strategy); -use Hash::Util qw(lock_keys); -use Getopt::Long; -use Pod::Usage; - -use constant KiB => 1024; -use constant MiB => 1024 * KiB; -use constant GiB => 1024 * MiB; -use constant NOW => time(); -use constant DATETIME => strftime("%Y-%m-%dT%H:%M:%SZ" => gmtime(NOW)); - -sub get_devsize; -sub get_devname; - -$SIG{INT} = sub { die "Got INT\n" }; - -my %o = ( - compress => undef, - verbose => undef, - blocksize => 2 * MiB, -); -lock_keys(%o); - -my $NOW = time(); - -MAIN: { - my ($src, $dst); - - my $idx = "{DIR}/idx/{HOSTNAME}/{DEVICE}/"; - my $data = "{DIR}/data"; - my $size; - - GetOptions( - "h|help" => sub { pod2usage(-verbose => 1, exit => 0) }, - "m|man" => sub { - pod2usage( - -verbose => 2, - exit => 0, - -noperldoc => system("perldoc -V >/dev/null 2>&1") - ); - }, - "z|compress:i" => sub { $o{compress} = $_[1] ? $_[1] : Z_BEST_SPEED }, - "b|blocksize=s" => sub { - given ($_[1]) { - when (/(\d+)G/i) { $o{blocksize} = $1 * GiB }; - when (/(\d+)M/i) { $o{blocksize} = $1 * MiB }; - when (/(\d+)K/i) { $o{blocksize} = $1 * KiB }; - when (/^(\d+)$/) { $o{blocksize} = $1 }; - default { - die "Blocksize $_[1] is incorrect!\n" - }; - } - }, - ) - and @ARGV == 2 - or pod2usage; - ($src, $dst) = @ARGV; - - foreach ($idx, $data) { - s/{DIR}/$dst/g; - s/{HOSTNAME}/hostname/eg; - s/{DEVICE}/get_devname($src)/eg; - } - $size = get_devsize($src); - - -d $dst or die "$0: $dst: $!\n"; - mkpath([$data, $idx]); - - my $index = File::Temp->new(DIR => $idx); - print {$index} <<__EOT; -# imager -format: 1 -host: @{[hostname]} -filesystem: $src -blocksize: $o{blocksize} -devsize: $size -timestamp: @{[NOW]} -datetime: @{[DATETIME]} - -__EOT - - open(my $in => $src); - binmode($in); - local $/ = \(my $bs = $o{blocksize}); - local $| = 1; - - my %stats = ( - written => 0, - skipped => 0, - todo => 1 + int($size / $o{blocksize}), - ); - - local $SIG{ALRM} = sub { - my $speed = ($stats{written} + $stats{skipped}) / (time - $^T + 1); - say sprintf -"# done %5.1f%% | %24s (%*d of $stats{todo}, written %*d, skipped %*d)", - 100 * (($stats{written} + $stats{skipped}) / $stats{todo}), - ($speed ? (scalar localtime($^T + $stats{todo} / $speed)) : ""), - length($stats{todo}) => $stats{written} + $stats{skipped}, - length($stats{todo}) => $stats{written}, - length($stats{todo}) => $stats{skipped}; - alarm(5); - }; - $SIG{ALRM}->(); - - while (my $buffer = <$in>) { - my ($file, $ext, $cs); - $file = $cs = md5_hex($buffer); - $file =~ s/(?(?...).*)/$+{prefix}\/$+{fn}/g; - $ext = $o{compress} ? ".gz" : ""; - - # the extension we do not put into the index - my $log = sprintf "%12d %s %s" => ($. - 1), $cs, $file; - - if (not(-e "$data/$file" or -e "$data/$file.gz")) { - mkpath dirname("$data/$file.gz"); - my $out = File::Temp->new( - TEMPLATE => ".XXXXXXX", - DIR => dirname("$data/$file") - ); - binmode($out); - if ($o{compress}) { - gzip( - \$buffer => $out, - -Minimal => 1, - -Level => Z_BEST_SPEED, - -Strategy => Z_FILTERED - ) or die $GzipError; - } - else { print {$out} $buffer } - close($out); - rename($out => "$data/$file$ext"); - $log .= " *"; - $stats{written}++; - } - else { - $log .= " "; - $stats{skipped}++; - } - - say {$index} $log; - } - $SIG{ALRM}->(); - alarm 0; - - say {$index} "# DONE (runtime " . (time() - $^T) . "s)"; - - say "# DONE (runtime " . (time() - $^T) . "s)"; - say "# WRITTEN $stats{written}, SKIPPED $stats{skipped} blocks"; - say "# SAVINGS " - . sprintf "%3d%%" => 100 * - ($stats{skipped} / ($stats{written} + $stats{skipped})); - - rename $index->filename => "$idx/" . DATETIME; - close $index; - -} - -sub get_devsize { - my ($devname) = @_; - open(my $fh => $devname); - seek($fh, 0, 2); - return tell($fh); -} - -sub get_devname { - my $_ = shift; - s/^\/dev\///; - s/_/__/g; - s/\//_/g; - return $_; -} - -__END__ - -=head1 NAME - - imager - create a block device snapshot - -=head1 SYNOPSIS - - imager [options] {device} {destination} - -=head1 DESCRIPTION - -This tool creates a snapshot of a blockdevice. -Just call it like - - imager /dev/sda1 /media/backup - -This will create F, if not already existing. -The index (blocklist) goes to -IFIFI. The data goes to -I/F. - -=head1 OPTIONS - -=over - -=item B<-z> [I]|B<--compress>[=I] - -Use compression when writing the blocks to disk. (default: off) - -=item B<-b> I|B<--blocksize>=I - -The blocksize used. (may be suffixed with K, M, G). (default: 2MiB) - -=item B<-h>|B<--help> - -=item B<-m>|B<--man> - -The short and longer help. - -=back - -=cut diff -r 94a50c69de28 -r 496ee9b0f488 bin/imager.check --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/imager.check Fri Jul 29 10:53:14 2011 +0200 @@ -0,0 +1,265 @@ +#! /usr/bin/perl + +use 5.010; +use strict; +use warnings; +use Pod::Usage; +use Hash::Util qw(lock_keys); +use File::Find; +use File::Temp; +use DB_File; +use File::Basename; +use autodie qw(:all); +use Cwd qw(abs_path); + +use Getopt::Long; +sub get_block_list; +sub purge_unused; +sub check_images; + +my %o = ( + yes => undef, + verbose => undef, + check => undef, +); +lock_keys(%o); + +MAIN: { + Getopt::Long::Configure qw(Bundling); + GetOptions( + "y|yes!" => \$o{yes}, + "v|verbose!" => \$o{verbose}, + "c|check" => \$o{check}, + "h|help" => sub { pod2usage(-verbose => 1, -exit 0) }, + "m|man" => sub { + pod2usage( + -verbose => 2, + -exit 0, + -noperldoc => system( + "perldoc -V 1>/dev/null + 2>&1" + ) + ); + }, + ) + and @ARGV + or pod2usage; + my $dir = shift; + my $tmp = File::Temp->new; + + # load the index files, remember the latest + # timestamp we see + #tie %idx, "DB_File" => $tmp->filename; + my %block = get_block_list($dir); + + verbose("# indexed: " + . scalar(@{ $block{""} // [] }) + . " images with " + . (grep !/^\.idx$/ => keys(%block)) + . " blocks"); + + purge_unused($dir => %block); + check_images($dir => %block); +} + +sub verbose { say @_ if $o{verbose} } + +sub get_file_list { + my ($list) = @_; + my @files = (); + + open(my $fh => $list); + while (<$fh>) { + push @files, (split)[2]; + } + return grep /^[a-z\d.\/]+$/ => @files; +} + +sub get_block_list { + my $dir = shift; + my %block; + find( + sub { + (-f) or return; # we need to include the tmp files! + push @{ $block{""} }, abs_path $_; + foreach my $f (get_file_list($_)) { + push @{ $block{$f} } => $#{ $block{""} }; + } + }, + "$dir/idx" + ); + return %block; +} + +sub purge_unused { + my ($dir, %block) = @_; + + my ($total, $done); + verbose("# pass 1 - checking for unused blocks"); + verbose("# pass 1 - estimating file count"); + + # calculate the number of files we expect + find( + sub { + -d or return; + opendir(my $dh => $_); + map { $total++ if not $_ ~~ [qw<. ..>] and length > 8 } readdir $dh; + closedir($dh); + }, + "$dir/data" + ); + + # progress + local $SIG{ALRM} = sub { + return alarm 1 if not $done; + my $speed = $done / (time - $^T + 1); + verbose sprintf "# pass 1 done %5.1f%% | %25s (%*d of %d blocks)", + 100 * ($done / $total), + scalar(localtime($^T + $speed * ($total - $done))), + length($total) => $done, + $total; + alarm 5; + }; + $SIG{ALRM}->(); + + my @unused; + find( + sub { + $done++ if -f; + (-f _) and ((-M _) > 0) or return; # don't process the fresh blocks + + # we don't need uncompressed files if an compressed version + # exists + unlink $_ and return if -f "$_.gz"; + + # cut away the first part of the filename and + # some optional extension + (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/; + exists $block{$rn} and return; + push @unused, abs_path $File::Find::name; + return; + + }, + "$dir/data" + ); + $SIG{ALRM}->(); + alarm 0; + + return if not @unused; + + say sprintf "found %d (%.1f%%) unused files", + 0 + @unused, + 100 * (@unused / $total); + + if ($o{yes}) { + verbose("# deleting " . @unused . " files"); + unlink @unused; + return; + } + + if (-t) { + while (1) { + print "delete? [y/N/v]: "; + given () { + when (/^y(?:es)?$/i) { unlink @unused; last } + when (/^v/) { say join "\n", @unused; next } + default { last } + } + } + } + +} + +sub check_images { + my ($dir, %block) = @_; + + my $total = grep { $_ ne "" } keys(%block); + my $done = 0; + + verbose("# pass 2 - checking image completeness"); + + # progress + local $SIG{ALRM} = sub { + return alarm 1 if not $done; + my $speed = $done / (time - $^T + 1); + verbose sprintf "# pass 2 done %5.1f%% | %25s (%*d of %d blocks)", + 100 * $done / $total, + scalar(localtime($^T + ($total - $done) * $speed)), + length($total) => $done, + $total; + alarm 5; + }; + $SIG{ALRM}->(); + + my %invalid; + foreach my $k (keys %block) { + my $i = $block{$k}; + next if $k eq ""; + ++$done; + + next + if -f "$dir/data/$k" + or -f "$dir/data/$k.gz"; + say "missing $k @$i"; + @invalid{@$i} = (); + } + $SIG{ALRM}->(); + alarm 0; + + # invalid now contains the numbers of the idx files beiing + # invalid + my @invalid = sort @{ $block{""} }[keys %invalid]; + + return if not @invalid; + + say sprintf "found %d (%.1f%%) invalid images:", + 0 + @invalid, + 100 * (@invalid / $total); + + if ($o{yes}) { + unlink @invalid; + return; + } + + while (-t) { + print "delete? [y/N/v] "; + given () { + when (/^y(?:es)?$/i) { unlink @invalid; last } + when (/^v/i) { say join "\n" => @invalid; next } + default { last } + } + } +} +__END__ + +=head1 NAME + + imager.check - checks the imager data and index files + +=head1 SYNOPSIS + + imager.check [options] {directory} + +=head1 DESCRIPTION + +This tool loads all the index files from IF, +checks if all mentioned files are existing and optionally purges +unreferenced files. + +=head1 OPTIONS + +=over + +=item B<-y>|B<--yes> + +Assume "yes" for all questions (dangerous!). (default: no) + +=item B<-h>|B<--help> + +=item B<-m>|B<--man> + +The short and longer help. + +=back + +=cut diff -r 94a50c69de28 -r 496ee9b0f488 bin/imager.fuse --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/imager.fuse Fri Jul 29 10:53:14 2011 +0200 @@ -0,0 +1,284 @@ +#! /usr/bin/perl + +use 5.010; +use strict; +use warnings; +use autodie qw(:all); +use Getopt::Long; +use Fuse; +use POSIX qw(setpgid :errno_h); +use IO::Uncompress::Gunzip qw(gunzip $GunzipError); +use Pod::Usage; +use Hash::Util qw(lock_keys); +use File::Temp; +use DB_File; +use File::Basename; + +my %o = ( + debug => undef, + detach => 1, + tmp => undef, +); +lock_keys %o; + +use constant ME => basename $0; +my ($DATA, $IDX); + +sub tie_vars; + +MAIN: { + + GetOptions( + "d|debug!" => \$o{debug}, + "detach!" => \$o{detach}, + "tmp:s" => sub { $o{tmp} = length $_[1] ? $_[1] : $ENV{TMP} // "/tmp" }, + "h|help" => sub { pod2usage(-verbose => 1, -exit => 0) }, + "m|man" => sub { + pod2usage( + -verbose => 2, + -exit => 0, + -noperlpod => system("perldoc -V 1>/dev/null 2>&1") + ); + }, + ) + and @ARGV == 2 + or pod2usage; + + my ($src, $mp) = @ARGV; + + $DATA = "$src/data"; + $IDX = "$src/idx"; + + die ME . ": $DATA: $!" if not -d $DATA; + die ME . ": $IDX: $!" if not -d $IDX; + + if (!$o{debug} and $o{detach}) { + fork() and exit; + $0 = "FUSE $src $mp"; + open(STDOUT => ">/dev/null"); + open(STDIN => "/dev/null"); + + setpgid($$ => $$); + } + + tie_vars $o{tmp}; + + Fuse::main( + mountpoint => $mp, + debug => $o{debug} // 0, + getattr => \&getattr, + getdir => \&getdir, + open => \&openfile, + read => \&readbuffer, + write => \&writebuffer, + ); + + exit; + +} + +# not the fuse functions + +{ + my (%IMAGE, %DIRTY); + + sub tie_vars { + return if not defined $_[0]; + my $file = + -d $_[0] + ? File::Temp->new(DIR => shift, TEMPLATE => "tmp.fuse.XXXXXX") + ->filename + : shift; + tie %DIRTY, "DB_File" => $file + or die "Can't tie to $file: $!\n"; + } + + sub getattr { + my $path = $IDX . shift; + return stat $path if -d $path; + my @attr = stat $path or return -(ENOENT); + my %meta = _get_meta($path); + $attr[7] = $meta{devsize}; + $attr[9] = $meta{timestamp}; + $attr[2] &= ~0222; # r/o + return @attr; + } + + sub getdir { + my $path = $IDX . shift; + opendir(my $dh, $path) or return 0; + return (readdir($dh), 0); + } + + sub openfile { + my $path = $IDX . shift; + return 0 if exists $IMAGE{$path}; + $IMAGE{$path}{meta} = { _get_meta($path) }; + $IMAGE{$path}{blocklist} = {}; + + # skip the file header + open(my $fh => $path); + { local $/ = ""; scalar <$fh> } + + # should check for the format + # $IMAGE{$path}{meta}{format} + + # now read the block list + while (<$fh>) { + /^#/ and last; + my ($block, $cs, $file) = split; + $IMAGE{$path}{blocklist}{$block} = $file; + } + close $fh; + return 0; + } + + sub readbuffer { + my $path = $IDX . shift; + my ($size, $offset) = @_; + my $finfo = $IMAGE{$path} or die "File $path is not opened!"; + return "" if $offset >= $finfo->{meta}{devsize}; + + my $buffer = ""; + for (my $need = $size ; $need > 0 ; $need = $size - length($buffer)) { + $buffer .= _readblock($finfo, $need, $offset + length($buffer)); + } + + return $buffer; + } + + sub _readblock { + my ($finfo, $size, $offset) = @_; + + my $block = int($offset / $finfo->{meta}{blocksize}); + my $blockoffset = $offset % $finfo->{meta}{blocksize}; + + my $length = $finfo->{meta}{blocksize} - $blockoffset; + $length = $size if $size <= $length; + + if (exists $DIRTY{ $finfo . $block }) { + return substr $DIRTY{ $finfo . $block }, $blockoffset, $length; + } + + my $fn = "$DATA/" . $finfo->{blocklist}{$block}; + if (-e $fn) { + open(my $fh => $fn); + binmode($fh); + seek($fh => $blockoffset, 0) or die "seek: $!"; + local $/ = \$length; + return scalar <$fh>; + } + elsif (-e "$fn.gz") { + open(my $fh => "$fn.gz"); + binmode($fh); + my $buffer; + gunzip($fh => \$buffer) + or die $GunzipError; + close($fh); + return substr($buffer, $blockoffset, $size); + } + + die "$fn: $!\n"; + } + + sub writebuffer { + my $path = $IDX . shift; + my ($buffer, $offset) = @_; + my $size = length($buffer); + my $finfo = $IMAGE{$path} or die "File $path is not opened!"; + + for (my $written = 0 ; $written < $size ;) { + + # OPTIMIZE: we should not ask for writing more than the + # blocksize + my $n = + _writeblock($finfo, substr($buffer, $written), $offset + $written) + or return $written; + $written += $n; + } + return $size; + } + + sub _writeblock { + my ($finfo, $buffer, $offset) = @_; + my $size = length($buffer); + + my $block = int($offset / $finfo->{meta}{blocksize}); + my $blockoffset = $offset % $finfo->{meta}{blocksize}; + + if (not exists $DIRTY{ $finfo . $block }) { + $DIRTY{ $finfo . $block } = _readblock( + $finfo, + $finfo->{meta}{blocksize}, + $block * $finfo->{meta}{blocksize} + ); + } + + my $length = $finfo->{meta}{blocksize} - $blockoffset; + $length = $size if $size < $length; + + substr($DIRTY{ $finfo . $block }, $blockoffset, $length) = + substr($buffer, 0, $length); + + return $length; + } + + sub _get_meta { + my $path = shift; + my %meta; + open(my $fh => $path); + while (<$fh>) { + last if /^$/; + /^(?\S+):\s+(?.*?)\s*$/ + and do { $meta{ $+{k} } = $+{v}; next; }; + } + return %meta; + } + +} + +__END__ + +=head1 NAME + + imager.fuse - the fuse mount helper for imagers backups + +=head1 SYNOPSIS + + imager.fuse [options] {src} {mount point} + +=head1 DESCRIPTION + +B mounts the src directory (containing F and F +directories) the the specified mount point. + +=head1 OPTIONS + +=over 4 + +=item B<--tmp> [I] + +Write dirty blocks into a buffer file in the specified tmp directory. +If no directory is specified, the system default (usually F) will +be used. (default: no temp file) + +B: The temporary file may get B. + +=item B<-d>|B<--debug> + +Enables debugging output from B. When using this option, +B does not detach from the terminal. (default: off) + +=item B<-->I<[no]>B + +Detach or don't detach from the terminal. (default: detach) + +=item B<-h>|B<--help> + +=item B<-m>|B<--man> + +The common help and man options. + +=back + +=cut diff -r 94a50c69de28 -r 496ee9b0f488 bin/imager.restore --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/imager.restore Fri Jul 29 10:53:14 2011 +0200 @@ -0,0 +1,112 @@ +#! /usr/bin/perl +# Eigentlich geht das selbe mit: +# grep '^[[:space:]]*[[:digit:]]' IDX-file | tr -d | cut -f4 -d' ' | while read f; do +# cat DATA/$f || zcat DATA/$f.gz +# done +# ODER +# perl -ne '/^\s*\d/ and print "DATA/" . (split)[2] . "\n"' IDX-File | while read f; do +# cat DATA/$f || zcat DATA/$f.gz +# done + +use 5.010; +use strict; +use warnings; +use File::Basename; +use Cwd qw(abs_path); +use autodie qw(:all); +use Pod::Usage; +use Getopt::Long; +use IO::Uncompress::Gunzip qw(gunzip $GunzipError); + +use constant KiB => 1024; +use constant MiB => 1024 * KiB; +use constant GiB => 1024 * MiB; +use constant ME => basename $0; + +sub find_data_dir; + +MAIN: { + + Getopt::Long::Configure(qw(Bundling)); + GetOptions( + "h|help" => sub { pod2usage(-verbose => 1, -exit => 0) }, + "m|man" => sub { + pod2usage( + -verbose => 2, + -exit => 0, + -noperldoc => system( + "perldoc -V 1>/dev/null + 2>&1" + ) + ); + }, + ) + and @ARGV == 2 + or pod2usage; + + my $idx = shift; + my $dst = shift; + my $blocksize = undef; + my $data = find_data_dir($idx); + + open(my $fh => $idx); + { local $/ = ""; $_ = <$fh>; } + /^format:\s*1$/m or die ME . ": expected index format 1\n"; + ($blocksize) = /^blocksize:\s*(\d+)/m or die ME . ": no blocksize found\n"; + + my $out; + if ($dst eq "-") { open($out => ">&STDOUT") } + else { open($out => ">", $dst) } + + while (<$fh>) { + next if /^#/; + my ($blk, $hash, $path) = split; + my ($in, $buffer); + + if (-f "$data/$path") { + open($in => "$data/$path"); + binmode($in); + local $/ = \$blocksize; + $buffer = <$in>; + } + elsif (-f "$data/$path.gz") { + open($in => "$data/$path.gz"); + binmode($in); + gunzip($in => \$buffer) + or die $GunzipError; + } + else { + die ME . ": Can't open $data/$path: $!\n"; + } + print {$out} $buffer; + close($in); + } + close($out); + close($fh); +} + +sub find_data_dir { + for (my $dir = shift ; $dir ne "/" ; $dir = abs_path("$dir/..")) { + return "$dir/data" if -d "$dir/data" and -d "$dir/idx"; + } + die ME . ": no data directory found!\n"; +} + +__END__ + +=head1 NAME + + imager.restore - cats the blocks of the imager + +=head1 SYNOPSIS + + imager.restore {idx} {destination} + +=head1 DESCRIPTION + +The B takes all the blocks from the IDX file and +cats them as one data stream. The destination can be any block device, +a file name or even B<-> (STDOUT). + + +=cut diff -r 94a50c69de28 -r 496ee9b0f488 bin/imager.save --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/imager.save Fri Jul 29 10:53:14 2011 +0200 @@ -0,0 +1,228 @@ +#! /usr/bin/perl + +use 5.010; +use strict; +use warnings; +use POSIX qw(strftime); +use autodie qw(:all); +use Digest::MD5 qw(md5_hex); +use File::Path qw(mkpath); +use File::Basename; +use File::Temp; +use Sys::Hostname; +use IO::Compress::Gzip qw(gzip $GzipError :level :strategy); +use Hash::Util qw(lock_keys); +use Getopt::Long; +use Pod::Usage; + +use constant KiB => 1024; +use constant MiB => 1024 * KiB; +use constant GiB => 1024 * MiB; +use constant NOW => time(); +use constant DATETIME => strftime("%Y-%m-%dT%H:%M:%SZ" => gmtime(NOW)); + +sub get_devsize; +sub get_devname; + +$SIG{INT} = sub { die "Got INT\n" }; + +my %o = ( + compress => undef, + verbose => undef, + blocksize => 2 * MiB, +); +lock_keys(%o); + +my $NOW = time(); + +MAIN: { + my ($src, $dst); + + my $idx = "{DIR}/idx/{HOSTNAME}/{DEVICE}/"; + my $data = "{DIR}/data"; + my $size; + + GetOptions( + "h|help" => sub { pod2usage(-verbose => 1, exit => 0) }, + "m|man" => sub { + pod2usage( + -verbose => 2, + exit => 0, + -noperldoc => system("perldoc -V >/dev/null 2>&1") + ); + }, + "z|compress:i" => sub { $o{compress} = $_[1] ? $_[1] : Z_BEST_SPEED }, + "b|blocksize=s" => sub { + given ($_[1]) { + when (/(\d+)G/i) { $o{blocksize} = $1 * GiB }; + when (/(\d+)M/i) { $o{blocksize} = $1 * MiB }; + when (/(\d+)K/i) { $o{blocksize} = $1 * KiB }; + when (/^(\d+)$/) { $o{blocksize} = $1 }; + default { + die "Blocksize $_[1] is incorrect!\n" + }; + } + }, + ) + and @ARGV == 2 + or pod2usage; + ($src, $dst) = @ARGV; + + foreach ($idx, $data) { + s/{DIR}/$dst/g; + s/{HOSTNAME}/hostname/eg; + s/{DEVICE}/get_devname($src)/eg; + } + $size = get_devsize($src); + + -d $dst or die "$0: $dst: $!\n"; + mkpath([$data, $idx]); + + my $index = File::Temp->new(DIR => $idx); + print {$index} <<__EOT; +# imager +format: 1 +host: @{[hostname]} +filesystem: $src +blocksize: $o{blocksize} +devsize: $size +timestamp: @{[NOW]} +datetime: @{[DATETIME]} + +__EOT + + open(my $in => $src); + binmode($in); + local $/ = \(my $bs = $o{blocksize}); + local $| = 1; + + my %stats = ( + written => 0, + skipped => 0, + todo => 1 + int($size / $o{blocksize}), + ); + + local $SIG{ALRM} = sub { + my $speed = ($stats{written} + $stats{skipped}) / (time - $^T + 1); + say sprintf +"# done %5.1f%% | %24s (%*d of $stats{todo}, written %*d, skipped %*d)", + 100 * (($stats{written} + $stats{skipped}) / $stats{todo}), + ($speed ? (scalar localtime($^T + $stats{todo} / $speed)) : ""), + length($stats{todo}) => $stats{written} + $stats{skipped}, + length($stats{todo}) => $stats{written}, + length($stats{todo}) => $stats{skipped}; + alarm(5); + }; + $SIG{ALRM}->(); + + while (my $buffer = <$in>) { + my ($file, $ext, $cs); + $file = $cs = md5_hex($buffer); + $file =~ s/(?(?...).*)/$+{prefix}\/$+{fn}/g; + $ext = $o{compress} ? ".gz" : ""; + + # the extension we do not put into the index + my $log = sprintf "%12d %s %s" => ($. - 1), $cs, $file; + + if (not(-e "$data/$file" or -e "$data/$file.gz")) { + mkpath dirname("$data/$file.gz"); + my $out = File::Temp->new( + TEMPLATE => ".XXXXXXX", + DIR => dirname("$data/$file") + ); + binmode($out); + if ($o{compress}) { + gzip( + \$buffer => $out, + -Minimal => 1, + -Level => Z_BEST_SPEED, + -Strategy => Z_FILTERED + ) or die $GzipError; + } + else { print {$out} $buffer } + close($out); + rename($out => "$data/$file$ext"); + $log .= " *"; + $stats{written}++; + } + else { + $log .= " "; + $stats{skipped}++; + } + + say {$index} $log; + } + $SIG{ALRM}->(); + alarm 0; + + say {$index} "# DONE (runtime " . (time() - $^T) . "s)"; + + say "# DONE (runtime " . (time() - $^T) . "s)"; + say "# WRITTEN $stats{written}, SKIPPED $stats{skipped} blocks"; + say "# SAVINGS " + . sprintf "%3d%%" => 100 * + ($stats{skipped} / ($stats{written} + $stats{skipped})); + + rename $index->filename => "$idx/" . DATETIME; + close $index; + +} + +sub get_devsize { + my ($devname) = @_; + open(my $fh => $devname); + seek($fh, 0, 2); + return tell($fh); +} + +sub get_devname { + my $_ = shift; + s/^\/dev\///; + s/_/__/g; + s/\//_/g; + return $_; +} + +__END__ + +=head1 NAME + + imager.save - create a block device snapshot + +=head1 SYNOPSIS + + imager.save [options] {device} {destination} + +=head1 DESCRIPTION + +This tool creates a snapshot of a blockdevice. +Just call it like + + imager.save /dev/sda1 /media/backup + +This will create F, if not already existing. +The index (blocklist) goes to +IFIFI. The data goes to +I/F. + +=head1 OPTIONS + +=over + +=item B<-z> [I]|B<--compress>[=I] + +Use compression when writing the blocks to disk. (default: off) + +=item B<-b> I|B<--blocksize>=I + +The blocksize used. (may be suffixed with K, M, G). (default: 2MiB) + +=item B<-h>|B<--help> + +=item B<-m>|B<--man> + +The short and longer help. + +=back + +=cut