# HG changeset patch # User Heiko Schlittermann (JUMPER) # Date 1311840195 -7200 # Node ID 49ff641055a36938c655d94a2e5b7da4c44ae50a # Parent 4a01ae9db5c462e2a51d2750bb5ca00cba5fa90f added Build.PL and moved towards a reasonable build structure diff -r 4a01ae9db5c4 -r 49ff641055a3 .hgignore --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.hgignore Thu Jul 28 10:03:15 2011 +0200 @@ -0,0 +1,3 @@ +_build +Build +blib diff -r 4a01ae9db5c4 -r 49ff641055a3 Build.PL --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Build.PL Thu Jul 28 10:03:15 2011 +0200 @@ -0,0 +1,14 @@ +use strict; +use warnings; +use Module::Build; + +Module::Build->new( + dist_name => "imager", + dist_version => "0.0", + dist_author => "Heiko Schlittermann ", + requires => { + perl => "5.0.10", + "IO::Uncompress::Gunzip" => 0, + "IO::Compress::Gzip" => 0, + } +)->create_build_script; diff -r 4a01ae9db5c4 -r 49ff641055a3 bin/catter --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/catter Thu Jul 28 10:03:15 2011 +0200 @@ -0,0 +1,105 @@ +#! /usr/bin/perl +# Eigentlich geht das selbe mit: +# grep '^[[:space:]]*[[:digit:]]' IDX-file | tr -d | cut -f4 -d' ' | while read f; do +# cat DATA/$f || zcat DATA/$f.gz +# done +# ODER +# perl -ne '/^\s*\d/ and print "DATA/" . (split)[2] . "\n"' IDX-File | while read f; do +# cat DATA/$f || zcat DATA/$f.gz +# done + + +use 5.010; +use strict; +use warnings; +use File::Basename; +use Cwd qw(abs_path); +use autodie qw(:all); +use Pod::Usage; +use Getopt::Long; +use IO::Uncompress::Gunzip qw(gunzip $GunzipError); + +use constant KiB => 1024; +use constant MiB => 1024 * KiB; +use constant GiB => 1024 * MiB; +use constant ME => basename $0; + +sub find_data_dir; + +MAIN: { + + Getopt::Long::Configure(qw(Bundling)); + GetOptions( + "h|help" => sub { pod2usage(-verbose => 1, -exit => 0) }, + "m|man" => sub { pod2usage(-verbose => 2, -exit => 0, + -noperldoc => system("perldoc -V 1>/dev/null + 2>&1")) }, + ) and @ARGV == 2 or pod2usage; + + my $idx = shift; + my $dst = shift; + my $blocksize = undef; + my $data = find_data_dir($idx); + + open(my $fh => $idx); + { local $/ = ""; $_ = <$fh>; } + /^format:\s*1$/m or die ME.": expected index format 1\n"; + ($blocksize) = /^blocksize:\s*(\d+)/m or die ME.": no blocksize found\n"; + + + my $out; + if ($dst eq "-") { open($out => ">&STDOUT") } + else { open($out => ">", $dst) }; + + while (<$fh>) { + next if /^#/; + my ($blk, $hash, $path) = split; + my ($in, $buffer); + + if (-f "$data/$path") { + open($in => "$data/$path"); + binmode($in); + local $/ = \$blocksize; + $buffer = <$in>; + } + elsif (-f "$data/$path.gz") { + open($in => "$data/$path.gz"); + binmode($in); + gunzip($in => \$buffer) + or die $GunzipError; + } + else { + die ME.": Can't open $data/$path: $!\n"; + } + print {$out} $buffer; + close($in); + } + close($out); + close($fh); +} + +sub find_data_dir { + for (my $dir = shift; $dir ne "/"; $dir = abs_path("$dir/..")) { + return "$dir/data" if -d "$dir/data" and -d "$dir/idx"; + } + die ME.": no data directory found!\n"; +} + +__END__ + +=head1 NAME + + catter - cats the blocks of the imager + +=head1 SYNOPSIS + + catter {idx} {destination} + +=head1 DESCRIPTION + +The B takes all the blocks from the IDX file and +cats them as one data stream. The destination can be any block device, +a file name or even B<-> (STDOUT). + + +=cut diff -r 4a01ae9db5c4 -r 49ff641055a3 bin/checker --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/checker Thu Jul 28 10:03:15 2011 +0200 @@ -0,0 +1,244 @@ +#! /usr/bin/perl + +use 5.010; +use strict; +use warnings; +use Pod::Usage; +use Hash::Util qw(lock_keys); +use File::Find; +use File::Temp; +use DB_File; +use File::Basename; +use autodie qw(:all); +use Cwd qw(abs_path); + +use Getopt::Long; +sub get_block_list; +sub purge_unused; +sub check_images; + +my %o = ( + yes => undef, + verbose => undef, + check => undef, +); lock_keys(%o); + +MAIN: { + Getopt::Long::Configure qw(Bundling); + GetOptions( + "y|yes!" => \$o{yes}, + "v|verbose!" => \$o{verbose}, + "c|check" => \$o{check}, + "h|help" => sub { pod2usage(-verbose => 1, -exit 0) }, + "m|man" => sub { pod2usage(-verbose => 2, -exit 0, + -noperldoc => system("perldoc -V 1>/dev/null + 2>&1"))}, + ) and @ARGV or pod2usage; + my $dir = shift; + my $tmp = File::Temp->new; + + # load the index files, remember the latest + # timestamp we see + #tie %idx, "DB_File" => $tmp->filename; + my %block = get_block_list($dir); + + verbose("# indexed: " + . scalar(@{$block{""}//[]}) . " images with " + . (grep !/^\.idx$/ => keys(%block))." blocks"); + + purge_unused($dir => %block); + check_images($dir => %block); +} + +sub verbose { say @_ if $o{verbose} } + +sub get_file_list { + my ($list) = @_; + my @files = (); + + open(my $fh => $list); + while (<$fh>) { + push @files, (split)[2]; + } + return grep /^[a-z\d.\/]+$/ => @files; +} + +sub get_block_list { + my $dir = shift; + my %block; + find(sub { + (-f) or return; # we need to include the tmp files! + push @{$block{""}}, abs_path $_; + foreach my $f (get_file_list($_)) { + push @{$block{$f}} => $#{$block{""}}; + } + }, "$dir/idx"); + return %block; +} + +sub purge_unused { + my ($dir, %block) = @_; + + my ($total, $done); + verbose("# pass 1 - checking for unused blocks"); + verbose("# pass 1 - estimating file count"); + + # calculate the number of files we expect + find(sub { + -d or return; + opendir(my $dh => $_); + map { $total++ if not $_ ~~ [qw<. ..>] and length > 8} readdir $dh; + closedir($dh); + }, "$dir/data"); + + + # progress + local $SIG{ALRM} = sub { + return alarm 1 if not $done; + my $speed = $done / (time - $^T + 1); + verbose sprintf "# pass 1 done %5.1f%% | %25s (%*d of %d blocks)", + 100 * ($done/$total), + scalar(localtime($^T + $speed * ($total - $done))), + length($total) => $done, + $total; + alarm 5; + }; + $SIG{ALRM}->(); + + my @unused; + find(sub { + $done++ if -f; + (-f _) and ((-M _) > 0) or return; # don't process the fresh blocks + + # we don't need uncompressed files if an compressed version + # exists + unlink $_ and return if -f "$_.gz"; + + # cut away the first part of the filename and + # some optional extension + (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/; + exists $block{$rn} and return; + push @unused, abs_path $File::Find::name; + return; + + }, "$dir/data"); + $SIG{ALRM}->(); + alarm 0; + + return if not @unused; + + say sprintf "found %d (%.1f%%) unused files", + 0+@unused, + 100 * (@unused/$total); + + if ($o{yes}) { + verbose("# deleting ".@unused." files"); + unlink @unused; + return; + } + + if (-t) { + while(1) { + print "delete? [y/N/v]: "; + given () { + when (/^y(?:es)?$/i) { unlink @unused; last } + when (/^v/) { say join "\n", @unused; next } + default { last } + } + } + } + +} + +sub check_images { + my ($dir, %block) = @_; + + my $total = grep { $_ ne "" } keys(%block); + my $done = 0; + + verbose("# pass 2 - checking image completeness"); + + # progress + local $SIG{ALRM} = sub { + return alarm 1 if not $done; + my $speed = $done / (time - $^T + 1); + verbose sprintf "# pass 2 done %5.1f%% | %25s (%*d of %d blocks)", + 100 * $done/$total, + scalar(localtime($^T + ($total - $done) * $speed)), + length($total) => $done, + $total; + alarm 5; + }; + $SIG{ALRM}->(); + + my %invalid; + foreach my $k (keys %block) { + my $i = $block{$k}; + next if $k eq ""; + ++$done; + + next if -f "$dir/data/$k" + or -f "$dir/data/$k.gz"; + say "missing $k @$i"; + @invalid{@$i} = (); + } + $SIG{ALRM}->(); + alarm 0; + + # invalid now contains the numbers of the idx files beiing + # invalid + my @invalid = sort @{$block{""}}[keys %invalid]; + + return if not @invalid; + + say sprintf "found %d (%.1f%%) invalid images:", + 0+@invalid, + 100 * (@invalid/$total); + + if ($o{yes}) { + unlink @invalid; + return; + } + + while (-t) { + print "delete? [y/N/v] "; + given () { + when (/^y(?:es)?$/i) { unlink @invalid; last } + when (/^v/i) { say join "\n" => @invalid; next } + default { last } + } + } +} +__END__ + +=head1 NAME + + checker - checks the imager data and index files + +=head1 SYNOPSIS + + checker [options] {directory} + +=head1 DESCRIPTION + +This tool loads all the index files from IF, +checks if all mentioned files are existing and optionally purges +unreferenced files. + +=head1 OPTIONS + +=over + +=item B<-y>|B<--yes> + +Assume "yes" for all questions (dangerous!). (default: no) + +=item B<-h>|B<--help> + +=item B<-m>|B<--man> + +The short and longer help. + +=back + +=cut diff -r 4a01ae9db5c4 -r 49ff641055a3 bin/fuse-imager --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/fuse-imager Thu Jul 28 10:03:15 2011 +0200 @@ -0,0 +1,268 @@ +#! /usr/bin/perl + +use 5.010; +use strict; +use warnings; +use autodie qw(:all); +use Getopt::Long; +use Fuse; +use POSIX qw(setpgid :errno_h); +use IO::Uncompress::Gunzip qw(gunzip $GunzipError); +use Pod::Usage; +use Hash::Util qw(lock_keys); +use File::Temp; +use DB_File; +use File::Basename; + +my %o = ( + debug => undef, + detach => 1, + tmp => undef, +); lock_keys %o; + +use constant ME => basename $0; +my ($DATA, $IDX); + +sub tie_vars; + +MAIN: { + + GetOptions( + "d|debug!" => \$o{debug}, + "detach!" => \$o{detach}, + "tmp:s" => sub { $o{tmp} = length $_[1] ? $_[1] : $ENV{TMP}// "/tmp" }, + "h|help" => sub { pod2usage(-verbose => 1, -exit => 0) }, + "m|man" => sub { pod2usage(-verbose => 2, -exit => 0, + -noperlpod => system("perldoc -V 1>/dev/null 2>&1")) }, + ) and @ARGV == 2 or pod2usage; + + my ($src, $mp) = @ARGV; + + $DATA = "$src/data"; + $IDX = "$src/idx"; + + die ME.": $DATA: $!" if not -d $DATA; + die ME.": $IDX: $!" if not -d $IDX; + + if (!$o{debug} and $o{detach}) { + fork() and exit; + $0 = "FUSE $src $mp"; + open(STDOUT => ">/dev/null"); + open(STDIN => "/dev/null"); + + setpgid($$ => $$); + } + + tie_vars $o{tmp}; + + Fuse::main(mountpoint => $mp, + debug => $o{debug} // 0, + getattr => \&getattr, + getdir => \&getdir, + open => \&openfile, + read => \&readbuffer, + write => \&writebuffer, + ); + + exit; + +} + +# not the fuse functions + +{ + my (%IMAGE, %DIRTY); + +sub tie_vars { + return if not defined $_[0]; + my $file = -d $_[0] ? File::Temp->new(DIR => shift, TEMPLATE => "tmp.fuse.XXXXXX")->filename : shift; + tie %DIRTY, "DB_File" => $file + or die "Can't tie to $file: $!\n"; +} + +sub getattr { + my $path = $IDX . shift; + return stat $path if -d $path; + my @attr = stat $path or return -(ENOENT); + my %meta = _get_meta($path); + $attr[7] = $meta{devsize}; + $attr[9] = $meta{timestamp}; + $attr[2] &= ~0222; # r/o + return @attr; +} + +sub getdir { + my $path = $IDX . shift; + opendir(my $dh, $path) or return 0; + return (readdir($dh), 0); +} + +sub openfile { + my $path = $IDX . shift; + return 0 if exists $IMAGE{$path}; + $IMAGE{$path}{meta} = { _get_meta($path) }; + $IMAGE{$path}{blocklist} = {}; + + # skip the file header + open(my $fh => $path); + { local $/ = ""; scalar <$fh> } + + # should check for the format + # $IMAGE{$path}{meta}{format} + + # now read the block list + while (<$fh>) { + /^#/ and last; + my ($block, $cs, $file) = split; + $IMAGE{$path}{blocklist}{$block} = $file; + } + close $fh; + return 0; +} + +sub readbuffer { + my $path = $IDX . shift; + my ($size, $offset) = @_; + my $finfo = $IMAGE{$path} or die "File $path is not opened!"; + return "" if $offset >= $finfo->{meta}{devsize}; + + my $buffer = ""; + for (my $need = $size; $need > 0; $need = $size - length($buffer)) { + $buffer .= _readblock($finfo, $need, $offset + length($buffer)); + } + + return $buffer; +} + +sub _readblock { + my ($finfo, $size, $offset) = @_; + + my $block = int($offset / $finfo->{meta}{blocksize}); + my $blockoffset = $offset % $finfo->{meta}{blocksize}; + + my $length = $finfo->{meta}{blocksize} - $blockoffset; + $length = $size if $size <= $length; + + if (exists $DIRTY{$finfo.$block}) { + return substr $DIRTY{$finfo.$block}, $blockoffset, $length; + } + + my $fn = "$DATA/" . $finfo->{blocklist}{$block}; + if (-e $fn) { + open(my $fh => $fn); + binmode($fh); + seek($fh => $blockoffset, 0) or die "seek: $!"; + local $/ = \$length; + return scalar <$fh>; + } + elsif (-e "$fn.gz") { + open(my $fh => "$fn.gz"); + binmode($fh); + my $buffer; + gunzip($fh => \$buffer) + or die $GunzipError; + close($fh); + return substr($buffer, $blockoffset, $size); + } + + die "$fn: $!\n"; +} + +sub writebuffer { + my $path = $IDX . shift; + my ($buffer, $offset) = @_; + my $size = length($buffer); + my $finfo = $IMAGE{$path} or die "File $path is not opened!"; + + for (my $written = 0; $written < $size;) { + # OPTIMIZE: we should not ask for writing more than the + # blocksize + my $n = _writeblock($finfo, substr($buffer, $written), $offset + $written) + or return $written; + $written += $n; + } + return $size; +} + +sub _writeblock { + my ($finfo, $buffer, $offset) = @_; + my $size = length($buffer); + + my $block = int($offset / $finfo->{meta}{blocksize}); + my $blockoffset = $offset % $finfo->{meta}{blocksize}; + + if (not exists $DIRTY{$finfo.$block}) { + $DIRTY{$finfo.$block} = _readblock( + $finfo, + $finfo->{meta}{blocksize}, + $block * $finfo->{meta}{blocksize}); + } + + my $length = $finfo->{meta}{blocksize} - $blockoffset; + $length = $size if $size < $length; + + substr($DIRTY{$finfo.$block}, $blockoffset, $length) + = substr($buffer, 0, $length); + + return $length; +} + +sub _get_meta { + my $path = shift; + my %meta; + open(my $fh => $path); + while(<$fh>) { + last if /^$/; + /^(?\S+):\s+(?.*?)\s*$/ and do { $meta{$+{k}} = $+{v}; next; }; + } + return %meta; +} + +} + + +__END__ + +=head1 NAME + + fuse-imager - the fuse mount helper for imagers backups + +=head1 SYNOPSIS + + fuse-imager [options] {src} {mount point} + +=head1 DESCRIPTION + +B mounts the src directory (containing F and F +directories) the the specified mount point. + +=head1 OPTIONS + +=over 4 + +=item B<--tmp> [I] + +Write dirty blocks into a buffer file in the specified tmp directory. +If no directory is specified, the system default (usually F) will +be used. (default: no temp file) + +B: The temporary file may get B. + +=item B<-d>|B<--debug> + +Enables debugging output from B. When using this option, +B does not detach from the terminal. (default: off) + +=item B<-->I<[no]>B + +Detach or don't detach from the terminal. (default: detach) + +=item B<-h>|B<--help> + +=item B<-m>|B<--man> + +The common help and man options. + +=back + +=cut diff -r 4a01ae9db5c4 -r 49ff641055a3 bin/imager --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/imager Thu Jul 28 10:03:15 2011 +0200 @@ -0,0 +1,210 @@ +#! /usr/bin/perl + +use 5.010; +use strict; +use warnings; +use POSIX qw(strftime); +use autodie qw(:all); +use Digest::MD5 qw(md5_hex); +use File::Path qw(mkpath); +use File::Basename; +use File::Temp; +use Sys::Hostname; +use IO::Compress::Gzip qw(gzip $GzipError :level :strategy); +use Hash::Util qw(lock_keys); +use Getopt::Long; +use Pod::Usage; + +use constant KiB => 1024; +use constant MiB => 1024 * KiB; +use constant GiB => 1024 * MiB; +use constant NOW => time(); +use constant DATETIME => strftime("%Y-%m-%dT%H:%M:%SZ" => gmtime(NOW)); + +sub get_devsize; +sub get_devname; + +$SIG{INT} = sub { die "Got INT\n" }; + +my %o = ( + compress => undef, + verbose => undef, + blocksize => 4 * MiB, +); lock_keys(%o); + +my $NOW = time(); + +MAIN: { + my ($src, $dst); + + my $idx = "{DIR}/idx/{HOSTNAME}/{DEVICE}/"; + my $data = "{DIR}/data"; + my $size; + + GetOptions( + "h|help" => sub { pod2usage(-verbose => 1, exit => 0) }, + "m|man" => sub { pod2usage(-verbose => 2, exit => 0, + -noperldoc => system("perldoc -V >/dev/null 2>&1")) + }, + "z|compress:i" => sub { $o{compress} = $_[1] ? $_[1] : Z_BEST_SPEED }, + "b|blocksize=s" => sub { + given ($_[1]) { + when (/(\d+)G/i) { $o{blocksize} = $1 * GiB }; + when (/(\d+)M/i) { $o{blocksize} = $1 * MiB }; + when (/(\d+)K/i) { $o{blocksize} = $1 * KiB }; + when (/^(\d+)$/) { $o{blocksize} = $1 }; + default { die "Blocksize $_[1] is incorrect!\n" + }; + } + }, + ) and @ARGV == 2 or pod2usage; + ($src, $dst) = @ARGV; + + foreach ($idx, $data) { + s/{DIR}/$dst/g; + s/{HOSTNAME}/hostname/eg; + s/{DEVICE}/get_devname($src)/eg; + } + $size = get_devsize($src); + + -d $dst or die "$0: $dst: $!\n"; + mkpath([$data, $idx]); + + my $index = File::Temp->new(DIR => $idx); + print {$index} <<__EOT; +# imager +format: 1 +host: @{[hostname]} +filesystem: $src +blocksize: $o{blocksize} +devsize: $size +timestamp: @{[NOW]} +datetime: @{[DATETIME]} + +__EOT + + open(my $in => $src); + binmode($in); + local $/ = \(my $bs = $o{blocksize}); + local $| = 1; + + my %stats = ( + written => 0, + skipped => 0, + todo => 1 + int($size / $o{blocksize}), + ); + + local $SIG{ALRM} = sub { + my $speed = ($stats{written} + $stats{skipped}) / (time - $^T + 1); + say sprintf "# done %5.1f%% | %24s (%*d of $stats{todo}, written %*d, skipped %*d)", + 100 * (($stats{written}+$stats{skipped})/$stats{todo}), + ($speed ? (scalar localtime($^T + $stats{todo} / $speed)) : ""), + length($stats{todo}) => $stats{written} + $stats{skipped}, + length($stats{todo}) => $stats{written}, + length($stats{todo}) => $stats{skipped}; + alarm(5); + }; + $SIG{ALRM}->(); + + while (my $buffer = <$in>) { + my ($file, $ext, $cs); + $file = $cs = md5_hex($buffer); + $file =~ s/(?(?...).*)/$+{prefix}\/$+{fn}/g; + $ext = ".gz" if $o{compress}; + + # the extension we do not put into the index + my $log = sprintf "%12d %s %s" => ($.-1), $cs, $file; + + if (not (-e "$data/$file" or -e "$data/$file.gz")) { + mkpath dirname("$data/$file.gz"); + my $out = File::Temp->new(TEMPLATE => ".XXXXXXX", DIR => dirname("$data/$file")); + binmode($out); + if ($o{compress}) { + gzip(\$buffer => $out, + -Minimal => 1, + -Level => Z_BEST_SPEED, + -Strategy => Z_FILTERED) + or die $GzipError + } + else { print {$out} $buffer } + close($out); + rename($out => "$data/$file$ext"); + $log .= " *"; + $stats{written}++; + } + else { + $log .= " "; + $stats{skipped}++; + } + + say {$index} $log; + } + $SIG{ALRM}->(); + alarm 0; + + say {$index} "# DONE (runtime " . (time() - $^T) . "s)"; + + say "# DONE (runtime " . (time() - $^T) . "s)"; + say "# WRITTEN $stats{written}, SKIPPED $stats{skipped} blocks"; + say "# SAVINGS " + . sprintf "%3d%%" => 100 * ($stats{skipped}/($stats{written}+$stats{skipped})); + + rename $index->filename => "$idx/".DATETIME; + close $index; + +} + +sub get_devsize { + my ($devname) = @_; + open(my $fh => $devname); + seek($fh, 0, 2); + return tell($fh); +} + +sub get_devname { + my $_ = shift; + s/^\/dev\///; + s/_/__/g; + s/\//_/g; + return $_; +} + +__END__ + +=head1 NAME + + imager - create a block device snapshot + +=head1 SYNOPSIS + + imager [options] {device} {destination} + +=head1 DESCRIPTION + +This tool creates a snapshot of a blockdevice. +Just call it like + + imager /dev/sda1 /media/backup + +This will create F, if not already existing. +The index (blocklist) goes to +IFIFI. The data goes to +I/F. + +=head1 OPTIONS + +=over + +=item B<-z>[I]|B<--compress>[=I] + +Use compression when writing the blocks to disk. (default: off) + +=item B<-h>|B<--help> + +=item B<-m>|B<--man> + +The short and longer help. + +=back + +=cut diff -r 4a01ae9db5c4 -r 49ff641055a3 catter --- a/catter Tue Jul 26 11:54:40 2011 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,105 +0,0 @@ -#! /usr/bin/perl -# Eigentlich geht das selbe mit: -# grep '^[[:space:]]*[[:digit:]]' IDX-file | tr -d | cut -f4 -d' ' | while read f; do -# cat DATA/$f || zcat DATA/$f.gz -# done -# ODER -# perl -ne '/^\s*\d/ and print "DATA/" . (split)[2] . "\n"' IDX-File | while read f; do -# cat DATA/$f || zcat DATA/$f.gz -# done - - -use 5.010; -use strict; -use warnings; -use File::Basename; -use Cwd qw(abs_path); -use autodie qw(:all); -use Pod::Usage; -use Getopt::Long; -use IO::Uncompress::Gunzip qw(gunzip $GunzipError); - -use constant KiB => 1024; -use constant MiB => 1024 * KiB; -use constant GiB => 1024 * MiB; -use constant ME => basename $0; - -sub find_data_dir; - -MAIN: { - - Getopt::Long::Configure(qw(Bundling)); - GetOptions( - "h|help" => sub { pod2usage(-verbose => 1, -exit => 0) }, - "m|man" => sub { pod2usage(-verbose => 2, -exit => 0, - -noperldoc => system("perldoc -V 1>/dev/null - 2>&1")) }, - ) and @ARGV == 2 or pod2usage; - - my $idx = shift; - my $dst = shift; - my $blocksize = undef; - my $data = find_data_dir($idx); - - open(my $fh => $idx); - { local $/ = ""; $_ = <$fh>; } - /^format:\s*1$/m or die ME.": expected index format 1\n"; - ($blocksize) = /^blocksize:\s*(\d+)/m or die ME.": no blocksize found\n"; - - - my $out; - if ($dst eq "-") { open($out => ">&STDOUT") } - else { open($out => ">", $dst) }; - - while (<$fh>) { - next if /^#/; - my ($blk, $hash, $path) = split; - my ($in, $buffer); - - if (-f "$data/$path") { - open($in => "$data/$path"); - binmode($in); - local $/ = \$blocksize; - $buffer = <$in>; - } - elsif (-f "$data/$path.gz") { - open($in => "$data/$path.gz"); - binmode($in); - gunzip($in => \$buffer) - or die $GunzipError; - } - else { - die ME.": Can't open $data/$path: $!\n"; - } - print {$out} $buffer; - close($in); - } - close($out); - close($fh); -} - -sub find_data_dir { - for (my $dir = shift; $dir ne "/"; $dir = abs_path("$dir/..")) { - return "$dir/data" if -d "$dir/data" and -d "$dir/idx"; - } - die ME.": no data directory found!\n"; -} - -__END__ - -=head1 NAME - - catter - cats the blocks of the imager - -=head1 SYNOPSIS - - catter {idx} {destination} - -=head1 DESCRIPTION - -The B takes all the blocks from the IDX file and -cats them as one data stream. The destination can be any block device, -a file name or even B<-> (STDOUT). - - -=cut diff -r 4a01ae9db5c4 -r 49ff641055a3 checker --- a/checker Tue Jul 26 11:54:40 2011 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,244 +0,0 @@ -#! /usr/bin/perl - -use 5.010; -use strict; -use warnings; -use Pod::Usage; -use Hash::Util qw(lock_keys); -use File::Find; -use File::Temp; -use DB_File; -use File::Basename; -use autodie qw(:all); -use Cwd qw(abs_path); - -use Getopt::Long; -sub get_block_list; -sub purge_unused; -sub check_images; - -my %o = ( - yes => undef, - verbose => undef, - check => undef, -); lock_keys(%o); - -MAIN: { - Getopt::Long::Configure qw(Bundling); - GetOptions( - "y|yes!" => \$o{yes}, - "v|verbose!" => \$o{verbose}, - "c|check" => \$o{check}, - "h|help" => sub { pod2usage(-verbose => 1, -exit 0) }, - "m|man" => sub { pod2usage(-verbose => 2, -exit 0, - -noperldoc => system("perldoc -V 1>/dev/null - 2>&1"))}, - ) and @ARGV or pod2usage; - my $dir = shift; - my $tmp = File::Temp->new; - - # load the index files, remember the latest - # timestamp we see - #tie %idx, "DB_File" => $tmp->filename; - my %block = get_block_list($dir); - - verbose("# indexed: " - . scalar(@{$block{""}//[]}) . " images with " - . (grep !/^\.idx$/ => keys(%block))." blocks"); - - purge_unused($dir => %block); - check_images($dir => %block); -} - -sub verbose { say @_ if $o{verbose} } - -sub get_file_list { - my ($list) = @_; - my @files = (); - - open(my $fh => $list); - while (<$fh>) { - push @files, (split)[2]; - } - return grep /^[a-z\d.\/]+$/ => @files; -} - -sub get_block_list { - my $dir = shift; - my %block; - find(sub { - (-f) or return; # we need to include the tmp files! - push @{$block{""}}, abs_path $_; - foreach my $f (get_file_list($_)) { - push @{$block{$f}} => $#{$block{""}}; - } - }, "$dir/idx"); - return %block; -} - -sub purge_unused { - my ($dir, %block) = @_; - - my ($total, $done); - verbose("# pass 1 - checking for unused blocks"); - verbose("# pass 1 - estimating file count"); - - # calculate the number of files we expect - find(sub { - -d or return; - opendir(my $dh => $_); - map { $total++ if not $_ ~~ [qw<. ..>] and length > 8} readdir $dh; - closedir($dh); - }, "$dir/data"); - - - # progress - local $SIG{ALRM} = sub { - return alarm 1 if not $done; - my $speed = $done / (time - $^T + 1); - verbose sprintf "# pass 1 done %5.1f%% | %25s (%*d of %d blocks)", - 100 * ($done/$total), - scalar(localtime($^T + $speed * ($total - $done))), - length($total) => $done, - $total; - alarm 5; - }; - $SIG{ALRM}->(); - - my @unused; - find(sub { - $done++ if -f; - (-f _) and ((-M _) > 0) or return; # don't process the fresh blocks - - # we don't need uncompressed files if an compressed version - # exists - unlink $_ and return if -f "$_.gz"; - - # cut away the first part of the filename and - # some optional extension - (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/; - exists $block{$rn} and return; - push @unused, abs_path $File::Find::name; - return; - - }, "$dir/data"); - $SIG{ALRM}->(); - alarm 0; - - return if not @unused; - - say sprintf "found %d (%.1f%%) unused files", - 0+@unused, - 100 * (@unused/$total); - - if ($o{yes}) { - verbose("# deleting ".@unused." files"); - unlink @unused; - return; - } - - if (-t) { - while(1) { - print "delete? [y/N/v]: "; - given () { - when (/^y(?:es)?$/i) { unlink @unused; last } - when (/^v/) { say join "\n", @unused; next } - default { last } - } - } - } - -} - -sub check_images { - my ($dir, %block) = @_; - - my $total = grep { $_ ne "" } keys(%block); - my $done = 0; - - verbose("# pass 2 - checking image completeness"); - - # progress - local $SIG{ALRM} = sub { - return alarm 1 if not $done; - my $speed = $done / (time - $^T + 1); - verbose sprintf "# pass 2 done %5.1f%% | %25s (%*d of %d blocks)", - 100 * $done/$total, - scalar(localtime($^T + ($total - $done) * $speed)), - length($total) => $done, - $total; - alarm 5; - }; - $SIG{ALRM}->(); - - my %invalid; - foreach my $k (keys %block) { - my $i = $block{$k}; - next if $k eq ""; - ++$done; - - next if -f "$dir/data/$k" - or -f "$dir/data/$k.gz"; - say "missing $k @$i"; - @invalid{@$i} = (); - } - $SIG{ALRM}->(); - alarm 0; - - # invalid now contains the numbers of the idx files beiing - # invalid - my @invalid = sort @{$block{""}}[keys %invalid]; - - return if not @invalid; - - say sprintf "found %d (%.1f%%) invalid images:", - 0+@invalid, - 100 * (@invalid/$total); - - if ($o{yes}) { - unlink @invalid; - return; - } - - while (-t) { - print "delete? [y/N/v] "; - given () { - when (/^y(?:es)?$/i) { unlink @invalid; last } - when (/^v/i) { say join "\n" => @invalid; next } - default { last } - } - } -} -__END__ - -=head1 NAME - - checker - checks the imager data and index files - -=head1 SYNOPSIS - - checker [options] {directory} - -=head1 DESCRIPTION - -This tool loads all the index files from IF, -checks if all mentioned files are existing and optionally purges -unreferenced files. - -=head1 OPTIONS - -=over - -=item B<-y>|B<--yes> - -Assume "yes" for all questions (dangerous!). (default: no) - -=item B<-h>|B<--help> - -=item B<-m>|B<--man> - -The short and longer help. - -=back - -=cut diff -r 4a01ae9db5c4 -r 49ff641055a3 fuse --- a/fuse Tue Jul 26 11:54:40 2011 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,268 +0,0 @@ -#! /usr/bin/perl - -use 5.010; -use strict; -use warnings; -use autodie qw(:all); -use Getopt::Long; -use Fuse; -use POSIX qw(setpgid :errno_h); -use IO::Uncompress::Gunzip qw(gunzip $GunzipError); -use Pod::Usage; -use Hash::Util qw(lock_keys); -use File::Temp; -use DB_File; -use File::Basename; - -my %o = ( - debug => undef, - detach => 1, - tmp => undef, -); lock_keys %o; - -use constant ME => basename $0; -my ($DATA, $IDX); - -sub tie_vars; - -MAIN: { - - GetOptions( - "d|debug!" => \$o{debug}, - "detach!" => \$o{detach}, - "tmp:s" => sub { $o{tmp} = length $_[1] ? $_[1] : $ENV{TMP}// "/tmp" }, - "h|help" => sub { pod2usage(-verbose => 1, -exit => 0) }, - "m|man" => sub { pod2usage(-verbose => 2, -exit => 0, - -noperlpod => system("perldoc -V 1>/dev/null 2>&1")) }, - ) and @ARGV == 2 or pod2usage; - - my ($src, $mp) = @ARGV; - - $DATA = "$src/data"; - $IDX = "$src/idx"; - - die ME.": $DATA: $!" if not -d $DATA; - die ME.": $IDX: $!" if not -d $IDX; - - if (!$o{debug} and $o{detach}) { - fork() and exit; - $0 = "FUSE $src $mp"; - open(STDOUT => ">/dev/null"); - open(STDIN => "/dev/null"); - - setpgid($$ => $$); - } - - tie_vars $o{tmp}; - - Fuse::main(mountpoint => $mp, - debug => $o{debug} // 0, - getattr => \&getattr, - getdir => \&getdir, - open => \&openfile, - read => \&readbuffer, - write => \&writebuffer, - ); - - exit; - -} - -# not the fuse functions - -{ - my (%IMAGE, %DIRTY); - -sub tie_vars { - return if not defined $_[0]; - my $file = -d $_[0] ? File::Temp->new(DIR => shift, TEMPLATE => "tmp.fuse.XXXXXX")->filename : shift; - tie %DIRTY, "DB_File" => $file - or die "Can't tie to $file: $!\n"; -} - -sub getattr { - my $path = $IDX . shift; - return stat $path if -d $path; - my @attr = stat $path or return -(ENOENT); - my %meta = _get_meta($path); - $attr[7] = $meta{devsize}; - $attr[9] = $meta{timestamp}; - $attr[2] &= ~0222; # r/o - return @attr; -} - -sub getdir { - my $path = $IDX . shift; - opendir(my $dh, $path) or return 0; - return (readdir($dh), 0); -} - -sub openfile { - my $path = $IDX . shift; - return 0 if exists $IMAGE{$path}; - $IMAGE{$path}{meta} = { _get_meta($path) }; - $IMAGE{$path}{blocklist} = {}; - - # skip the file header - open(my $fh => $path); - { local $/ = ""; scalar <$fh> } - - # should check for the format - # $IMAGE{$path}{meta}{format} - - # now read the block list - while (<$fh>) { - /^#/ and last; - my ($block, $cs, $file) = split; - $IMAGE{$path}{blocklist}{$block} = $file; - } - close $fh; - return 0; -} - -sub readbuffer { - my $path = $IDX . shift; - my ($size, $offset) = @_; - my $finfo = $IMAGE{$path} or die "File $path is not opened!"; - return "" if $offset >= $finfo->{meta}{devsize}; - - my $buffer = ""; - for (my $need = $size; $need > 0; $need = $size - length($buffer)) { - $buffer .= _readblock($finfo, $need, $offset + length($buffer)); - } - - return $buffer; -} - -sub _readblock { - my ($finfo, $size, $offset) = @_; - - my $block = int($offset / $finfo->{meta}{blocksize}); - my $blockoffset = $offset % $finfo->{meta}{blocksize}; - - my $length = $finfo->{meta}{blocksize} - $blockoffset; - $length = $size if $size <= $length; - - if (exists $DIRTY{$finfo.$block}) { - return substr $DIRTY{$finfo.$block}, $blockoffset, $length; - } - - my $fn = "$DATA/" . $finfo->{blocklist}{$block}; - if (-e $fn) { - open(my $fh => $fn); - binmode($fh); - seek($fh => $blockoffset, 0) or die "seek: $!"; - local $/ = \$length; - return scalar <$fh>; - } - elsif (-e "$fn.gz") { - open(my $fh => "$fn.gz"); - binmode($fh); - my $buffer; - gunzip($fh => \$buffer) - or die $GunzipError; - close($fh); - return substr($buffer, $blockoffset, $size); - } - - die "$fn: $!\n"; -} - -sub writebuffer { - my $path = $IDX . shift; - my ($buffer, $offset) = @_; - my $size = length($buffer); - my $finfo = $IMAGE{$path} or die "File $path is not opened!"; - - for (my $written = 0; $written < $size;) { - # OPTIMIZE: we should not ask for writing more than the - # blocksize - my $n = _writeblock($finfo, substr($buffer, $written), $offset + $written) - or return $written; - $written += $n; - } - return $size; -} - -sub _writeblock { - my ($finfo, $buffer, $offset) = @_; - my $size = length($buffer); - - my $block = int($offset / $finfo->{meta}{blocksize}); - my $blockoffset = $offset % $finfo->{meta}{blocksize}; - - if (not exists $DIRTY{$finfo.$block}) { - $DIRTY{$finfo.$block} = _readblock( - $finfo, - $finfo->{meta}{blocksize}, - $block * $finfo->{meta}{blocksize}); - } - - my $length = $finfo->{meta}{blocksize} - $blockoffset; - $length = $size if $size < $length; - - substr($DIRTY{$finfo.$block}, $blockoffset, $length) - = substr($buffer, 0, $length); - - return $length; -} - -sub _get_meta { - my $path = shift; - my %meta; - open(my $fh => $path); - while(<$fh>) { - last if /^$/; - /^(?\S+):\s+(?.*?)\s*$/ and do { $meta{$+{k}} = $+{v}; next; }; - } - return %meta; -} - -} - - -__END__ - -=head1 NAME - - fuse - the fuse mount helper for imagers backups - -=head1 SYNOPSIS - - fuse [options] {src} {mount point} - -=head1 DESCRIPTION - -B mounts the src directory (containing F and F -directories) the the specified mount point. - -=head1 OPTIONS - -=over 4 - -=item B<--tmp> [I] - -Write dirty blocks into a buffer file in the specified tmp directory. -If no directory is specified, the system default (usually F) will -be used. (default: no temp file) - -B: The temporary file may get B. - -=item B<-d>|B<--debug> - -Enables debugging output from B. When using this option, -B does not detach from the terminal. (default: off) - -=item B<-->I<[no]>B - -Detach or don't detach from the terminal. (default: detach) - -=item B<-h>|B<--help> - -=item B<-m>|B<--man> - -The common help and man options. - -=back - -=cut diff -r 4a01ae9db5c4 -r 49ff641055a3 imager --- a/imager Tue Jul 26 11:54:40 2011 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,210 +0,0 @@ -#! /usr/bin/perl - -use 5.010; -use strict; -use warnings; -use POSIX qw(strftime); -use autodie qw(:all); -use Digest::MD5 qw(md5_hex); -use File::Path qw(mkpath); -use File::Basename; -use File::Temp; -use Sys::Hostname; -use IO::Compress::Gzip qw(gzip $GzipError :level :strategy); -use Hash::Util qw(lock_keys); -use Getopt::Long; -use Pod::Usage; - -use constant KiB => 1024; -use constant MiB => 1024 * KiB; -use constant GiB => 1024 * MiB; -use constant NOW => time(); -use constant DATETIME => strftime("%Y-%m-%dT%H:%M:%SZ" => gmtime(NOW)); - -sub get_devsize; -sub get_devname; - -$SIG{INT} = sub { die "Got INT\n" }; - -my %o = ( - compress => undef, - verbose => undef, - blocksize => 4 * MiB, -); lock_keys(%o); - -my $NOW = time(); - -MAIN: { - my ($src, $dst); - - my $idx = "{DIR}/idx/{HOSTNAME}/{DEVICE}/"; - my $data = "{DIR}/data"; - my $size; - - GetOptions( - "h|help" => sub { pod2usage(-verbose => 1, exit => 0) }, - "m|man" => sub { pod2usage(-verbose => 2, exit => 0, - -noperldoc => system("perldoc -V >/dev/null 2>&1")) - }, - "z|compress:i" => sub { $o{compress} = $_[1] ? $_[1] : Z_BEST_SPEED }, - "b|blocksize=s" => sub { - given ($_[1]) { - when (/(\d+)G/i) { $o{blocksize} = $1 * GiB }; - when (/(\d+)M/i) { $o{blocksize} = $1 * MiB }; - when (/(\d+)K/i) { $o{blocksize} = $1 * KiB }; - when (/^(\d+)$/) { $o{blocksize} = $1 }; - default { die "Blocksize $_[1] is incorrect!\n" - }; - } - }, - ) and @ARGV == 2 or pod2usage; - ($src, $dst) = @ARGV; - - foreach ($idx, $data) { - s/{DIR}/$dst/g; - s/{HOSTNAME}/hostname/eg; - s/{DEVICE}/get_devname($src)/eg; - } - $size = get_devsize($src); - - -d $dst or die "$0: $dst: $!\n"; - mkpath([$data, $idx]); - - my $index = File::Temp->new(DIR => $idx); - print {$index} <<__EOT; -# imager -format: 1 -host: @{[hostname]} -filesystem: $src -blocksize: $o{blocksize} -devsize: $size -timestamp: @{[NOW]} -datetime: @{[DATETIME]} - -__EOT - - open(my $in => $src); - binmode($in); - local $/ = \(my $bs = $o{blocksize}); - local $| = 1; - - my %stats = ( - written => 0, - skipped => 0, - todo => 1 + int($size / $o{blocksize}), - ); - - local $SIG{ALRM} = sub { - my $speed = ($stats{written} + $stats{skipped}) / (time - $^T + 1); - say sprintf "# done %5.1f%% | %24s (%*d of $stats{todo}, written %*d, skipped %*d)", - 100 * (($stats{written}+$stats{skipped})/$stats{todo}), - ($speed ? (scalar localtime($^T + $stats{todo} / $speed)) : ""), - length($stats{todo}) => $stats{written} + $stats{skipped}, - length($stats{todo}) => $stats{written}, - length($stats{todo}) => $stats{skipped}; - alarm(5); - }; - $SIG{ALRM}->(); - - while (my $buffer = <$in>) { - my ($file, $ext, $cs); - $file = $cs = md5_hex($buffer); - $file =~ s/(?(?...).*)/$+{prefix}\/$+{fn}/g; - $ext = ".gz" if $o{compress}; - - # the extension we do not put into the index - my $log = sprintf "%12d %s %s" => ($.-1), $cs, $file; - - if (not (-e "$data/$file" or -e "$data/$file.gz")) { - mkpath dirname("$data/$file.gz"); - my $out = File::Temp->new(TEMPLATE => ".XXXXXXX", DIR => dirname("$data/$file")); - binmode($out); - if ($o{compress}) { - gzip(\$buffer => $out, - -Minimal => 1, - -Level => Z_BEST_SPEED, - -Strategy => Z_FILTERED) - or die $GzipError - } - else { print {$out} $buffer } - close($out); - rename($out => "$data/$file$ext"); - $log .= " *"; - $stats{written}++; - } - else { - $log .= " "; - $stats{skipped}++; - } - - say {$index} $log; - } - $SIG{ALRM}->(); - alarm 0; - - say {$index} "# DONE (runtime " . (time() - $^T) . "s)"; - - say "# DONE (runtime " . (time() - $^T) . "s)"; - say "# WRITTEN $stats{written}, SKIPPED $stats{skipped} blocks"; - say "# SAVINGS " - . sprintf "%3d%%" => 100 * ($stats{skipped}/($stats{written}+$stats{skipped})); - - rename $index->filename => "$idx/".DATETIME; - close $index; - -} - -sub get_devsize { - my ($devname) = @_; - open(my $fh => $devname); - seek($fh, 0, 2); - return tell($fh); -} - -sub get_devname { - my $_ = shift; - s/^\/dev\///; - s/_/__/g; - s/\//_/g; - return $_; -} - -__END__ - -=head1 NAME - - imager - create a block device snapshot - -=head1 SYNOPSIS - - imager [options] {device} {destination} - -=head1 DESCRIPTION - -This tool creates a snapshot of a blockdevice. -Just call it like - - imager /dev/sda1 /media/backup - -This will create F, if not already existing. -The index (blocklist) goes to -IFIFI. The data goes to -I/F. - -=head1 OPTIONS - -=over - -=item B<-z>[I]|B<--compress>[=I] - -Use compression when writing the blocks to disk. (default: off) - -=item B<-h>|B<--help> - -=item B<-m>|B<--man> - -The short and longer help. - -=back - -=cut