# HG changeset patch # User Heiko Schlittermann (JUMPER) # Date 1311943925 -7200 # Node ID 02ef2d1b190aa06964067ff4c8f2e106d57ae54b # Parent 4a1820d504c403a7252bc4461a0d8ab3593a8017# Parent 221af7ffe05027d6ba212d81cb38601bafacdd18 [merged] and parallelisation in save diff -r 4a1820d504c4 -r 02ef2d1b190a bin/imager diff -r 4a1820d504c4 -r 02ef2d1b190a bin/imager.fuse --- a/bin/imager.fuse Fri Jul 29 11:09:36 2011 +0200 +++ b/bin/imager.fuse Fri Jul 29 14:52:05 2011 +0200 @@ -22,9 +22,18 @@ lock_keys %o; use constant ME => basename $0; +use constant BS => 4 * 1024; + my ($DATA, $IDX); sub tie_vars; +sub min { (sort {$a <=> $b} @_)[0] } +sub max { (sort {$a <=> $b} @_)[-1] } +my $debug = sub { print STDERR @_ }; + $debug = sub { }; + + +#$SIG{INT} = sub { warn "Got ^C or INT signal\n"; exit 1; }; MAIN: { @@ -57,7 +66,6 @@ $0 = "FUSE $src $mp"; open(STDOUT => ">/dev/null"); open(STDIN => "/dev/null"); - setpgid($$ => $$); } @@ -71,6 +79,7 @@ open => \&openfile, read => \&readbuffer, write => \&writebuffer, + release => \&release, ); exit; @@ -133,6 +142,13 @@ return 0; } + sub release { + my $path = $IDX . shift; + return 0 if not exists $IMAGE{$path}; + $debug->("Currently we have " . keys(%DIRTY) . " dirty blocks\n"); + return 0; + } + sub readbuffer { my $path = $IDX . shift; my ($size, $offset) = @_; @@ -149,36 +165,65 @@ sub _readblock { my ($finfo, $size, $offset) = @_; + my ($block, $blockoffset, $length); - my $block = int($offset / $finfo->{meta}{blocksize}); - my $blockoffset = $offset % $finfo->{meta}{blocksize}; + $debug->("<<< block offset:$offset size:$size\n"); + $debug->( " block @{[int($offset/BS)]} + @{[$offset % BS]}\n"); - my $length = $finfo->{meta}{blocksize} - $blockoffset; - $length = $size if $size <= $length; + # first check if it's an dirty block + $block = int($offset / BS); + if (exists $DIRTY{ $finfo . $block }) { + $blockoffset = $offset % BS; + $length = min(BS - $blockoffset, $size); - if (exists $DIRTY{ $finfo . $block }) { + $debug->("+++ dirty offset:$block*@{[BS]} + $blockoffset size:$length\n"); return substr $DIRTY{ $finfo . $block }, $blockoffset, $length; } + + # if not dirty, we've to find it on disk + + $block = int($offset / $finfo->{meta}{blocksize}); + $blockoffset = $offset % $finfo->{meta}{blocksize}; + $length = min($finfo->{meta}{blocksize} - $blockoffset, $size); + + # find the max length we can satisfy w/o colliding + # with dirty blocks + for (my $l = BS; $l < $length; $l += BS) { + my $b = int(($offset + $l)/BS); + if ($DIRTY{$finfo . $b}) { + $length = $l; + last; + } + } + + $debug->("=== $length\n"); + $debug->("+++ disk offset:$block*$finfo->{meta}{blocksize} + $blockoffset size:$length\n"); + my $fn = "$DATA/" . $finfo->{blocklist}{$block}; - if (-e $fn) { - open(my $fh => $fn); - binmode($fh); - seek($fh => $blockoffset, 0) or die "seek: $!"; - local $/ = \$length; - return scalar <$fh>; - } - elsif (-e "$fn.gz") { - open(my $fh => "$fn.gz"); - binmode($fh); - my $buffer; - gunzip($fh => \$buffer) - or die $GunzipError; - close($fh); - return substr($buffer, $blockoffset, $size); - } + + state %cache; + if (not defined $cache{fn} + or ($cache{fn} ne $fn)) { - die "$fn: $!\n"; + if (-e $fn) { + open(my $fh => $fn); + binmode($fh); + local $/ = undef; + $cache{data} = <$fh>; + } + elsif (-e "$fn.gz") { + open(my $fh => "$fn.gz"); + binmode($fh); + gunzip($fh => \$cache{data}) + or die $GunzipError; + } + $cache{fn} = $fn; + } + + return substr($cache{data}, $blockoffset, $size); + die "$fn: $!\n"; + } sub writebuffer { @@ -201,22 +246,22 @@ sub _writeblock { my ($finfo, $buffer, $offset) = @_; + my ($block, $blockoffset, $length); my $size = length($buffer); - my $block = int($offset / $finfo->{meta}{blocksize}); - my $blockoffset = $offset % $finfo->{meta}{blocksize}; + $block = int($offset / BS); + $blockoffset = $offset % BS; + $length = min(BS - $blockoffset, $size); + + $debug->(">>> offset:$offset size:$length of $size\n"); + $debug->(" block @{[int($offset/BS)]} + @{[$offset % BS]}\n"); if (not exists $DIRTY{ $finfo . $block }) { + $debug->("+++ missing $block+$blockoffset\n"); $DIRTY{ $finfo . $block } = _readblock( - $finfo, - $finfo->{meta}{blocksize}, - $block * $finfo->{meta}{blocksize} - ); + $finfo, BS, $block * BS); } - my $length = $finfo->{meta}{blocksize} - $blockoffset; - $length = $size if $size < $length; - substr($DIRTY{ $finfo . $block }, $blockoffset, $length) = substr($buffer, 0, $length); diff -r 4a1820d504c4 -r 02ef2d1b190a bin/imager.save --- a/bin/imager.save Fri Jul 29 11:09:36 2011 +0200 +++ b/bin/imager.save Fri Jul 29 14:52:05 2011 +0200 @@ -15,6 +15,7 @@ use Getopt::Long; use Pod::Usage; + use constant KiB => 1024; use constant MiB => 1024 * KiB; use constant GiB => 1024 * MiB; @@ -23,25 +24,20 @@ sub get_devsize; sub get_devname; +sub save; $SIG{INT} = sub { die "Got INT\n" }; my %o = ( compress => undef, verbose => undef, - blocksize => 2 * MiB, + blocksize => 4 * MiB, ); lock_keys(%o); my $NOW = time(); MAIN: { - my ($src, $dst); - - my $idx = "{DIR}/idx/{HOSTNAME}/{DEVICE}/"; - my $data = "{DIR}/data"; - my $size; - GetOptions( "h|help" => sub { pod2usage(-verbose => 1, exit => 0) }, "m|man" => sub { @@ -64,9 +60,29 @@ } }, ) - and @ARGV == 2 - or pod2usage; - ($src, $dst) = @ARGV; + and @ARGV >= 2 or pod2usage; + + my $dst = pop @ARGV; + foreach my $src (@ARGV) { + if (my $pid = fork()) { + next; + } + elsif (not defined $pid) { + die "Can't fork: $!\n" + } + save($src, $dst); + exit; + } + + do 1 while wait != -1; + +} + +sub save { + my ($src, $dst) = @_; + my $idx = "{DIR}/idx/{HOSTNAME}/{DEVICE}/"; + my $data = "{DIR}/data"; + my $size; foreach ($idx, $data) { s/{DIR}/$dst/g; @@ -105,7 +121,8 @@ local $SIG{ALRM} = sub { my $speed = ($stats{written} + $stats{skipped}) / (time - $^T + 1); say sprintf -"# done %5.1f%% | %24s (%*d of $stats{todo}, written %*d, skipped %*d)", +"# %*s done %5.1f%% | %24s (%*d of $stats{todo}, written %*d, skipped %*d)", + (sort {$a<=>$b} map { length basename $_ } @ARGV)[-1] => basename($src), 100 * (($stats{written} + $stats{skipped}) / $stats{todo}), ($speed ? (scalar localtime($^T + $stats{todo} / $speed)) : ""), length($stats{todo}) => $stats{written} + $stats{skipped}, @@ -157,9 +174,9 @@ say {$index} "# DONE (runtime " . (time() - $^T) . "s)"; - say "# DONE (runtime " . (time() - $^T) . "s)"; - say "# WRITTEN $stats{written}, SKIPPED $stats{skipped} blocks"; - say "# SAVINGS " + say "# $src DONE (runtime " . (time() - $^T) . "s)"; + say "# $src WRITTEN $stats{written}, SKIPPED $stats{skipped} blocks"; + say "# $src SAVINGS " . sprintf "%3d%%" => 100 * ($stats{skipped} / ($stats{written} + $stats{skipped})); @@ -215,7 +232,7 @@ =item B<-b> I|B<--blocksize>=I -The blocksize used. (may be suffixed with K, M, G). (default: 2MiB) +The blocksize used. (may be suffixed with K, M, G). (default: 4 MiB) =item B<-h>|B<--help> diff -r 4a1820d504c4 -r 02ef2d1b190a t/000-syntax.t --- a/t/000-syntax.t Fri Jul 29 11:09:36 2011 +0200 +++ b/t/000-syntax.t Fri Jul 29 14:52:05 2011 +0200 @@ -6,7 +6,7 @@ use File::Find; my @scripts; -find(sub { push @scripts, $File::Find::name if -f and -x }, "blib"); +find(sub { /^\./ and return; push @scripts, $File::Find::name if -f and -x }, "blib"); plan tests => scalar @scripts;