[merged] and parallelisation in save
authorHeiko Schlittermann (JUMPER) <hs@schlittermann.de>
Fri, 29 Jul 2011 14:52:05 +0200
changeset 32 02ef2d1b190a
parent 29 4a1820d504c4 (current diff)
parent 31 221af7ffe050 (diff)
child 34 3c71ae4facca
[merged] and parallelisation in save
bin/imager
bin/imager.fuse
bin/imager.save
t/000-syntax.t
--- a/bin/imager.fuse	Fri Jul 29 11:09:36 2011 +0200
+++ b/bin/imager.fuse	Fri Jul 29 14:52:05 2011 +0200
@@ -22,9 +22,18 @@
 lock_keys %o;
 
 use constant ME => basename $0;
+use constant BS => 4 * 1024;
+
 my ($DATA, $IDX);
 
 sub tie_vars;
+sub min { (sort {$a <=> $b} @_)[0] }
+sub max { (sort {$a <=> $b} @_)[-1] }
+my $debug = sub { print STDERR @_ };
+   $debug = sub { };
+
+
+#$SIG{INT} = sub { warn "Got ^C or INT signal\n"; exit 1; };
 
 MAIN: {
 
@@ -57,7 +66,6 @@
         $0 = "FUSE $src $mp";
         open(STDOUT => ">/dev/null");
         open(STDIN  => "/dev/null");
-
         setpgid($$ => $$);
     }
 
@@ -71,6 +79,7 @@
         open       => \&openfile,
         read       => \&readbuffer,
         write      => \&writebuffer,
+	release	   => \&release,
     );
 
     exit;
@@ -133,6 +142,13 @@
         return 0;
     }
 
+    sub release {
+	my $path = $IDX . shift;
+	return 0 if not exists $IMAGE{$path};
+	$debug->("Currently we have " . keys(%DIRTY) . " dirty blocks\n");
+	return 0;
+    }
+
     sub readbuffer {
         my $path = $IDX . shift;
         my ($size, $offset) = @_;
@@ -149,36 +165,65 @@
 
     sub _readblock {
         my ($finfo, $size, $offset) = @_;
+	my ($block, $blockoffset, $length);
 
-        my $block       = int($offset / $finfo->{meta}{blocksize});
-        my $blockoffset = $offset % $finfo->{meta}{blocksize};
+	$debug->("<<< block offset:$offset size:$size\n");
+	$debug->( "    block @{[int($offset/BS)]} + @{[$offset % BS]}\n");
 
-        my $length = $finfo->{meta}{blocksize} - $blockoffset;
-        $length = $size if $size <= $length;
+	# first check if it's an dirty block
+        $block       = int($offset / BS);
+        if (exists $DIRTY{ $finfo . $block }) {
+	    $blockoffset = $offset % BS;
+	    $length = min(BS - $blockoffset, $size);
 
-        if (exists $DIRTY{ $finfo . $block }) {
+	    $debug->("+++ dirty offset:$block*@{[BS]} + $blockoffset size:$length\n");
             return substr $DIRTY{ $finfo . $block }, $blockoffset, $length;
         }
 
+
+	# if not dirty, we've to find it on disk
+
+	$block = int($offset / $finfo->{meta}{blocksize});
+	$blockoffset = $offset % $finfo->{meta}{blocksize};
+	$length = min($finfo->{meta}{blocksize} - $blockoffset, $size);
+
+	# find the max length we can satisfy w/o colliding 
+	# with dirty blocks
+	for (my $l = BS; $l < $length; $l += BS) {
+	    my $b = int(($offset + $l)/BS);
+	    if ($DIRTY{$finfo . $b}) {
+		$length = $l;
+		last;
+	    }
+	}
+
+	$debug->("=== $length\n");
+	$debug->("+++ disk offset:$block*$finfo->{meta}{blocksize} + $blockoffset size:$length\n");
+
         my $fn = "$DATA/" . $finfo->{blocklist}{$block};
-        if (-e $fn) {
-            open(my $fh => $fn);
-            binmode($fh);
-            seek($fh => $blockoffset, 0) or die "seek: $!";
-            local $/ = \$length;
-            return scalar <$fh>;
-        }
-        elsif (-e "$fn.gz") {
-            open(my $fh => "$fn.gz");
-            binmode($fh);
-            my $buffer;
-            gunzip($fh => \$buffer)
-              or die $GunzipError;
-            close($fh);
-            return substr($buffer, $blockoffset, $size);
-        }
+
+	state %cache;
+	if (not defined $cache{fn} 
+	    or ($cache{fn} ne $fn)) {
 
-        die "$fn: $!\n";
+	    if (-e $fn) {
+		open(my $fh => $fn);
+		binmode($fh);
+		local $/ = undef;
+		$cache{data} = <$fh>;
+	    }
+	    elsif (-e "$fn.gz") {
+		open(my $fh => "$fn.gz");
+		binmode($fh);
+		gunzip($fh => \$cache{data})
+		      or die $GunzipError;
+	    }
+	    $cache{fn} = $fn;
+	}
+
+	return substr($cache{data}, $blockoffset, $size);
+	die "$fn: $!\n";
+        
     }
 
     sub writebuffer {
@@ -201,22 +246,22 @@
 
     sub _writeblock {
         my ($finfo, $buffer, $offset) = @_;
+	my ($block, $blockoffset, $length);
         my $size = length($buffer);
 
-        my $block       = int($offset / $finfo->{meta}{blocksize});
-        my $blockoffset = $offset % $finfo->{meta}{blocksize};
+	$block = int($offset / BS);
+	$blockoffset = $offset % BS;
+        $length = min(BS - $blockoffset, $size);
+
+	$debug->(">>> offset:$offset size:$length of $size\n");
+	$debug->("    block @{[int($offset/BS)]} + @{[$offset % BS]}\n");
 
         if (not exists $DIRTY{ $finfo . $block }) {
+	    $debug->("+++ missing $block+$blockoffset\n");
             $DIRTY{ $finfo . $block } = _readblock(
-                $finfo,
-                $finfo->{meta}{blocksize},
-                $block * $finfo->{meta}{blocksize}
-            );
+                $finfo, BS, $block * BS);
         }
 
-        my $length = $finfo->{meta}{blocksize} - $blockoffset;
-        $length = $size if $size < $length;
-
         substr($DIRTY{ $finfo . $block }, $blockoffset, $length) =
           substr($buffer, 0, $length);
 
--- a/bin/imager.save	Fri Jul 29 11:09:36 2011 +0200
+++ b/bin/imager.save	Fri Jul 29 14:52:05 2011 +0200
@@ -15,6 +15,7 @@
 use Getopt::Long;
 use Pod::Usage;
 
+
 use constant KiB      => 1024;
 use constant MiB      => 1024 * KiB;
 use constant GiB      => 1024 * MiB;
@@ -23,25 +24,20 @@
 
 sub get_devsize;
 sub get_devname;
+sub save;
 
 $SIG{INT} = sub { die "Got INT\n" };
 
 my %o = (
     compress  => undef,
     verbose   => undef,
-    blocksize => 2 * MiB,
+    blocksize => 4 * MiB,
 );
 lock_keys(%o);
 
 my $NOW = time();
 
 MAIN: {
-    my ($src, $dst);
-
-    my $idx  = "{DIR}/idx/{HOSTNAME}/{DEVICE}/";
-    my $data = "{DIR}/data";
-    my $size;
-
     GetOptions(
         "h|help" => sub { pod2usage(-verbose => 1, exit => 0) },
         "m|man"  => sub {
@@ -64,9 +60,29 @@
             }
         },
       )
-      and @ARGV == 2
-      or pod2usage;
-    ($src, $dst) = @ARGV;
+      and @ARGV >= 2 or pod2usage;
+
+    my $dst = pop @ARGV;
+    foreach my $src (@ARGV) {
+	if (my $pid = fork()) {
+	    next;
+	}
+	elsif (not defined $pid) {
+	    die "Can't fork: $!\n"
+	}
+	save($src, $dst);
+	exit;
+    }
+
+    do 1 while wait != -1;
+
+}
+
+sub save {
+    my ($src, $dst) = @_;
+    my $idx  = "{DIR}/idx/{HOSTNAME}/{DEVICE}/";
+    my $data = "{DIR}/data";
+    my $size;
 
     foreach ($idx, $data) {
         s/{DIR}/$dst/g;
@@ -105,7 +121,8 @@
     local $SIG{ALRM} = sub {
         my $speed = ($stats{written} + $stats{skipped}) / (time - $^T + 1);
         say sprintf
-"# done %5.1f%% | %24s (%*d of $stats{todo}, written %*d, skipped %*d)",
+"# %*s done %5.1f%% | %24s (%*d of $stats{todo}, written %*d, skipped %*d)",
+	  (sort {$a<=>$b} map { length basename $_ } @ARGV)[-1] => basename($src),
           100 * (($stats{written} + $stats{skipped}) / $stats{todo}),
           ($speed ? (scalar localtime($^T + $stats{todo} / $speed)) : ""),
           length($stats{todo}) => $stats{written} + $stats{skipped},
@@ -157,9 +174,9 @@
 
     say {$index} "# DONE (runtime " . (time() - $^T) . "s)";
 
-    say "# DONE (runtime " . (time() - $^T) . "s)";
-    say "# WRITTEN $stats{written}, SKIPPED $stats{skipped} blocks";
-    say "# SAVINGS "
+    say "# $src DONE (runtime " . (time() - $^T) . "s)";
+    say "# $src WRITTEN $stats{written}, SKIPPED $stats{skipped} blocks";
+    say "# $src SAVINGS "
       . sprintf "%3d%%" => 100 *
       ($stats{skipped} / ($stats{written} + $stats{skipped}));
 
@@ -215,7 +232,7 @@
 
 =item B<-b> I<blocksize>|B<--blocksize>=I<blocksize>
 
-The blocksize used. (may be suffixed with K, M, G). (default: 2MiB)
+The blocksize used. (may be suffixed with K, M, G). (default: 4 MiB)
 
 =item B<-h>|B<--help>
 
--- a/t/000-syntax.t	Fri Jul 29 11:09:36 2011 +0200
+++ b/t/000-syntax.t	Fri Jul 29 14:52:05 2011 +0200
@@ -6,7 +6,7 @@
 use File::Find;
 
 my @scripts;
-find(sub { push @scripts, $File::Find::name if -f and -x }, "blib");
+find(sub { /^\./ and return; push @scripts, $File::Find::name if -f and -x }, "blib");
 
 plan tests => scalar @scripts;