bin/imager.save
changeset 137 dd11d1262b6c
parent 136 a5d087334439
child 138 790ac145bccc
equal deleted inserted replaced
136:a5d087334439 137:dd11d1262b6c
     1 #! /usr/bin/perl
       
     2 
       
     3 use 5.010;
       
     4 use strict;
       
     5 use warnings;
       
     6 use POSIX qw(strftime);
       
     7 use autodie qw(:all);
       
     8 use Digest::MD5 qw(md5_hex);
       
     9 use File::Path qw(mkpath);
       
    10 use File::Basename;
       
    11 use File::Temp;
       
    12 use Sys::Hostname;
       
    13 use IO::Compress::Gzip qw(gzip $GzipError :level :strategy);
       
    14 use Hash::Util qw(lock_keys);
       
    15 use Getopt::Long;
       
    16 use Pod::Usage;
       
    17 use Imager 0.1;
       
    18 use if $^V >= v5.18 => (experimental => qw'lexical_topic smartmatch');
       
    19 
       
    20 use constant KiB     => 1024;
       
    21 use constant MiB     => 1024 * KiB;
       
    22 use constant GiB     => 1024 * MiB;
       
    23 use constant BS      => 4 * MiB;
       
    24 use constant DATEFMT => "%Y-%m-%dT%H:%M:%SZ";
       
    25 use constant CIPHER  => "aes-128-cbc";
       
    26 
       
    27 sub get_devsize;
       
    28 sub get_devname;
       
    29 sub save;
       
    30 
       
    31 $SIG{INT} = sub { die "Got INT\n" };
       
    32 
       
    33 my %o = (
       
    34     compress  => undef,
       
    35     verbose   => undef,
       
    36     blocksize => BS,
       
    37     pass      => undef,
       
    38     comment   => undef,
       
    39     now       => time(),
       
    40 );
       
    41 lock_keys(%o);
       
    42 
       
    43 MAIN: {
       
    44     GetOptions(
       
    45         "h|help" => sub { pod2usage(-verbose => 1, exit => 0) },
       
    46         "m|man"  => sub {
       
    47             pod2usage(
       
    48                 -verbose   => 2,
       
    49                 exit       => 0,
       
    50                 -noperldoc => system("perldoc -V >/dev/null 2>&1")
       
    51             );
       
    52         },
       
    53         "c|comment=s"   => \$o{comment},
       
    54         "z|compress:i"  => sub { $o{compress} = $_[1] ? $_[1] : Z_BEST_SPEED },
       
    55         "p|pass=s"      => \$o{pass},
       
    56         "now=i"         => \$o{now},
       
    57         "b|blocksize=s" => sub {
       
    58             given ($_[1]) {
       
    59                 when (/(\d+)G/i) { $o{blocksize} = $1 * GiB };
       
    60                 when (/(\d+)M/i) { $o{blocksize} = $1 * MiB };
       
    61                 when (/(\d+)K/i) { $o{blocksize} = $1 * KiB };
       
    62                 when (/^(\d+)$/) { $o{blocksize} = $1 };
       
    63                 default {
       
    64                     die "Blocksize $_[1] is incorrect!\n"
       
    65                 };
       
    66             }
       
    67         },
       
    68       )
       
    69       and @ARGV >= 2
       
    70       or pod2usage;
       
    71 
       
    72     my $dst = pop @ARGV;
       
    73     foreach my $src (@ARGV) {
       
    74         if (my $pid = fork()) {
       
    75             next;
       
    76         }
       
    77         elsif (not defined $pid) {
       
    78             die "Can't fork: $!\n";
       
    79         }
       
    80         save($src, $dst);
       
    81         exit;
       
    82     }
       
    83 
       
    84     my $rc = 0;
       
    85     while (wait != -1) {
       
    86         $rc = ($? >> 8) if ($? >> 8) > $rc;
       
    87     }
       
    88     exit $rc;
       
    89 
       
    90 }
       
    91 
       
    92 sub save {
       
    93     my ($src, $dst) = @_;
       
    94     my $idx  = "{DIR}/idx/{HOSTNAME}/{DEVICE}/";
       
    95     my $data = "{DIR}/data";
       
    96     my $info = "{DIR}/data/info";
       
    97     my ($size, $name);
       
    98 
       
    99     if ($src =~ /(?<dev>.+?):(?<name>.+)/) {
       
   100         $src  = $+{dev};
       
   101         $name = $+{name};
       
   102     }
       
   103     else { $name = $src }
       
   104 
       
   105     foreach ($idx, $data, $info) {
       
   106         s/{DIR}/$dst/g;
       
   107         s/{HOSTNAME}/hostname/eg;
       
   108         s/{DEVICE}/$name/g;
       
   109     }
       
   110     $size = get_devsize($src);
       
   111 
       
   112     -d $dst or die "$0: $dst: $!\n";
       
   113     mkpath([$data, $idx, $info]);
       
   114 
       
   115     my %index;
       
   116     $index{META} = {
       
   117         format     => 1,
       
   118         host       => hostname,
       
   119         filesystem => $src,
       
   120         blocksize  => $o{blocksize},
       
   121         devsize    => $size,
       
   122         timestamp  => $o{now},
       
   123         datetime   => strftime(DATEFMT, gmtime $o{now}),
       
   124         (defined $o{comment} ? (comment => $o{comment}) : ()),
       
   125         encryption => $o{pass} ? CIPHER : "none",
       
   126     };
       
   127 
       
   128     open(my $in => $src);
       
   129     binmode($in);
       
   130     local $| = 1;
       
   131 
       
   132     my %stats = (
       
   133         written => 0,
       
   134         skipped => 0,
       
   135         todo    => 1 + int($size / $o{blocksize}),
       
   136     );
       
   137 
       
   138     local $SIG{ALRM} = sub {
       
   139         my $speed = ($stats{written} + $stats{skipped}) / (time - $^T + 1);
       
   140         say sprintf
       
   141 "# %*s done %5.1f%% | %24s (%*d of $stats{todo}, written %*d, skipped %*d)",
       
   142           (sort { $a <=> $b } map { length basename $_ } @ARGV)[-1] =>
       
   143           basename($name),
       
   144           100 * (($stats{written} + $stats{skipped}) / $stats{todo}),
       
   145           ($speed ? (scalar localtime($^T + $stats{todo} / $speed)) : ""),
       
   146           length($stats{todo}) => $stats{written} + $stats{skipped},
       
   147           length($stats{todo}) => $stats{written},
       
   148           length($stats{todo}) => $stats{skipped};
       
   149         alarm(5);
       
   150     };
       
   151     $SIG{ALRM}->();
       
   152 
       
   153     for (
       
   154         my $blknr = 0 ;
       
   155         sysread($in => my $buffer, $o{blocksize}) > 0 ;
       
   156         ++$blknr
       
   157       )
       
   158     {
       
   159 
       
   160         my ($file, $ext, $cs);
       
   161         $file = $cs = md5_hex($buffer);
       
   162         $file =~ s/(?<fn>(?<prefix>...).*)/$+{prefix}\/$+{fn}/g;
       
   163         $ext .= $o{pass} ? ".x" : "";
       
   164 
       
   165         # the extension we do not put into the index
       
   166         push @{ $index{BLOCKS} }, sprintf "%12d %s %s" => $blknr,
       
   167           $cs,                    $file;
       
   168 
       
   169         if (not Imager::get_file("$data/$file")) {
       
   170             mkpath dirname("$data/$file");
       
   171             my $out = File::Temp->new(
       
   172                 TEMPLATE => "tmp-XXXXXXX",
       
   173                 DIR      => dirname("$data/$file")
       
   174             );
       
   175 
       
   176             if ($o{pass}) {
       
   177                 open($out, "|openssl @{[CIPHER]} -pass $o{pass} -out $out");
       
   178             }
       
   179             binmode($out);
       
   180 
       
   181             my $bufref = \$buffer;
       
   182             if ($o{compress}) {
       
   183                 my $zbuffer;
       
   184                 gzip(
       
   185                     \$buffer  => \$zbuffer,
       
   186                     -Minimal  => 1,
       
   187                     -Level    => Z_BEST_SPEED,
       
   188                     -Strategy => Z_FILTERED
       
   189                 ) or die $GzipError;
       
   190                 if (length($zbuffer) / length($buffer) < 0.9) {
       
   191                     $bufref = \$zbuffer;
       
   192                     $ext    = ".gz$ext";
       
   193                 }
       
   194             }
       
   195 
       
   196             #for(my $todo = length $$bufref;
       
   197             #    $todo -= syswrite $out => $$bufref, $todo, -$todo; 1)
       
   198             #{
       
   199             #}
       
   200             syswrite $out => $$bufref or die "$0: write: $!\n";
       
   201             close($out) or die "$0: close output file: $!";
       
   202 
       
   203             rename($out => "$data/$file$ext");
       
   204             $index{BLOCKS}[$blknr] .= " *";
       
   205             $stats{written}++;
       
   206         }
       
   207         else {
       
   208             $stats{skipped}++;
       
   209         }
       
   210     }
       
   211     $SIG{ALRM}->();
       
   212     alarm 0;
       
   213 
       
   214     $index{META}{blocks}  = @{ $index{BLOCKS} };
       
   215     $index{META}{runtime} = time() - $^T . "s";
       
   216 
       
   217     my $index = File::Temp->new(DIR => $idx);
       
   218     say $index join "\n" => "# imager",
       
   219       (map { "$_: $index{META}{$_}" } sort(keys %{ $index{META} })),
       
   220       "",
       
   221       @{ $index{BLOCKS} };
       
   222     close($index);
       
   223     rename $index->filename => "$idx/" . strftime(DATEFMT, gmtime $o{now});
       
   224 
       
   225     say "# $src DONE (runtime " . (time() - $^T) . "s)";
       
   226     say "# $src WRITTEN $stats{written}, SKIPPED $stats{skipped} blocks";
       
   227     say "# $src SAVINGS "
       
   228       . sprintf "%3d%%" => 100 *
       
   229       ($stats{skipped} / ($stats{written} + $stats{skipped}));
       
   230 
       
   231 }
       
   232 
       
   233 sub get_devsize {
       
   234     my ($devname) = @_;
       
   235     open(my $fh => $devname);
       
   236     seek($fh, 0, 2);
       
   237     return tell($fh);
       
   238 }
       
   239 
       
   240 sub get_devname {
       
   241     my $_ = shift;
       
   242     s/^\/dev\///;
       
   243     s/_/__/g;
       
   244     s/\//_/g;
       
   245     return $_;
       
   246 }
       
   247 
       
   248 __END__
       
   249 
       
   250 =head1 NAME
       
   251 
       
   252     imager.save - create a block device snapshot
       
   253 
       
   254 =head1 SYNOPSIS
       
   255 
       
   256     imager.save [options] {device}[:name] {destination}
       
   257 
       
   258 =head1 DESCRIPTION
       
   259 
       
   260 This tool creates a snapshot of a blockdevice.
       
   261 Just call it like
       
   262 
       
   263     imager.save /dev/sda1 /media/backup
       
   264 
       
   265 This will create F</media/backup/{data,idx}>, if not already existing.
       
   266 The index (blocklist) goes to
       
   267 I<destination>F</idx/>I<hostname>F</>I<devicename>.  The data goes to
       
   268 I<destination>/F<data/>.
       
   269 
       
   270 If :I<name> is appended on to the device name, the blocklist file and
       
   271 the data directory are named acording to this I<name>, not the original
       
   272 device name. You may welcome this extension if you save LVM snapshots
       
   273 or simiar stuff.
       
   274 
       
   275 =head1 OPTIONS
       
   276 
       
   277 =over
       
   278 
       
   279 =item B<-b>|B<--blocksize> I<blocksize>
       
   280 
       
   281 The blocksize used. (may be suffixed with K, M, G). (default: 4 MiB,
       
   282 or taken from F<data/info/blocksize>)
       
   283 
       
   284 =item B<-c>|B<--comment> I<comment>
       
   285 
       
   286 Comment to be included in the header of the index file. (default: none)
       
   287 
       
   288 =item B<--now> I<timestamp>
       
   289 
       
   290 Set the timestamp used for naming the idx files. (default: now)
       
   291 
       
   292 =item B<-p>|B<--pass> I<pass>
       
   293 
       
   294 Use symmetric encryption for writing the data blocks. This option
       
   295 is passed to L<openssl(1)>.
       
   296 
       
   297 =item B<-z>|B<--compress> [I<level>]
       
   298 
       
   299 Use compression when writing the blocks to disk. B<NOTE:> There may
       
   300 remain uncompressed files, since we only save compressed data if we 
       
   301 can save more then 10% of the size. (default: off)
       
   302 
       
   303 =item B<-h>|B<--help>
       
   304 
       
   305 =item B<-m>|B<--man>
       
   306 
       
   307 The short and longer help. 
       
   308 
       
   309 =back
       
   310 
       
   311 =head1 PERFORMANCE
       
   312 
       
   313 Some experiments have shown that if compression and encryption is used,
       
   314 about 1/3 of the time is consumed by the encryption, and 2/3 are used
       
   315 for compression. The compression is done before(!) encrypting the file,
       
   316 since otherwise there is almost no benefit in compressing an encrypted
       
   317 file!
       
   318 
       
   319 =cut