#! /usr/bin/perl

use 5.010;
use strict;
use warnings;
use POSIX qw(strftime);
use autodie qw(:all);
use Digest::MD5 qw(md5_hex);
use File::Path qw(mkpath);
use File::Basename;
use File::Temp;
use Sys::Hostname;
use IO::Compress::Gzip qw(gzip $GzipError :level :strategy);
use Hash::Util qw(lock_keys);
use Getopt::Long;
use Pod::Usage;


use constant KiB      => 1024;
use constant MiB      => 1024 * KiB;
use constant GiB      => 1024 * MiB;
use constant NOW      => time();
use constant DATETIME => strftime("%Y-%m-%dT%H:%M:%SZ" => gmtime(NOW));

sub get_devsize;
sub get_devname;

$SIG{INT} = sub { die "Got INT\n" };

my %o = (
    compress  => undef,
    verbose   => undef,
    blocksize => 4 * MiB,
);
lock_keys(%o);

my $NOW = time();

MAIN: {
    my ($src, $dst);

    my $idx  = "{DIR}/idx/{HOSTNAME}/{DEVICE}/";
    my $data = "{DIR}/data";
    my $size;

    GetOptions(
        "h|help" => sub { pod2usage(-verbose => 1, exit => 0) },
        "m|man"  => sub {
            pod2usage(
                -verbose   => 2,
                exit       => 0,
                -noperldoc => system("perldoc -V >/dev/null 2>&1")
            );
        },
        "z|compress:i" => sub { $o{compress} = $_[1] ? $_[1] : Z_BEST_SPEED },
        "b|blocksize=s" => sub {
            given ($_[1]) {
                when (/(\d+)G/i) { $o{blocksize} = $1 * GiB };
                when (/(\d+)M/i) { $o{blocksize} = $1 * MiB };
                when (/(\d+)K/i) { $o{blocksize} = $1 * KiB };
                when (/^(\d+)$/) { $o{blocksize} = $1 };
                default {
                    die "Blocksize $_[1] is incorrect!\n"
                };
            }
        },
      )
      and @ARGV == 2
      or pod2usage;
    ($src, $dst) = @ARGV;

    foreach ($idx, $data) {
        s/{DIR}/$dst/g;
        s/{HOSTNAME}/hostname/eg;
        s/{DEVICE}/get_devname($src)/eg;
    }
    $size = get_devsize($src);

    -d $dst or die "$0: $dst: $!\n";
    mkpath([$data, $idx]);

    my $index = File::Temp->new(DIR => $idx);
    print {$index} <<__EOT;
# imager
format: 1
host: @{[hostname]}
filesystem: $src
blocksize: $o{blocksize}
devsize: $size
timestamp: @{[NOW]}
datetime: @{[DATETIME]}

__EOT

    open(my $in => $src);
    binmode($in);
    local $/ = \(my $bs = $o{blocksize});
    local $| = 1;

    my %stats = (
        written => 0,
        skipped => 0,
        todo    => 1 + int($size / $o{blocksize}),
    );

    local $SIG{ALRM} = sub {
        my $speed = ($stats{written} + $stats{skipped}) / (time - $^T + 1);
        say sprintf
"# done %5.1f%% | %24s (%*d of $stats{todo}, written %*d, skipped %*d)",
          100 * (($stats{written} + $stats{skipped}) / $stats{todo}),
          ($speed ? (scalar localtime($^T + $stats{todo} / $speed)) : ""),
          length($stats{todo}) => $stats{written} + $stats{skipped},
          length($stats{todo}) => $stats{written},
          length($stats{todo}) => $stats{skipped};
        alarm(5);
    };
    $SIG{ALRM}->();

    while (my $buffer = <$in>) {
	my ($file, $ext, $cs);
	$file = $cs = md5_hex($buffer);
	$file =~ s/(?<fn>(?<prefix>...).*)/$+{prefix}\/$+{fn}/g;
	$ext = $o{compress} ? ".gz" : "";

        # the extension we do not put into the index
        my $log = sprintf "%12d %s %s" => ($. - 1), $cs, $file;

        if (not(-e "$data/$file" or -e "$data/$file.gz")) {
            mkpath dirname("$data/$file.gz");
            my $out = File::Temp->new(
                TEMPLATE => ".XXXXXXX",
                DIR      => dirname("$data/$file")
            );
            binmode($out);
            if ($o{compress}) {
                gzip(
                    \$buffer  => $out,
                    -Minimal  => 1,
                    -Level    => Z_BEST_SPEED,
                    -Strategy => Z_FILTERED
                ) or die $GzipError;
            }
            else { print {$out} $buffer }
            close($out);
            rename($out => "$data/$file$ext");
            $log .= " *";
            $stats{written}++;
        }
        else {
            $log .= "  ";
            $stats{skipped}++;
        }

        say {$index} $log;
    }
    $SIG{ALRM}->();
    alarm 0;

    say {$index} "# DONE (runtime " . (time() - $^T) . "s)";

    say "# DONE (runtime " . (time() - $^T) . "s)";
    say "# WRITTEN $stats{written}, SKIPPED $stats{skipped} blocks";
    say "# SAVINGS "
      . sprintf "%3d%%" => 100 *
      ($stats{skipped} / ($stats{written} + $stats{skipped}));

    rename $index->filename => "$idx/" . DATETIME;
    close $index;

}

sub get_devsize {
    my ($devname) = @_;
    open(my $fh => $devname);
    seek($fh, 0, 2);
    return tell($fh);
}

sub get_devname {
    my $_ = shift;
    s/^\/dev\///;
    s/_/__/g;
    s/\//_/g;
    return $_;
}

__END__

=head1 NAME

    imager - create a block device snapshot

=head1 SYNOPSIS

    imager [options] {device} {destination}

=head1 DESCRIPTION

This tool creates a snapshot of a blockdevice.
Just call it like

    imager /dev/sda1 /media/backup

This will create F</media/backup/{data,idx}>, if not already existing.
The index (blocklist) goes to
I<destination>F</idx/>I<hostname>F</>I<devicename>.  The data goes to
I<destination>/F<data/>.

=head1 OPTIONS

=over

=item B<-z> [I<level>]|B<--compress>[=I<level>]

Use compression when writing the blocks to disk. (default: off)

=item B<-b> I<blocksize>|B<--blocksize>=I<blocksize>

The blocksize used. (may be suffixed with K, M, G). (default: 4 MiB)

=item B<-h>|B<--help>

=item B<-m>|B<--man>

The short and longer help. 

=back

=cut
