#! /usr/bin/perl

use 5.010;
use strict;
use warnings;
use POSIX qw(strftime);
use autodie qw(:all);
use Digest::MD5 qw(md5_hex);
use File::Path qw(mkpath);
use File::Basename;
use File::Temp;
use Sys::Hostname;
use IO::Compress::Gzip qw(gzip $GzipError :level :strategy);
use Hash::Util qw(lock_keys);
use Getopt::Long;
use Pod::Usage;

use constant KiB => 1024;
use constant MiB => 1024 * KiB;
use constant GiB => 1024 * MiB;
use constant NOW => time();
use constant DATETIME => strftime("%Y-%m-%dT%H:%M:%SZ" => gmtime(NOW));

sub get_devsize;
sub get_devname;

$SIG{INT} = sub { die "Got INT\n" };

my %o = (
    compress => undef,
    verbose => undef,
    blocksize => 4 * MiB,
); lock_keys(%o);

my $NOW = time();

MAIN: {
    my ($src, $dst);

    my $idx = "{DIR}/idx/{HOSTNAME}/{DEVICE}/";
    my $data = "{DIR}/data";
    my $size;

    GetOptions(
	"h|help" => sub { pod2usage(-verbose => 1, exit => 0) },
	"m|man" => sub  { pod2usage(-verbose => 2, exit => 0,
			 -noperldoc => system("perldoc -V >/dev/null 2>&1"))
		   },
	"z|compress:i" => sub { $o{compress} = $_[1] ? $_[1] : Z_BEST_SPEED },
	"b|blocksize=s" => sub {
	    given ($_[1]) {
		when (/(\d+)G/i) { $o{blocksize} = $1 * GiB };
		when (/(\d+)M/i) { $o{blocksize} = $1 * MiB };
		when (/(\d+)K/i) { $o{blocksize} = $1 * KiB };
		when (/^(\d+)$/) { $o{blocksize} = $1 };
		default	       { die "Blocksize $_[1] is incorrect!\n"
		};
	    }
	},
    ) and @ARGV == 2 or pod2usage;
    ($src, $dst) = @ARGV;

    foreach ($idx, $data) {
	s/{DIR}/$dst/g;
	s/{HOSTNAME}/hostname/eg;
	s/{DEVICE}/get_devname($src)/eg;
    }
    $size = get_devsize($src);

    -d $dst or die "$0: $dst: $!\n";
    mkpath([$data, $idx]);

    my $index = File::Temp->new(DIR => $idx);
    print {$index} <<__EOT;
# imager
format: 1
host: @{[hostname]}
filesystem: $src
blocksize: $o{blocksize}
devsize: $size
timestamp: @{[NOW]}
datetime: @{[DATETIME]}

__EOT

    open(my $in => $src);
    binmode($in);
    local $/ = \(my $bs = $o{blocksize});
    local $| = 1;

    my %stats = (
	written => 0,
	skipped => 0,
	todo => 1 + int($size / $o{blocksize}),
    );

    local $SIG{ALRM} = sub {
	my $speed = ($stats{written} + $stats{skipped}) / (time - $^T + 1);
	say sprintf "# done %5.1f%% | %24s (%*d of $stats{todo}, written %*d, skipped %*d)",
	    100 * (($stats{written}+$stats{skipped})/$stats{todo}), 
	    ($speed ? (scalar localtime($^T + $stats{todo} / $speed)) : ""),
	    length($stats{todo}) => $stats{written} + $stats{skipped},
	    length($stats{todo}) => $stats{written},
	    length($stats{todo}) => $stats{skipped};
	alarm(5);
    };
    $SIG{ALRM}->();

    while (my $buffer = <$in>) {
	my ($file, $ext, $cs);
	$file = $cs = md5_hex($buffer);
	$file =~ s/(?<fn>(?<prefix>...).*)/$+{prefix}\/$+{fn}/g;
	$ext = ".gz" if $o{compress};

	# the extension we do not put into the index
	my $log = sprintf "%12d %s %s" => ($.-1), $cs, $file;

	if (not (-e "$data/$file" or -e "$data/$file.gz")) {
	    mkpath dirname("$data/$file.gz");
	    my $out = File::Temp->new(TEMPLATE => ".XXXXXXX", DIR => dirname("$data/$file"));
	    binmode($out);
	    if ($o{compress}) { 
		gzip(\$buffer => $out, 
			-Minimal => 1, 
			-Level => Z_BEST_SPEED, 
			-Strategy => Z_FILTERED) 
		or die $GzipError 
	    } 
	    else { print {$out} $buffer }
	    close($out);
	    rename($out => "$data/$file$ext");
	    $log .= " *";
	    $stats{written}++;
	}
	else { 
	    $log .= "  "; 
	    $stats{skipped}++;
	}

	say {$index} $log;
    }
    $SIG{ALRM}->();
    alarm 0;

    say {$index} "# DONE (runtime " . (time() - $^T) . "s)";

    say "# DONE (runtime " . (time() - $^T) . "s)";
    say "# WRITTEN $stats{written}, SKIPPED $stats{skipped} blocks";
    say "# SAVINGS " 
	. sprintf "%3d%%" => 100 * ($stats{skipped}/($stats{written}+$stats{skipped}));

    rename $index->filename => "$idx/".DATETIME;
    close $index;

}

sub get_devsize {
    my ($devname) = @_;
    open(my $fh => $devname);
    seek($fh, 0, 2);
    return tell($fh);
}

sub get_devname {
    my $_ = shift;
    s/^\/dev\///;
    s/_/__/g;
    s/\//_/g;
    return $_;
}
    
__END__

=head1 NAME

    imager - create a block device snapshot

=head1 SYNOPSIS

    imager [options] {device} {destination}

=head1 DESCRIPTION

This tool creates a snapshot of a blockdevice.
Just call it like

    imager /dev/sda1 /media/backup

This will create F</media/backup/{data,idx}>, if not already existing.
The index (blocklist) goes to
I<destination>F</idx/>I<hostname>F</>I<devicename>.  The data goes to
I<destination>/F<data/>.

=head1 OPTIONS

=over

=item B<-z>[I<level>]|B<--compress>[=I<level>]

Use compression when writing the blocks to disk. (default: off)

=item B<-h>|B<--help>

=item B<-m>|B<--man>

The short and longer help. 

=back

=cut
