#! /usr/bin/perl
# (c) 2013 Heiko Schlittermann <hs@schlittermann.de>
# source: hg clone https://ssl.schlittermann.de/amanda-plugin-dumpext
#
# This script should be a plugin to use ext2/3/4 dump/restore via the
# APPLICATION interface of Amanda. The rationale behind is, that I'd like to use
# dump(8) with different dumpdates files for the different backup types
# (daily, weekly, …)
#
# The commands we need to support are required by the
# API:  http://wiki.zmanda.com/index.php/Application_API/Operations
#
# This script tries do be as standalone as possible.
# Though we need the tools dump/restore as they exists for the ext2/3/4
# filesystems.

use 5.010;
use strict;
use warnings;
use Pod::Usage;
use Getopt::Long;
use File::Basename;
use POSIX;

#use Readonly;

our $VERSION = '0.01';
my $ME = basename $0;

# to avoid stupid "not found"
$ENV{PATH} .= ':/usr/local/sbin:/usr/sbin:/sbin';

use constant YES       => 'YES';
use constant NO        => 'NO';
use constant DUMPDATES => '/var/lib/dumpdates';
use constant FD3       => 3;
use constant FD4       => 4;

$SIG{__DIE__} = sub { die "$ME: ", @_ };

my %SUPPORT = (
    CONFIG          => YES,    # --config … (default)
    DISK            => NO,     # --disk …
    HOST            => NO,     # --host …
    MAX_LEVEL       => 9,      # --level …
    INDEX_LINE      => YES,    # --index line
    MESSAGE_LINE    => YES,    # --message line
                               #
    CLIENT_ESTIMATE => YES,    # estimate
    MULTI_ESTIMATE  => YES,    # estimate for multiple levels
    CALCSIZE        => NO,     # estimate --calcsize
                               #
    RECORD          => YES,    # --record
);

sub exec_support;
sub exec_selfcheck;
sub exec_estimate;
sub exec_backup;

# some helper functions

sub device;
sub OK;
sub ERROR;

# bad but common style - the global options

my $opt_config;     # $config
my $opt_device;     # $device DLE[2]
my $opt_message;    # line / <>
my $opt_index;      # line / <>
my $opt_record;     # true / <>
my @opt_level;      # 0…99

my $opt_dumpdates;

MAIN: {
    my @argv = @ARGV;
    my $command = shift // pod2usage;
    GetOptions(
        'config=s'    => \$opt_config,
        'device=s'    => \$opt_device,       # --device $device
        'message=s'   => \$opt_message,      # --message line|xml
        'index=s'     => \$opt_index,        # --index line
        'record!'     => \$opt_record,       # --record
        'level=i@'    => \@opt_level,        # --level n
        'dumpdates=s' => \$opt_dumpdates,    # --dumpdates <file>
        'host=s'      => sub { },            # ignore
	'disk=s'      => sub { },	     # ignore
    ) or pod2usage;

    given ($command) {
        when ("support") { exec_support }
        when ("selfcheck") {
            pod2usage if undef ~~ $opt_device;
            exec_selfcheck
        }
        when ("estimate") {
            pod2usage if undef ~~ [$opt_device, $opt_level[0]];
            exec_estimate
        }
        when ("backup") {
            pod2usage if undef ~~ [$opt_device, $opt_level[0]];
            exec_backup
        }
        default { pod2usage }
    }
}

# output a list of supported options
sub exec_support {
    print map { "$_ $SUPPORT{$_}\n" =~ s/_/-/gr } sort keys %SUPPORT;
    exit 0;
}

sub exec_selfcheck {

    # must: $opt_device
    # may: $opt_level

    OK "$ME version $VERSION";
    OK "euid=$> (" . getpwuid($>) . ')'; 
    OK "egid=$) (" . join(', ' => map { '' . getgrgid $_ } split ' ' => $)) . ')';

    if ($_ = (grep { -x ($_ .= "/dump") } split /:/ => $ENV{PATH})[0]) {
        chomp(my $version = (`$_ 2>&1`)[0]);
        OK "dump is $version";
    }
    else {
        ERROR "dump not found in $ENV{PATH}";
    }

    # check the device
    # the opt_disk is just a label, the device is in opt_device!
    my $device = device($opt_device);

    if    (-b $device) { OK "$opt_device ($device is block special)" }
    elsif (-d $device) { OK "$opt_device ($device is directory)" }
    else               { ERROR "$opt_device not recognized" }

    # check the dumpdates file
    if ($opt_record) {
        my $dumpdates = $opt_dumpdates ? expand($opt_dumpdates) : DUMPDATES;

        eval { open(my $x, "+>>", $dumpdates) or die "$!\n" };
        if   (chomp $@) { ERROR "dumpdates file \"$dumpdates\": $@" }
        else            { OK "dumpdates file: \"$dumpdates\"" }
    }

    exit 0;
}

sub exec_estimate {

    # must: $opt_level, $opt_device
    # may:  $opt_record, $opt_dumpdates
    my (@errors, @results);

    foreach my $level (@opt_level) {
        my @cmd = (
            dump => "-$level",
            '-S',   # estimate
            $opt_record && $opt_dumpdates ? (-D => expand($opt_dumpdates)) : (),
            device($opt_device),
        );

        my @output = `@cmd 2>&1`;

        given ($?) {
            when (-1) { say "command not found: $cmd[0]" }
            when ($_ > 0) {
                my $rc  = ($? & 0xffff) >> 8;
                my $sig = ($? & 0xff);
                say
"unexpected return code (exit: $rc, signal: $sig) from `@cmd':\n",
                  join "\n" => @output;
                exit 1;
            }
        }
        chomp @output;

        # the last line should be the number of 1K blocks
        my $blocks = do {
            my $_ = pop @output;
            /^(\d+)/ or do {
                say "can't get estimate";
                exit 1;
            };
            $1 / 1024;
        };

        # level blocks blocksize
        say join "\n" => @output if @output;
        say "$level $blocks 1";
    }

    exit 0;
}

sub exec_backup {

    # fd1: data channel
    # fd3: message channel
    # fd4: index channel

    my @dump = (
        dump => "-$opt_level[0]",
	#'-v', # verbose
        -f   => '-',
        $opt_record ? '-u' : (),
        $opt_record && $opt_dumpdates ? (-D => expand($opt_dumpdates)) : (),
        device($opt_device)
    );

    #   ,---------> fd3 ---->          (messages)
    # dump --o----> fd1 ---->          (data)
    #         `---> restore -t --> fd4 (index)

    open(my $msg, '>&=', FD3) or die "Can't open fd3: $!\n";
    open(my $idx, '>&=', FD4) or die "Can't open fd4: $!\n" if $opt_index;

    if ($opt_index) {
        my $pid = fork // die "Can't fork: $!\n";
        if (not $pid) {
	    $0 = "$ME [about to exec dump]";

	    # dump will be execed soon, first we've to establish
	    # the channels - one for STDOUT, and one for STDIN
            open(STDOUT, '|-') or do {
		# this is the child that will read
		# the STDOUT from dump
		$0 = "$ME [stdout < dump]";

                my $pid = open(my $restore, '|-') or do {
		    $0 = "$ME [toc]";
                    open(STDOUT, '|-') or do {
                        postprocess_toc($idx);
                        exit 0;
                    };
                    exec 'restore', -tvf => '-';
                    die "Can't exec `restore -tvf -`: $!";
                };

                local $/ = \(my $x = 64 * 1024);
                while (<STDIN>) {
                    print $_;
                    print $restore $_;
                }
		close($restore);
                exit 0;
            };
	    
            open(STDERR, '|-') or do {
		$0 = "$ME [stderr < dump]";
                postprocess_dump_messages($msg);
                exit 0;
            };

	     # we need to fork again, otherwise dump sees
	     # the end of the above children and complains
	     my $pid = fork // die "Can't fork: $!\n";
	     if (not $pid) {
		exec @dump;
		die "Can't exec `@dump': $!\n";
	    }

	    waitpid($pid, 0);
        }

        waitpid($pid, 0);
        exit $?;
    }

    # no need to send an index
    # dump [2] --- (postprocess_dump_messages) --> [fd3]
    #      [1] ----------------------------------> [fd1]

    my $pid = fork // die "Can't fork: $!\n";

    # child does all the work
    if (not $pid) {

        # create the subprocess that will read the
        # stderr output  from dump, convert it and send it
        # to the message channel
        open(STDERR, '|-') or do {
            postprocess_dump_messages($msg);
            exit 0;
        };
        exec @dump;
        die "Can't exec `@dump`: $!\n";
    }
    waitpid($pid, 0);
    exit $?;

}

sub postprocess_dump_messages {

    select +shift;  # send output to the message channel

    while (<STDIN>) {
        print "| $_";
        if (/^\s+DUMP: (\d+) blocks?/) {
            # we assume a block size of 1K
            say "sendbackup: size $1";
        }
        elsif (/^\s+DUMP: DUMP IS DONE/) {
            say 'sendbackup: end';
        }
    }
}

sub postprocess_toc {

    # the output of restore -tv looks
    # about like this:
    #
    # dir  4711  ./aaa
    # leaf 4712  ./bbb/xxx
    # leaf 4713  ./bbb/a
    # b
    # leaf 8819  ./bbb/x
    #
    # it may break if there is a lf/cr
    # embedded in the filename
    #
    # the more generic solution would be to force
    # restore to use a \0 separated output format

    select +shift;
    local $/ = "\n";	# make sure to have it line separated!

    my $buffer = undef;
    my $type   = undef;

    while (1) {

        $_ = <STDIN>;

        # skip the header lines
        if (1 .. defined && /\Adir\s+\d+\s+(.*)\Z/) {
            $buffer = '';
            $type   = 'dir';
	    die "Unexpected end of input\n" if not defined;
            next;
        }

        # if we match really good the buffer may be output
        if (not defined
            or chomp and /\A(?'type' dir|leaf)\s+\d+\s+\.(?'name' \/.*)\Z/x)
        {

            # output
            say $buffer . ($type eq 'dir' ? '/' : '');

            # we're done if this was the last line of output
            last if not defined;

            # order matters, do not exchange the next two lines! The %+
            # will break
            $type = $+{type};
            $buffer = $+{name} =~ s/\\/\\\\/gr;

            next;
        }

        $buffer .= "\\n$_";

    }

}

sub device {
    my $_ = shift;
    return $_ if /^\//;
    return "/dev/$_";
}

sub expand {
    my $_ = shift;
    s/\${c}/$opt_config/g;
    return $_;
}

sub OK    { say "OK @_" }
sub ERROR { say "ERROR [@_]" }

=head1 NAME

  amdumpext - the amanda dump application

=head1 SYNOPSIS

  amdumpext support
  amdumpext selfcheck [options] [--level <level>]    --device <device> 
  amdumpext estimate [options]  [--level <level>]... --device <device> 

=head1 COMMANDS

=over

=item B<support>

Send a list of supported features.

=back

=head1 OPTIONS

=head2 Common Options

The following options have to be supported by the application.

=over 4

=item B<--config> I<config>

The configuration to be used (the backup set).

=item B<--host> I<host>

The host from the DLE.

=item B<--disk> I<disk>

The disk to be saved. It's some "label" for the device to be backed up.

=item B<--device> I<device>

The device to be backed up (may be a device name, a mountpoint).

=back

=head2 Optional options

The following options need to be supported if indicated by the "support"
command.

=over

=item B<--message> "line"

Send messages line by line.

=item B<--index> "line"

Send the index line by line.

=back

=cut

=head2 Properties

=over 4

=item B<--dumpdates> I<dumpdates>

The location of the dumpdates file. Placeholder "${c}" is allowed and
replaced by the name of the current config.

=back

=head1 EXAMPLE

    define application "dump" {
        plugin "amdumpext"
	# optional - define some additional parameters
        property "dumpdates" "/tmp/dumpdates.${c}"
   }


=cut

# vim:sts=4 sw=4 aw ai sm:
