#! /usr/bin/perl
# (c) 2015 Heiko Schlittermann <hs@schlittermann.de>
use strict;
use 5.10.0;
use warnings;
use Pod::Usage;
use Getopt::Long;
use File::Basename;
use File::Spec::Functions;

our $VERSION = '0.1.0';

my $opt_tasks;
my $opt_dry;
my $opt_keep;
my $opt_basedir = '.';
my $opt_min_age = txt2days('1d');

my $dir_template = '$version-$cluster-$date';    # do not change!

delete @ENV{ grep /^LC_/ => keys %ENV };

exit main() if not caller;

sub main {

    GetOptions(
        'k|keep=i'    => \$opt_keep,
        'd|basedir=s' => \$opt_basedir,
        'dry'         => \$opt_dry,
        'min-age=s'   => sub { $opt_min_age = txt2days($_[1]) },
        'h|help'      => sub { pod2usage(-verbose => 1, -exit => 0) },
        'm|man'       => sub {
            pod2usage(
                -verbose   => 2,
                -exit      => 0,
                -noperldoc => system('perldoc -V >/dev/null 2>&1')
            );
        },
    );

    my @clusters = ls_clusters();

    # process the command line and get a list of tasks to do
    my @tasks = sort { $a->{name} cmp $b->{name} } map {
        state $date = date();
        my $version = $_->{version};
        my $cluster = $_->{cluster};
        my $dirname = eval "\"$dir_template\"";
        $_->{dirname} = catfile($opt_basedir, $dirname);
        $_;
      } grep { defined and $_->{status} eq 'online' } do {
        if (@ARGV) {
            my $v = $ARGV[0];
            if (@ARGV > 1) {
                my %h =
                  map { $_->{cluster} => $_ }
                  grep { $_->{version} eq $v } @clusters;
                @h{ @ARGV[1 .. $#ARGV] };
            }
            else {
                grep { $_->{version} eq $v } @clusters;
            }
        }
        else {
            @clusters;
        }
      };

    # check for consistency
    if (@ARGV == 1 and !@tasks) {
        die "$0: no tasks for version $ARGV[0]\n";
    }
    elsif (@ARGV > 1 and @tasks < @ARGV - 1) {
        die "$0: no tasks for version $ARGV[0] and clusters @ARGV[1..$#ARGV]\n";
    }

    # now get the real jobe done
    # run for all tasks, regardless of errors

    foreach my $task (@tasks) {

        rmdir glob("$opt_basedir/*");
        mkdir $task->{dirname}
          or die "$0: Can't mkdir $task->{dirname}: $!\n"
          unless $opt_dry;

        my @cmd = (
            pg_basebackup => '--format' => 't',
            '--xlog',
            '--cluster' => $task->{name},
            '--pgdata'  => $task->{dirname},
            '--gzip',
            -t 0 ? '--progress' : ()
        );

        if ($opt_dry) {
            print sprintf "%s %s\n", $task->{name}, "@cmd";
            $task->{exit} = 0;
            next;
        }

        system @cmd;
        warn "$0: `@cmd` failed\n" if $?;
        $task->{exit} = $?;
    }

    # check the results

    foreach my $task (@tasks) {
        printf "%-10s %-20s %s\n",
          $task->{exit} ? "FAIL:$task->{exit}" : 'OK',
          $task->{cluster},
          $task->{dirname};
    }

    return 0 if not $opt_keep;

    # care about the backups to keep, if everything went fine so far
    rmdir glob "$opt_basedir/*";    # remove empty directories

    foreach my $task (grep { !$_->{exit} } @tasks) {

        # sorted list, from oldest to youngest backup
        my @old = grep { -M > $opt_min_age } glob catfile $opt_basedir, do {
            my $version = $task->{version};
            my $cluster = $task->{cluster};
            my $date    = '*';
            eval "\"$dir_template\"";
        };

        if ($opt_keep <= @old) {
            splice @old, -1 * $opt_keep;
            foreach (@old) {
                if ($opt_dry) {
                    print "would unlink $_\n";
                    next;
                }
                unlink glob "$_/*.tar.gz";
                rmdir $_;
            }
        }

    }

}

sub date {
    my @now = localtime;
    sprintf '%4d-%02d-%02dT%02d-%02d-%02d',
      $now[5] + 1900,
      $now[4] + 1,
      $now[3],
      @now[reverse 0 .. 2];
}

sub ls_clusters {
    my @clusters;

    foreach (map { [(split)[0 .. 3]] } `pg_lsclusters -h`) {
        push @clusters,
          {
            name    => "$_->[0]/$_->[1]",
            version => $_->[0],
            cluster => $_->[1],
            port    => $_->[2],
            status  => $_->[3],
          };
    }
    return @clusters;
}

sub txt2days {
    local $_ = shift;
    my $seconds;
    if (/(\d+)w/)   { $seconds += $1 * 604800; }
    if (/(\d+)d/)   { $seconds += $1 * 86400; }
    if (/(\d+)h/)   { $seconds += $1 * 3600; }
    if (/(\d+)m/)   { $seconds += $1 * 60; }
    if (/(\d+)s?$/) { $seconds += $1; }
    return $seconds / 86400;
}

exit main() if not caller;

__END__

=head1 NAME
 
 pg-backup - backup all active PostgreSQL clusters

=head1 SYNOPSIS

 pg-backup [options] [<version> [<cluster>]...]
 pg-backup -h|--help|-m|--man

=head1 DESCRIPTION

This creates backups using C<pg_basebackup> 
for all active PostgreSQL clusters and writes the backups as TAR archives.

If a I<version> and optionally I<cluster>s are specified on the command line,
all clusters (or the specified clusters) of the I<version> are saved.

Without any I<version>/I<cluster> specification all B<online> clusters are backed up.

=head1 OPTIONS

=over

=item B<-d>|B<--basedir> I<dir>

The directory where the subdirectories for the backups will be placed. (default: F<.>)

=item B<-k>|B<--keep> I<n>

The number of generations to keep. Old backups will be only removed if B<all>
specified backups succeed. (no default)

=item B<--dry>

Dry run, show what will be done. (default: undef)

=item B<--min-age> I<time>

The minimum age of old backups before the B<--keep> option tries to cleanup. 
The timestamp of the directory with the tar archive is relevant. Nothing else!
(default: 1d)

=head1 RECOVERY

For recovery, create a fresh PostgreSQL cluster, stop the cluster, unpack the archive in the 
data directory, and start the cluster.

NOTE: The version of the server and the saved backup need to match!

=back

=head1 AUTHOR

Heiko Schlittermann L<mailto:hs@schlittermann.de>

=cut
