#! /usr/bin/perl

use 5.010;
use strict;
use warnings;
use Pod::Usage;
use Hash::Util qw(lock_keys);
use File::Find;
use File::Temp;
use DB_File;
use File::Basename;
use autodie qw(:all);
use Cwd qw(abs_path);

use Getopt::Long;
sub get_block_list;
sub purge_unused;
sub check_images;

my %o = (
    yes     => undef,
    verbose => undef,
    check   => undef,
);
lock_keys(%o);

MAIN: {
    Getopt::Long::Configure qw(Bundling);
    GetOptions(
        "y|yes!"     => \$o{yes},
        "v|verbose!" => \$o{verbose},
        "c|check"    => \$o{check},
        "h|help"     => sub { pod2usage(-verbose => 1, -exit 0) },
        "m|man"      => sub {
            pod2usage(
                -verbose => 2,
                -exit 0,
                -noperldoc => system(
                    "perldoc -V 1>/dev/null
			  2>&1"
                )
            );
        },
      )
      and @ARGV
      or pod2usage;
    my $dir = shift;
    my $tmp = File::Temp->new;

    # load the index files, remember the latest
    # timestamp we see
    #tie %idx, "DB_File" => $tmp->filename;
    my %block = get_block_list($dir);

    verbose("# indexed: "
          . scalar(@{ $block{""} // [] })
          . " images with "
          . (grep !/^\.idx$/ => keys(%block))
          . " blocks");

    purge_unused($dir => %block);
    check_images($dir => %block);
}

sub verbose { say @_ if $o{verbose} }

sub get_file_list {
    my ($list) = @_;
    my @files = ();

    open(my $fh => $list);
    while (<$fh>) {
        push @files, (split)[2];
    }
    return grep /^[a-z\d.\/]+$/ => @files;
}

sub get_block_list {
    my $dir = shift;
    my %block;
    find(
        sub {
            (-f) or return;    # we need to include the tmp files!
            push @{ $block{""} }, abs_path $_;
            foreach my $f (get_file_list($_)) {
                push @{ $block{$f} } => $#{ $block{""} };
            }
        },
        "$dir/idx"
    );
    return %block;
}

sub purge_unused {
    my ($dir, %block) = @_;

    my ($total, $done);
    verbose("# pass 1 - checking for unused blocks");
    verbose("# pass 1 - estimating file count");

    # calculate the number of files we expect
    find(
        sub {
            -d or return;
            opendir(my $dh => $_);
            map { $total++ if not $_ ~~ [qw<. ..>] and length > 8 } readdir $dh;
            closedir($dh);
        },
        "$dir/data"
    );

    # progress
    local $SIG{ALRM} = sub {
        return alarm 1 if not $done;
        my $speed = $done / (time - $^T + 1);
        verbose sprintf "# pass 1 done %5.1f%% | %25s (%*d of %d blocks)",
          100 * ($done / $total),
          scalar(localtime($^T + $speed * ($total - $done))),
          length($total) => $done,
          $total;
        alarm 5;
    };
    $SIG{ALRM}->();

    my @unused;
    find(
        sub {
            $done++ if -f;
            (-f _) and ((-M _) > 0) or return;  # don't process the fresh blocks

            # we don't need uncompressed files if an compressed version
            # exists
            unlink $_ and return if -f "$_.gz";

            # cut away the first part of the filename and
            # some optional extension
            (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
            exists $block{$rn} and return;
            push @unused, abs_path $File::Find::name;
            return;

        },
        "$dir/data"
    );
    $SIG{ALRM}->();
    alarm 0;

    return if not @unused;

    say sprintf "found %d (%.1f%%) unused files",
      0 + @unused,
      100 * (@unused / $total);

    if ($o{yes}) {
        verbose("# deleting " . @unused . " files");
        unlink @unused;
        return;
    }

    if (-t) {
        while (1) {
            print "delete? [y/N/v]: ";
            given (<STDIN>) {
                when (/^y(?:es)?$/i) { unlink @unused; last }
                when (/^v/) { say join "\n", @unused; next }
                default { last }
            }
        }
    }

}

sub check_images {
    my ($dir, %block) = @_;

    my $total = grep { $_ ne "" } keys(%block);
    my $done = 0;

    verbose("# pass 2 - checking image completeness");

    # progress
    local $SIG{ALRM} = sub {
        return alarm 1 if not $done;
        my $speed = $done / (time - $^T + 1);
        verbose sprintf "# pass 2 done %5.1f%% | %25s (%*d of %d blocks)",
          100 * $done / $total,
          scalar(localtime($^T + ($total - $done) * $speed)),
          length($total) => $done,
          $total;
        alarm 5;
    };
    $SIG{ALRM}->();

    my %invalid;
    foreach my $k (keys %block) {
        my $i = $block{$k};
        next if $k eq "";
        ++$done;

        next
          if -f "$dir/data/$k"
              or -f "$dir/data/$k.gz";
        say "missing $k @$i";
        @invalid{@$i} = ();
    }
    $SIG{ALRM}->();
    alarm 0;

    # invalid now contains the numbers of the idx files beiing
    # invalid
    my @invalid = sort @{ $block{""} }[keys %invalid];

    return if not @invalid;

    say sprintf "found %d (%.1f%%) invalid images:",
      0 + @invalid,
      100 * (@invalid / $total);

    if ($o{yes}) {
        unlink @invalid;
        return;
    }

    while (-t) {
        print "delete? [y/N/v] ";
        given (<STDIN>) {
            when (/^y(?:es)?$/i) { unlink @invalid; last }
            when (/^v/i) { say join "\n" => @invalid; next }
            default { last }
        }
    }
}
__END__

=head1 NAME

    checker - checks the imager data and index files

=head1 SYNOPSIS

    checker [options] {directory}

=head1 DESCRIPTION

This tool loads all the index files from I<directory>F</idx/>,
checks if all mentioned files are existing and optionally purges
unreferenced files.

=head1 OPTIONS

=over

=item B<-y>|B<--yes>

Assume "yes" for all questions (dangerous!). (default: no)

=item B<-h>|B<--help>

=item B<-m>|B<--man>

The short and longer help.

=back

=cut
