package Exim::Grey;
# for usage please see at the end

use strict;
use warnings;
use base 'Exporter';
use Carp;

our @EXPORT_OK   = qw(unseen seen getDBDir connectDB getDefault);
our %EXPORT_TAGS = (all => \@EXPORT_OK,);
our $VERSION     = '2.0';

our $verbose;

sub verbose {
    return if not $verbose;
    print STDERR __PACKAGE__ . ': ' . map { s/\0//gr } @_, "\n";
}

sub exim_bool { $_[0] ? 'yes' : 'no' }

# You may choose, but DB_File's footprint is smaller.
# perl -MDB_File -e 'tie %h, ...':	real    0m0.063s
# perl -MBerkeleyDB -e 'tie %h, ...':	real	0m0.112s
# And DB_File is part of the Perl core distribution (?)
# use BerkeleyDB;
# use DB_File;
#
# But we need locking! DB_File::Lock isn't part of the corelist.
use DB_File::Lock;

my %DEFAULT = (
    delay => 600,
    db    => 'seen',
);

# some helper functions
sub getDBDir();
sub findExim(;$);
sub connectDB($$);
sub getDefault() { %DEFAULT }

# dbm file is relativ to $spool_directory/grey, EXCEPT its name
# starts with "/".

sub unseen_ {
    my $item   = shift;
    my $delay  = shift // $DEFAULT{delay};
    my $db     = shift // $DEFAULT{db};
    my $now    = time();
    my ($auto) = $item =~ /.*?\/(.+?)$/ # remember the /<autokey> from the item
        and $item =~ s/\/.*?$//;        # and remove it from the item
    my $rc;

    connectDB(\my %h, $db);

    return 1    # not unseen, ergo known
      if defined $auto and is_whitelisted($auto, \%h);

    my $key = "$item\0";                # for compatibility with Exim's dbm functions

    # We do not know anything about the client -> unknown.
    # But remember that key with the associated "auto" subkey
    if (not exists $h{$key}) {
        $h{$key} = serialize(t0 => $now, t1 => $now, count => 0, auto => [defined $auto ? $auto : ()]);

        verbose "unseen: $item" if $verbose;
        return 1;
    }

    my %entry = deserialize($h{$key});

    # we know the client, but last contact was recently (too fast)
    # should we add it to our list auto entries too?
    if ($now - $entry{t0} < $delay) {
        return 1;
    }

    # we know the client, was patiently enough
    whitelist(\%h, uniq($auto, $entry{auto})) if defined $auto;
    $entry{count}++;
    $h{$key} = $_ = serialize(%entry);
    verbose "seen: $_" if $verbose;
    return 0;
}

sub unseen { exim_bool unseen_ @_ }
sub seen { exim_bool !unseen_ @_ }

# According to a thought from "David Woodhouse <dwmw2@infradead.org>"
# on exim-users@exim.org (Thu, 08 May 2008 13:10:21 +0100,
# Message-Id: <1210248621.25560.1088.camel@pmac.infradead.org>) we
# should have the ability to "auto whitelist" hosts which are known
# for retries, because there is no benefit in greylisting them.
#
# Most safe approach would be something based on message id.
# If we see the message id a second time it indicates successful retry.
# But we do not see the message id the first time we reject the message.

# This function has to be called twice per message delivery attempt
# <KEY> <$sender_host_address> <$sender_helo_name>
# (Where <KEY> is something like <$sender_address>+<$local_part@$domain>
# If we see the same message a second time (same message means here:
# same greylist criteria

sub whitelist {
    my ($h, @items) = (shift, uniq(@_));
    my $now = time;
    warn __PACKAGE__ . ": whitelist: @items\n"
        if $verbose;
    $h->{"$_\0"} = "$now $now 1 auto\0"
        foreach uniq(@items);
}

sub uniq {
    my %h = map { $_, undef } @_;
    return keys %h;
}

sub is_whitelisted {
    my ($item, $h) = @_;
    my $key = "$item\0";

    warn __PACKAGE__ . 'is '
        . (exists $h->{$key} ? '' : 'not')
        . "whitelisted: $item\n" if $verbose;

    return 0 if not exists $h->{$key};

    my ($t0, undef, $cnt, $flag) = split /[ \0]/, $h->{$key};
    $h->{$key} = join(' ' => $t0, time, ++$cnt, $flag ? $flag : ()) . "\0";


    return 1;
}

# Get the directory where we could store the database file(s)
# If we're running under exim it's easy, otherwise we've to find exim
# and then ask...
sub getDBDir() {
    my ($spooldir, $dbdir);
    eval { $spooldir = Exim::expand_string('$spool_directory') };
    if (not defined $spooldir) {
        my $exim = findExim();
        chomp($spooldir = `$exim -be '\$spool_directory'`);
        die "Can't find spooldir" if not defined $spooldir;
    }
    -d ($dbdir = "$spooldir/grey") and return $dbdir;

    my ($mode, $owner, $group) = (stat $spooldir)[2, 4, 5];
    {
        local $) = $group;
        local $> = $owner;
        $mode &= 0777;
        mkdir $dbdir, $mode or die "Can't create $dbdir: $!";
    }
    return $dbdir;
}

sub findExim(;$) {
    my $path = shift || $ENV{PATH};
    my $exim;
    foreach (split /:/, $ENV{PATH}) {
        -x ($exim = "$_/exim")  and return $exim;
        -x ($exim = "$_/exim4") and return $exim;
    }
    die "Can't find exim binary (missing .../sbin dirs in PATH?";
}

sub connectDB($$) {
    my ($h, $db) = @_;
    $db = getDBDir() . "/$db" unless $db =~ m(^/);

    # Creation of DB-File if it doesn't exist
    # to avoid races we change our own uid/gid for creation of
    # this file.
    if (!-f $db) {
        (my $dir = $db) =~ s/^(.*)\/.*?$/$1/;

        # copy mode, uid, gid from the directory
        my ($mode, $user, $group) = (stat $dir)[2, 4, 5]
          or die "Can't stat $dir: $!";
        my $umask = umask(($mode & 0777) ^ 0777);
        local $) = $group;
        local $> = $user;
        open(X, ">>$db") or die "Can't create $db: $!";
        close(X);
        umask $umask;
    }

    # now test which of the DB-Modules has been loaded

    if (exists &BerkeleyDB::Hash::TIEHASH) {
        no strict;
        my $umask = umask 077;
        tie %$h, "BerkeleyDB::Hash", -Filename => $db
          or die "$0: $db: $!";
        return $db;
    }

    if (exists &DB_File::Lock::TIEHASH) {
        tie %$h, 'DB_File::Lock', [$db], 'write'
          or die "$0: $db: $!";
        return $db;
    }

    if (exists &DB_File::TIEHASH) {
        tie %$h, 'DB_File', $db or die "$0: $db: $!";
        warn "$0: using DB_File, no locking is possible!\n";
        return $db;
    }

    die "Can't connect to database driver";
}

# These two functions do not truly serialize/de-serialize the data
# passed. They're specialiased to a fixed data format:
# serialized: <t0> <t1> <count> [auto=<item>[,<item>]...]
# structured: (
#   t0 => <t0>,
#   t1 => <t1>,
#   count => <count>,
#   auto => [item, item, …],
#   )
sub serialize {
    my %data = @_;
    my $auto = (ref $data{auto} && @{$data{auto}}) ?  join ',', @{$data{auto}} : '';
    return "$data{t0} $data{t1} $data{count} auto=$auto\0";
}

sub deserialize {
    my @data = split / /, $_[0] =~ s/\0$//r;
    my %data;
    ($data{t0}, $data{t1}, $data{count}) = splice @data, 0, 3;
    if ($data[0] =~ /^auto=(.*)/) {
        $data{auto} = [split /,/, $1];
    }
    return %data;
}

1;

__END__
=head1 NAME

 Exim::Grey

=head1 SYNOPSIS

 perl_startup use Exim::Grey qw(unseen);
 ...
 acl rcpt
    defer condition = ${perl{unseen}{<$sender_address>:<$local_part@$domain>}}

=head1 DESCRIPTION

This is a module to be loade by Exim, the MTA. On request it exports
a single function C<unseen()>. This function may be used in the ACL section
to support greylisting.

=head1 FUNCTIONS

=over

=item scalar B<unseen>(I<key>, I<delay>, I<db>)

This function returns I<true> if the key is already known in the I<db> database
for the minimum I<delay> time. (Note: The database may be cleaned regularly by
the compangion L<exigrey> tool.)

The I<key> is mandotory, the default I<delay> is 600 seconds and the default I<db>
is called F<seen>.

I<Key> may contain a suffix, separated by "/". This suffix is used for
automatic whitelisting.

=item scalar B<seen>(I<key>, I<delay>, I<db>)

The same as C<unseen()>, but with reversed result.

=back

=head1 EXAMPLES

=head2 Greylisting

First you have to include B<Exim::Grey> into your Exim. If Exim is built with Perl
support, the configuration syntax allows for C<perl_startup>:

    perl_startup = use Exim::Grey qw(unseen);

In the ACL section of the configuration can check if a given key (sender, or combination
of sender and recipient, or whatever) is new (unseen):

    defer condition = ${perl{unseen}{<$sender_address>:$<local_part@$domain>}}

If the same condition is checked more then I<delay> later, the C<unseen> function returns
false.

=head2 Greylisting + automatic whitelisting

Greylisting gets annoying if you do it for senders that are already known to retry. Thus it might be
good to maintain a whitelist. You may use a suffix to your key, separated by "/". Once the greylist
filter is passed, the used suffixes are registered with the whitelist.

  t
  |
  0  a->b/x  # a->b never seen, suffix never seen: greylist
  1  a->b/y  # a->b again: accept AND put x and y to the whitelist,
  |          # as they are known to retry
  2  c->b/x  # c->b unknown, but x is already whitelisted: accept
  3  d->b/y  # d->b unknown, but y is already whitelisted: accept
  |
  v

This can be implemented in your ACL as:

    defer condition = ${perl{unseen}{<$sender_address>:$<local_part@$domain>/$sender_host_address}}

But, if I<a> and I<b> are the sender and the recipient address, and the
subkey is the sender host address, a spammer might send a forged message
after t0, to get whitelisted.

=head1 INTERNALS

=head2 Format of the database

The record structure is

 key:   item\0
 value: timestamp(creation) timestamp(usage) counter[ flags]\0

This way we are compatible with ${lookup{...}dbm{...}}

=head1 FILES

The database files are placed in C<$spool_directory/grey/>.

=head1 SEE ALSO

The companion tool L<exigrey> should be used for inspection and manipulation
of the database.

=cut

# vim:aw et sw=4 ts=4:
