1 package Exim::Grey; |
|
2 # for usage please see at the end |
|
3 |
|
4 use strict; |
|
5 use warnings; |
|
6 use base 'Exporter'; |
|
7 use Carp; |
|
8 |
|
9 our @EXPORT_OK = qw(unseen seen getDBDir connectDB getDefault); |
|
10 our %EXPORT_TAGS = (all => \@EXPORT_OK,); |
|
11 our $VERSION = '2.0'; |
|
12 |
|
13 our $verbose; |
|
14 |
|
15 sub verbose { |
|
16 return if not $verbose; |
|
17 print STDERR __PACKAGE__ . ': ' . map { s/\0//gr } @_, "\n"; |
|
18 } |
|
19 |
|
20 sub exim_bool { $_[0] ? 'yes' : 'no' } |
|
21 |
|
22 # You may choose, but DB_File's footprint is smaller. |
|
23 # perl -MDB_File -e 'tie %h, ...': real 0m0.063s |
|
24 # perl -MBerkeleyDB -e 'tie %h, ...': real 0m0.112s |
|
25 # And DB_File is part of the Perl core distribution (?) |
|
26 # use BerkeleyDB; |
|
27 # use DB_File; |
|
28 # |
|
29 # But we need locking! DB_File::Lock isn't part of the corelist. |
|
30 use DB_File::Lock; |
|
31 |
|
32 my %DEFAULT = ( |
|
33 delay => 600, |
|
34 db => 'seen', |
|
35 ); |
|
36 |
|
37 # some helper functions |
|
38 sub getDBDir(); |
|
39 sub findExim(;$); |
|
40 sub connectDB($$); |
|
41 sub getDefault() { %DEFAULT } |
|
42 |
|
43 # dbm file is relativ to $spool_directory/grey, EXCEPT its name |
|
44 # starts with "/". |
|
45 |
|
46 sub unseen_ { |
|
47 my $item = shift; |
|
48 my $delay = shift // $DEFAULT{delay}; |
|
49 my $db = shift // $DEFAULT{db}; |
|
50 my $now = time(); |
|
51 my ($auto) = $item =~ /.*?\/(.+?)$/ # remember the /<autokey> from the item |
|
52 and $item =~ s/\/.*?$//; # and remove it from the item |
|
53 my $rc; |
|
54 |
|
55 connectDB(\my %h, $db); |
|
56 |
|
57 return 1 # not unseen, ergo known |
|
58 if defined $auto and is_whitelisted($auto, \%h); |
|
59 |
|
60 my $key = "$item\0"; # for compatibility with Exim's dbm functions |
|
61 |
|
62 # We do not know anything about the client -> unknown. |
|
63 # But remember that key with the associated "auto" subkey |
|
64 if (not exists $h{$key}) { |
|
65 $h{$key} = serialize(t0 => $now, t1 => $now, count => 0, auto => [defined $auto ? $auto : ()]); |
|
66 |
|
67 verbose "unseen: $item" if $verbose; |
|
68 return 1; |
|
69 } |
|
70 |
|
71 my %entry = deserialize($h{$key}); |
|
72 |
|
73 # we know the client, but last contact was recently (too fast) |
|
74 # should we add it to our list auto entries too? |
|
75 if ($now - $entry{t0} < $delay) { |
|
76 return 1; |
|
77 } |
|
78 |
|
79 # we know the client, was patiently enough |
|
80 whitelist(\%h, uniq($auto, $entry{auto})) if defined $auto; |
|
81 $entry{count}++; |
|
82 $h{$key} = $_ = serialize(%entry); |
|
83 verbose "seen: $_" if $verbose; |
|
84 return 0; |
|
85 } |
|
86 |
|
87 sub unseen { exim_bool unseen_ @_ } |
|
88 sub seen { exim_bool !unseen_ @_ } |
|
89 |
|
90 # According to a thought from "David Woodhouse <dwmw2@infradead.org>" |
|
91 # on exim-users@exim.org (Thu, 08 May 2008 13:10:21 +0100, |
|
92 # Message-Id: <1210248621.25560.1088.camel@pmac.infradead.org>) we |
|
93 # should have the ability to "auto whitelist" hosts which are known |
|
94 # for retries, because there is no benefit in greylisting them. |
|
95 # |
|
96 # Most safe approach would be something based on message id. |
|
97 # If we see the message id a second time it indicates successful retry. |
|
98 # But we do not see the message id the first time we reject the message. |
|
99 |
|
100 # This function has to be called twice per message delivery attempt |
|
101 # <KEY> <$sender_host_address> <$sender_helo_name> |
|
102 # (Where <KEY> is something like <$sender_address>+<$local_part@$domain> |
|
103 # If we see the same message a second time (same message means here: |
|
104 # same greylist criteria |
|
105 |
|
106 sub whitelist { |
|
107 my ($h, @items) = (shift, uniq(@_)); |
|
108 my $now = time; |
|
109 warn __PACKAGE__ . ": whitelist: @items\n" |
|
110 if $verbose; |
|
111 $h->{"$_\0"} = "$now $now 1 auto\0" |
|
112 foreach uniq(@items); |
|
113 } |
|
114 |
|
115 sub uniq { |
|
116 my %h = map { $_, undef } @_; |
|
117 return keys %h; |
|
118 } |
|
119 |
|
120 sub is_whitelisted { |
|
121 my ($item, $h) = @_; |
|
122 my $key = "$item\0"; |
|
123 |
|
124 warn __PACKAGE__ . 'is ' |
|
125 . (exists $h->{$key} ? '' : 'not') |
|
126 . "whitelisted: $item\n" if $verbose; |
|
127 |
|
128 return 0 if not exists $h->{$key}; |
|
129 |
|
130 my ($t0, undef, $cnt, $flag) = split /[ \0]/, $h->{$key}; |
|
131 $h->{$key} = join(' ' => $t0, time, ++$cnt, $flag ? $flag : ()) . "\0"; |
|
132 |
|
133 |
|
134 return 1; |
|
135 } |
|
136 |
|
137 # Get the directory where we could store the database file(s) |
|
138 # If we're running under exim it's easy, otherwise we've to find exim |
|
139 # and then ask... |
|
140 sub getDBDir() { |
|
141 my ($spooldir, $dbdir); |
|
142 eval { $spooldir = Exim::expand_string('$spool_directory') }; |
|
143 if (not defined $spooldir) { |
|
144 my $exim = findExim(); |
|
145 chomp($spooldir = `$exim -be '\$spool_directory'`); |
|
146 die "Can't find spooldir" if not defined $spooldir; |
|
147 } |
|
148 -d ($dbdir = "$spooldir/grey") and return $dbdir; |
|
149 |
|
150 my ($mode, $owner, $group) = (stat $spooldir)[2, 4, 5]; |
|
151 { |
|
152 local $) = $group; |
|
153 local $> = $owner; |
|
154 $mode &= 0777; |
|
155 mkdir $dbdir, $mode or die "Can't create $dbdir: $!"; |
|
156 } |
|
157 return $dbdir; |
|
158 } |
|
159 |
|
160 sub findExim(;$) { |
|
161 my $path = shift || $ENV{PATH}; |
|
162 my $exim; |
|
163 foreach (split /:/, $ENV{PATH}) { |
|
164 -x ($exim = "$_/exim") and return $exim; |
|
165 -x ($exim = "$_/exim4") and return $exim; |
|
166 } |
|
167 die "Can't find exim binary (missing .../sbin dirs in PATH?"; |
|
168 } |
|
169 |
|
170 sub connectDB($$) { |
|
171 my ($h, $db) = @_; |
|
172 $db = getDBDir() . "/$db" unless $db =~ m(^/); |
|
173 |
|
174 # Creation of DB-File if it doesn't exist |
|
175 # to avoid races we change our own uid/gid for creation of |
|
176 # this file. |
|
177 if (!-f $db) { |
|
178 (my $dir = $db) =~ s/^(.*)\/.*?$/$1/; |
|
179 |
|
180 # copy mode, uid, gid from the directory |
|
181 my ($mode, $user, $group) = (stat $dir)[2, 4, 5] |
|
182 or die "Can't stat $dir: $!"; |
|
183 my $umask = umask(($mode & 0777) ^ 0777); |
|
184 local $) = $group; |
|
185 local $> = $user; |
|
186 open(X, ">>$db") or die "Can't create $db: $!"; |
|
187 close(X); |
|
188 umask $umask; |
|
189 } |
|
190 |
|
191 # now test which of the DB-Modules has been loaded |
|
192 |
|
193 if (exists &BerkeleyDB::Hash::TIEHASH) { |
|
194 no strict; |
|
195 my $umask = umask 077; |
|
196 tie %$h, "BerkeleyDB::Hash", -Filename => $db |
|
197 or die "$0: $db: $!"; |
|
198 return $db; |
|
199 } |
|
200 |
|
201 if (exists &DB_File::Lock::TIEHASH) { |
|
202 tie %$h, 'DB_File::Lock', [$db], 'write' |
|
203 or die "$0: $db: $!"; |
|
204 return $db; |
|
205 } |
|
206 |
|
207 if (exists &DB_File::TIEHASH) { |
|
208 tie %$h, 'DB_File', $db or die "$0: $db: $!"; |
|
209 warn "$0: using DB_File, no locking is possible!\n"; |
|
210 return $db; |
|
211 } |
|
212 |
|
213 die "Can't connect to database driver"; |
|
214 } |
|
215 |
|
216 # These two functions do not truly serialize/de-serialize the data |
|
217 # passed. They're specialiased to a fixed data format: |
|
218 # serialized: <t0> <t1> <count> [auto=<item>[,<item>]...] |
|
219 # structured: ( |
|
220 # t0 => <t0>, |
|
221 # t1 => <t1>, |
|
222 # count => <count>, |
|
223 # auto => [item, item, …], |
|
224 # ) |
|
225 sub serialize { |
|
226 my %data = @_; |
|
227 my $auto = (ref $data{auto} && @{$data{auto}}) ? join ',', @{$data{auto}} : ''; |
|
228 return "$data{t0} $data{t1} $data{count} auto=$auto\0"; |
|
229 } |
|
230 |
|
231 sub deserialize { |
|
232 my @data = split / /, $_[0] =~ s/\0$//r; |
|
233 my %data; |
|
234 ($data{t0}, $data{t1}, $data{count}) = splice @data, 0, 3; |
|
235 if ($data[0] =~ /^auto=(.*)/) { |
|
236 $data{auto} = [split /,/, $1]; |
|
237 } |
|
238 return %data; |
|
239 } |
|
240 |
|
241 1; |
|
242 |
|
243 __END__ |
|
244 =head1 NAME |
|
245 |
|
246 Exim::Grey |
|
247 |
|
248 =head1 SYNOPSIS |
|
249 |
|
250 perl_startup use Exim::Grey qw(unseen); |
|
251 ... |
|
252 acl rcpt |
|
253 defer condition = ${perl{unseen}{<$sender_address>:<$local_part@$domain>}} |
|
254 |
|
255 =head1 DESCRIPTION |
|
256 |
|
257 This is a module to be loade by Exim, the MTA. On request it exports |
|
258 a single function C<unseen()>. This function may be used in the ACL section |
|
259 to support greylisting. |
|
260 |
|
261 =head1 FUNCTIONS |
|
262 |
|
263 =over |
|
264 |
|
265 =item scalar B<unseen>(I<key>, I<delay>, I<db>) |
|
266 |
|
267 This function returns I<true> if the key is already known in the I<db> database |
|
268 for the minimum I<delay> time. (Note: The database may be cleaned regularly by |
|
269 the compangion L<exigrey> tool.) |
|
270 |
|
271 The I<key> is mandotory, the default I<delay> is 600 seconds and the default I<db> |
|
272 is called F<seen>. |
|
273 |
|
274 I<Key> may contain a suffix, separated by "/". This suffix is used for |
|
275 automatic whitelisting. |
|
276 |
|
277 =item scalar B<seen>(I<key>, I<delay>, I<db>) |
|
278 |
|
279 The same as C<unseen()>, but with reversed result. |
|
280 |
|
281 =back |
|
282 |
|
283 =head1 EXAMPLES |
|
284 |
|
285 =head2 Greylisting |
|
286 |
|
287 First you have to include B<Exim::Grey> into your Exim. If Exim is built with Perl |
|
288 support, the configuration syntax allows for C<perl_startup>: |
|
289 |
|
290 perl_startup = use Exim::Grey qw(unseen); |
|
291 |
|
292 In the ACL section of the configuration can check if a given key (sender, or combination |
|
293 of sender and recipient, or whatever) is new (unseen): |
|
294 |
|
295 defer condition = ${perl{unseen}{<$sender_address>:$<local_part@$domain>}} |
|
296 |
|
297 If the same condition is checked more then I<delay> later, the C<unseen> function returns |
|
298 false. |
|
299 |
|
300 =head2 Greylisting + automatic whitelisting |
|
301 |
|
302 Greylisting gets annoying if you do it for senders that are already known to retry. Thus it might be |
|
303 good to maintain a whitelist. You may use a suffix to your key, separated by "/". Once the greylist |
|
304 filter is passed, the used suffixes are registered with the whitelist. |
|
305 |
|
306 t |
|
307 | |
|
308 0 a->b/x # a->b never seen, suffix never seen: greylist |
|
309 1 a->b/y # a->b again: accept AND put x and y to the whitelist, |
|
310 | # as they are known to retry |
|
311 2 c->b/x # c->b unknown, but x is already whitelisted: accept |
|
312 3 d->b/y # d->b unknown, but y is already whitelisted: accept |
|
313 | |
|
314 v |
|
315 |
|
316 This can be implemented in your ACL as: |
|
317 |
|
318 defer condition = ${perl{unseen}{<$sender_address>:$<local_part@$domain>/$sender_host_address}} |
|
319 |
|
320 But, if I<a> and I<b> are the sender and the recipient address, and the |
|
321 subkey is the sender host address, a spammer might send a forged message |
|
322 after t0, to get whitelisted. |
|
323 |
|
324 =head1 INTERNALS |
|
325 |
|
326 =head2 Format of the database |
|
327 |
|
328 The record structure is |
|
329 |
|
330 key: item\0 |
|
331 value: timestamp(creation) timestamp(usage) counter[ flags]\0 |
|
332 |
|
333 This way we are compatible with ${lookup{...}dbm{...}} |
|
334 |
|
335 =head1 FILES |
|
336 |
|
337 The database files are placed in C<$spool_directory/grey/>. |
|
338 |
|
339 =head1 SEE ALSO |
|
340 |
|
341 The companion tool L<exigrey> should be used for inspection and manipulation |
|
342 of the database. |
|
343 |
|
344 =cut |
|
345 |
|
346 # vim:aw et sw=4 ts=4: |
|