--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Makefile Sun Jul 20 20:58:37 2008 +0000
@@ -0,0 +1,24 @@
+# Makefile
+# $Id$
+# $URL$
+prefix = /usr/local
+sbindir = ${prefix}/sbin
+man8dir = ${prefix}/man/man8
+
+.PHONY: all clean install
+
+all: .ok.mailscan
+
+install: all
+ install -d -m 0755 ${DESTDIR}${sbindir}/
+ install -d -m 0755 ${DESTDIR}${man8dir}/
+ install -m 0755 mailscan ${DESTDIR}${sbindir}/
+ umask 022; \
+ pod2man --section 8 mailscan | gzip >${DESTDIR}${man8dir}/mailscan.8.gz
+
+clean:
+ -rm -f .ok.*
+
+.ok.%: %
+ @perl -c $<
+ @touch $@
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/db.mysql Sun Jul 20 20:58:37 2008 +0000
@@ -0,0 +1,66 @@
+-- MySQL dump 10.11
+--
+-- Host: schnuffi Database: mail
+-- ------------------------------------------------------
+-- Server version 5.0.32-Debian_7etch6-log
+
+/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
+/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
+/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
+/*!40101 SET NAMES utf8 */;
+/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
+/*!40103 SET TIME_ZONE='+00:00' */;
+/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
+/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
+/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
+/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
+
+--
+-- Table structure for table `header_field`
+--
+
+DROP TABLE IF EXISTS `header_field`;
+CREATE TABLE `header_field` (
+ `id` int(11) NOT NULL auto_increment,
+ `name` char(255) default NULL,
+ KEY `id` (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+
+--
+-- Table structure for table `message`
+--
+
+DROP TABLE IF EXISTS `message`;
+CREATE TABLE `message` (
+ `id` int(11) NOT NULL auto_increment,
+ `content` longtext,
+ `timestamp` timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP,
+ KEY `id` (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+
+--
+-- Table structure for table `message_header`
+--
+
+DROP TABLE IF EXISTS `message_header`;
+CREATE TABLE `message_header` (
+ `message_id` int(11) NOT NULL,
+ `header_field_id` int(11) NOT NULL,
+ `idx` int(11) default NULL,
+ `content` text,
+ KEY `header_field_id` (`header_field_id`),
+ KEY `message_id` (`message_id`),
+ CONSTRAINT `message_header_ibfk_1` FOREIGN KEY (`header_field_id`) REFERENCES `header_field` (`id`) ON DELETE CASCADE,
+ CONSTRAINT `message_header_ibfk_2` FOREIGN KEY (`message_id`) REFERENCES `message` (`id`) ON DELETE CASCADE
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
+
+/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
+/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
+/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
+/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
+/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
+/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
+/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
+
+-- Dump completed on 2008-07-20 20:54:13
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mail2db Sun Jul 20 20:58:37 2008 +0000
@@ -0,0 +1,265 @@
+#! /usr/bin/perl
+# © 2008 Heiko Schlittermann <hs@schlittermann.de>
+# $Id$
+# $URL$
+
+use strict;
+use warnings;
+use Getopt::Long;
+use DBI;
+use Pod::Usage;
+use MIME::Parser;
+use MIME::Words qw(:all);
+use File::Temp;
+use Unix::Syslog qw(:macros :subs);
+use Text::Iconv;
+use if $ENV{DEBUG} => "Data::Dumper";
+
+my $DSN = "DBI:mysql:mail:hostname=schnuffi";
+my @CREDENTIALS = qw(mail PWUle5Eimi);
+
+my $OUTPUT_CHARSET = "UTF8";
+my $DEFAULT_INPUT_CHARSET = "ASCII";
+
+my $opt_help = 0;
+my $opt_man = 0;
+my $opt_dry = 0;
+
+my $DBH =
+ DBI->connect($DSN, @CREDENTIALS,
+ { RaiseError => 1, FetchHashKeyName => "NAME_lc", AutoCommit => 0 })
+ or die;
+
+# read the message into tmpfile (passed as arg) and return "Message" object
+sub get_message();
+sub get_headers($);
+sub decode_headers($$);
+
+MAIN: {
+ openlog("mail2db", LOG_PID | (-t STDERR ? LOG_PERROR : 0), LOG_MAIL);
+ GetOptions(
+ "h|help" => \$opt_help,
+ "m|man" => \$opt_man,
+ "n|dry" => \$opt_dry,
+ ) or pod2usage();
+
+ pod2usage(-verbose => 1, -exitval => 0) if $opt_help;
+ pod2usage(-verbose => 2, -exitval => 0) if $opt_man;
+
+ my ($tmpfile, $message) = get_message();
+ decode_headers($message, $OUTPUT_CHARSET);
+
+ # $message->print;
+
+ # now we have the mime entity with completly decoded
+ # headers and an unmodified body. the body we do not need
+ # actually.
+
+ # database job starts
+
+ my $insert_message = $DBH->prepare(qq{
+ INSERT INTO message (id, content, timestamp) VALUES(NULL, ?, NOW())
+ });
+
+ my $insert_header_id = $DBH->prepare(qq{
+ INSERT INTO header_field (id, name) VALUES(NULL, ?)
+ });
+ my $insert_message_header = $DBH->prepare(qq{
+ INSERT INTO message_header
+ (message_id, header_field_id, idx, content)
+ VALUES(?, (SELECT id FROM header_field WHERE name = ?), ?, ?)
+ });
+
+ # first insert the message and get the database message id
+ my $msg_id;
+ {
+ seek($tmpfile, 0, 0);
+ local $/ = undef;
+ $insert_message->execute(<$tmpfile>);
+ $msg_id = $DBH->last_insert_id(undef, undef, message => "id");
+ syslog(LOG_DEBUG, "message id: $msg_id");
+ }
+
+ # now insert the message headers
+ foreach my $tag (map { lc } $message->head->tags) {
+ for (my $idx = 0 ;
+ my $header = $message->head->get($tag, $idx) ; ++$idx)
+ {
+ $header =~ s/\s*$//;
+ syslog(LOG_DEBUG, "$tag\[$idx]\n");
+
+ # first we'll give it a try, but it may fail, because the
+ # header_field.id is missing
+ eval {
+ local $insert_message_header->{PrintError} = 0;
+ $insert_message_header->execute($msg_id, $tag, $idx, $header);
+ };
+ # if there was a problem, we'll try to insert the header_field
+ if ($@) {
+ $insert_header_id->execute($tag);
+ $insert_message_header->execute($msg_id, $tag, $idx, $header);
+ }
+ }
+ }
+
+ $DBH->commit if not $opt_dry;
+
+}
+
+sub get_message() {
+
+ # we'll create a tmp file containing the complete message
+ # if speed matters we should use a ram disk.
+ # unfortunely the MIME::Parser may temporary files too
+ my $tmpfile = new File::Temp(TEMPLATE => "mail2db-XXXXXX");
+ local $_ = <>;
+ if (!/^From\s/) {
+ my $nl = /\r?\n$/;
+ print {$tmpfile} "From - @{[scalar localtime]}$nl", $_;
+ }
+ $/ = undef;
+ print {$tmpfile} $_, <>;
+ $tmpfile->autoflush(1);
+ seek($tmpfile, 0, 0);
+
+ my $parser = new MIME::Parser or die "internal error";
+ $parser->output_to_core(1);
+ my $msg = $parser->parse($tmpfile);
+
+ return ($tmpfile, $msg);
+}
+
+{
+ my %converter;
+
+ sub decode_headers($$) {
+ my ($msg, $charset) = @_;
+
+ $msg->head->unfold;
+ my %converter;
+ foreach my $tag (map { lc } $msg->head->tags) {
+ for (my $i = 0 ; my $h = $msg->head->get($tag, $i) ; ++$i) {
+ my $line = "";
+ foreach my $w (decode_mimewords $h) {
+ if (!defined $w->[1]) {
+ $line .= $w->[0];
+ next;
+ }
+ $converter{ $w->[1] } ||= new Text::Iconv($w->[1], $charset)
+ or die "Can't create converter $w->[1] => $charset\n";
+ $line .= $converter{ $w->[1] }->convert($w->[0]);
+ }
+ $msg->head->replace($tag, $line, $i);
+ }
+ }
+ }
+}
+
+{
+
+ # this code is not used anymore!
+ my %converter;
+
+ sub get_headers($) {
+ my $msg = shift;
+ my %header;
+
+ $msg->head->unfold;
+
+ # go through all header fields
+ foreach my $tag (map { lc } $msg->head->tags) {
+
+ # but we may have more than one...
+ foreach my $t ($msg->head->get_all($tag)) {
+ push @{ $header{$tag} }, "";
+ my $current = \$header{$tag}[-1];
+ foreach my $w (decode_mimewords $t) {
+ if (!defined $w->[1]) {
+ $$current .= $w->[0];
+ next;
+ }
+ $converter{ $w->[1] } ||=
+ new Text::Iconv($w->[1], $OUTPUT_CHARSET)
+ or die
+ "Can't create converter $w->[1] => $OUTPUT_CHARSET\n";
+ $$current .= $converter{ $w->[1] }->convert($w->[0]);
+ }
+ $$current =~ s/\s+$//;
+ }
+ }
+ return %header;
+ }
+}
+
+=head1 NAME
+
+ mail2db - scan in/outgoing mails and save some in database
+
+=head1 SYNOPSIS
+
+ mail2db [-n|--dry]
+
+ mail2db [-h|--help]
+ mail2db [-m|--man]
+
+=head1 DESCRIPTION
+
+B<mail2db> reads a RFC822 message from stdin and saves it into a database.
+For more information please the the source code itself.
+
+=head1 OPTIONS
+
+=over 4
+
+=item B<-n>|B<--dry>
+
+Do not modify the database, just start a transaction but do not commit it.
+(This feature needs a database with transactions (as MySQL 5.x with InnoDB tables
+is). [default: off]
+
+=item B<-h>|B<--help>
+
+=item B<-m>|B<--man>
+
+Short help resp. manpage.
+
+=back
+
+=head1 DATABASE LAYOUT
+
+ header_field
+ -------------------
+ id: int, unique
+ name: char(255)
+
+ message_header
+ -------------------
+ message_id: int, foreign key message.id
+ header_field_id: int, foreign key header_field.id
+ idx: int -- position in unfolded header fields
+ content: text
+
+ message
+ ------------------
+ id: int, autoincrement
+ timestamp: timestamp -- date of insertion
+ content: text -- complete message (incl. all headers)
+
+
+If you want to see the message headers in a nice form, you may create view or just
+use the following statement:
+
+ SELECT message_header.message_id,
+ header_field.name,
+ message_header.idx,
+ message_header.content
+ FROM message_header
+ JOIN header_field ON header_field.id = message_header.header_field_id
+ ORDER BY message_header.message_id, header_field.name, message_header.idx;
+
+=head1 AUTHOR
+
+Heiko Schlittermann L<hs@schlittermann.de>
+
+=cut
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/testmails/aaa.crlf Sun Jul 20 20:58:37 2008 +0000
@@ -0,0 +1,48 @@
+From hs@schlittermann.de Fri Jul 18 10:11:07 2008
+Envelope-to: heiko@localhost
+Received: from heiko by jumper.site with local (Exim 4.68)
+ (envelope-from <hs@schlittermann.de>)
+ id 1KJl3T-0007mK-US
+ for heiko@localhost; Fri, 18 Jul 2008 10:11:07 +0200
+Received: from hallo ballo by x with y
+Date: Fri, 18 Jul 2008 10:11:07 +0200
+From: Heiko Schlittermann <hs@schlittermann.de>
+To: heiko@localhost
+Subject: Das ist =?utf-8?B?w5Z0emk=?=
+Message-ID: <20080718081107.GG21790@schlittermann.de>
+MIME-Version: 1.0
+Content-Type: multipart/signed; micalg=pgp-sha1;
+ protocol="application/pgp-signature"; boundary="0zDq8CFkxn2hi9iJ"
+Content-Disposition: inline
+X-Phone: +49.172.7909055 / SMS welcome
+Organization: schlittermann -- internet & unix support
+User-Agent: Mutt/1.5.13 (2006-08-11)
+Status: RO
+Content-Length: 531
+Lines: 24
+
+
+--0zDq8CFkxn2hi9iJ
+Content-Type: text/plain; charset=utf-8
+Content-Disposition: inline
+Content-Transfer-Encoding: quoted-printable
+
+Hallo, das ist =C3=96tzi
+--=20
+Heiko
+
+--0zDq8CFkxn2hi9iJ
+Content-Type: application/pgp-signature; name="signature.asc"
+Content-Description: Digital signature
+Content-Disposition: inline
+
+-----BEGIN PGP SIGNATURE-----
+Version: GnuPG v1.4.6 (GNU/Linux)
+
+iD8DBQFIgFAb7k6smEjQNZsRAlIgAJwIIdUILclCSu58kI2hmP10o+6mUQCgqFym
+YjjtDpEp5uyNQzuv6CgOjEM=
+=bmBu
+-----END PGP SIGNATURE-----
+
+--0zDq8CFkxn2hi9iJ--
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/testmails/aaa.nl Sun Jul 20 20:58:37 2008 +0000
@@ -0,0 +1,49 @@
+From hs@schlittermann.de Fri Jul 18 10:11:07 2008
+Envelope-to: heiko@localhost
+Received: from heiko by jumper.site with local (Exim 4.68)
+ (envelope-from <hs@schlittermann.de>)
+ id 1KJl3T-0007mK-US
+ for heiko@localhost; Fri, 18 Jul 2008 10:11:07 +0200
+Received: from hallo ballo by x with y
+Date: Fri, 18 Jul 2008 10:11:07 +0200
+From: Heiko Schlittermann <hs@schlittermann.de>
+To: heiko@localhost
+To: heiko2@localhost
+Subject: Das ist =?utf-8?B?w5Z0emk=?=
+Message-ID: <20080718081107.GG21790@schlittermann.de>
+MIME-Version: 1.0
+Content-Type: multipart/signed; micalg=pgp-sha1;
+ protocol="application/pgp-signature"; boundary="0zDq8CFkxn2hi9iJ"
+Content-Disposition: inline
+X-Phone: +49.172.7909055 / SMS welcome
+Organization: schlittermann -- internet & unix support
+User-Agent: Mutt/1.5.13 (2006-08-11)
+Status: RO
+Content-Length: 531
+Lines: 24
+
+
+--0zDq8CFkxn2hi9iJ
+Content-Type: text/plain; charset=utf-8
+Content-Disposition: inline
+Content-Transfer-Encoding: quoted-printable
+
+Hallo, das ist =C3=96tzi
+--=20
+Heiko
+
+--0zDq8CFkxn2hi9iJ
+Content-Type: application/pgp-signature; name="signature.asc"
+Content-Description: Digital signature
+Content-Disposition: inline
+
+-----BEGIN PGP SIGNATURE-----
+Version: GnuPG v1.4.6 (GNU/Linux)
+
+iD8DBQFIgFAb7k6smEjQNZsRAlIgAJwIIdUILclCSu58kI2hmP10o+6mUQCgqFym
+YjjtDpEp5uyNQzuv6CgOjEM=
+=bmBu
+-----END PGP SIGNATURE-----
+
+--0zDq8CFkxn2hi9iJ--
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/testmails/bbb.crlf Sun Jul 20 20:58:37 2008 +0000
@@ -0,0 +1,47 @@
+Envelope-to: heiko@localhost
+Received: from heiko by jumper.site with local (Exim 4.68)
+ (envelope-from <hs@schlittermann.de>)
+ id 1KJl3T-0007mK-US
+ for heiko@localhost; Fri, 18 Jul 2008 10:11:07 +0200
+Received: from hallo ballo by x with y
+Date: Fri, 18 Jul 2008 10:11:07 +0200
+From: Heiko Schlittermann <hs@schlittermann.de>
+To: heiko@localhost
+Subject: Das ist =?utf-8?B?w5Z0emk=?=
+Message-ID: <20080718081107.GG21790@schlittermann.de>
+MIME-Version: 1.0
+Content-Type: multipart/signed; micalg=pgp-sha1;
+ protocol="application/pgp-signature"; boundary="0zDq8CFkxn2hi9iJ"
+Content-Disposition: inline
+X-Phone: +49.172.7909055 / SMS welcome
+Organization: schlittermann -- internet & unix support
+User-Agent: Mutt/1.5.13 (2006-08-11)
+Status: RO
+Content-Length: 531
+Lines: 24
+
+
+--0zDq8CFkxn2hi9iJ
+Content-Type: text/plain; charset=utf-8
+Content-Disposition: inline
+Content-Transfer-Encoding: quoted-printable
+
+Hallo, das ist =C3=96tzi
+--=20
+Heiko
+
+--0zDq8CFkxn2hi9iJ
+Content-Type: application/pgp-signature; name="signature.asc"
+Content-Description: Digital signature
+Content-Disposition: inline
+
+-----BEGIN PGP SIGNATURE-----
+Version: GnuPG v1.4.6 (GNU/Linux)
+
+iD8DBQFIgFAb7k6smEjQNZsRAlIgAJwIIdUILclCSu58kI2hmP10o+6mUQCgqFym
+YjjtDpEp5uyNQzuv6CgOjEM=
+=bmBu
+-----END PGP SIGNATURE-----
+
+--0zDq8CFkxn2hi9iJ--
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/testmails/bbb.nl Sun Jul 20 20:58:37 2008 +0000
@@ -0,0 +1,47 @@
+Envelope-to: heiko@localhost
+Received: from heiko by jumper.site with local (Exim 4.68)
+ (envelope-from <hs@schlittermann.de>)
+ id 1KJl3T-0007mK-US
+ for heiko@localhost; Fri, 18 Jul 2008 10:11:07 +0200
+Received: from hallo ballo by x with y
+Date: Fri, 18 Jul 2008 10:11:07 +0200
+From: Heiko Schlittermann <hs@schlittermann.de>
+To: heiko@localhost
+Subject: Das ist =?utf-8?B?w5Z0emk=?=
+Message-ID: <20080718081107.GG21790@schlittermann.de>
+MIME-Version: 1.0
+Content-Type: multipart/signed; micalg=pgp-sha1;
+ protocol="application/pgp-signature"; boundary="0zDq8CFkxn2hi9iJ"
+Content-Disposition: inline
+X-Phone: +49.172.7909055 / SMS welcome
+Organization: schlittermann -- internet & unix support
+User-Agent: Mutt/1.5.13 (2006-08-11)
+Status: RO
+Content-Length: 531
+Lines: 24
+
+
+--0zDq8CFkxn2hi9iJ
+Content-Type: text/plain; charset=utf-8
+Content-Disposition: inline
+Content-Transfer-Encoding: quoted-printable
+
+Hallo, das ist =C3=96tzi
+--=20
+Heiko
+
+--0zDq8CFkxn2hi9iJ
+Content-Type: application/pgp-signature; name="signature.asc"
+Content-Description: Digital signature
+Content-Disposition: inline
+
+-----BEGIN PGP SIGNATURE-----
+Version: GnuPG v1.4.6 (GNU/Linux)
+
+iD8DBQFIgFAb7k6smEjQNZsRAlIgAJwIIdUILclCSu58kI2hmP10o+6mUQCgqFym
+YjjtDpEp5uyNQzuv6CgOjEM=
+=bmBu
+-----END PGP SIGNATURE-----
+
+--0zDq8CFkxn2hi9iJ--
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/testmails/latin1 Sun Jul 20 20:58:37 2008 +0000
@@ -0,0 +1,47 @@
+Envelope-to: heiko@localhost
+Received: from heiko by jumper.site with local (Exim 4.68)
+ (envelope-from <hs@schlittermann.de>)
+ id 1KJl3T-0007mK-US
+ for heiko@localhost; Fri, 18 Jul 2008 10:11:07 +0200
+Received: from hallo ballo by x with y
+Date: Fri, 18 Jul 2008 10:11:07 +0200
+From: Heiko Schlittermann <hs@schlittermann.de>
+To: heiko@localhost
+Subject: Das ist =?iso8859-1?Q?=41=3D=D6?= und sonst nichts
+Message-ID: <20080718081107.GG21790@schlittermann.de>
+MIME-Version: 1.0
+Content-Type: multipart/signed; micalg=pgp-sha1;
+ protocol="application/pgp-signature"; boundary="0zDq8CFkxn2hi9iJ"
+Content-Disposition: inline
+X-Phone: +49.172.7909055 / SMS welcome
+Organization: schlittermann -- internet & unix support
+User-Agent: Mutt/1.5.13 (2006-08-11)
+Status: RO
+Content-Length: 531
+Lines: 24
+
+
+--0zDq8CFkxn2hi9iJ
+Content-Type: text/plain; charset=utf-8
+Content-Disposition: inline
+Content-Transfer-Encoding: quoted-printable
+
+Hallo, das ist =C3=96tzi
+--=20
+Heiko
+
+--0zDq8CFkxn2hi9iJ
+Content-Type: application/pgp-signature; name="signature.asc"
+Content-Description: Digital signature
+Content-Disposition: inline
+
+-----BEGIN PGP SIGNATURE-----
+Version: GnuPG v1.4.6 (GNU/Linux)
+
+iD8DBQFIgFAb7k6smEjQNZsRAlIgAJwIIdUILclCSu58kI2hmP10o+6mUQCgqFym
+YjjtDpEp5uyNQzuv6CgOjEM=
+=bmBu
+-----END PGP SIGNATURE-----
+
+--0zDq8CFkxn2hi9iJ--
+