- tags/trunk/branches
authorheiko
Sun, 20 Jul 2008 20:58:37 +0000
changeset 0 68aafeeb1559
child 1 2638ae657240
- tags/trunk/branches
Makefile
db.mysql
mail2db
testmails/aaa.crlf
testmails/aaa.nl
testmails/bbb.crlf
testmails/bbb.nl
testmails/latin1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Makefile	Sun Jul 20 20:58:37 2008 +0000
@@ -0,0 +1,24 @@
+# Makefile
+# $Id$
+# $URL$
+prefix = /usr/local
+sbindir = ${prefix}/sbin
+man8dir = ${prefix}/man/man8
+
+.PHONY:	all clean install
+
+all:			.ok.mailscan
+
+install:		all
+				install -d -m 0755 ${DESTDIR}${sbindir}/
+				install -d -m 0755 ${DESTDIR}${man8dir}/
+				install -m 0755 mailscan ${DESTDIR}${sbindir}/
+				umask 022; \
+				pod2man --section 8 mailscan | gzip >${DESTDIR}${man8dir}/mailscan.8.gz
+
+clean:
+		-rm -f .ok.*
+
+.ok.%:			%
+				@perl -c $<
+				@touch $@
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/db.mysql	Sun Jul 20 20:58:37 2008 +0000
@@ -0,0 +1,66 @@
+-- MySQL dump 10.11
+--
+-- Host: schnuffi    Database: mail
+-- ------------------------------------------------------
+-- Server version	5.0.32-Debian_7etch6-log
+
+/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
+/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
+/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
+/*!40101 SET NAMES utf8 */;
+/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
+/*!40103 SET TIME_ZONE='+00:00' */;
+/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
+/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
+/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
+/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
+
+--
+-- Table structure for table `header_field`
+--
+
+DROP TABLE IF EXISTS `header_field`;
+CREATE TABLE `header_field` (
+  `id` int(11) NOT NULL auto_increment,
+  `name` char(255) default NULL,
+  KEY `id` (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+
+--
+-- Table structure for table `message`
+--
+
+DROP TABLE IF EXISTS `message`;
+CREATE TABLE `message` (
+  `id` int(11) NOT NULL auto_increment,
+  `content` longtext,
+  `timestamp` timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP,
+  KEY `id` (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+
+--
+-- Table structure for table `message_header`
+--
+
+DROP TABLE IF EXISTS `message_header`;
+CREATE TABLE `message_header` (
+  `message_id` int(11) NOT NULL,
+  `header_field_id` int(11) NOT NULL,
+  `idx` int(11) default NULL,
+  `content` text,
+  KEY `header_field_id` (`header_field_id`),
+  KEY `message_id` (`message_id`),
+  CONSTRAINT `message_header_ibfk_1` FOREIGN KEY (`header_field_id`) REFERENCES `header_field` (`id`) ON DELETE CASCADE,
+  CONSTRAINT `message_header_ibfk_2` FOREIGN KEY (`message_id`) REFERENCES `message` (`id`) ON DELETE CASCADE
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
+
+/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
+/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
+/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
+/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
+/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
+/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
+/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
+
+-- Dump completed on 2008-07-20 20:54:13
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mail2db	Sun Jul 20 20:58:37 2008 +0000
@@ -0,0 +1,265 @@
+#! /usr/bin/perl
+# © 2008 Heiko Schlittermann <hs@schlittermann.de>
+# $Id$
+# $URL$
+
+use strict;
+use warnings;
+use Getopt::Long;
+use DBI;
+use Pod::Usage;
+use MIME::Parser;
+use MIME::Words qw(:all);
+use File::Temp;
+use Unix::Syslog qw(:macros :subs);
+use Text::Iconv;
+use if $ENV{DEBUG} => "Data::Dumper";
+
+my $DSN         = "DBI:mysql:mail:hostname=schnuffi";
+my @CREDENTIALS = qw(mail PWUle5Eimi);
+
+my $OUTPUT_CHARSET        = "UTF8";
+my $DEFAULT_INPUT_CHARSET = "ASCII";
+
+my $opt_help = 0;
+my $opt_man  = 0;
+my $opt_dry  = 0;
+
+my $DBH =
+  DBI->connect($DSN, @CREDENTIALS,
+    { RaiseError => 1, FetchHashKeyName => "NAME_lc", AutoCommit => 0 })
+  or die;
+
+# read the message into tmpfile (passed as arg) and return "Message" object
+sub get_message();
+sub get_headers($);
+sub decode_headers($$);
+
+MAIN: {
+	openlog("mail2db", LOG_PID | (-t STDERR ? LOG_PERROR : 0), LOG_MAIL);
+    GetOptions(
+        "h|help" => \$opt_help,
+        "m|man"  => \$opt_man,
+        "n|dry"  => \$opt_dry,
+    ) or pod2usage();
+
+    pod2usage(-verbose => 1, -exitval => 0) if $opt_help;
+    pod2usage(-verbose => 2, -exitval => 0) if $opt_man;
+
+    my ($tmpfile, $message) = get_message();
+    decode_headers($message, $OUTPUT_CHARSET);
+
+    # $message->print;
+
+    # now we have the mime entity with completly decoded
+    # headers and an unmodified body. the body we do not need
+    # actually.
+
+    # database job starts
+
+    my $insert_message = $DBH->prepare(qq{
+			INSERT INTO message (id, content, timestamp) VALUES(NULL, ?, NOW())
+	});
+
+    my $insert_header_id = $DBH->prepare(qq{
+			INSERT INTO header_field (id, name) VALUES(NULL, ?)
+	});
+    my $insert_message_header = $DBH->prepare(qq{
+			INSERT INTO message_header 
+			(message_id, header_field_id, idx, content) 
+			VALUES(?, (SELECT id FROM header_field WHERE name = ?), ?, ?)
+	});
+
+	# first insert the message and get the database message id
+    my $msg_id;
+    {
+        seek($tmpfile, 0, 0);
+        local $/ = undef;
+        $insert_message->execute(<$tmpfile>);
+        $msg_id = $DBH->last_insert_id(undef, undef, message => "id");
+		syslog(LOG_DEBUG, "message id: $msg_id");
+    }
+
+	# now insert the message headers
+    foreach my $tag (map { lc } $message->head->tags) {
+        for (my $idx = 0 ;
+            my $header = $message->head->get($tag, $idx) ; ++$idx)
+        {
+            $header =~ s/\s*$//;
+			syslog(LOG_DEBUG, "$tag\[$idx]\n");
+
+			# first we'll give it a try, but it may fail, because the
+			# header_field.id is missing
+			eval {
+				local $insert_message_header->{PrintError} = 0;
+				$insert_message_header->execute($msg_id, $tag, $idx, $header);
+			};
+			# if there was a problem, we'll try to insert the header_field
+			if ($@) {
+				$insert_header_id->execute($tag);
+				$insert_message_header->execute($msg_id, $tag, $idx, $header);
+			}
+        }
+    }
+
+	$DBH->commit if not $opt_dry;
+
+}
+
+sub get_message() {
+
+    # we'll create a tmp file containing the complete message
+    # if speed matters we should use a ram disk.
+    # unfortunely the MIME::Parser may temporary files too
+    my $tmpfile = new File::Temp(TEMPLATE => "mail2db-XXXXXX");
+    local $_ = <>;
+    if (!/^From\s/) {
+        my $nl = /\r?\n$/;
+        print {$tmpfile} "From - @{[scalar localtime]}$nl", $_;
+    }
+    $/ = undef;
+    print {$tmpfile} $_, <>;
+    $tmpfile->autoflush(1);
+    seek($tmpfile, 0, 0);
+
+    my $parser = new MIME::Parser or die "internal error";
+    $parser->output_to_core(1);
+    my $msg = $parser->parse($tmpfile);
+
+    return ($tmpfile, $msg);
+}
+
+{
+    my %converter;
+
+    sub decode_headers($$) {
+        my ($msg, $charset) = @_;
+
+        $msg->head->unfold;
+        my %converter;
+        foreach my $tag (map { lc } $msg->head->tags) {
+            for (my $i = 0 ; my $h = $msg->head->get($tag, $i) ; ++$i) {
+                my $line = "";
+                foreach my $w (decode_mimewords $h) {
+                    if (!defined $w->[1]) {
+                        $line .= $w->[0];
+                        next;
+                    }
+                    $converter{ $w->[1] } ||= new Text::Iconv($w->[1], $charset)
+                      or die "Can't create converter $w->[1] => $charset\n";
+                    $line .= $converter{ $w->[1] }->convert($w->[0]);
+                }
+                $msg->head->replace($tag, $line, $i);
+            }
+        }
+    }
+}
+
+{
+
+    # this code is not used anymore!
+    my %converter;
+
+    sub get_headers($) {
+        my $msg = shift;
+        my %header;
+
+        $msg->head->unfold;
+
+        # go through all header fields
+        foreach my $tag (map { lc } $msg->head->tags) {
+
+            # but we may have more than one...
+            foreach my $t ($msg->head->get_all($tag)) {
+                push @{ $header{$tag} }, "";
+                my $current = \$header{$tag}[-1];
+                foreach my $w (decode_mimewords $t) {
+                    if (!defined $w->[1]) {
+                        $$current .= $w->[0];
+                        next;
+                    }
+                    $converter{ $w->[1] } ||=
+                      new Text::Iconv($w->[1], $OUTPUT_CHARSET)
+                      or die
+                      "Can't create converter $w->[1] => $OUTPUT_CHARSET\n";
+                    $$current .= $converter{ $w->[1] }->convert($w->[0]);
+                }
+                $$current =~ s/\s+$//;
+            }
+        }
+        return %header;
+    }
+}
+
+=head1 NAME
+
+ mail2db - scan in/outgoing mails and save some in database
+
+=head1 SYNOPSIS
+
+ mail2db [-n|--dry]
+
+ mail2db [-h|--help]
+ mail2db [-m|--man]
+
+=head1 DESCRIPTION
+
+B<mail2db> reads a RFC822 message from stdin and saves it into a database.
+For more information please the the source code itself.
+
+=head1 OPTIONS
+
+=over 4
+
+=item B<-n>|B<--dry>
+
+Do not modify the database, just start a transaction but do not commit it.
+(This feature needs a database with transactions (as MySQL 5.x with InnoDB tables
+is). [default: off]
+
+=item B<-h>|B<--help>
+
+=item B<-m>|B<--man>
+
+Short help resp. manpage.
+
+=back
+
+=head1 DATABASE LAYOUT
+
+	header_field
+	-------------------
+	id:     int, unique
+	name:   char(255)
+
+	message_header
+	-------------------
+	message_id:         int, foreign key message.id
+	header_field_id:    int, foreign key header_field.id
+	idx:                int             -- position in unfolded header fields
+	content:            text
+
+	message
+	------------------
+	id:             int, autoincrement
+	timestamp:      timestamp           -- date of insertion
+	content:        text                -- complete message (incl. all headers)
+
+
+If you want to see the message headers in a nice form, you may create view or just
+use the following statement:
+
+	SELECT message_header.message_id, 
+		   header_field.name, 
+		   message_header.idx, 
+		   message_header.content 
+		FROM message_header 
+		JOIN header_field ON header_field.id = message_header.header_field_id 
+		ORDER BY message_header.message_id, header_field.name, message_header.idx;
+
+=head1 AUTHOR
+
+Heiko Schlittermann L<hs@schlittermann.de>
+
+=cut
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/testmails/aaa.crlf	Sun Jul 20 20:58:37 2008 +0000
@@ -0,0 +1,48 @@
+From hs@schlittermann.de Fri Jul 18 10:11:07 2008
+Envelope-to: heiko@localhost
+Received: from heiko by jumper.site with local (Exim 4.68)
+        (envelope-from <hs@schlittermann.de>)
+        id 1KJl3T-0007mK-US
+        for heiko@localhost; Fri, 18 Jul 2008 10:11:07 +0200
+Received: from hallo ballo by x with y
+Date: Fri, 18 Jul 2008 10:11:07 +0200
+From: Heiko Schlittermann <hs@schlittermann.de>
+To: heiko@localhost
+Subject: Das ist =?utf-8?B?w5Z0emk=?=
+Message-ID: <20080718081107.GG21790@schlittermann.de>
+MIME-Version: 1.0
+Content-Type: multipart/signed; micalg=pgp-sha1;
+        protocol="application/pgp-signature"; boundary="0zDq8CFkxn2hi9iJ"
+Content-Disposition: inline
+X-Phone: +49.172.7909055 / SMS welcome
+Organization: schlittermann -- internet & unix support
+User-Agent: Mutt/1.5.13 (2006-08-11)
+Status: RO
+Content-Length: 531
+Lines: 24
+
+
+--0zDq8CFkxn2hi9iJ
+Content-Type: text/plain; charset=utf-8
+Content-Disposition: inline
+Content-Transfer-Encoding: quoted-printable
+
+Hallo, das ist =C3=96tzi
+--=20
+Heiko
+
+--0zDq8CFkxn2hi9iJ
+Content-Type: application/pgp-signature; name="signature.asc"
+Content-Description: Digital signature
+Content-Disposition: inline
+
+-----BEGIN PGP SIGNATURE-----
+Version: GnuPG v1.4.6 (GNU/Linux)
+
+iD8DBQFIgFAb7k6smEjQNZsRAlIgAJwIIdUILclCSu58kI2hmP10o+6mUQCgqFym
+YjjtDpEp5uyNQzuv6CgOjEM=
+=bmBu
+-----END PGP SIGNATURE-----
+
+--0zDq8CFkxn2hi9iJ--
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/testmails/aaa.nl	Sun Jul 20 20:58:37 2008 +0000
@@ -0,0 +1,49 @@
+From hs@schlittermann.de Fri Jul 18 10:11:07 2008
+Envelope-to: heiko@localhost
+Received: from heiko by jumper.site with local (Exim 4.68)
+        (envelope-from <hs@schlittermann.de>)
+        id 1KJl3T-0007mK-US
+        for heiko@localhost; Fri, 18 Jul 2008 10:11:07 +0200
+Received: from hallo ballo by x with y
+Date: Fri, 18 Jul 2008 10:11:07 +0200
+From: Heiko Schlittermann <hs@schlittermann.de>
+To: heiko@localhost
+To: heiko2@localhost
+Subject: Das ist =?utf-8?B?w5Z0emk=?=
+Message-ID: <20080718081107.GG21790@schlittermann.de>
+MIME-Version: 1.0
+Content-Type: multipart/signed; micalg=pgp-sha1;
+        protocol="application/pgp-signature"; boundary="0zDq8CFkxn2hi9iJ"
+Content-Disposition: inline
+X-Phone: +49.172.7909055 / SMS welcome
+Organization: schlittermann -- internet & unix support
+User-Agent: Mutt/1.5.13 (2006-08-11)
+Status: RO
+Content-Length: 531
+Lines: 24
+
+
+--0zDq8CFkxn2hi9iJ
+Content-Type: text/plain; charset=utf-8
+Content-Disposition: inline
+Content-Transfer-Encoding: quoted-printable
+
+Hallo, das ist =C3=96tzi
+--=20
+Heiko
+
+--0zDq8CFkxn2hi9iJ
+Content-Type: application/pgp-signature; name="signature.asc"
+Content-Description: Digital signature
+Content-Disposition: inline
+
+-----BEGIN PGP SIGNATURE-----
+Version: GnuPG v1.4.6 (GNU/Linux)
+
+iD8DBQFIgFAb7k6smEjQNZsRAlIgAJwIIdUILclCSu58kI2hmP10o+6mUQCgqFym
+YjjtDpEp5uyNQzuv6CgOjEM=
+=bmBu
+-----END PGP SIGNATURE-----
+
+--0zDq8CFkxn2hi9iJ--
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/testmails/bbb.crlf	Sun Jul 20 20:58:37 2008 +0000
@@ -0,0 +1,47 @@
+Envelope-to: heiko@localhost
+Received: from heiko by jumper.site with local (Exim 4.68)
+        (envelope-from <hs@schlittermann.de>)
+        id 1KJl3T-0007mK-US
+        for heiko@localhost; Fri, 18 Jul 2008 10:11:07 +0200
+Received: from hallo ballo by x with y
+Date: Fri, 18 Jul 2008 10:11:07 +0200
+From: Heiko Schlittermann <hs@schlittermann.de>
+To: heiko@localhost
+Subject: Das ist =?utf-8?B?w5Z0emk=?=
+Message-ID: <20080718081107.GG21790@schlittermann.de>
+MIME-Version: 1.0
+Content-Type: multipart/signed; micalg=pgp-sha1;
+        protocol="application/pgp-signature"; boundary="0zDq8CFkxn2hi9iJ"
+Content-Disposition: inline
+X-Phone: +49.172.7909055 / SMS welcome
+Organization: schlittermann -- internet & unix support
+User-Agent: Mutt/1.5.13 (2006-08-11)
+Status: RO
+Content-Length: 531
+Lines: 24
+
+
+--0zDq8CFkxn2hi9iJ
+Content-Type: text/plain; charset=utf-8
+Content-Disposition: inline
+Content-Transfer-Encoding: quoted-printable
+
+Hallo, das ist =C3=96tzi
+--=20
+Heiko
+
+--0zDq8CFkxn2hi9iJ
+Content-Type: application/pgp-signature; name="signature.asc"
+Content-Description: Digital signature
+Content-Disposition: inline
+
+-----BEGIN PGP SIGNATURE-----
+Version: GnuPG v1.4.6 (GNU/Linux)
+
+iD8DBQFIgFAb7k6smEjQNZsRAlIgAJwIIdUILclCSu58kI2hmP10o+6mUQCgqFym
+YjjtDpEp5uyNQzuv6CgOjEM=
+=bmBu
+-----END PGP SIGNATURE-----
+
+--0zDq8CFkxn2hi9iJ--
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/testmails/bbb.nl	Sun Jul 20 20:58:37 2008 +0000
@@ -0,0 +1,47 @@
+Envelope-to: heiko@localhost
+Received: from heiko by jumper.site with local (Exim 4.68)
+        (envelope-from <hs@schlittermann.de>)
+        id 1KJl3T-0007mK-US
+        for heiko@localhost; Fri, 18 Jul 2008 10:11:07 +0200
+Received: from hallo ballo by x with y
+Date: Fri, 18 Jul 2008 10:11:07 +0200
+From: Heiko Schlittermann <hs@schlittermann.de>
+To: heiko@localhost
+Subject: Das ist =?utf-8?B?w5Z0emk=?=
+Message-ID: <20080718081107.GG21790@schlittermann.de>
+MIME-Version: 1.0
+Content-Type: multipart/signed; micalg=pgp-sha1;
+        protocol="application/pgp-signature"; boundary="0zDq8CFkxn2hi9iJ"
+Content-Disposition: inline
+X-Phone: +49.172.7909055 / SMS welcome
+Organization: schlittermann -- internet & unix support
+User-Agent: Mutt/1.5.13 (2006-08-11)
+Status: RO
+Content-Length: 531
+Lines: 24
+
+
+--0zDq8CFkxn2hi9iJ
+Content-Type: text/plain; charset=utf-8
+Content-Disposition: inline
+Content-Transfer-Encoding: quoted-printable
+
+Hallo, das ist =C3=96tzi
+--=20
+Heiko
+
+--0zDq8CFkxn2hi9iJ
+Content-Type: application/pgp-signature; name="signature.asc"
+Content-Description: Digital signature
+Content-Disposition: inline
+
+-----BEGIN PGP SIGNATURE-----
+Version: GnuPG v1.4.6 (GNU/Linux)
+
+iD8DBQFIgFAb7k6smEjQNZsRAlIgAJwIIdUILclCSu58kI2hmP10o+6mUQCgqFym
+YjjtDpEp5uyNQzuv6CgOjEM=
+=bmBu
+-----END PGP SIGNATURE-----
+
+--0zDq8CFkxn2hi9iJ--
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/testmails/latin1	Sun Jul 20 20:58:37 2008 +0000
@@ -0,0 +1,47 @@
+Envelope-to: heiko@localhost
+Received: from heiko by jumper.site with local (Exim 4.68)
+        (envelope-from <hs@schlittermann.de>)
+        id 1KJl3T-0007mK-US
+        for heiko@localhost; Fri, 18 Jul 2008 10:11:07 +0200
+Received: from hallo ballo by x with y
+Date: Fri, 18 Jul 2008 10:11:07 +0200
+From: Heiko Schlittermann <hs@schlittermann.de>
+To: heiko@localhost
+Subject: Das ist =?iso8859-1?Q?=41=3D=D6?= und sonst nichts
+Message-ID: <20080718081107.GG21790@schlittermann.de>
+MIME-Version: 1.0
+Content-Type: multipart/signed; micalg=pgp-sha1;
+        protocol="application/pgp-signature"; boundary="0zDq8CFkxn2hi9iJ"
+Content-Disposition: inline
+X-Phone: +49.172.7909055 / SMS welcome
+Organization: schlittermann -- internet & unix support
+User-Agent: Mutt/1.5.13 (2006-08-11)
+Status: RO
+Content-Length: 531
+Lines: 24
+
+
+--0zDq8CFkxn2hi9iJ
+Content-Type: text/plain; charset=utf-8
+Content-Disposition: inline
+Content-Transfer-Encoding: quoted-printable
+
+Hallo, das ist =C3=96tzi
+--=20
+Heiko
+
+--0zDq8CFkxn2hi9iJ
+Content-Type: application/pgp-signature; name="signature.asc"
+Content-Description: Digital signature
+Content-Disposition: inline
+
+-----BEGIN PGP SIGNATURE-----
+Version: GnuPG v1.4.6 (GNU/Linux)
+
+iD8DBQFIgFAb7k6smEjQNZsRAlIgAJwIIdUILclCSu58kI2hmP10o+6mUQCgqFym
+YjjtDpEp5uyNQzuv6CgOjEM=
+=bmBu
+-----END PGP SIGNATURE-----
+
+--0zDq8CFkxn2hi9iJ--
+