# HG changeset patch # User heiko # Date 1216587517 0 # Node ID 68aafeeb1559896af767aec00b0b2e21366cbd97 - tags/trunk/branches diff -r 000000000000 -r 68aafeeb1559 Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Makefile Sun Jul 20 20:58:37 2008 +0000 @@ -0,0 +1,24 @@ +# Makefile +# $Id$ +# $URL$ +prefix = /usr/local +sbindir = ${prefix}/sbin +man8dir = ${prefix}/man/man8 + +.PHONY: all clean install + +all: .ok.mailscan + +install: all + install -d -m 0755 ${DESTDIR}${sbindir}/ + install -d -m 0755 ${DESTDIR}${man8dir}/ + install -m 0755 mailscan ${DESTDIR}${sbindir}/ + umask 022; \ + pod2man --section 8 mailscan | gzip >${DESTDIR}${man8dir}/mailscan.8.gz + +clean: + -rm -f .ok.* + +.ok.%: % + @perl -c $< + @touch $@ diff -r 000000000000 -r 68aafeeb1559 db.mysql --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/db.mysql Sun Jul 20 20:58:37 2008 +0000 @@ -0,0 +1,66 @@ +-- MySQL dump 10.11 +-- +-- Host: schnuffi Database: mail +-- ------------------------------------------------------ +-- Server version 5.0.32-Debian_7etch6-log + +/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; +/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; +/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */; +/*!40101 SET NAMES utf8 */; +/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */; +/*!40103 SET TIME_ZONE='+00:00' */; +/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */; +/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */; +/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */; +/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */; + +-- +-- Table structure for table `header_field` +-- + +DROP TABLE IF EXISTS `header_field`; +CREATE TABLE `header_field` ( + `id` int(11) NOT NULL auto_increment, + `name` char(255) default NULL, + KEY `id` (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; + +-- +-- Table structure for table `message` +-- + +DROP TABLE IF EXISTS `message`; +CREATE TABLE `message` ( + `id` int(11) NOT NULL auto_increment, + `content` longtext, + `timestamp` timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP, + KEY `id` (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; + +-- +-- Table structure for table `message_header` +-- + +DROP TABLE IF EXISTS `message_header`; +CREATE TABLE `message_header` ( + `message_id` int(11) NOT NULL, + `header_field_id` int(11) NOT NULL, + `idx` int(11) default NULL, + `content` text, + KEY `header_field_id` (`header_field_id`), + KEY `message_id` (`message_id`), + CONSTRAINT `message_header_ibfk_1` FOREIGN KEY (`header_field_id`) REFERENCES `header_field` (`id`) ON DELETE CASCADE, + CONSTRAINT `message_header_ibfk_2` FOREIGN KEY (`message_id`) REFERENCES `message` (`id`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8; +/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */; + +/*!40101 SET SQL_MODE=@OLD_SQL_MODE */; +/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */; +/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */; +/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */; +/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */; +/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; +/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; + +-- Dump completed on 2008-07-20 20:54:13 diff -r 000000000000 -r 68aafeeb1559 mail2db --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mail2db Sun Jul 20 20:58:37 2008 +0000 @@ -0,0 +1,265 @@ +#! /usr/bin/perl +# © 2008 Heiko Schlittermann +# $Id$ +# $URL$ + +use strict; +use warnings; +use Getopt::Long; +use DBI; +use Pod::Usage; +use MIME::Parser; +use MIME::Words qw(:all); +use File::Temp; +use Unix::Syslog qw(:macros :subs); +use Text::Iconv; +use if $ENV{DEBUG} => "Data::Dumper"; + +my $DSN = "DBI:mysql:mail:hostname=schnuffi"; +my @CREDENTIALS = qw(mail PWUle5Eimi); + +my $OUTPUT_CHARSET = "UTF8"; +my $DEFAULT_INPUT_CHARSET = "ASCII"; + +my $opt_help = 0; +my $opt_man = 0; +my $opt_dry = 0; + +my $DBH = + DBI->connect($DSN, @CREDENTIALS, + { RaiseError => 1, FetchHashKeyName => "NAME_lc", AutoCommit => 0 }) + or die; + +# read the message into tmpfile (passed as arg) and return "Message" object +sub get_message(); +sub get_headers($); +sub decode_headers($$); + +MAIN: { + openlog("mail2db", LOG_PID | (-t STDERR ? LOG_PERROR : 0), LOG_MAIL); + GetOptions( + "h|help" => \$opt_help, + "m|man" => \$opt_man, + "n|dry" => \$opt_dry, + ) or pod2usage(); + + pod2usage(-verbose => 1, -exitval => 0) if $opt_help; + pod2usage(-verbose => 2, -exitval => 0) if $opt_man; + + my ($tmpfile, $message) = get_message(); + decode_headers($message, $OUTPUT_CHARSET); + + # $message->print; + + # now we have the mime entity with completly decoded + # headers and an unmodified body. the body we do not need + # actually. + + # database job starts + + my $insert_message = $DBH->prepare(qq{ + INSERT INTO message (id, content, timestamp) VALUES(NULL, ?, NOW()) + }); + + my $insert_header_id = $DBH->prepare(qq{ + INSERT INTO header_field (id, name) VALUES(NULL, ?) + }); + my $insert_message_header = $DBH->prepare(qq{ + INSERT INTO message_header + (message_id, header_field_id, idx, content) + VALUES(?, (SELECT id FROM header_field WHERE name = ?), ?, ?) + }); + + # first insert the message and get the database message id + my $msg_id; + { + seek($tmpfile, 0, 0); + local $/ = undef; + $insert_message->execute(<$tmpfile>); + $msg_id = $DBH->last_insert_id(undef, undef, message => "id"); + syslog(LOG_DEBUG, "message id: $msg_id"); + } + + # now insert the message headers + foreach my $tag (map { lc } $message->head->tags) { + for (my $idx = 0 ; + my $header = $message->head->get($tag, $idx) ; ++$idx) + { + $header =~ s/\s*$//; + syslog(LOG_DEBUG, "$tag\[$idx]\n"); + + # first we'll give it a try, but it may fail, because the + # header_field.id is missing + eval { + local $insert_message_header->{PrintError} = 0; + $insert_message_header->execute($msg_id, $tag, $idx, $header); + }; + # if there was a problem, we'll try to insert the header_field + if ($@) { + $insert_header_id->execute($tag); + $insert_message_header->execute($msg_id, $tag, $idx, $header); + } + } + } + + $DBH->commit if not $opt_dry; + +} + +sub get_message() { + + # we'll create a tmp file containing the complete message + # if speed matters we should use a ram disk. + # unfortunely the MIME::Parser may temporary files too + my $tmpfile = new File::Temp(TEMPLATE => "mail2db-XXXXXX"); + local $_ = <>; + if (!/^From\s/) { + my $nl = /\r?\n$/; + print {$tmpfile} "From - @{[scalar localtime]}$nl", $_; + } + $/ = undef; + print {$tmpfile} $_, <>; + $tmpfile->autoflush(1); + seek($tmpfile, 0, 0); + + my $parser = new MIME::Parser or die "internal error"; + $parser->output_to_core(1); + my $msg = $parser->parse($tmpfile); + + return ($tmpfile, $msg); +} + +{ + my %converter; + + sub decode_headers($$) { + my ($msg, $charset) = @_; + + $msg->head->unfold; + my %converter; + foreach my $tag (map { lc } $msg->head->tags) { + for (my $i = 0 ; my $h = $msg->head->get($tag, $i) ; ++$i) { + my $line = ""; + foreach my $w (decode_mimewords $h) { + if (!defined $w->[1]) { + $line .= $w->[0]; + next; + } + $converter{ $w->[1] } ||= new Text::Iconv($w->[1], $charset) + or die "Can't create converter $w->[1] => $charset\n"; + $line .= $converter{ $w->[1] }->convert($w->[0]); + } + $msg->head->replace($tag, $line, $i); + } + } + } +} + +{ + + # this code is not used anymore! + my %converter; + + sub get_headers($) { + my $msg = shift; + my %header; + + $msg->head->unfold; + + # go through all header fields + foreach my $tag (map { lc } $msg->head->tags) { + + # but we may have more than one... + foreach my $t ($msg->head->get_all($tag)) { + push @{ $header{$tag} }, ""; + my $current = \$header{$tag}[-1]; + foreach my $w (decode_mimewords $t) { + if (!defined $w->[1]) { + $$current .= $w->[0]; + next; + } + $converter{ $w->[1] } ||= + new Text::Iconv($w->[1], $OUTPUT_CHARSET) + or die + "Can't create converter $w->[1] => $OUTPUT_CHARSET\n"; + $$current .= $converter{ $w->[1] }->convert($w->[0]); + } + $$current =~ s/\s+$//; + } + } + return %header; + } +} + +=head1 NAME + + mail2db - scan in/outgoing mails and save some in database + +=head1 SYNOPSIS + + mail2db [-n|--dry] + + mail2db [-h|--help] + mail2db [-m|--man] + +=head1 DESCRIPTION + +B reads a RFC822 message from stdin and saves it into a database. +For more information please the the source code itself. + +=head1 OPTIONS + +=over 4 + +=item B<-n>|B<--dry> + +Do not modify the database, just start a transaction but do not commit it. +(This feature needs a database with transactions (as MySQL 5.x with InnoDB tables +is). [default: off] + +=item B<-h>|B<--help> + +=item B<-m>|B<--man> + +Short help resp. manpage. + +=back + +=head1 DATABASE LAYOUT + + header_field + ------------------- + id: int, unique + name: char(255) + + message_header + ------------------- + message_id: int, foreign key message.id + header_field_id: int, foreign key header_field.id + idx: int -- position in unfolded header fields + content: text + + message + ------------------ + id: int, autoincrement + timestamp: timestamp -- date of insertion + content: text -- complete message (incl. all headers) + + +If you want to see the message headers in a nice form, you may create view or just +use the following statement: + + SELECT message_header.message_id, + header_field.name, + message_header.idx, + message_header.content + FROM message_header + JOIN header_field ON header_field.id = message_header.header_field_id + ORDER BY message_header.message_id, header_field.name, message_header.idx; + +=head1 AUTHOR + +Heiko Schlittermann L + +=cut + diff -r 000000000000 -r 68aafeeb1559 testmails/aaa.crlf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testmails/aaa.crlf Sun Jul 20 20:58:37 2008 +0000 @@ -0,0 +1,48 @@ +From hs@schlittermann.de Fri Jul 18 10:11:07 2008 +Envelope-to: heiko@localhost +Received: from heiko by jumper.site with local (Exim 4.68) + (envelope-from ) + id 1KJl3T-0007mK-US + for heiko@localhost; Fri, 18 Jul 2008 10:11:07 +0200 +Received: from hallo ballo by x with y +Date: Fri, 18 Jul 2008 10:11:07 +0200 +From: Heiko Schlittermann +To: heiko@localhost +Subject: Das ist =?utf-8?B?w5Z0emk=?= +Message-ID: <20080718081107.GG21790@schlittermann.de> +MIME-Version: 1.0 +Content-Type: multipart/signed; micalg=pgp-sha1; + protocol="application/pgp-signature"; boundary="0zDq8CFkxn2hi9iJ" +Content-Disposition: inline +X-Phone: +49.172.7909055 / SMS welcome +Organization: schlittermann -- internet & unix support +User-Agent: Mutt/1.5.13 (2006-08-11) +Status: RO +Content-Length: 531 +Lines: 24 + + +--0zDq8CFkxn2hi9iJ +Content-Type: text/plain; charset=utf-8 +Content-Disposition: inline +Content-Transfer-Encoding: quoted-printable + +Hallo, das ist =C3=96tzi +--=20 +Heiko + +--0zDq8CFkxn2hi9iJ +Content-Type: application/pgp-signature; name="signature.asc" +Content-Description: Digital signature +Content-Disposition: inline + +-----BEGIN PGP SIGNATURE----- +Version: GnuPG v1.4.6 (GNU/Linux) + +iD8DBQFIgFAb7k6smEjQNZsRAlIgAJwIIdUILclCSu58kI2hmP10o+6mUQCgqFym +YjjtDpEp5uyNQzuv6CgOjEM= +=bmBu +-----END PGP SIGNATURE----- + +--0zDq8CFkxn2hi9iJ-- + diff -r 000000000000 -r 68aafeeb1559 testmails/aaa.nl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testmails/aaa.nl Sun Jul 20 20:58:37 2008 +0000 @@ -0,0 +1,49 @@ +From hs@schlittermann.de Fri Jul 18 10:11:07 2008 +Envelope-to: heiko@localhost +Received: from heiko by jumper.site with local (Exim 4.68) + (envelope-from ) + id 1KJl3T-0007mK-US + for heiko@localhost; Fri, 18 Jul 2008 10:11:07 +0200 +Received: from hallo ballo by x with y +Date: Fri, 18 Jul 2008 10:11:07 +0200 +From: Heiko Schlittermann +To: heiko@localhost +To: heiko2@localhost +Subject: Das ist =?utf-8?B?w5Z0emk=?= +Message-ID: <20080718081107.GG21790@schlittermann.de> +MIME-Version: 1.0 +Content-Type: multipart/signed; micalg=pgp-sha1; + protocol="application/pgp-signature"; boundary="0zDq8CFkxn2hi9iJ" +Content-Disposition: inline +X-Phone: +49.172.7909055 / SMS welcome +Organization: schlittermann -- internet & unix support +User-Agent: Mutt/1.5.13 (2006-08-11) +Status: RO +Content-Length: 531 +Lines: 24 + + +--0zDq8CFkxn2hi9iJ +Content-Type: text/plain; charset=utf-8 +Content-Disposition: inline +Content-Transfer-Encoding: quoted-printable + +Hallo, das ist =C3=96tzi +--=20 +Heiko + +--0zDq8CFkxn2hi9iJ +Content-Type: application/pgp-signature; name="signature.asc" +Content-Description: Digital signature +Content-Disposition: inline + +-----BEGIN PGP SIGNATURE----- +Version: GnuPG v1.4.6 (GNU/Linux) + +iD8DBQFIgFAb7k6smEjQNZsRAlIgAJwIIdUILclCSu58kI2hmP10o+6mUQCgqFym +YjjtDpEp5uyNQzuv6CgOjEM= +=bmBu +-----END PGP SIGNATURE----- + +--0zDq8CFkxn2hi9iJ-- + diff -r 000000000000 -r 68aafeeb1559 testmails/bbb.crlf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testmails/bbb.crlf Sun Jul 20 20:58:37 2008 +0000 @@ -0,0 +1,47 @@ +Envelope-to: heiko@localhost +Received: from heiko by jumper.site with local (Exim 4.68) + (envelope-from ) + id 1KJl3T-0007mK-US + for heiko@localhost; Fri, 18 Jul 2008 10:11:07 +0200 +Received: from hallo ballo by x with y +Date: Fri, 18 Jul 2008 10:11:07 +0200 +From: Heiko Schlittermann +To: heiko@localhost +Subject: Das ist =?utf-8?B?w5Z0emk=?= +Message-ID: <20080718081107.GG21790@schlittermann.de> +MIME-Version: 1.0 +Content-Type: multipart/signed; micalg=pgp-sha1; + protocol="application/pgp-signature"; boundary="0zDq8CFkxn2hi9iJ" +Content-Disposition: inline +X-Phone: +49.172.7909055 / SMS welcome +Organization: schlittermann -- internet & unix support +User-Agent: Mutt/1.5.13 (2006-08-11) +Status: RO +Content-Length: 531 +Lines: 24 + + +--0zDq8CFkxn2hi9iJ +Content-Type: text/plain; charset=utf-8 +Content-Disposition: inline +Content-Transfer-Encoding: quoted-printable + +Hallo, das ist =C3=96tzi +--=20 +Heiko + +--0zDq8CFkxn2hi9iJ +Content-Type: application/pgp-signature; name="signature.asc" +Content-Description: Digital signature +Content-Disposition: inline + +-----BEGIN PGP SIGNATURE----- +Version: GnuPG v1.4.6 (GNU/Linux) + +iD8DBQFIgFAb7k6smEjQNZsRAlIgAJwIIdUILclCSu58kI2hmP10o+6mUQCgqFym +YjjtDpEp5uyNQzuv6CgOjEM= +=bmBu +-----END PGP SIGNATURE----- + +--0zDq8CFkxn2hi9iJ-- + diff -r 000000000000 -r 68aafeeb1559 testmails/bbb.nl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testmails/bbb.nl Sun Jul 20 20:58:37 2008 +0000 @@ -0,0 +1,47 @@ +Envelope-to: heiko@localhost +Received: from heiko by jumper.site with local (Exim 4.68) + (envelope-from ) + id 1KJl3T-0007mK-US + for heiko@localhost; Fri, 18 Jul 2008 10:11:07 +0200 +Received: from hallo ballo by x with y +Date: Fri, 18 Jul 2008 10:11:07 +0200 +From: Heiko Schlittermann +To: heiko@localhost +Subject: Das ist =?utf-8?B?w5Z0emk=?= +Message-ID: <20080718081107.GG21790@schlittermann.de> +MIME-Version: 1.0 +Content-Type: multipart/signed; micalg=pgp-sha1; + protocol="application/pgp-signature"; boundary="0zDq8CFkxn2hi9iJ" +Content-Disposition: inline +X-Phone: +49.172.7909055 / SMS welcome +Organization: schlittermann -- internet & unix support +User-Agent: Mutt/1.5.13 (2006-08-11) +Status: RO +Content-Length: 531 +Lines: 24 + + +--0zDq8CFkxn2hi9iJ +Content-Type: text/plain; charset=utf-8 +Content-Disposition: inline +Content-Transfer-Encoding: quoted-printable + +Hallo, das ist =C3=96tzi +--=20 +Heiko + +--0zDq8CFkxn2hi9iJ +Content-Type: application/pgp-signature; name="signature.asc" +Content-Description: Digital signature +Content-Disposition: inline + +-----BEGIN PGP SIGNATURE----- +Version: GnuPG v1.4.6 (GNU/Linux) + +iD8DBQFIgFAb7k6smEjQNZsRAlIgAJwIIdUILclCSu58kI2hmP10o+6mUQCgqFym +YjjtDpEp5uyNQzuv6CgOjEM= +=bmBu +-----END PGP SIGNATURE----- + +--0zDq8CFkxn2hi9iJ-- + diff -r 000000000000 -r 68aafeeb1559 testmails/latin1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/testmails/latin1 Sun Jul 20 20:58:37 2008 +0000 @@ -0,0 +1,47 @@ +Envelope-to: heiko@localhost +Received: from heiko by jumper.site with local (Exim 4.68) + (envelope-from ) + id 1KJl3T-0007mK-US + for heiko@localhost; Fri, 18 Jul 2008 10:11:07 +0200 +Received: from hallo ballo by x with y +Date: Fri, 18 Jul 2008 10:11:07 +0200 +From: Heiko Schlittermann +To: heiko@localhost +Subject: Das ist =?iso8859-1?Q?=41=3D=D6?= und sonst nichts +Message-ID: <20080718081107.GG21790@schlittermann.de> +MIME-Version: 1.0 +Content-Type: multipart/signed; micalg=pgp-sha1; + protocol="application/pgp-signature"; boundary="0zDq8CFkxn2hi9iJ" +Content-Disposition: inline +X-Phone: +49.172.7909055 / SMS welcome +Organization: schlittermann -- internet & unix support +User-Agent: Mutt/1.5.13 (2006-08-11) +Status: RO +Content-Length: 531 +Lines: 24 + + +--0zDq8CFkxn2hi9iJ +Content-Type: text/plain; charset=utf-8 +Content-Disposition: inline +Content-Transfer-Encoding: quoted-printable + +Hallo, das ist =C3=96tzi +--=20 +Heiko + +--0zDq8CFkxn2hi9iJ +Content-Type: application/pgp-signature; name="signature.asc" +Content-Description: Digital signature +Content-Disposition: inline + +-----BEGIN PGP SIGNATURE----- +Version: GnuPG v1.4.6 (GNU/Linux) + +iD8DBQFIgFAb7k6smEjQNZsRAlIgAJwIIdUILclCSu58kI2hmP10o+6mUQCgqFym +YjjtDpEp5uyNQzuv6CgOjEM= +=bmBu +-----END PGP SIGNATURE----- + +--0zDq8CFkxn2hi9iJ-- +