- Prokejtname "mimecut" wieder verwendet
authorschulze
Fri, 07 Sep 2007 15:30:08 +0000
changeset 16 0e1c0994309a
parent 15 9482d3366306
child 17 e9aa9cb9f61f
- Prokejtname "mimecut" wieder verwendet
Makefile
hs12
mimecut.pl
t/10-x.t
--- a/Makefile	Fri Sep 07 15:25:39 2007 +0000
+++ b/Makefile	Fri Sep 07 15:30:08 2007 +0000
@@ -1,4 +1,4 @@
-BIN = mimecut hs12
+BIN = mimecut
 CLEANFILES = $(BIN)
 
 .PHONY: all clean install test
--- a/hs12	Fri Sep 07 15:25:39 2007 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,169 +0,0 @@
-#! /usr/bin/perl
-# $Id$
-# $URL$
-#
-# ** Just proof of concept ** to see if we really need to decode all the
-# mime parts.
-#
-
-use strict;
-use warnings;
-
-use Fatal qw(:void select);
-use File::Temp qw(tempfile);
-use if $ENV{DEBUG} => "Smart::Comments";
-use FindBin qw($Bin);
-
-sub print_message(*@);
-sub read_message();
-sub pass_mime($$);
-sub forward_to_boundary($*);
-sub read_header(*);
-
-#
-sub process($*;@);
-my $confdir = -f "$Bin/.build" ? $Bin : "/etc/$0";
-my @mimes;
-
-$SIG{__WARN__} = sub { print STDERR "### ", @_ };
-
-MAIN: {
-
-	open ( my $fh, "< $confdir/mimes.conf")
-		or warn "can't read config!\n";
-	my @mimes = map { chomp; $_ } grep !/(?:^\s*#|^\s*$)/, <$fh>;
-	
-    # create an r/o tmp file containing the message  for sequential
-    # processing and optional failback in face of some processing error
-    my $message = read_message();
-
-    # during processing everything is printed into some tmp file
-    # - this way we can abort processing at any time and just send
-    # the above temporary file down the river
-    my $tmpout = tempfile();
-    my $stdout = select $tmpout;
-
-    # now we start processing but at the beginning - of course
-    seek($message, 0, 0);
-    process(\@mimes, $message, boundary => undef);
-
-    # everything is done, probably some rest is still unprocessed (some
-    # epilogue, but this shouldn't be a problem at all
-    {
-        local $/ = \10240;
-        if ($tmpout) {
-            seek($tmpout, 0, 0);
-            select $stdout;
-            print while <$tmpout>;
-        }
-        print while <$message>;
-    }
-
-    exit 0;
-}
-
-sub print_message(*@) {
-    my ($m, %arg) = @_;
-
-    while (<$m>) {
-        print;
-        last if $arg{to} and /^--\Q$arg{to}\E/;
-    }
-}
-
-sub process($*;@) {
-    my ($mimes, $m, %arg)    = @_;
-    my ($header, %header) = read_header($m);
-    my ($type, $boundary);
-
-    if ($header{"content-type"}) {
-        ($type) = ($header{"content-type"} =~ /^([^;]*)/);
-        (undef, $boundary)
-            = ($header{"content-type"} =~ /boundary=(['"])(.*?)\1/);
-        ### h{content-type}:  $header{"content-type"}
-        ### type:	      $type
-        ### bound:            $boundary
-    }
-
-    $boundary ||= $arg{boundary};
-
-    if (not $type or pass_mime($type, $mimes)) {
-        warn "passing: " . ($type ? $type : "no mime type") . "\n";
-        print $header;
-        print_message($m, to => $boundary);
-        return;
-    }
-
-    if ($type =~ m{^multipart/}) {
-        warn "forward to next multipart boundary: $boundary\n";
-        print $header;
-        print_message($m, to => $boundary);
-
-        while (not eof($m)) {
-            process($mimes, $m, boundary => $boundary);
-        }
-
-	return;
-    }
-
-    warn "removed: $type\n";
-
-    my ($eol) = ($header =~ /(\s*)$/);
-    $header =~ s/\s*$//;
-    $header =~ s/^/-- /gm;
-
-    print "Content-Type: text/plain" . $eol x 2
-	. "Content removed (" . localtime() . ")$eol"
-	. $header
-	. $eol;
-
-    while (<$m>) {
-	if (/^--\Q$boundary\E/) {
-	    print;
-	    last;
-	}
-    }
-
-}
-
-sub pass_mime($$) {
-	my ($type, $mimes) = @_;
-    local $_ = $type;
-	my $re = join "|", map { $a = $_; $a =~ s/([^\da-z])/\\$1/gi; $a } @$mimes;
-	return m{$re};
-}
-
-sub read_message() {
-    my $tmp = tempfile();
-
-    local $/ = \102400;
-    print {$tmp} $_ while <>;
-    chmod 0400, $tmp or die "Can't fchmod on tmpfile: $!\n";
-
-    return $tmp;
-}
-
-# in:	current message file handle
-# out:	($orignal_header, %parsed_header)
-sub read_header(*) {
-    my ($msg) = @_;
-    my $h = "";
-
-    while (<$msg>) {
-        $h .= $_;
-        last if /^\s*$/m;
-    }
-
-    $_ = $h;    # unmodified header (excl. $from)
-
-    ### $_
-
-    s/\r?\n\s+(?=\S)/ /gm;    # continuation lines
-    s/^(\S+):/\L$1:/gm;       # header fields to lower case
-
-    return ($h,
-            map { ($a = $_) =~ s/\s*$//; $a }
-                ":unix_from:" => split(/^(\S+):\s*/m, $_));
-}
-__END__
-# vim:ts=4
--- a/mimecut.pl	Fri Sep 07 15:25:39 2007 +0000
+++ b/mimecut.pl	Fri Sep 07 15:30:08 2007 +0000
@@ -1,434 +1,169 @@
 #! /usr/bin/perl
 # $Id$
+# $URL$
+#
+# ** Just proof of concept ** to see if we really need to decode all the
+# mime parts.
+#
 
 use strict;
 use warnings;
-use MIME::Parser;
-use MIME::Entity;
-use MIME::Head;
-use Getopt::Long;
-use File::Basename;
+
+use Fatal qw(:void select);
+use File::Temp qw(tempfile);
+use if $ENV{DEBUG} => "Smart::Comments";
 use FindBin qw($Bin);
-use if $ENV{DEBUG} => "Smart::Comments", $ENV{DEBUG};
-
-BEGIN {
-    delete @ENV{ "LANG", grep /^LC_/, keys %ENV };
-}
-
-my $ME      = basename $0;
-my $MEPID   = $$;
-my $CONFDIR = -f "$Bin/.build" ? $Bin : "/etc/$0";
-my $LOGDIR  = '.';
-my $HELP    = <<EOF;
-Usage:
-    $ME [options] < mail
-    $ME [actions]
-
-Options:
-    -d, --debug
-        Output detailed information how the mail and its parts is parsed using
-        STDERR for output.
-
-    -l, --logfile
-        Writes the debug information to mimcut.log file instead of printing to
-        STDERR.
-
-    -t, --text
-        With this mode only plain/text mimes will pass the cut.
 
-    -f, --fake
-        Do not output anything to STDOUT. (This enables debugging but can't be
-        used with log and/or strain mode!)
-
-    -s, --strain
-        Will output the mail instantly and unchanged to STDOUT.
-
-Actions:
-    -m, --mimes
-        List each mime which is to be kept from mimes.conf.
-
-    -v, --vips
-        List allowed senders/RCPT's/CC's/BCC's who will get unchanged mail.
-
-    -p, --pod
-        In detail information about the way this script works.
+sub print_message(*@);
+sub read_message();
+sub pass_mime($$);
+sub forward_to_boundary($*);
+sub read_header(*);
 
-    -h, --help
-        Show this help screen and exit.
-EOF
-
-my $opt_debug;
-my $opt_log;
-my $opt_text;
-my $opt_fake;
-my $opt_strain;
-my $opt_mimes;
-my $opt_vips;
-my $opt_help;
-my $opt_pod;
-
-my $conf_mimes = "$CONFDIR/mimes.conf";
-my $conf_vips  = "$CONFDIR/vips.conf";
-my @vips;
+#
+sub process($*;@);
+my $confdir = -f "$Bin/.build" ? $Bin : "/etc/$0";
 my @mimes;
 
-my $logfile;
-
-sub read_conf($);
-sub new_parser();
-sub get_mail_header($);
-sub new_mail(%);
-sub new_mail_send($);
-sub check_vip($%);
-sub check_multipart($$$$);
-sub check_part($$$$$);
-sub replace_part($$$);
+$SIG{__WARN__} = sub { print STDERR "### ", @_ };
 
 MAIN: {
-    Getopt::Long::Configure("bundling");
-    GetOptions("d|debug"  => \$opt_debug,
-               "l|log"    => \$opt_log,
-               "t|text"   => \$opt_text,
-               "s|strain" => \$opt_strain,
-               "f|fake"   => \$opt_fake,
-               "m|mimes"  => \$opt_mimes,
-               "v|vips"   => \$opt_vips,
-               "h|help"   => \$opt_help,
-               "p|pod"    => \$opt_pod,
-    ) or die "$ME: try\n  $ME --help\n";
 
-    if ($opt_help) {
-        print $HELP and exit 0;
-    }
-    if ($opt_pod) {
-        system("pod2usage -v 3 $0") and exit 0;
-    }
+	open ( my $fh, "< $confdir/mimes.conf")
+		or warn "can't read config!\n";
+	my @mimes = map { chomp; $_ } grep !/(?:^\s*#|^\s*$)/, <$fh>;
+	
+    # create an r/o tmp file containing the message  for sequential
+    # processing and optional failback in face of some processing error
+    my $message = read_message();
+
+    # during processing everything is printed into some tmp file
+    # - this way we can abort processing at any time and just send
+    # the above temporary file down the river
+    my $tmpout = tempfile();
+    my $stdout = select $tmpout;
 
-    if ($opt_fake) { $opt_debug = 1 }
-    if ($opt_log) {
-        die "$ME: can't fake in log mode!\n"  if $opt_fake;
-        die "$ME: can't debug in log mode!\n" if $opt_debug;
-        open(STDERR, ">> $LOGDIR/$ME.log")
-            or die "$ME: can't open logfile!\n";
-        $opt_debug = 1;
-    }
-    elsif (!$opt_debug) { open(STDERR, ">/dev/null") }
+    # now we start processing but at the beginning - of course
+    seek($message, 0, 0);
+    process(\@mimes, $message, boundary => undef);
 
-    if ($opt_strain) {
-        if ($opt_fake) {
-            warn "$ME: can't fake in strain mode!\n";
-            exit 0;
+    # everything is done, probably some rest is still unprocessed (some
+    # epilogue, but this shouldn't be a problem at all
+    {
+        local $/ = \10240;
+        if ($tmpout) {
+            seek($tmpout, 0, 0);
+            select $stdout;
+            print while <$tmpout>;
         }
-        if ($opt_text) {
-            warn "$ME: can't use text-only in strain mode!\n";
-            exit 0;
-        }
-    }
-
-    @mimes = read_conf($conf_mimes) unless $opt_text;
-    @vips = read_conf($conf_vips);
-
-    if ($opt_mimes) {
-        local $" = ", ";
-        print "mimes: @mimes\n";
-        exit 0;
-    }
-    if ($opt_vips) {
-        local $" = ", ";
-        print "vips: @vips\n";
-        exit 0;
+        print while <$message>;
     }
 
-    die "$ME: no mail on stdin!\n$!" if (-z *STDIN);
-
-    ###
-
-    my $parser = new MIME::Parser;
-    $parser->output_to_core(1);    # FIXME: was ist bei sehr großen Mails?
-
-    my $mail   = $parser->parse(\*STDIN);
-    my %header = get_mail_header($mail);
-
-    warn "\n$ME\[$MEPID\]: @{[scalar localtime]}\n"
-        . "<<$header{from}\n"
-        . ">>$header{to}\n";
-
-    if ($opt_strain) {
-        warn "   STRAIN MODE\n";
-        new_mail_send($mail);
-        exit 0;
-    }
+    exit 0;
+}
 
-    {
-        #### checking vips: \@vips
-        my $result;
-        if (@vips and $result = check_vip(\@vips, %header)) {
-            warn "   $result\n";
-            new_mail_send($mail);
-            exit 0;
-        }
-    }
-
-    if ($header{mtype} =~ /multipart/) {
-
-        my $hl1 = '-' x 32;
-        my $hl2 = '-' x 8;
-        my $hl3 = '-' x 36;
-        my $hl  = '-' x 78;
+sub print_message(*@) {
+    my ($m, %arg) = @_;
 
-        warn ",$hl1.$hl2.$hl3.\n";
-        warn sprintf "| %-30s | %-6s | %-34s |\n", "part [subparts]",
-            "status", "filename";
-        warn "+$hl1+$hl2+$hl3+\n";
-
-        my $mail_new = new_mail(%header);
-        $mail_new = check_multipart(\@mimes, $mail, $mail_new, 0);
-
-        warn "`$hl1'$hl2'$hl3'\n";
-        new_mail_send($mail_new);
-        exit 0;
-
-    }
-    else {
-
-        warn "** SINGLEPART\n";
-        new_mail_send($mail);
-        exit 0;
+    while (<$m>) {
+        print;
+        last if $arg{to} and /^--\Q$arg{to}\E/;
     }
 }
 
-###
-
-sub read_conf($) {
-    my $conf = shift @_;
-
-    die "$ME: can't find $conf!\n" if (!-e $conf);
-
-    my $fh;
-    open($fh, "< $conf") or die "$ME: can't read $conf!\n";
-
-    return map { chomp; $_ } grep !/(?:^\s*#|^\s*$)/, <$fh>;
-}
-
-sub get_mail_header($) {
-    my $mail = shift;
-
-    my %data;
-
-    $data{mtype}    = $mail->mime_type;
-    $data{preamble} = $mail->preamble || '';
-    $data{epilogue} = $mail->epilogue || '';
-    $data{date}     = $mail->head->get('Date') || '';
-    $data{subject}  = $mail->head->get('Subject') || '';
-    $data{from}     = $mail->head->get('From') || '';
-    $data{to}       = $mail->head->get('To') || '';
-    $data{cc}       = $mail->head->get('CC') || '';
-    $data{bcc}      = $mail->head->get('BCC') || '';
-
-    map { chomp } values %data;
-
-    return (%data);
-}
+sub process($*;@) {
+    my ($mimes, $m, %arg)    = @_;
+    my ($header, %header) = read_header($m);
+    my ($type, $boundary);
 
-sub new_mail(%) {
-    my %data = @_;
-    return
-        MIME::Entity->build(Type    => $data{mtype},
-                            Date    => $data{date},
-                            From    => $data{from},
-                            To      => $data{to},
-                            CC      => $data{cc},
-                            BCC     => $data{bcc},
-                            Subject => $data{subject},
-        );
-}
-
-sub new_mail_send($) {
-    my $mail = shift @_;
-    if (!$opt_fake) {
-        print "From $ME " . scalar localtime() . "\n";
-        $mail->print;
-    }
-}
-
-sub check_multipart($$$$) {
-    my ($mimes, $old, $new, $level) = @_;
-    my $parts_count = $old->parts;
-
-    warn sprintf "| %-30s | %-6s | %-34s |\n",
-        " " x $level . $old->mime_type . " [$parts_count]", "", "";
-
-    my @parts = $old->parts;
-    foreach my $part (@parts) {
-        my $mtype = $part->mime_type;
-
-		if ($mtype =~ /multipart/) {
-				$new = check_multipart($mimes, $part, $new, $level + 1);
-		}
-		else {
-				check_part($mimes, $part, $mtype, $new, $level);
-		}
+    if ($header{"content-type"}) {
+        ($type) = ($header{"content-type"} =~ /^([^;]*)/);
+        (undef, $boundary)
+            = ($header{"content-type"} =~ /boundary=(['"])(.*?)\1/);
+        ### h{content-type}:  $header{"content-type"}
+        ### type:	      $type
+        ### bound:            $boundary
     }
 
-    return $new;
-}
+    $boundary ||= $arg{boundary};
 
-sub check_vip($%) {
-    my ($vips, %data) = @_;
-    my $re = join "|", map { $a = $_; $a =~ s/([^\da-z])/\\$1/gi; $a } @$vips;
-    my @matched;
-
-    foreach (qw(from to cc bcc)) {
-        push @matched, "VIP $_" if $data{$_} =~ /$re/o;
+    if (not $type or pass_mime($type, $mimes)) {
+        #warn "passing: " . ($type ? $type : "no mime type") . "\n";
+        print $header;
+        print_message($m, to => $boundary);
+        return;
     }
 
-    return @matched if wantarray;
-    return join ", ", @matched;
-}
-
-sub check_part($$$$$) {
-    my ($mimes, $part, $mtype, $mail_new, $level) = @_;
-    my $status = 'cut';
-
-    my $re = join "|", map { $a = $_; $a =~ s/([^\da-z])/\\$1/gi; $a } @$mimes;
+    if ($type =~ m{^multipart/}) {
+        #warn "forward to next multipart boundary: $boundary\n";
+        print $header;
+        print_message($m, to => $boundary);
 
-    my $disposition = ($part->get('Content-Disposition') =~ /filename="(.+)"/)
-        and my $filename = $1
-        if $part->get('Content-Disposition');
-
-    $filename = 'n/a' unless $filename;
+        while (not eof($m)) {
+            process($mimes, $m, boundary => $boundary);
+        }
 
-    if ($opt_text) {
-        if   ($mtype =~ m[text/plain]) { $status = 'kept' }
-        else                           { $status = 'cut' }
-    }
-    elsif ($re) {
-        if ($mtype =~ /$re/) { $status = 'kept' }
+	return;
     }
 
-    if ($status eq 'kept') {
-        $mail_new->add_part($part);
-    }
-    elsif ($status eq 'cut') { replace_part($part, $filename, $mail_new) }
+    #warn "removed: $type\n";
+
+    my ($eol) = ($header =~ /(\s*)$/);
+    $header =~ s/\s*$//;
+    $header =~ s/^/-- /gm;
 
-    warn sprintf "| %-30s | %-6s | %34.34s |\n", 
-		" " x ($level+1) . $mtype, 
-		$status,
-        $filename;
+    print "Content-Type: text/plain" . $eol x 2
+	. "Content removed (" . localtime() . ")$eol"
+	. $header
+	. $eol;
+
+    while (<$m>) {
+	if (/^--\Q$boundary\E/) {
+	    print;
+	    last;
+	}
+    }
+
 }
 
-sub replace_part($$$) {
-
-    # TODO	part ersetzen ohne boundary zu verlieren
-    # 		kurze info zu mime/filetype oder so
+sub pass_mime($$) {
+	my ($type, $mimes) = @_;
+    local $_ = $type;
+	my $re = join "|", map { $a = $_; $a =~ s/([^\da-z])/\\$1/gi; $a } @$mimes;
+	return m{$re};
 }
 
-=pod
-
-=head1 NAME
-
-mimecut -- entfernt oder ersetzt AnhE<auml>nge anhand MIME's
-
-=head1 SYNOPSIS
-
-=over
-
-=item B<mimecut> [OPTION] < Mail
+sub read_message() {
+    my $tmp = tempfile();
 
-=item B<mimecut> [ACTION]
-
-=back
-
-=head1 DESCRIPTION
-
-Eine e-Mail, die E<uuml>ber STDIN an mimecut E<uuml>bergeben wird, wird auf
-Ihre Struktur untersucht. Ist sie keine multipart e-Mail erfolgt die Ausgabe
-sofort und ungeE<auml>ndert E<uuml>ber STDOUT.
-
-Ist die e-Mail multipart, werden die einzelnen parts, gegebenenfalls rekursiv
-durchlaufen und auf ihre mime-types hin analysiert. 
-mimecut benE<ouml>tigt zwei Konfigurationsdateien:
+    local $/ = \102400;
+    print {$tmp} $_ while <>;
+    chmod 0400, $tmp or die "Can't fchmod on tmpfile: $!\n";
 
-=over
-
-=item B<mimes.conf>
-
-Hier sind die mime-types erfasst, welche in der e-Mail enthalten bleiben sollen.
-
-=item B<vips.conf>
-
-Die Liste der EmpfE<auml>nger/Sender/CC's/BCC's, welche ungekE<uuml>rzte e-Mails bekommen.
-
-=back
+    return $tmp;
+}
 
-Anhand der Konfiguration wird zunE<auml>chst der e-Mail header
-E<uuml>berprE<uuml>ft und falls keine VIP e-Mail vorliegt, werden die parts
-mit den zu entfernenden mimes abgetrennt.
-
-=head1 OPTIONS
-
-=over
-
-=item B<-d, --debug>
-
-Gibt einen Statusbericht auf STDERR aus.
-
-=item B<-l, --logfile>
-
-Schreibt das debug in oder hE<auml>ngt es an die Datei mimecut.log im Pfad von mimecut an.
+# in:	current message file handle
+# out:	($orignal_header, %parsed_header)
+sub read_header(*) {
+    my ($msg) = @_;
+    my $h = "";
 
-=item B<-t, --text>
-
-Entfernt alle AnhE<auml>nge mit einem anderen mimetype als plain/text.
-
-=item B<-f, --fake>
-
-Testet was passieren wE<uuml>rde, ohne jedoch eine e-Mail E<uuml>ber STDOUT auszugeben.
-(aktiviert automatisch B<-d>)
-
-=item B<-s, --strain>
-
-Gibt die e-Mail, ganz gleich VIP oder nicht, sofort und unverE<auml>ndert E<uuml>ber
-STDOUT aus
+    while (<$msg>) {
+        $h .= $_;
+        last if /^\s*$/m;
+    }
 
-=back
-
-=head1 ACTIONS
-
-=over
+    $_ = $h;    # unmodified header (excl. $from)
 
-=item B<-m, --mimes>
-
-Ausgabe der aus der mimes.conf eingelesenen MIME's.
-
-=item B<-v, --vips>
-
-Ausgabe der aus der vips.conf eingelesenen VIP-Adressen.
+    ### $_
 
-=back
-
-=head1 OTHER
-
-=over
-
-=item B<-h, --help>
-
-Kurze Hilfe mit SYNOPSIS, OPTIONS & ACTIONS.
-
-=item B<-p, --pod>
-
-Diese Dokumentation.
+    s/\r?\n\s+(?=\S)/ /gm;    # continuation lines
+    s/^(\S+):/\L$1:/gm;       # header fields to lower case
 
-=back
-
-=head1 FILES
-
-=over
-
-=item B<mimes.conf>, B<vips.conf>, B<mimecut.log>
-
-=back
-
-=cut
-
-# vim:ft=perl:tw=78:ts=4
+    return ($h,
+            map { ($a = $_) =~ s/\s*$//; $a }
+                ":unix_from:" => split(/^(\S+):\s*/m, $_));
+}
+__END__
+# vim:ts=4
--- a/t/10-x.t	Fri Sep 07 15:25:39 2007 +0000
+++ b/t/10-x.t	Fri Sep 07 15:30:08 2007 +0000
@@ -6,7 +6,7 @@
 use File::Temp qw(tempfile);;
 use File::Compare;
 
-my $MIMECUT = "$Bin/../hs12";
+my $MIMECUT = "$Bin/../mimecut";
 
 my $tmpout = tempfile();
 open(my $saveout, ">&STDOUT");