hs12
changeset 12 62cd5aef2cfa
parent 11 1fccf68e52c6
child 14 6f1e07f90b46
--- a/hs12	Wed Sep 05 23:15:45 2007 +0000
+++ b/hs12	Fri Sep 07 11:13:12 2007 +0000
@@ -1,4 +1,10 @@
 #! /usr/bin/perl
+# $Id$
+# $URL$
+#
+# ** Just proof of concept ** to see if we really need to decode all the
+# mime parts.
+#
 
 use strict;
 use warnings;
@@ -7,84 +13,115 @@
 use File::Temp qw(tempfile);
 use if $ENV{DEBUG} => "Smart::Comments";
 
-sub print_message(*$);
+sub print_message(*@);
 sub read_message();
 sub pass_mime($);
 sub forward_to_boundary($*);
-sub read_header(*$);
-sub process(*$$);
+sub read_header(*);
+
+#
+sub process(*;@);
+
+$SIG{__WARN__} = sub { print STDERR "### ", @_ };
 
 MAIN: {
+
+    # create an r/o tmp file containing the message  for sequential
+    # processing and optional failback in face of some processing error
     my $message = read_message();
-    my $tmpout  = tempfile();
-    my $stdout  = select $tmpout;    # print ab jetzt ins tmpout
-
-    seek($message, 0, 0);
-    process($message, undef, undef);
 
-    # spit out everthing
-    select $stdout;
-    seek($tmpout, 0, 0);
+    # during processing everything is printed into some tmp file
+    # - this way we can abort processing at any time and just send
+    # the above temporary file down the river
+    my $tmpout = tempfile();
+    my $stdout = select $tmpout;
 
-    # now output the stuff collected in tmpout
-    # and the rest of the message
+    # now we start processing but at the beginning - of course
+    seek($message, 0, 0);
+    process($message, boundary => undef);
+
+    # everything is done, probably some rest is still unprocessed (some
+    # epilogue, but this shouldn't be a problem at all
     {
         local $/ = \10240;
-        print while <$tmpout>;
+        if ($tmpout) {
+            seek($tmpout, 0, 0);
+            select $stdout;
+            print while <$tmpout>;
+        }
         print while <$message>;
     }
 
     exit 0;
 }
 
-sub print_message(*$) {
-    my ($m, $b) = @_;
-
-    if (not defined $b) {
-        return print while <$m>;
-    }
+sub print_message(*@) {
+    my ($m, %arg) = @_;
 
     while (<$m>) {
         print;
-        last if /^--$b--\s*/;
+        last if $arg{to} and /$arg{to}/;
     }
 }
 
-sub process(*$$) {
-    my ($m, $boundary, $mime_version) = @_;
-    my ($header, %header) = read_header($m, $boundary);
-    my $mime_type;
-
-    $mime_version ||= $header{"mime-version"};
-
-    ### $header
+sub process(*;@) {
+    my ($m,      %arg)    = @_;
+    my ($header, %header) = read_header($m);
+    my ($type, $boundary);
 
-    if (    $mime_version
-        and $header{"content-type"})
-    {
-        ($mime_type, undef, $boundary) = (
-            $header{"content-type"} =~ /^(.*?); # mime type
-	      (?:.*(?:boundary=(['"])(.*?)\2))? # eventuell noch mehr
-	    /x
-        );
+    if ($header{"content-type"}) {
+        ($type) = ($header{"content-type"} =~ /^([^;]*)/);
+        (undef, $boundary)
+            = ($header{"content-type"} =~ /boundary=(['"])(.*?)\1/);
+        ### h{content-type}:  $header{"content-type"}
+        ### type:	      $type
+        ### bound:            $boundary
+    }
+
+    $boundary ||= $arg{boundary};
+
+    if (not $type or pass_mime($type)) {
+        warn "passing: " . ($type ? $type : "no mime type") . "\n";
+        print $header;
+        print_message($m, to => $boundary ? "^--$boundary" : undef);
+        return;
     }
 
-    if (not $mime_type or pass_mime($mime_type)) {
-        warn "passing: " . ($mime_type ? $mime_type : "no mime_type") . "\n";
+    if ($type =~ m{^multipart/}) {
+        warn "forward to next multipart boundary: $boundary\n";
         print $header;
-        print_message($m, $boundary);
-        return;
-    }
-    else {
-        warn "not just passing: $mime_type\n";
+        print_message($m, to => "^--$boundary");
+
+        while (not eof($m)) {
+            process($m, boundary => $boundary);
+        }
+
+	return;
     }
 
-    process($m, $boundary, $mime_version);
+    warn "removed: $type\n";
+
+    my ($eol) = ($header =~ /(\s*)$/);
+    $header =~ s/\s*$//;
+    $header =~ s/^/-- /gm;
+
+    print "Content-Type: text/plain" . $eol x 2
+	. "Content removed (" . localtime() . ")$eol"
+	. $header
+	. $eol;
+
+    while (<$m>) {
+	if (/^--$boundary/) {
+	    print;
+	    last;
+	}
+    }
 
 }
 
 sub pass_mime($) {
-    return $_[0] =~ m{/signed};
+    local $_ = shift;
+    return m{(?:^text/)|(?:/signed)};
 }
 
 sub read_message() {
@@ -99,27 +136,24 @@
 
 # in:	current message file handle
 # out:	($orignal_header, %parsed_header)
-sub read_header(*$) {
-    my ($msg, $start) = @_;
+sub read_header(*) {
+    my ($msg) = @_;
     my $h = "";
 
-    if (defined $start) {
-        while (<$msg>) {
-            $h .= $_;
-            last if /^--$start\s*$/;
-        }
-    }
-
     while (<$msg>) {
         $h .= $_;
-        last if /^\s*$/;
+        last if /^\s*$/m;
     }
 
     $_ = $h;    # unmodified header (excl. $from)
 
+    ### $_
+
     s/\r?\n\s+(?=\S)/ /gm;    # continuation lines
     s/^(\S+):/\L$1:/gm;       # header fields to lower case
 
-    return ($h, ":unix_from:" => split(/^(\S+):\s*/m, $_));
+    return ($h,
+            map { ($a = $_) =~ s/\s*$//; $a }
+                ":unix_from:" => split(/^(\S+):\s*/m, $_));
 }
 __END__