mimecut.pl
changeset 16 0e1c0994309a
parent 15 9482d3366306
child 17 e9aa9cb9f61f
equal deleted inserted replaced
15:9482d3366306 16:0e1c0994309a
     1 #! /usr/bin/perl
     1 #! /usr/bin/perl
     2 # $Id$
     2 # $Id$
       
     3 # $URL$
       
     4 #
       
     5 # ** Just proof of concept ** to see if we really need to decode all the
       
     6 # mime parts.
       
     7 #
     3 
     8 
     4 use strict;
     9 use strict;
     5 use warnings;
    10 use warnings;
     6 use MIME::Parser;
    11 
     7 use MIME::Entity;
    12 use Fatal qw(:void select);
     8 use MIME::Head;
    13 use File::Temp qw(tempfile);
     9 use Getopt::Long;
    14 use if $ENV{DEBUG} => "Smart::Comments";
    10 use File::Basename;
       
    11 use FindBin qw($Bin);
    15 use FindBin qw($Bin);
    12 use if $ENV{DEBUG} => "Smart::Comments", $ENV{DEBUG};
       
    13 
    16 
    14 BEGIN {
    17 sub print_message(*@);
    15     delete @ENV{ "LANG", grep /^LC_/, keys %ENV };
    18 sub read_message();
       
    19 sub pass_mime($$);
       
    20 sub forward_to_boundary($*);
       
    21 sub read_header(*);
       
    22 
       
    23 #
       
    24 sub process($*;@);
       
    25 my $confdir = -f "$Bin/.build" ? $Bin : "/etc/$0";
       
    26 my @mimes;
       
    27 
       
    28 $SIG{__WARN__} = sub { print STDERR "### ", @_ };
       
    29 
       
    30 MAIN: {
       
    31 
       
    32 	open ( my $fh, "< $confdir/mimes.conf")
       
    33 		or warn "can't read config!\n";
       
    34 	my @mimes = map { chomp; $_ } grep !/(?:^\s*#|^\s*$)/, <$fh>;
       
    35 	
       
    36     # create an r/o tmp file containing the message  for sequential
       
    37     # processing and optional failback in face of some processing error
       
    38     my $message = read_message();
       
    39 
       
    40     # during processing everything is printed into some tmp file
       
    41     # - this way we can abort processing at any time and just send
       
    42     # the above temporary file down the river
       
    43     my $tmpout = tempfile();
       
    44     my $stdout = select $tmpout;
       
    45 
       
    46     # now we start processing but at the beginning - of course
       
    47     seek($message, 0, 0);
       
    48     process(\@mimes, $message, boundary => undef);
       
    49 
       
    50     # everything is done, probably some rest is still unprocessed (some
       
    51     # epilogue, but this shouldn't be a problem at all
       
    52     {
       
    53         local $/ = \10240;
       
    54         if ($tmpout) {
       
    55             seek($tmpout, 0, 0);
       
    56             select $stdout;
       
    57             print while <$tmpout>;
       
    58         }
       
    59         print while <$message>;
       
    60     }
       
    61 
       
    62     exit 0;
    16 }
    63 }
    17 
    64 
    18 my $ME      = basename $0;
    65 sub print_message(*@) {
    19 my $MEPID   = $$;
    66     my ($m, %arg) = @_;
    20 my $CONFDIR = -f "$Bin/.build" ? $Bin : "/etc/$0";
       
    21 my $LOGDIR  = '.';
       
    22 my $HELP    = <<EOF;
       
    23 Usage:
       
    24     $ME [options] < mail
       
    25     $ME [actions]
       
    26 
    67 
    27 Options:
    68     while (<$m>) {
    28     -d, --debug
    69         print;
    29         Output detailed information how the mail and its parts is parsed using
    70         last if $arg{to} and /^--\Q$arg{to}\E/;
    30         STDERR for output.
       
    31 
       
    32     -l, --logfile
       
    33         Writes the debug information to mimcut.log file instead of printing to
       
    34         STDERR.
       
    35 
       
    36     -t, --text
       
    37         With this mode only plain/text mimes will pass the cut.
       
    38 
       
    39     -f, --fake
       
    40         Do not output anything to STDOUT. (This enables debugging but can't be
       
    41         used with log and/or strain mode!)
       
    42 
       
    43     -s, --strain
       
    44         Will output the mail instantly and unchanged to STDOUT.
       
    45 
       
    46 Actions:
       
    47     -m, --mimes
       
    48         List each mime which is to be kept from mimes.conf.
       
    49 
       
    50     -v, --vips
       
    51         List allowed senders/RCPT's/CC's/BCC's who will get unchanged mail.
       
    52 
       
    53     -p, --pod
       
    54         In detail information about the way this script works.
       
    55 
       
    56     -h, --help
       
    57         Show this help screen and exit.
       
    58 EOF
       
    59 
       
    60 my $opt_debug;
       
    61 my $opt_log;
       
    62 my $opt_text;
       
    63 my $opt_fake;
       
    64 my $opt_strain;
       
    65 my $opt_mimes;
       
    66 my $opt_vips;
       
    67 my $opt_help;
       
    68 my $opt_pod;
       
    69 
       
    70 my $conf_mimes = "$CONFDIR/mimes.conf";
       
    71 my $conf_vips  = "$CONFDIR/vips.conf";
       
    72 my @vips;
       
    73 my @mimes;
       
    74 
       
    75 my $logfile;
       
    76 
       
    77 sub read_conf($);
       
    78 sub new_parser();
       
    79 sub get_mail_header($);
       
    80 sub new_mail(%);
       
    81 sub new_mail_send($);
       
    82 sub check_vip($%);
       
    83 sub check_multipart($$$$);
       
    84 sub check_part($$$$$);
       
    85 sub replace_part($$$);
       
    86 
       
    87 MAIN: {
       
    88     Getopt::Long::Configure("bundling");
       
    89     GetOptions("d|debug"  => \$opt_debug,
       
    90                "l|log"    => \$opt_log,
       
    91                "t|text"   => \$opt_text,
       
    92                "s|strain" => \$opt_strain,
       
    93                "f|fake"   => \$opt_fake,
       
    94                "m|mimes"  => \$opt_mimes,
       
    95                "v|vips"   => \$opt_vips,
       
    96                "h|help"   => \$opt_help,
       
    97                "p|pod"    => \$opt_pod,
       
    98     ) or die "$ME: try\n  $ME --help\n";
       
    99 
       
   100     if ($opt_help) {
       
   101         print $HELP and exit 0;
       
   102     }
       
   103     if ($opt_pod) {
       
   104         system("pod2usage -v 3 $0") and exit 0;
       
   105     }
       
   106 
       
   107     if ($opt_fake) { $opt_debug = 1 }
       
   108     if ($opt_log) {
       
   109         die "$ME: can't fake in log mode!\n"  if $opt_fake;
       
   110         die "$ME: can't debug in log mode!\n" if $opt_debug;
       
   111         open(STDERR, ">> $LOGDIR/$ME.log")
       
   112             or die "$ME: can't open logfile!\n";
       
   113         $opt_debug = 1;
       
   114     }
       
   115     elsif (!$opt_debug) { open(STDERR, ">/dev/null") }
       
   116 
       
   117     if ($opt_strain) {
       
   118         if ($opt_fake) {
       
   119             warn "$ME: can't fake in strain mode!\n";
       
   120             exit 0;
       
   121         }
       
   122         if ($opt_text) {
       
   123             warn "$ME: can't use text-only in strain mode!\n";
       
   124             exit 0;
       
   125         }
       
   126     }
       
   127 
       
   128     @mimes = read_conf($conf_mimes) unless $opt_text;
       
   129     @vips = read_conf($conf_vips);
       
   130 
       
   131     if ($opt_mimes) {
       
   132         local $" = ", ";
       
   133         print "mimes: @mimes\n";
       
   134         exit 0;
       
   135     }
       
   136     if ($opt_vips) {
       
   137         local $" = ", ";
       
   138         print "vips: @vips\n";
       
   139         exit 0;
       
   140     }
       
   141 
       
   142     die "$ME: no mail on stdin!\n$!" if (-z *STDIN);
       
   143 
       
   144     ###
       
   145 
       
   146     my $parser = new MIME::Parser;
       
   147     $parser->output_to_core(1);    # FIXME: was ist bei sehr großen Mails?
       
   148 
       
   149     my $mail   = $parser->parse(\*STDIN);
       
   150     my %header = get_mail_header($mail);
       
   151 
       
   152     warn "\n$ME\[$MEPID\]: @{[scalar localtime]}\n"
       
   153         . "<<$header{from}\n"
       
   154         . ">>$header{to}\n";
       
   155 
       
   156     if ($opt_strain) {
       
   157         warn "   STRAIN MODE\n";
       
   158         new_mail_send($mail);
       
   159         exit 0;
       
   160     }
       
   161 
       
   162     {
       
   163         #### checking vips: \@vips
       
   164         my $result;
       
   165         if (@vips and $result = check_vip(\@vips, %header)) {
       
   166             warn "   $result\n";
       
   167             new_mail_send($mail);
       
   168             exit 0;
       
   169         }
       
   170     }
       
   171 
       
   172     if ($header{mtype} =~ /multipart/) {
       
   173 
       
   174         my $hl1 = '-' x 32;
       
   175         my $hl2 = '-' x 8;
       
   176         my $hl3 = '-' x 36;
       
   177         my $hl  = '-' x 78;
       
   178 
       
   179         warn ",$hl1.$hl2.$hl3.\n";
       
   180         warn sprintf "| %-30s | %-6s | %-34s |\n", "part [subparts]",
       
   181             "status", "filename";
       
   182         warn "+$hl1+$hl2+$hl3+\n";
       
   183 
       
   184         my $mail_new = new_mail(%header);
       
   185         $mail_new = check_multipart(\@mimes, $mail, $mail_new, 0);
       
   186 
       
   187         warn "`$hl1'$hl2'$hl3'\n";
       
   188         new_mail_send($mail_new);
       
   189         exit 0;
       
   190 
       
   191     }
       
   192     else {
       
   193 
       
   194         warn "** SINGLEPART\n";
       
   195         new_mail_send($mail);
       
   196         exit 0;
       
   197     }
    71     }
   198 }
    72 }
   199 
    73 
   200 ###
    74 sub process($*;@) {
       
    75     my ($mimes, $m, %arg)    = @_;
       
    76     my ($header, %header) = read_header($m);
       
    77     my ($type, $boundary);
   201 
    78 
   202 sub read_conf($) {
    79     if ($header{"content-type"}) {
   203     my $conf = shift @_;
    80         ($type) = ($header{"content-type"} =~ /^([^;]*)/);
       
    81         (undef, $boundary)
       
    82             = ($header{"content-type"} =~ /boundary=(['"])(.*?)\1/);
       
    83         ### h{content-type}:  $header{"content-type"}
       
    84         ### type:	      $type
       
    85         ### bound:            $boundary
       
    86     }
   204 
    87 
   205     die "$ME: can't find $conf!\n" if (!-e $conf);
    88     $boundary ||= $arg{boundary};
   206 
    89 
   207     my $fh;
    90     if (not $type or pass_mime($type, $mimes)) {
   208     open($fh, "< $conf") or die "$ME: can't read $conf!\n";
    91         #warn "passing: " . ($type ? $type : "no mime type") . "\n";
       
    92         print $header;
       
    93         print_message($m, to => $boundary);
       
    94         return;
       
    95     }
   209 
    96 
   210     return map { chomp; $_ } grep !/(?:^\s*#|^\s*$)/, <$fh>;
    97     if ($type =~ m{^multipart/}) {
       
    98         #warn "forward to next multipart boundary: $boundary\n";
       
    99         print $header;
       
   100         print_message($m, to => $boundary);
       
   101 
       
   102         while (not eof($m)) {
       
   103             process($mimes, $m, boundary => $boundary);
       
   104         }
       
   105 
       
   106 	return;
       
   107     }
       
   108 
       
   109     #warn "removed: $type\n";
       
   110 
       
   111     my ($eol) = ($header =~ /(\s*)$/);
       
   112     $header =~ s/\s*$//;
       
   113     $header =~ s/^/-- /gm;
       
   114 
       
   115     print "Content-Type: text/plain" . $eol x 2
       
   116 	. "Content removed (" . localtime() . ")$eol"
       
   117 	. $header
       
   118 	. $eol;
       
   119 
       
   120     while (<$m>) {
       
   121 	if (/^--\Q$boundary\E/) {
       
   122 	    print;
       
   123 	    last;
       
   124 	}
       
   125     }
       
   126 
   211 }
   127 }
   212 
   128 
   213 sub get_mail_header($) {
   129 sub pass_mime($$) {
   214     my $mail = shift;
   130 	my ($type, $mimes) = @_;
   215 
   131     local $_ = $type;
   216     my %data;
   132 	my $re = join "|", map { $a = $_; $a =~ s/([^\da-z])/\\$1/gi; $a } @$mimes;
   217 
   133 	return m{$re};
   218     $data{mtype}    = $mail->mime_type;
       
   219     $data{preamble} = $mail->preamble || '';
       
   220     $data{epilogue} = $mail->epilogue || '';
       
   221     $data{date}     = $mail->head->get('Date') || '';
       
   222     $data{subject}  = $mail->head->get('Subject') || '';
       
   223     $data{from}     = $mail->head->get('From') || '';
       
   224     $data{to}       = $mail->head->get('To') || '';
       
   225     $data{cc}       = $mail->head->get('CC') || '';
       
   226     $data{bcc}      = $mail->head->get('BCC') || '';
       
   227 
       
   228     map { chomp } values %data;
       
   229 
       
   230     return (%data);
       
   231 }
   134 }
   232 
   135 
   233 sub new_mail(%) {
   136 sub read_message() {
   234     my %data = @_;
   137     my $tmp = tempfile();
   235     return
   138 
   236         MIME::Entity->build(Type    => $data{mtype},
   139     local $/ = \102400;
   237                             Date    => $data{date},
   140     print {$tmp} $_ while <>;
   238                             From    => $data{from},
   141     chmod 0400, $tmp or die "Can't fchmod on tmpfile: $!\n";
   239                             To      => $data{to},
   142 
   240                             CC      => $data{cc},
   143     return $tmp;
   241                             BCC     => $data{bcc},
       
   242                             Subject => $data{subject},
       
   243         );
       
   244 }
   144 }
   245 
   145 
   246 sub new_mail_send($) {
   146 # in:	current message file handle
   247     my $mail = shift @_;
   147 # out:	($orignal_header, %parsed_header)
   248     if (!$opt_fake) {
   148 sub read_header(*) {
   249         print "From $ME " . scalar localtime() . "\n";
   149     my ($msg) = @_;
   250         $mail->print;
   150     my $h = "";
   251     }
       
   252 }
       
   253 
   151 
   254 sub check_multipart($$$$) {
   152     while (<$msg>) {
   255     my ($mimes, $old, $new, $level) = @_;
   153         $h .= $_;
   256     my $parts_count = $old->parts;
   154         last if /^\s*$/m;
   257 
       
   258     warn sprintf "| %-30s | %-6s | %-34s |\n",
       
   259         " " x $level . $old->mime_type . " [$parts_count]", "", "";
       
   260 
       
   261     my @parts = $old->parts;
       
   262     foreach my $part (@parts) {
       
   263         my $mtype = $part->mime_type;
       
   264 
       
   265 		if ($mtype =~ /multipart/) {
       
   266 				$new = check_multipart($mimes, $part, $new, $level + 1);
       
   267 		}
       
   268 		else {
       
   269 				check_part($mimes, $part, $mtype, $new, $level);
       
   270 		}
       
   271     }
   155     }
   272 
   156 
   273     return $new;
   157     $_ = $h;    # unmodified header (excl. $from)
       
   158 
       
   159     ### $_
       
   160 
       
   161     s/\r?\n\s+(?=\S)/ /gm;    # continuation lines
       
   162     s/^(\S+):/\L$1:/gm;       # header fields to lower case
       
   163 
       
   164     return ($h,
       
   165             map { ($a = $_) =~ s/\s*$//; $a }
       
   166                 ":unix_from:" => split(/^(\S+):\s*/m, $_));
   274 }
   167 }
   275 
   168 __END__
   276 sub check_vip($%) {
   169 # vim:ts=4
   277     my ($vips, %data) = @_;
       
   278     my $re = join "|", map { $a = $_; $a =~ s/([^\da-z])/\\$1/gi; $a } @$vips;
       
   279     my @matched;
       
   280 
       
   281     foreach (qw(from to cc bcc)) {
       
   282         push @matched, "VIP $_" if $data{$_} =~ /$re/o;
       
   283     }
       
   284 
       
   285     return @matched if wantarray;
       
   286     return join ", ", @matched;
       
   287 }
       
   288 
       
   289 sub check_part($$$$$) {
       
   290     my ($mimes, $part, $mtype, $mail_new, $level) = @_;
       
   291     my $status = 'cut';
       
   292 
       
   293     my $re = join "|", map { $a = $_; $a =~ s/([^\da-z])/\\$1/gi; $a } @$mimes;
       
   294 
       
   295     my $disposition = ($part->get('Content-Disposition') =~ /filename="(.+)"/)
       
   296         and my $filename = $1
       
   297         if $part->get('Content-Disposition');
       
   298 
       
   299     $filename = 'n/a' unless $filename;
       
   300 
       
   301     if ($opt_text) {
       
   302         if   ($mtype =~ m[text/plain]) { $status = 'kept' }
       
   303         else                           { $status = 'cut' }
       
   304     }
       
   305     elsif ($re) {
       
   306         if ($mtype =~ /$re/) { $status = 'kept' }
       
   307     }
       
   308 
       
   309     if ($status eq 'kept') {
       
   310         $mail_new->add_part($part);
       
   311     }
       
   312     elsif ($status eq 'cut') { replace_part($part, $filename, $mail_new) }
       
   313 
       
   314     warn sprintf "| %-30s | %-6s | %34.34s |\n", 
       
   315 		" " x ($level+1) . $mtype, 
       
   316 		$status,
       
   317         $filename;
       
   318 }
       
   319 
       
   320 sub replace_part($$$) {
       
   321 
       
   322     # TODO	part ersetzen ohne boundary zu verlieren
       
   323     # 		kurze info zu mime/filetype oder so
       
   324 }
       
   325 
       
   326 =pod
       
   327 
       
   328 =head1 NAME
       
   329 
       
   330 mimecut -- entfernt oder ersetzt AnhE<auml>nge anhand MIME's
       
   331 
       
   332 =head1 SYNOPSIS
       
   333 
       
   334 =over
       
   335 
       
   336 =item B<mimecut> [OPTION] < Mail
       
   337 
       
   338 =item B<mimecut> [ACTION]
       
   339 
       
   340 =back
       
   341 
       
   342 =head1 DESCRIPTION
       
   343 
       
   344 Eine e-Mail, die E<uuml>ber STDIN an mimecut E<uuml>bergeben wird, wird auf
       
   345 Ihre Struktur untersucht. Ist sie keine multipart e-Mail erfolgt die Ausgabe
       
   346 sofort und ungeE<auml>ndert E<uuml>ber STDOUT.
       
   347 
       
   348 Ist die e-Mail multipart, werden die einzelnen parts, gegebenenfalls rekursiv
       
   349 durchlaufen und auf ihre mime-types hin analysiert. 
       
   350 mimecut benE<ouml>tigt zwei Konfigurationsdateien:
       
   351 
       
   352 =over
       
   353 
       
   354 =item B<mimes.conf>
       
   355 
       
   356 Hier sind die mime-types erfasst, welche in der e-Mail enthalten bleiben sollen.
       
   357 
       
   358 =item B<vips.conf>
       
   359 
       
   360 Die Liste der EmpfE<auml>nger/Sender/CC's/BCC's, welche ungekE<uuml>rzte e-Mails bekommen.
       
   361 
       
   362 =back
       
   363 
       
   364 Anhand der Konfiguration wird zunE<auml>chst der e-Mail header
       
   365 E<uuml>berprE<uuml>ft und falls keine VIP e-Mail vorliegt, werden die parts
       
   366 mit den zu entfernenden mimes abgetrennt.
       
   367 
       
   368 =head1 OPTIONS
       
   369 
       
   370 =over
       
   371 
       
   372 =item B<-d, --debug>
       
   373 
       
   374 Gibt einen Statusbericht auf STDERR aus.
       
   375 
       
   376 =item B<-l, --logfile>
       
   377 
       
   378 Schreibt das debug in oder hE<auml>ngt es an die Datei mimecut.log im Pfad von mimecut an.
       
   379 
       
   380 =item B<-t, --text>
       
   381 
       
   382 Entfernt alle AnhE<auml>nge mit einem anderen mimetype als plain/text.
       
   383 
       
   384 =item B<-f, --fake>
       
   385 
       
   386 Testet was passieren wE<uuml>rde, ohne jedoch eine e-Mail E<uuml>ber STDOUT auszugeben.
       
   387 (aktiviert automatisch B<-d>)
       
   388 
       
   389 =item B<-s, --strain>
       
   390 
       
   391 Gibt die e-Mail, ganz gleich VIP oder nicht, sofort und unverE<auml>ndert E<uuml>ber
       
   392 STDOUT aus
       
   393 
       
   394 =back
       
   395 
       
   396 =head1 ACTIONS
       
   397 
       
   398 =over
       
   399 
       
   400 =item B<-m, --mimes>
       
   401 
       
   402 Ausgabe der aus der mimes.conf eingelesenen MIME's.
       
   403 
       
   404 =item B<-v, --vips>
       
   405 
       
   406 Ausgabe der aus der vips.conf eingelesenen VIP-Adressen.
       
   407 
       
   408 =back
       
   409 
       
   410 =head1 OTHER
       
   411 
       
   412 =over
       
   413 
       
   414 =item B<-h, --help>
       
   415 
       
   416 Kurze Hilfe mit SYNOPSIS, OPTIONS & ACTIONS.
       
   417 
       
   418 =item B<-p, --pod>
       
   419 
       
   420 Diese Dokumentation.
       
   421 
       
   422 =back
       
   423 
       
   424 =head1 FILES
       
   425 
       
   426 =over
       
   427 
       
   428 =item B<mimes.conf>, B<vips.conf>, B<mimecut.log>
       
   429 
       
   430 =back
       
   431 
       
   432 =cut
       
   433 
       
   434 # vim:ft=perl:tw=78:ts=4