lib/Message/2822.pm
changeset 7 5f6309f60053
parent 6 caaa00b9165b
child 8 6276861aa7d4
--- a/lib/Message/2822.pm	Wed Nov 09 16:50:16 2011 +0100
+++ b/lib/Message/2822.pm	Thu Nov 10 13:54:01 2011 +0100
@@ -3,56 +3,128 @@
 use 5.010;
 use strict;
 use warnings;
+use Hash::Util qw(lock_keys);
 use IO::File;
 use if $ENV{DEBUG} => "Smart::Comments";
 
-my %DATA;
+my %DATA = (
+    fn => {},
+    fh => {},
+    header => {},
+    body_pos => {},
+);
+
+lock_keys %DATA;
+
 
 sub new {
     my $class = ref $_[0] ? ref shift : shift;
-    my $this = bless \(my $x) => $class;
+    my $self = bless \(my $x) => $class;
     my %arg = @_;
 
-    $DATA{$this}{file} = $arg{file};
-    $DATA{$this}{fh} = IO::File->new($DATA{$this}{file})
-	or die "Can't open $DATA{$this}{file}: $!\n";
+    $DATA{fn}{$self} = $arg{file};
+    $DATA{fh}{$self} = my $fh = IO::File->new($DATA{fn}{$self})
+	or die "Can't open $DATA{fn}{$self}: $!\n";
 
     local $/ = "";
-    my $_ = $DATA{$this}{fh}->getline();
+    $DATA{header}{$self} = <$fh>;
+    $DATA{body_pos}{$self} = tell($fh);
 
-    foreach (split /\n(?=\S)/, $_) {
-	my (undef, $k, $v) = split /(^(?:\S+:|(?i:from )))/, $_;
-	push @{$DATA{$this}{header_fields}}, $k;
-	$DATA{$this}{header}{$k} = $v;
-    }
-
-    return $this;
+    return $self;
 }
 
-sub header_fields {
-    my $this = shift;
-    return @{$DATA{$this}{header_fields}};
+sub __header_fields {
+    my $self = shift;
+    my %h;
+    @h{$DATA{header}{$self}
+	=~ /^(\S+?[: ])/mg} = ();
+    return keys %h;
 }
 
 sub header_content {
-    my $this = shift;
+    my $self = shift;
     my $field = shift;
 
-    my $_ = $DATA{$this}{header}{$field};
+    ### assert: $field =~ /[: ]$/
+
+    # FIXME: not sure if the space following the header field name
+    # is optional or required
+    return $DATA{header}{$self}
+	=~ /^$field\s+(.*?)(?=^\S)/imsg;
+}
 
-    if (not defined) {
-	($field) = grep /^$field$/i => $this->header_fields();
-	$_ = $DATA{$this}{header}{$field}
-    }
+sub orig_header {
+    my $self = shift;
+    my $fh = $DATA{fh}{$self};
+    seek($fh, 0, 0);
+    local $/ = "";
+    return <$fh>;
+}
 
-    /^\s*(.*?)\s*$/m;
-    return $1;
+sub orig_body {
+    my $self = shift;
+    my $fh = $DATA{fh}{$self};
+    seek($fh, $DATA{body_pos}{$self}, 0);
+    local $/ = undef;
+    return <$fh>;
 }
 
 sub DESTROY {
-    my $this = shift;
-    delete $DATA{$this};
+    my $self = shift;
+    foreach (keys %DATA) {
+	delete $DATA{$_}{$self}
+    }
 }
 
+1;
 
-1;
+__END__
+
+=head1 NAME
+
+  Message::2822 - simple parser for 2822 messages
+
+=head1 SYNOPSIS
+
+  use Message::2822;
+
+  my $msg = Message::2822->new(file => "mail.eml");
+
+  my @received = $msg->header_content("received:");
+  my @from_ = $msg->header_content("from ");
+  my @from = $msg->header_content("from:");
+
+=head1 DESCRIPTION
+
+This is a simple parser for RFC2822 messages. I'm sure, such parsers
+exists already about one million times. Here is yet another one.
+
+=head1 METHODS
+
+=over
+
+=item B<new>(file => I<file>)
+
+The construcor. The file is opened r/o and read. The file will not be
+closed until the object disappears.
+
+=item @list = B<header_content>(I<field>)
+
+Returns a list (*always* a list) with the contents of the specified
+header field. The I<field> has to include the colon ":" or space " ",
+since it is considered a part of the field name. I<field> is case
+insensitive!
+
+=item $scalar = $B<orig_header>()
+
+Returns the original header as it is.
+
+=item $body = B<orig_body>()
+
+Returns the original body as it is.
+
+=back
+
+=head1 AUTHOR 
+
+Heiko Schlittermann L<hs@schlittermann.de>