--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/imapami Tue May 25 00:23:55 2010 +0200
@@ -0,0 +1,743 @@
+#! /usr/bin/perl
+# © Heiko Schlittermann <hs@schlittermann.de>
+#
+# This is a „home brewed“ imap sync, almost for just one purpose,
+# migrating uw-imap boxes to dovecot in a very special environment. This
+# script may or may not be helpful for you.
+
+use strict;
+use warnings;
+
+use Getopt::Long;
+use File::Basename;
+use Pod::Usage;
+use Data::Dumper;
+use Date::Parse;
+use Digest::SHA1 qw/sha1_hex/;
+use POSIX qw/strftime/;
+use Smart::Comments -ENV;
+use Time::HiRes qw(gettimeofday tv_interval usleep);
+use IO::File;
+
+use lib dirname($0) . "/Mail-IMAPClient-3.24/lib";
+use Mail::IMAPClient 3.24;
+
+use lib dirname($0) . "/Unicode-IMAPUtf7-2.01/lib";
+use Unicode::IMAPUtf7 2.01;
+
+my $ME = basename $0;
+
+my ($o_host1, $o_host2) = (undef, undef);
+my ($o_port1, $o_port2) = (143, 143);
+my ($o_user1, $o_user2) = (undef, undef);
+my ($o_password1, $o_password2) = (undef, undef);
+my $o_prefix1 = "";
+my $o_sync = 0;
+my $o_diag = 0;
+my $o_folder = "";
+my $o_purge_dst = "";
+my $o_debugimap = 0;
+my $o_delete_from_src = 0;
+my $o_cleanup_dst = "";
+my $o_state_dir = "$ENV{HOME}/.$ME";
+my $sep;
+
+sub map_folders($$);
+sub sync_folders($$\%);
+sub message_info($$);
+sub transfer_message($$$$$);
+sub transferred_messages();
+sub transferred_bytes();
+sub fatal(@);
+sub utf7($);
+sub diag($$);
+sub report_dups($$);
+
+sub setup_reporting($);
+sub report(@);
+sub summary($);
+
+++$|;
+
+$SIG{INT} = sub { print "\n"; exit; };
+
+MAIN: {
+ GetOptions(
+ "src=s" => sub {
+ ($o_user1, $o_password1, $o_host1, $o_port1) =
+ "$_[1]:$o_port1" =~ /^(.*?):(.*?)@(.*?):(.*?)(:|$)/;
+ },
+ "dst=s" => sub {
+ ($o_user2, $o_password2, $o_host2, $o_port2) =
+ "$_[1]:$o_port2" =~ /^(.*?):(.*?)@(.*?):(.*?)(:|$)/;
+ },
+ "prefix1=s" => \$o_prefix1,
+ "sync!" => \$o_sync,
+ "debugimap=i" => \$o_debugimap,
+ "folder=s" => \$o_folder,
+ "purge-dst=s" => sub {
+ $_[1] =~ /^messages|all$/i or pod2usage;
+ $o_purge_dst = lc($_[1]);
+ },
+ "cleanup-dst=s" => sub {
+ $_[1] =~ /^per-folder|all$/i or pod2usage;
+ $o_cleanup_dst = lc($_[1]);
+ },
+ "delete-from-src!" => \$o_delete_from_src,
+ "diag!" => \$o_diag,
+ "state-dir=s" => \$o_state_dir,
+ "help" => sub { pod2usage(-verbose => 1, exit => 0) },
+ "man" => sub { pod2usage(-verbose => 2, exit => 0) },
+ ) or pod2usage();
+
+ fatal "For data protection reasons it's not possible to use --cleanup-dst=all and --folder=...\n"
+ if $o_cleanup_dst eq "all" and $o_folder;
+
+ -d $o_state_dir or mkdir $o_state_dir => 0777 or die "$ME: Can't create dir $o_state_dir: $!\n";
+
+ setup_reporting("$o_user1⇒$o_user2");
+
+ ### connecting to: $o_host1
+ my $imap1 = new Mail::IMAPClient(
+ Server => $o_host1,
+ Port => $o_port1,
+ User => $o_user1,
+ Password => $o_password1,
+ Debug => ($o_debugimap =~ /1/ ? 1 : 0),
+ Uid => 1,
+ ) or fatal "Can't connect to $o_host1 as $o_user1: $@";
+
+ ### connecting to: $o_host2
+ my $imap2 = new Mail::IMAPClient(
+ Server => $o_host2,
+ Port => $o_port2,
+ User => $o_user2,
+ Password => $o_password2,
+ Debug => ($o_debugimap =~ /2/ ? 1 : 0),
+ Uid => 1,
+ ) or fatal "Can't connect to $o_host2 as $o_user2: $@";
+
+ fatal "The separator is not '/' as I'd expect."
+ if ($sep = $imap1->separator) ne "/";
+
+ fatal "The separator is not the same on both sides."
+ if $sep ne $imap2->separator;
+
+ # now create a map from src folder name to destination folder name
+ # and if this is done, start the job
+
+ my %folders =
+ map_folders($o_prefix1, [grep { /$o_folder/ } "INBOX", $imap1->folders]);
+
+ ### %folders
+
+ sync_folders($imap1, $imap2, %folders);
+
+ report "_: done syncing\n";
+
+END {
+ summary($o_user1) if defined $o_user1;
+}
+
+ exit;
+
+}
+
+sub map_folders($$) {
+
+ my $prefix = shift;
+ my %folders;
+ @folders{ @{ $_[0] } } = ();
+
+ $prefix .= "/" if $prefix and not $prefix =~ /\/$/;
+
+ # inbox is special (case insensitive)
+ # all other are case sensitive in dovecot and
+ # probably in UW-IMAP too. We migrate the INBOX, but we migrate
+ # everything below Mail/ too, and there might be a Mail/inbox
+ # and we should not loose it. (But - checking with an UW-IMAP, it
+ # seems(!) that there the INBOX hides the Mail/(?i:Inbox) We could
+ # just ignore it, but I'd say, somebody will miss this folder(s).
+
+ my %map;
+
+ $map{INBOX} = "INBOX"
+ if @{ [delete @folders{ grep /^inbox$/i, keys %folders }] };
+
+ # remove everything else that does not match the prefix
+ # and then transform all names to names w/o the prefix
+ delete @folders{ grep !/^$prefix/, keys %folders };
+
+ foreach (sort keys %folders) {
+ my $f = $_;
+
+ s/^$prefix//;
+
+ if (/^inbox$/i) {
+ $_ = "Mail:$_";
+ while (exists $folders{"$prefix$_"}) {
+ s/:/::/;
+ }
+ }
+ elsif (s/\./ /g) {
+
+ # the rename operation should create a new
+ # folder name, if we would have a name clash
+ # already on the source!
+ while (exists $folders{"$prefix$_"}) {
+ if (/\d$/) {
+ s/(\d+)$/$1++/e;
+ next;
+ }
+ $_ .= " 1";
+ }
+ }
+ $map{$f} = $_;
+ }
+
+ return %map;
+}
+
+sub sync_folders($$\%) {
+ my ($i1, $i2) = (shift, shift);
+ my %map = %{ shift @_ };
+
+
+ # If purge-dst=all is requested we really remove the folders
+ # on the destination host. INBOX has to be handled on it's own,
+ # it is not deleteable
+ if ($o_purge_dst eq "all") {
+ if ($o_sync) {
+ foreach ($i2->folders) {
+ if (/^inbox$/i) {
+ report "_: purging $_ on destination\n";
+ $i2->select($_) or fatal "Can't select $_: $@\n";
+ $i2->delete_message($i2->messages);
+ $i2->expunge;
+ next;
+ }
+ report "_: removing $_ on destination\n";
+ $i2->unsubscribe($_);
+ $i2->delete($_);
+ }
+ }
+ else {
+ report "_: would purge ALL folders on dst\n";
+ }
+ }
+
+ # remove all folders we do not have in our map
+ # note: this option is not allowed when using the --folder option
+ if ($o_cleanup_dst eq "all") {
+ my %need;
+ @need{values %map} = ();
+ foreach (grep { not exists $need{$_} } $i2->folders) {
+ if (not $o_sync) {
+ report "_: would delete folder $_ from dst\n";
+ next;
+ }
+ if (/^inbox$/i) {
+ report "_: purging INBOX on destination\n";
+ $i2->select($_) or fatal "Can't select $_: $@\n";
+ $i2->delete_message($i2->messages);
+ $i2->expunge;
+ next;
+ }
+ report "_: removing $_ on destination\n";
+ $i2->unsubscribe($_);
+ $i2->delete($_);
+
+ }
+ }
+
+ FOLDER: foreach my $f1 (sort keys %map) {
+ ### current folder: $f1
+
+ report("I: not selectable: $f1\n"), next
+ if not $i1->selectable($f1);
+
+ if (!$i1->examine($f1)) {
+ report "E: can't select ($@): $f1\n";
+ next FOLDER;
+ }
+
+ my $f2 = $map{$f1};
+
+ my ($messages1, $messages2);
+ $messages1 = message_info($i1, $f1);
+
+ if ($messages1 =~ /^(.:.*)/) {
+ report "$1\n";
+ next FOLDER;
+ }
+
+ my $state = new State ("$o_state_dir/$o_user1" => $f2);
+ report_dupes($f1, $messages1);
+
+ # skip the folder if we didn't get a message-info hash
+ # the message_info() returns a string with additional info
+ report "_: from folder $f1";
+ report "(" . keys(%$messages1) . ") to $f2";
+
+ if (not $i2->exists(utf7($f2))) {
+ if (not $o_sync) {
+ report "E: missing folder: $f2\n";
+ next FOLDER;
+ }
+
+ $i2->create(utf7($f2))
+ or fatal "\nE: Can't create on destination ($@): $f2\n";
+ report "(created)";
+ $messages2 = undef;
+ }
+ else {
+ if ($o_sync) {
+ $i2->select(utf7($f2)) or fatal "Can't select $f2 on destination";
+ if ($o_purge_dst eq "messages" and (my @m = $i2->messages)) {
+ $i2->delete_message(@m);
+ $i2->expunge();
+ report "(purged " . @m . " messages)";
+ }
+ }
+ else {
+ $i2->examine(utf7($f2)) or fatal "Can't examine $f2 on destination";
+ if ($o_purge_dst eq "messages" and (my @m = $i2->messages)) {
+ report " (would purge " . @m . " messages)";
+ }
+ }
+
+ $messages2 = message_info($i2, utf7($f2));
+ report "(" . keys(%$messages2) . ")";
+ $state->seen("dst:$f2", values %$messages2);
+ }
+
+ # now $messages2 contains a list of messages found on the destination
+
+ $i2->subscribe(utf7($f2)) if $o_sync;
+
+ my (%transfer, %cleanup);
+
+ # we do only one way sync host1 -> host2
+ # so every message on host1 with no counterpart on host2
+ # will be migrated
+
+ %transfer = %$messages1;
+ delete @transfer{ keys %$messages2 };
+
+ if ($o_cleanup_dst eq "per-folder") {
+ %cleanup = %$messages2;
+ delete @cleanup{ keys %$messages1 };
+ }
+
+
+ report ": transfer(" . keys(%transfer) . ")"
+ . (%cleanup ? " delete(" . keys(%cleanup) . ")\n" : "\n");
+
+ diag(\%transfer, $messages2) if %transfer and $o_diag;
+
+ next FOLDER unless $o_sync;
+
+ if (%cleanup) {
+ $i2->select(utf7($f2));
+ report "_: cleanup: " . scalar(keys %cleanup) . " messages from $f2\n";
+ $i2->delete_message( map { @{$_->{ids}} } values %cleanup);
+ $i2->expunge();
+ }
+
+ next FOLDER if not %transfer;
+
+ my @transferred = ();
+ MESSAGE: foreach my $msg (
+ map { $transfer{$_} }
+ sort { $transfer{$a}{ids}[0] <=> $transfer{$b}{ids}[0] }
+ keys %transfer
+ )
+ {
+ report "I: transferring: "
+ . (@transferred + 1) . " of "
+ . scalar(keys %transfer)
+ . " $f1:$msg->{ids}[0] => $f2 ($msg->{imap}{'RFC822.SIZE'} bytes)";
+ eval {
+ transfer_message($i1, $i2, $f1, $f2, $msg);
+ };
+ if ($@) {
+ report "E: transfer failed: $f1 $msg->{ids}[0]: $@\n";
+ #$i1->Debug(1);
+ $i1->Status or $i1->connect or die $@;
+ $i1->examine($f1) or die $@;
+
+ #$i2->Debug(1);
+ $i2->Status or $i2->connect or die $@;
+ $i2->select($f2) or die $@;
+ next MESSAGE;
+ }
+
+ report " OK\n";
+ push @transferred, $msg->{ids}[0];
+ }
+
+ if ($o_delete_from_src) {
+ $i1->select($f1);
+ $i1->delete_message(@transferred);
+ $i1->expunge($f1);
+ }
+
+ # now we have the folders on both sides
+ # if the destination is empty, we can send just everything.
+ }
+}
+
+{
+ my $messages;
+ my $bytes;
+
+sub transfer_message($$$$$) {
+ my ($i1, $i2, $f1, $f2, $msg) = @_;
+
+ my $message_string = $msg->{imap}{"RFC822.HEADER"};
+ local $_ = $i1->body_string($msg->{ids}[0])
+ or die "Can't fetch body_string for $msg->{ids}[0]: $@\n"
+ . "-" x 72 . "\n"
+ . (join "" => map { s/^/\t/gm; $_ } $msg->{imap}{"RFC822.HEADER"})
+ . "-" x 72 . "\n";
+ $message_string .= $_;
+
+ $messages++;
+ $bytes += length($message_string);
+
+ return $i2->append_string(
+ utf7($f2),
+ $message_string,
+ $msg->{imap}{FLAGS},
+ $msg->{imap}{INTERNALDATE}
+ ) or fatal $@;
+}
+
+sub transferred_bytes() { defined $bytes ? $bytes : 0 };
+sub transferred_messages() { defined $messages ? $messages : 0 };
+
+}
+
+sub message_info($$) {
+ my @CHECKITEMS = qw/INTERNALDATE FLAGS RFC822.HEADER RFC822.SIZE/;
+ my $i = shift;
+ my $folder = shift;
+
+ my %message_info = ();
+
+ if ($i->messages == 0) {
+ return "I: empty: $folder";
+ }
+
+ my $message = $i->fetch_hash(@CHECKITEMS);
+
+ if ( keys %$message == 1
+ and (values %$message)[0]->{"RFC822.HEADER"} =~ /^subject:\s*\/home\//mi
+ and (values %$message)[0]->{"RFC822.HEADER"} !~ /^received:/i)
+ {
+ return "I: fake folder: $folder";
+ }
+
+ foreach my $id (keys %$message) {
+
+ # It seems that we've problems using the internal date as a
+ # checkitem - too many different formats and even timeshifts
+ # solution: transform it to unix timestamp and back
+
+ if (exists $message->{$id}{INTERNALDATE}) {
+
+ # 17-Jul-1996 02:44:25 -0700
+ $message->{$id}{INTERNALDATE} = strftime("%02d-%b-%Y %T %z",
+ localtime str2time($message->{$id}{INTERNALDATE}));
+ }
+
+ if (exists $message->{$id}{FLAGS}) {
+ $message->{$id}{FLAGS} =
+ join " " => sort grep { !/^\\recent$/i } split " ",
+ $message->{$id}{FLAGS};
+ }
+
+ my $digest = sha1_hex(@{ $message->{$id} }{@CHECKITEMS});
+ if (exists $message_info{$digest}) {
+ push @{ $message_info{$digest}{ids} }, $id;
+ }
+ else {
+ $message_info{$digest}{ids} = [$id];
+ $message_info{$digest}{imap} = $message->{$id};
+ }
+
+ # uuid - to uniquly identify the message, only based on the rfc822 header!
+ # we'll use this for remember already seen (transferred) message - they may
+ # change other items of the CHECKITEMS above
+ $message_info{$digest}{uuid} =
+ sha1_hex($message_info{$digest}{imap}{"RFC822.HEADER"});
+ $message_info{$digest}{h}{"message-id"}
+ = $message_info{$digest}{imap}{"RFC822.HEADER"} =~ /^message-id:\s*(<.*?>)\s*$/im ? $1 : "<>";
+ }
+
+ return \%message_info;
+}
+
+sub report_dupes($$) {
+ my $folder = shift;
+ my $mi = shift;
+
+ foreach (map { $mi->{$_} } keys %$mi) {
+ next if @{$_->{ids}} == 1;
+
+ my $mid = $_->{imap}{"RFC822.HEADER"} =~ /^message-id:\s*(.*?)\s*$/im
+ ? $1 : ("<" . sha1_hex($_->{imap}{"RFC822.HEADER"}) . "\@imapami>");
+
+ report "W: dup in $folder: " . (@{$_->{ids}} - 1) . " for $mid\n";
+
+ if ($o_diag) {
+ my $header = $_->{imap}{"RFC822.HEADER"};
+ $header =~ s/^/*\t/mg;
+ report "*: original header\n$header";
+ }
+ }
+
+
+
+}
+
+sub diag($$) {
+ my ($m1, $m2) = @_;
+
+ # in $m1 is the hash with messages missing on the destination
+ # $m2 contains everything we know about the destination
+
+ foreach my $m (keys %$m1) {
+ report "\n";
+
+ (my $msgid) =
+ ($m1->{$m}{imap}{"RFC822.HEADER"} =~ /^message-id:\s*(.*)$/im);
+ (my $other) = grep {
+ $m2->{$_}{imap}{"RFC822.HEADER"} =~ /^(?i:message-id:\s*)$msgid$/m
+ } keys %$m2;
+
+ report "_: diag for $msgid\n";
+
+ if (not $other) {
+ report "_: the message is really missing\n";
+ next;
+ }
+
+ foreach (qw/RFC822.SIZE RFC822.HEADER FLAGS INTERNALDATE/) {
+ next if $m1->{$m}{imap}{$_} eq $m2->{$other}{imap}{$_};
+ report "_: Mismatch on $_\n",
+ "\t$_ 1: $m1->{$m}{imap}{$_}\n",
+ "\t$_ 2: $m2->{$other}{imap}{$_}\n";
+ }
+ }
+
+}
+
+sub fatal(@) {
+ die "$ME:FATAL: ", @_, "\n"
+}
+
+sub utf7($) {
+ return $_[0] unless $_[0] =~ /(?:[\x80-\xff])|(&[^-])/;
+ return Unicode::IMAPUtf7->new->encode($_[0]);
+}
+
+{
+ my $last_line;
+ my $lines;
+ my %report;
+ my $user;
+ my $start;
+
+ sub setup_reporting($) {
+ $user = shift;
+ $last_line = "\n";
+ $lines = 0;
+ $start = [gettimeofday];
+ }
+
+ sub report(@) {
+ local $_ = join "", @_;
+
+ if (s/^(.:)/$1($user)/) {
+ print "\n" if $last_line !~ /\n$/;
+ }
+
+ if (/^(([IWE]):.*?)\n?$/m) {
+ push @{ $report{$2} }, $1;
+ }
+ $lines += tr/\n/\n/;
+
+ $last_line = $_;
+ print;
+ }
+
+ sub summary($) {
+ our $user = shift;
+ our ($text, $n, $r);
+ my %detail;
+
+ usleep 1; # avoids div by zero
+
+ print $_ = "= Summary for user \"$user\"\n";
+ print "= ", "=" x (length($_)- 3), "\n";
+ printf "= Run time : %5d s\n", tv_interval($start);
+ printf "= Transferred messages : %5d (%5.1f /s)\n", transferred_messages, transferred_messages() / tv_interval($start);
+ printf "= Transferred Data : %5d MiB (%5.1f MiB/s)\n", (transferred_bytes/2**20), (transferred_bytes/2**20) / tv_interval($start);
+
+ format =
+= @<<<<<<<<: @##### (@##.#)%
+$text, $n, $r
+.
+ $text = "Lines";
+ $n = $lines;
+ $r = 100;
+ write;
+
+ foreach (qw(I W E)) {
+ $text =
+ $_ eq "I" ? "Info"
+ : $_ eq "W" ? "Warnings"
+ : $_ eq "E" ? "Errors"
+ : "??";
+ $n = defined $report{$_} ? @{ $report{$_} } : 0;
+ $r = $lines ? $n / $lines * 100 : 0;
+ write;
+
+ if (defined $report{$_}) {
+ foreach my $r (@{$report{$_}}) {
+ $r =~ /^(.:.*?):/;
+ $detail{$_}->{$1}++;
+ }
+ }
+
+ }
+
+ print "=\n";
+ $~ = "details";
+ foreach my $level (grep { exists $detail{$_} } qw(I W E)) {
+ foreach (sort keys %{$detail{$level}}) {
+ printf "= %-60s: %5d\n",
+ $_, $detail{$level}->{$_};
+ }
+ }
+ }
+}
+
+{
+package State;
+
+ sub new {
+ my $class = shift;
+ my ($dir, $name) = @_;
+
+ $name =~ s/\//\./g;
+
+ my $self;
+
+ -d $dir or mkdir $dir => 0777 or die "Can't mkdir $dir: $!\n";
+ $self->{fh} = new IO::File "$dir/$name", "a+"
+ or die "Can't open >>$dir/$name: $!\n";
+
+ $self->{fh}->seek(0, 0) or die "Can't seek: $!";
+ while (defined($_ = $self->{fh}->getline)) {
+ my ($uuid, $mid) = (split " ", $_)[2, 3];
+ $self->{seen}{$uuid} = $mid;
+ }
+
+ return bless $self => $class;
+ }
+
+ sub seen {
+
+ my $self = shift;
+ my $folder = shift;
+
+
+ foreach my $msg (@_) {
+ next if $self->{seen}{"$msg->{uuid} $folder"};
+ $self->{fh}->print("seen @{[time]} $msg->{uuid} $msg->{h}{'message-id'}\n");
+ }
+ }
+}
+
+sub uniq(@) {
+ my %h;
+ @h{@_} = ();
+ keys %h;
+}
+
+__END__
+
+=head1 NAME
+
+ imapami - compare two imap servers
+
+=head1 SYNOPSIS
+
+ imapami [options]
+ imapami [-h|--help] [-m|--man]
+
+=head1 DESCRIPTION
+
+Most of the options are already preseeded for usage at MPIPKS.
+
+=head1 OPTIONS
+
+=over
+
+=item B<--dst>=I<user>:I<password>@I<host>[:I<port>]
+
+=item B<--src>=I<user>:I<password>@I<host>[:I<port>]
+
+Describe source and destination. (no defaults)
+
+=item B<--prefix1>=I<prefix>
+
+Use this as the prefix for accessing the source folders. In other words,
+start searching below this folder. (Note: the INBOX may be outside of the
+prefix, but get's transferred too.) (default: none)
+
+=item B<--[no]sync>
+
+Sync messages. (default: off)
+
+=item B<--purge-dst>=I<messages|all>
+
+Purge the destination. If B<messages> is selected, the destination folder
+is cleaned just before the transfer (thus leaving folders untouched
+if the src counterpart is missing). If B<all> is selected, the B<complete>
+destination will be wiped before starting the syncronisation. (default: nothing)
+
+=item B<--cleanup-dst>=I<per-folder|all>
+
+Using this option we remove all messages from the destination if a counterpart in the
+source is missing. If B<messages> is selected, we do this only for folders
+that exist on the source. If B<all> is selected, we even remove folders missing on the source.
+Could be B<dangerous> if used on an empty src (thus after
+syncing with B<--delete-from-source).
+
+=item B<--[no]delete-from-source>
+
+If this option is selected, the messages transferred will be deleted
+from the source server. For performance reasons this will happen on a
+per folder basis. (default: no)
+
+=item B<--[no]diag>
+
+Print diagnostic messages if src and destination mismatch. (default: .. see the source)
+
+=item B<--[no]debugimap>
+
+Switch on IMAP debugging (default: off)
+
+=back
+
+=head1 DEBUGGING
+
+Beside the B<--debugimap> option you may set the environment variable C<Smart_Comments>
+to some true value to get information from the Smart::Comments used in the script.
+
+=cut
+
+# vim:sts=4 sw=4 aw ai sm: