# HG changeset patch # User Heiko Schlittermann # Date 1274739835 -7200 # Node ID feeeedd7fa602c0b19838a3f72083d9f3313ff7a code: added the current version. NOTE: still work in progress and not stable diff -r 000000000000 -r feeeedd7fa60 .perltidyrc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.perltidyrc Tue May 25 00:23:55 2010 +0200 @@ -0,0 +1,2 @@ +--paren-tightness=2 +--square-bracket-tightness=2 diff -r 000000000000 -r feeeedd7fa60 Mail-IMAPClient-3.24.diff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Mail-IMAPClient-3.24.diff Tue May 25 00:23:55 2010 +0200 @@ -0,0 +1,14 @@ +diff --git a/hs12/isync/Mail-IMAPClient-3.24/lib/Mail/IMAPClient.pm b/hs12/isync/Mail-IMAPClient-3.24/lib/Mail/IMAPClient.pm +--- a/hs12/isync/Mail-IMAPClient-3.24/lib/Mail/IMAPClient.pm ++++ b/hs12/isync/Mail-IMAPClient-3.24/lib/Mail/IMAPClient.pm +@@ -1192,8 +1192,8 @@ + + my $popped; + $popped = pop @$ref # (-: vi +- until ( $popped && $popped =~ /\)$CRLF$/o ) # (-: vi +- || !grep /\)$CRLF$/o, @$ref; ++ until ( $popped && $popped =~ /^\)$CRLF$/o ) # (-: vi ++ || !grep /^\)$CRLF$/o, @$ref; + + if ( $head =~ /BODY\[TEXT\]\s*$/i ) { # Next line is a literal + $string .= shift @$ref while @$ref; diff -r 000000000000 -r feeeedd7fa60 imapami --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/imapami Tue May 25 00:23:55 2010 +0200 @@ -0,0 +1,743 @@ +#! /usr/bin/perl +# © Heiko Schlittermann +# +# This is a „home brewed“ imap sync, almost for just one purpose, +# migrating uw-imap boxes to dovecot in a very special environment. This +# script may or may not be helpful for you. + +use strict; +use warnings; + +use Getopt::Long; +use File::Basename; +use Pod::Usage; +use Data::Dumper; +use Date::Parse; +use Digest::SHA1 qw/sha1_hex/; +use POSIX qw/strftime/; +use Smart::Comments -ENV; +use Time::HiRes qw(gettimeofday tv_interval usleep); +use IO::File; + +use lib dirname($0) . "/Mail-IMAPClient-3.24/lib"; +use Mail::IMAPClient 3.24; + +use lib dirname($0) . "/Unicode-IMAPUtf7-2.01/lib"; +use Unicode::IMAPUtf7 2.01; + +my $ME = basename $0; + +my ($o_host1, $o_host2) = (undef, undef); +my ($o_port1, $o_port2) = (143, 143); +my ($o_user1, $o_user2) = (undef, undef); +my ($o_password1, $o_password2) = (undef, undef); +my $o_prefix1 = ""; +my $o_sync = 0; +my $o_diag = 0; +my $o_folder = ""; +my $o_purge_dst = ""; +my $o_debugimap = 0; +my $o_delete_from_src = 0; +my $o_cleanup_dst = ""; +my $o_state_dir = "$ENV{HOME}/.$ME"; +my $sep; + +sub map_folders($$); +sub sync_folders($$\%); +sub message_info($$); +sub transfer_message($$$$$); +sub transferred_messages(); +sub transferred_bytes(); +sub fatal(@); +sub utf7($); +sub diag($$); +sub report_dups($$); + +sub setup_reporting($); +sub report(@); +sub summary($); + +++$|; + +$SIG{INT} = sub { print "\n"; exit; }; + +MAIN: { + GetOptions( + "src=s" => sub { + ($o_user1, $o_password1, $o_host1, $o_port1) = + "$_[1]:$o_port1" =~ /^(.*?):(.*?)@(.*?):(.*?)(:|$)/; + }, + "dst=s" => sub { + ($o_user2, $o_password2, $o_host2, $o_port2) = + "$_[1]:$o_port2" =~ /^(.*?):(.*?)@(.*?):(.*?)(:|$)/; + }, + "prefix1=s" => \$o_prefix1, + "sync!" => \$o_sync, + "debugimap=i" => \$o_debugimap, + "folder=s" => \$o_folder, + "purge-dst=s" => sub { + $_[1] =~ /^messages|all$/i or pod2usage; + $o_purge_dst = lc($_[1]); + }, + "cleanup-dst=s" => sub { + $_[1] =~ /^per-folder|all$/i or pod2usage; + $o_cleanup_dst = lc($_[1]); + }, + "delete-from-src!" => \$o_delete_from_src, + "diag!" => \$o_diag, + "state-dir=s" => \$o_state_dir, + "help" => sub { pod2usage(-verbose => 1, exit => 0) }, + "man" => sub { pod2usage(-verbose => 2, exit => 0) }, + ) or pod2usage(); + + fatal "For data protection reasons it's not possible to use --cleanup-dst=all and --folder=...\n" + if $o_cleanup_dst eq "all" and $o_folder; + + -d $o_state_dir or mkdir $o_state_dir => 0777 or die "$ME: Can't create dir $o_state_dir: $!\n"; + + setup_reporting("$o_user1⇒$o_user2"); + + ### connecting to: $o_host1 + my $imap1 = new Mail::IMAPClient( + Server => $o_host1, + Port => $o_port1, + User => $o_user1, + Password => $o_password1, + Debug => ($o_debugimap =~ /1/ ? 1 : 0), + Uid => 1, + ) or fatal "Can't connect to $o_host1 as $o_user1: $@"; + + ### connecting to: $o_host2 + my $imap2 = new Mail::IMAPClient( + Server => $o_host2, + Port => $o_port2, + User => $o_user2, + Password => $o_password2, + Debug => ($o_debugimap =~ /2/ ? 1 : 0), + Uid => 1, + ) or fatal "Can't connect to $o_host2 as $o_user2: $@"; + + fatal "The separator is not '/' as I'd expect." + if ($sep = $imap1->separator) ne "/"; + + fatal "The separator is not the same on both sides." + if $sep ne $imap2->separator; + + # now create a map from src folder name to destination folder name + # and if this is done, start the job + + my %folders = + map_folders($o_prefix1, [grep { /$o_folder/ } "INBOX", $imap1->folders]); + + ### %folders + + sync_folders($imap1, $imap2, %folders); + + report "_: done syncing\n"; + +END { + summary($o_user1) if defined $o_user1; +} + + exit; + +} + +sub map_folders($$) { + + my $prefix = shift; + my %folders; + @folders{ @{ $_[0] } } = (); + + $prefix .= "/" if $prefix and not $prefix =~ /\/$/; + + # inbox is special (case insensitive) + # all other are case sensitive in dovecot and + # probably in UW-IMAP too. We migrate the INBOX, but we migrate + # everything below Mail/ too, and there might be a Mail/inbox + # and we should not loose it. (But - checking with an UW-IMAP, it + # seems(!) that there the INBOX hides the Mail/(?i:Inbox) We could + # just ignore it, but I'd say, somebody will miss this folder(s). + + my %map; + + $map{INBOX} = "INBOX" + if @{ [delete @folders{ grep /^inbox$/i, keys %folders }] }; + + # remove everything else that does not match the prefix + # and then transform all names to names w/o the prefix + delete @folders{ grep !/^$prefix/, keys %folders }; + + foreach (sort keys %folders) { + my $f = $_; + + s/^$prefix//; + + if (/^inbox$/i) { + $_ = "Mail:$_"; + while (exists $folders{"$prefix$_"}) { + s/:/::/; + } + } + elsif (s/\./ /g) { + + # the rename operation should create a new + # folder name, if we would have a name clash + # already on the source! + while (exists $folders{"$prefix$_"}) { + if (/\d$/) { + s/(\d+)$/$1++/e; + next; + } + $_ .= " 1"; + } + } + $map{$f} = $_; + } + + return %map; +} + +sub sync_folders($$\%) { + my ($i1, $i2) = (shift, shift); + my %map = %{ shift @_ }; + + + # If purge-dst=all is requested we really remove the folders + # on the destination host. INBOX has to be handled on it's own, + # it is not deleteable + if ($o_purge_dst eq "all") { + if ($o_sync) { + foreach ($i2->folders) { + if (/^inbox$/i) { + report "_: purging $_ on destination\n"; + $i2->select($_) or fatal "Can't select $_: $@\n"; + $i2->delete_message($i2->messages); + $i2->expunge; + next; + } + report "_: removing $_ on destination\n"; + $i2->unsubscribe($_); + $i2->delete($_); + } + } + else { + report "_: would purge ALL folders on dst\n"; + } + } + + # remove all folders we do not have in our map + # note: this option is not allowed when using the --folder option + if ($o_cleanup_dst eq "all") { + my %need; + @need{values %map} = (); + foreach (grep { not exists $need{$_} } $i2->folders) { + if (not $o_sync) { + report "_: would delete folder $_ from dst\n"; + next; + } + if (/^inbox$/i) { + report "_: purging INBOX on destination\n"; + $i2->select($_) or fatal "Can't select $_: $@\n"; + $i2->delete_message($i2->messages); + $i2->expunge; + next; + } + report "_: removing $_ on destination\n"; + $i2->unsubscribe($_); + $i2->delete($_); + + } + } + + FOLDER: foreach my $f1 (sort keys %map) { + ### current folder: $f1 + + report("I: not selectable: $f1\n"), next + if not $i1->selectable($f1); + + if (!$i1->examine($f1)) { + report "E: can't select ($@): $f1\n"; + next FOLDER; + } + + my $f2 = $map{$f1}; + + my ($messages1, $messages2); + $messages1 = message_info($i1, $f1); + + if ($messages1 =~ /^(.:.*)/) { + report "$1\n"; + next FOLDER; + } + + my $state = new State ("$o_state_dir/$o_user1" => $f2); + report_dupes($f1, $messages1); + + # skip the folder if we didn't get a message-info hash + # the message_info() returns a string with additional info + report "_: from folder $f1"; + report "(" . keys(%$messages1) . ") to $f2"; + + if (not $i2->exists(utf7($f2))) { + if (not $o_sync) { + report "E: missing folder: $f2\n"; + next FOLDER; + } + + $i2->create(utf7($f2)) + or fatal "\nE: Can't create on destination ($@): $f2\n"; + report "(created)"; + $messages2 = undef; + } + else { + if ($o_sync) { + $i2->select(utf7($f2)) or fatal "Can't select $f2 on destination"; + if ($o_purge_dst eq "messages" and (my @m = $i2->messages)) { + $i2->delete_message(@m); + $i2->expunge(); + report "(purged " . @m . " messages)"; + } + } + else { + $i2->examine(utf7($f2)) or fatal "Can't examine $f2 on destination"; + if ($o_purge_dst eq "messages" and (my @m = $i2->messages)) { + report " (would purge " . @m . " messages)"; + } + } + + $messages2 = message_info($i2, utf7($f2)); + report "(" . keys(%$messages2) . ")"; + $state->seen("dst:$f2", values %$messages2); + } + + # now $messages2 contains a list of messages found on the destination + + $i2->subscribe(utf7($f2)) if $o_sync; + + my (%transfer, %cleanup); + + # we do only one way sync host1 -> host2 + # so every message on host1 with no counterpart on host2 + # will be migrated + + %transfer = %$messages1; + delete @transfer{ keys %$messages2 }; + + if ($o_cleanup_dst eq "per-folder") { + %cleanup = %$messages2; + delete @cleanup{ keys %$messages1 }; + } + + + report ": transfer(" . keys(%transfer) . ")" + . (%cleanup ? " delete(" . keys(%cleanup) . ")\n" : "\n"); + + diag(\%transfer, $messages2) if %transfer and $o_diag; + + next FOLDER unless $o_sync; + + if (%cleanup) { + $i2->select(utf7($f2)); + report "_: cleanup: " . scalar(keys %cleanup) . " messages from $f2\n"; + $i2->delete_message( map { @{$_->{ids}} } values %cleanup); + $i2->expunge(); + } + + next FOLDER if not %transfer; + + my @transferred = (); + MESSAGE: foreach my $msg ( + map { $transfer{$_} } + sort { $transfer{$a}{ids}[0] <=> $transfer{$b}{ids}[0] } + keys %transfer + ) + { + report "I: transferring: " + . (@transferred + 1) . " of " + . scalar(keys %transfer) + . " $f1:$msg->{ids}[0] => $f2 ($msg->{imap}{'RFC822.SIZE'} bytes)"; + eval { + transfer_message($i1, $i2, $f1, $f2, $msg); + }; + if ($@) { + report "E: transfer failed: $f1 $msg->{ids}[0]: $@\n"; + #$i1->Debug(1); + $i1->Status or $i1->connect or die $@; + $i1->examine($f1) or die $@; + + #$i2->Debug(1); + $i2->Status or $i2->connect or die $@; + $i2->select($f2) or die $@; + next MESSAGE; + } + + report " OK\n"; + push @transferred, $msg->{ids}[0]; + } + + if ($o_delete_from_src) { + $i1->select($f1); + $i1->delete_message(@transferred); + $i1->expunge($f1); + } + + # now we have the folders on both sides + # if the destination is empty, we can send just everything. + } +} + +{ + my $messages; + my $bytes; + +sub transfer_message($$$$$) { + my ($i1, $i2, $f1, $f2, $msg) = @_; + + my $message_string = $msg->{imap}{"RFC822.HEADER"}; + local $_ = $i1->body_string($msg->{ids}[0]) + or die "Can't fetch body_string for $msg->{ids}[0]: $@\n" + . "-" x 72 . "\n" + . (join "" => map { s/^/\t/gm; $_ } $msg->{imap}{"RFC822.HEADER"}) + . "-" x 72 . "\n"; + $message_string .= $_; + + $messages++; + $bytes += length($message_string); + + return $i2->append_string( + utf7($f2), + $message_string, + $msg->{imap}{FLAGS}, + $msg->{imap}{INTERNALDATE} + ) or fatal $@; +} + +sub transferred_bytes() { defined $bytes ? $bytes : 0 }; +sub transferred_messages() { defined $messages ? $messages : 0 }; + +} + +sub message_info($$) { + my @CHECKITEMS = qw/INTERNALDATE FLAGS RFC822.HEADER RFC822.SIZE/; + my $i = shift; + my $folder = shift; + + my %message_info = (); + + if ($i->messages == 0) { + return "I: empty: $folder"; + } + + my $message = $i->fetch_hash(@CHECKITEMS); + + if ( keys %$message == 1 + and (values %$message)[0]->{"RFC822.HEADER"} =~ /^subject:\s*\/home\//mi + and (values %$message)[0]->{"RFC822.HEADER"} !~ /^received:/i) + { + return "I: fake folder: $folder"; + } + + foreach my $id (keys %$message) { + + # It seems that we've problems using the internal date as a + # checkitem - too many different formats and even timeshifts + # solution: transform it to unix timestamp and back + + if (exists $message->{$id}{INTERNALDATE}) { + + # 17-Jul-1996 02:44:25 -0700 + $message->{$id}{INTERNALDATE} = strftime("%02d-%b-%Y %T %z", + localtime str2time($message->{$id}{INTERNALDATE})); + } + + if (exists $message->{$id}{FLAGS}) { + $message->{$id}{FLAGS} = + join " " => sort grep { !/^\\recent$/i } split " ", + $message->{$id}{FLAGS}; + } + + my $digest = sha1_hex(@{ $message->{$id} }{@CHECKITEMS}); + if (exists $message_info{$digest}) { + push @{ $message_info{$digest}{ids} }, $id; + } + else { + $message_info{$digest}{ids} = [$id]; + $message_info{$digest}{imap} = $message->{$id}; + } + + # uuid - to uniquly identify the message, only based on the rfc822 header! + # we'll use this for remember already seen (transferred) message - they may + # change other items of the CHECKITEMS above + $message_info{$digest}{uuid} = + sha1_hex($message_info{$digest}{imap}{"RFC822.HEADER"}); + $message_info{$digest}{h}{"message-id"} + = $message_info{$digest}{imap}{"RFC822.HEADER"} =~ /^message-id:\s*(<.*?>)\s*$/im ? $1 : "<>"; + } + + return \%message_info; +} + +sub report_dupes($$) { + my $folder = shift; + my $mi = shift; + + foreach (map { $mi->{$_} } keys %$mi) { + next if @{$_->{ids}} == 1; + + my $mid = $_->{imap}{"RFC822.HEADER"} =~ /^message-id:\s*(.*?)\s*$/im + ? $1 : ("<" . sha1_hex($_->{imap}{"RFC822.HEADER"}) . "\@imapami>"); + + report "W: dup in $folder: " . (@{$_->{ids}} - 1) . " for $mid\n"; + + if ($o_diag) { + my $header = $_->{imap}{"RFC822.HEADER"}; + $header =~ s/^/*\t/mg; + report "*: original header\n$header"; + } + } + + + +} + +sub diag($$) { + my ($m1, $m2) = @_; + + # in $m1 is the hash with messages missing on the destination + # $m2 contains everything we know about the destination + + foreach my $m (keys %$m1) { + report "\n"; + + (my $msgid) = + ($m1->{$m}{imap}{"RFC822.HEADER"} =~ /^message-id:\s*(.*)$/im); + (my $other) = grep { + $m2->{$_}{imap}{"RFC822.HEADER"} =~ /^(?i:message-id:\s*)$msgid$/m + } keys %$m2; + + report "_: diag for $msgid\n"; + + if (not $other) { + report "_: the message is really missing\n"; + next; + } + + foreach (qw/RFC822.SIZE RFC822.HEADER FLAGS INTERNALDATE/) { + next if $m1->{$m}{imap}{$_} eq $m2->{$other}{imap}{$_}; + report "_: Mismatch on $_\n", + "\t$_ 1: $m1->{$m}{imap}{$_}\n", + "\t$_ 2: $m2->{$other}{imap}{$_}\n"; + } + } + +} + +sub fatal(@) { + die "$ME:FATAL: ", @_, "\n" +} + +sub utf7($) { + return $_[0] unless $_[0] =~ /(?:[\x80-\xff])|(&[^-])/; + return Unicode::IMAPUtf7->new->encode($_[0]); +} + +{ + my $last_line; + my $lines; + my %report; + my $user; + my $start; + + sub setup_reporting($) { + $user = shift; + $last_line = "\n"; + $lines = 0; + $start = [gettimeofday]; + } + + sub report(@) { + local $_ = join "", @_; + + if (s/^(.:)/$1($user)/) { + print "\n" if $last_line !~ /\n$/; + } + + if (/^(([IWE]):.*?)\n?$/m) { + push @{ $report{$2} }, $1; + } + $lines += tr/\n/\n/; + + $last_line = $_; + print; + } + + sub summary($) { + our $user = shift; + our ($text, $n, $r); + my %detail; + + usleep 1; # avoids div by zero + + print $_ = "= Summary for user \"$user\"\n"; + print "= ", "=" x (length($_)- 3), "\n"; + printf "= Run time : %5d s\n", tv_interval($start); + printf "= Transferred messages : %5d (%5.1f /s)\n", transferred_messages, transferred_messages() / tv_interval($start); + printf "= Transferred Data : %5d MiB (%5.1f MiB/s)\n", (transferred_bytes/2**20), (transferred_bytes/2**20) / tv_interval($start); + + format = += @<<<<<<<<: @##### (@##.#)% +$text, $n, $r +. + $text = "Lines"; + $n = $lines; + $r = 100; + write; + + foreach (qw(I W E)) { + $text = + $_ eq "I" ? "Info" + : $_ eq "W" ? "Warnings" + : $_ eq "E" ? "Errors" + : "??"; + $n = defined $report{$_} ? @{ $report{$_} } : 0; + $r = $lines ? $n / $lines * 100 : 0; + write; + + if (defined $report{$_}) { + foreach my $r (@{$report{$_}}) { + $r =~ /^(.:.*?):/; + $detail{$_}->{$1}++; + } + } + + } + + print "=\n"; + $~ = "details"; + foreach my $level (grep { exists $detail{$_} } qw(I W E)) { + foreach (sort keys %{$detail{$level}}) { + printf "= %-60s: %5d\n", + $_, $detail{$level}->{$_}; + } + } + } +} + +{ +package State; + + sub new { + my $class = shift; + my ($dir, $name) = @_; + + $name =~ s/\//\./g; + + my $self; + + -d $dir or mkdir $dir => 0777 or die "Can't mkdir $dir: $!\n"; + $self->{fh} = new IO::File "$dir/$name", "a+" + or die "Can't open >>$dir/$name: $!\n"; + + $self->{fh}->seek(0, 0) or die "Can't seek: $!"; + while (defined($_ = $self->{fh}->getline)) { + my ($uuid, $mid) = (split " ", $_)[2, 3]; + $self->{seen}{$uuid} = $mid; + } + + return bless $self => $class; + } + + sub seen { + + my $self = shift; + my $folder = shift; + + + foreach my $msg (@_) { + next if $self->{seen}{"$msg->{uuid} $folder"}; + $self->{fh}->print("seen @{[time]} $msg->{uuid} $msg->{h}{'message-id'}\n"); + } + } +} + +sub uniq(@) { + my %h; + @h{@_} = (); + keys %h; +} + +__END__ + +=head1 NAME + + imapami - compare two imap servers + +=head1 SYNOPSIS + + imapami [options] + imapami [-h|--help] [-m|--man] + +=head1 DESCRIPTION + +Most of the options are already preseeded for usage at MPIPKS. + +=head1 OPTIONS + +=over + +=item B<--dst>=I:I@I[:I] + +=item B<--src>=I:I@I[:I] + +Describe source and destination. (no defaults) + +=item B<--prefix1>=I + +Use this as the prefix for accessing the source folders. In other words, +start searching below this folder. (Note: the INBOX may be outside of the +prefix, but get's transferred too.) (default: none) + +=item B<--[no]sync> + +Sync messages. (default: off) + +=item B<--purge-dst>=I + +Purge the destination. If B is selected, the destination folder +is cleaned just before the transfer (thus leaving folders untouched +if the src counterpart is missing). If B is selected, the B +destination will be wiped before starting the syncronisation. (default: nothing) + +=item B<--cleanup-dst>=I + +Using this option we remove all messages from the destination if a counterpart in the +source is missing. If B is selected, we do this only for folders +that exist on the source. If B is selected, we even remove folders missing on the source. +Could be B if used on an empty src (thus after +syncing with B<--delete-from-source). + +=item B<--[no]delete-from-source> + +If this option is selected, the messages transferred will be deleted +from the source server. For performance reasons this will happen on a +per folder basis. (default: no) + +=item B<--[no]diag> + +Print diagnostic messages if src and destination mismatch. (default: .. see the source) + +=item B<--[no]debugimap> + +Switch on IMAP debugging (default: off) + +=back + +=head1 DEBUGGING + +Beside the B<--debugimap> option you may set the environment variable C +to some true value to get information from the Smart::Comments used in the script. + +=cut + +# vim:sts=4 sw=4 aw ai sm: