--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cleaner Fri Jul 22 17:06:09 2011 +0200
@@ -0,0 +1,159 @@
+#! /usr/bin/perl
+
+use 5.010;
+use strict;
+use warnings;
+use Pod::Usage;
+use Hash::Util qw(lock_keys);
+use File::Find;
+use File::Temp;
+use DB_File;
+use File::Basename;
+use autodie qw(:all);
+use Cwd qw(abs_path);
+
+use Getopt::Long;
+
+my %o = (
+ dry => undef,
+ verbose => undef,
+ check => undef,
+); lock_keys(%o);
+
+MAIN: {
+ GetOptions(
+ "n|dry!" => \$o{dry},
+ "v|verbose!" => \$o{verbose},
+ "c|check" => \$o{check},
+ "h|help" => sub { pod2usage(-verbose => 1, -exit 0) },
+ "m|man" => sub { pod2usage(-verbose => 2, -exit 0,
+ -noperldoc => system("perldoc -V 1>/dev/null
+ 2>&1"))},
+ ) and @ARGV or pod2usage;
+ my $dir = shift;
+ my $tmp = File::Temp->new;
+
+ # load the index files, remember the latest
+ # timestamp we see
+ my (%inuse, @idx);
+ #tie %idx, "DB_File" => $tmp->filename;
+
+ find(sub {
+ (-f) and (-M > 0) or return;
+ verbose("idx: $File::Find::name");
+ push @idx, abs_path $_;
+ foreach my $f (get_file_list($_)) {
+ push @{$inuse{$f}} => $#idx;
+ }
+ }, "$dir/idx");
+
+ verbose("indexed: ".scalar(keys %inuse)." files");
+
+ # simple "forward" check: existence of mentioned files
+ if ($o{check}) {
+ my $total = scalar keys %inuse;
+ my $done = 0;
+ local $SIG{ALRM} = sub {
+ say sprintf "done %5.1f%% (%*d of $total)"
+ => 100 * $done/$total, length($total), $done;
+ alarm 5;
+ };
+ $SIG{ALRM}->();
+ while (my ($f, $i) = each %inuse) {
+ ++$done;
+ next if -f "$dir/data/$f"
+ or -f "$dir/data/$f.gz";
+ say "missing $f from\n",
+ join "-\t" => "", map { "$_\n" } @idx[@$i];
+ }
+ $SIG{ALRM}->();
+ alarm 0;
+ exit 0;
+ }
+
+ # full check and probably cleaning: all files, not mentioned
+ # in some index will be purged
+# my (%file);
+#- find(sub {
+#- (-f) and (-M > 0) or return;
+#- $File::Find::name =~ s/^$dir\/data\///;
+#- $file{$_} = $_;
+#- }, "$dir/data");
+#-
+#- verbose("file system: ".scalar(keys %file)." files");
+#- exit 0;
+
+ # ok, now go through all the data files and remove
+ # files not mentioned in some index, but never remove
+ # files created after the cleaner started
+ find(sub {
+ (-f) and (-M > 0) or return;
+
+ # cut away the first part of the filename and
+ # some optional extension
+ $File::Find::name = abs_path $File::Find::name;
+ (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/;
+ exists $inuse{$rn} and return;
+
+ if ($o{dry}) {
+ verbose("(unlinking) $File::Find::name");
+ return;
+ }
+
+ verbose("unlinking $File::Find::name");
+ unlink $File::Find::name;
+
+ }, "$dir/data");
+
+}
+
+sub verbose { say @_ if $o{verbose} }
+
+sub get_file_list {
+ my ($list) = @_;
+ my @files = ();
+
+ open(my $fh => $list);
+ while (<$fh>) {
+ push @files, (split)[2];
+ }
+ return grep /^[a-z\d.\/]+$/ => @files;
+}
+
+
+__END__
+
+=head1 NAME
+
+ cleaner - cleans the imager data directory
+
+=head1 SYNOPSIS
+
+ cleaner [options] {directory}
+
+=head1 DESCRIPTION
+
+This tool loads all the index files from I<directory>F</idx/>
+and purges all not mentioned files below I<directory>F</data/>.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-c>|B<--check>
+
+Check (and exit) if nothing is missing.
+
+=item B<-n>|B<--dry>
+
+Do nothing, just print what should be removed. (default: off)
+
+=item B<-h>|B<--help>
+
+=item B<-m>|B<--man>
+
+The short and longer help.
+
+=back
+
+=cut
--- a/imager Thu Jul 21 00:20:10 2011 +0200
+++ b/imager Fri Jul 22 17:06:09 2011 +0200
@@ -10,27 +10,32 @@
use File::Basename;
use File::Temp;
use Sys::Hostname;
-use IO::Compress::Gzip qw(gzip $GzipError);
+use IO::Compress::Gzip qw(gzip $GzipError :level :strategy);
+use Hash::Util qw(lock_keys);
use Getopt::Long;
use Pod::Usage;
use constant KiB => 1024;
use constant MiB => 1024 * KiB;
use constant GiB => 1024 * MiB;
-use constant BLOCKSIZE => 8 * MiB;
+use constant BLOCKSIZE => 1 * MiB;
+use constant NOW => time();
+use constant DATETIME => strftime("%Y-%m-%dT%H:%M:%SZ" => gmtime(NOW));
sub get_devsize;
sub get_devname;
$SIG{INT} = sub { die "Got INT\n" };
-my $compress = 0;
+my %o = (
+ compress => undef,
+ verbose => undef,
+); lock_keys(%o);
my $NOW = time();
-my $DATETIME = strftime("%Y-%m-%dT%H:%M:%SZ" => gmtime($NOW));
MAIN: {
- my ($dev, $dir);
+ my ($src, $dst);
my $idx = "{DIR}/idx/{HOSTNAME}/{DEVICE}/";
my $data = "{DIR}/data";
@@ -38,35 +43,36 @@
GetOptions(
"h|help" => sub { pod2usage(-verbose => 1, exit => 0) },
- "m|man" => sub { pod2usage(-verbose => 2, exit => 0,
+ "m|man" => sub { pod2usage(-verbose => 2, exit => 0,
-noperldoc => system("perldoc -V >/dev/null 2>&1"))
- }
+ },
+ "z|compress:i" => sub { $o{compress} = $_[1] ? $_[1] : Z_BEST_SPEED },
) and @ARGV == 2 or pod2usage;
- ($dev, $dir) = @ARGV;
+ ($src, $dst) = @ARGV;
foreach ($idx, $data) {
- s/{DIR}/$dir/g;
+ s/{DIR}/$dst/g;
s/{HOSTNAME}/hostname/eg;
- s/{DEVICE}/get_devname($dev)/eg;
+ s/{DEVICE}/get_devname($src)/eg;
}
- $size = get_devsize($dev);
+ $size = get_devsize($src);
- -d $dir or die "$0: $dir: $!\n";
- mkpath($data, $idx);
+ -d $dst or die "$0: $dst: $!\n";
+ mkpath([$data, $idx]);
my $index = File::Temp->new(DIR => $idx);
print {$index} <<__EOT;
# imager
format: 1
-filesystem: $dev
+filesystem: $src
blocksize: @{[BLOCKSIZE]}
devsize: $size
-timestamp: $NOW
-datetime: $DATETIME
+timestamp: @{[NOW]}
+datetime: @{[DATETIME]}
__EOT
- open(my $in => $dev);
+ open(my $in => $src);
binmode($in);
local $/ = \(my $bs = BLOCKSIZE);
local $| = 1;
@@ -77,19 +83,29 @@
);
while (my $buffer = <$in>) {
- my $cs = md5_hex($buffer);
- (my $file = $cs) =~ s/(..)(..)(.*)/$1\/$2\/$3/g;
- $file .= ".gz" if $compress;
+ my ($file, $ext, $cs);
+ $file = $cs = md5_hex($buffer);
+# $file =~ s/((..)(..).*)/$2\/$3\/$1/g;
+ $file =~ s/(?<fn>(?<prefix>...).*)/$+{prefix}\/$+{fn}/g;
+ $ext = $o{compress} ? ".gz" : "";
+ # the extension we do not put into the index
my $log = sprintf "%6d %s %s" => ($.-1), $cs, $file;
- if (!-e "$data/$file") {
- mkpath dirname("$data/$file");
- open(my $out, ">$data/$file");
+ if (not (-e "$data/$file" or -e "$data/$file$ext")) {
+ mkpath dirname("$data/$file$ext");
+ my $out = File::Temp->new(TEMPLATE => ".XXXXXXX", DIR => dirname("$data/$file$ext"));
binmode($out);
- if ($compress) { gzip \$buffer => $out or die $GzipError }
+ if ($o{compress}) {
+ gzip(\$buffer => $out,
+ -Minimal => 1,
+ -Level => Z_BEST_SPEED,
+ -Strategy => Z_FILTERED)
+ or die $GzipError
+ }
else { print {$out} $buffer }
close($out);
+ rename($out => "$data/$file$ext");
$log .= " *";
$stats{written}++;
}
@@ -109,7 +125,7 @@
say "# SAVINGS "
. sprintf "%3d%%" => 100 * ($stats{skipped}/($stats{written}+$stats{skipped}));
- rename $index->filename => "$idx/$DATETIME";
+ rename $index->filename => "$idx/".DATETIME;
close $index;
}
@@ -142,9 +158,29 @@
=head1 DESCRIPTION
This tool creates a snapshot of a blockdevice.
+Just call it like
+
+ imager /dev/sda1 /media/backup
+
+This will create F</media/backup/{data,idx}>, if not already existing.
+The index (blocklist) goes to
+I<destination>F</idx/>I<hostname>F</>I<devicename>. The data goes to
+I<destination>/F<data/>.
=head1 OPTIONS
-Currently there are no useful options.
+=over
+
+=item B<-z>[I<level>]|B<--compress>[=I<level>]
+
+Use compression when writing the blocks to disk. (default: off)
+
+=item B<-h>|B<--help>
+
+=item B<-m>|B<--man>
+
+The short and longer help.
+
+=back
=cut