# HG changeset patch # User Heiko Schlittermann (JUMPER) # Date 1311347169 -7200 # Node ID fb2455a007a7de208d43b790204f5eb0cf6d5313 # Parent 910cff13054185379769dc80e5ba0ef4afa54068 friday evening diff -r 910cff130541 -r fb2455a007a7 cleaner --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cleaner Fri Jul 22 17:06:09 2011 +0200 @@ -0,0 +1,159 @@ +#! /usr/bin/perl + +use 5.010; +use strict; +use warnings; +use Pod::Usage; +use Hash::Util qw(lock_keys); +use File::Find; +use File::Temp; +use DB_File; +use File::Basename; +use autodie qw(:all); +use Cwd qw(abs_path); + +use Getopt::Long; + +my %o = ( + dry => undef, + verbose => undef, + check => undef, +); lock_keys(%o); + +MAIN: { + GetOptions( + "n|dry!" => \$o{dry}, + "v|verbose!" => \$o{verbose}, + "c|check" => \$o{check}, + "h|help" => sub { pod2usage(-verbose => 1, -exit 0) }, + "m|man" => sub { pod2usage(-verbose => 2, -exit 0, + -noperldoc => system("perldoc -V 1>/dev/null + 2>&1"))}, + ) and @ARGV or pod2usage; + my $dir = shift; + my $tmp = File::Temp->new; + + # load the index files, remember the latest + # timestamp we see + my (%inuse, @idx); + #tie %idx, "DB_File" => $tmp->filename; + + find(sub { + (-f) and (-M > 0) or return; + verbose("idx: $File::Find::name"); + push @idx, abs_path $_; + foreach my $f (get_file_list($_)) { + push @{$inuse{$f}} => $#idx; + } + }, "$dir/idx"); + + verbose("indexed: ".scalar(keys %inuse)." files"); + + # simple "forward" check: existence of mentioned files + if ($o{check}) { + my $total = scalar keys %inuse; + my $done = 0; + local $SIG{ALRM} = sub { + say sprintf "done %5.1f%% (%*d of $total)" + => 100 * $done/$total, length($total), $done; + alarm 5; + }; + $SIG{ALRM}->(); + while (my ($f, $i) = each %inuse) { + ++$done; + next if -f "$dir/data/$f" + or -f "$dir/data/$f.gz"; + say "missing $f from\n", + join "-\t" => "", map { "$_\n" } @idx[@$i]; + } + $SIG{ALRM}->(); + alarm 0; + exit 0; + } + + # full check and probably cleaning: all files, not mentioned + # in some index will be purged +# my (%file); +#- find(sub { +#- (-f) and (-M > 0) or return; +#- $File::Find::name =~ s/^$dir\/data\///; +#- $file{$_} = $_; +#- }, "$dir/data"); +#- +#- verbose("file system: ".scalar(keys %file)." files"); +#- exit 0; + + # ok, now go through all the data files and remove + # files not mentioned in some index, but never remove + # files created after the cleaner started + find(sub { + (-f) and (-M > 0) or return; + + # cut away the first part of the filename and + # some optional extension + $File::Find::name = abs_path $File::Find::name; + (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/; + exists $inuse{$rn} and return; + + if ($o{dry}) { + verbose("(unlinking) $File::Find::name"); + return; + } + + verbose("unlinking $File::Find::name"); + unlink $File::Find::name; + + }, "$dir/data"); + +} + +sub verbose { say @_ if $o{verbose} } + +sub get_file_list { + my ($list) = @_; + my @files = (); + + open(my $fh => $list); + while (<$fh>) { + push @files, (split)[2]; + } + return grep /^[a-z\d.\/]+$/ => @files; +} + + +__END__ + +=head1 NAME + + cleaner - cleans the imager data directory + +=head1 SYNOPSIS + + cleaner [options] {directory} + +=head1 DESCRIPTION + +This tool loads all the index files from IF +and purges all not mentioned files below IF. + +=head1 OPTIONS + +=over + +=item B<-c>|B<--check> + +Check (and exit) if nothing is missing. + +=item B<-n>|B<--dry> + +Do nothing, just print what should be removed. (default: off) + +=item B<-h>|B<--help> + +=item B<-m>|B<--man> + +The short and longer help. + +=back + +=cut diff -r 910cff130541 -r fb2455a007a7 imager --- a/imager Thu Jul 21 00:20:10 2011 +0200 +++ b/imager Fri Jul 22 17:06:09 2011 +0200 @@ -10,27 +10,32 @@ use File::Basename; use File::Temp; use Sys::Hostname; -use IO::Compress::Gzip qw(gzip $GzipError); +use IO::Compress::Gzip qw(gzip $GzipError :level :strategy); +use Hash::Util qw(lock_keys); use Getopt::Long; use Pod::Usage; use constant KiB => 1024; use constant MiB => 1024 * KiB; use constant GiB => 1024 * MiB; -use constant BLOCKSIZE => 8 * MiB; +use constant BLOCKSIZE => 1 * MiB; +use constant NOW => time(); +use constant DATETIME => strftime("%Y-%m-%dT%H:%M:%SZ" => gmtime(NOW)); sub get_devsize; sub get_devname; $SIG{INT} = sub { die "Got INT\n" }; -my $compress = 0; +my %o = ( + compress => undef, + verbose => undef, +); lock_keys(%o); my $NOW = time(); -my $DATETIME = strftime("%Y-%m-%dT%H:%M:%SZ" => gmtime($NOW)); MAIN: { - my ($dev, $dir); + my ($src, $dst); my $idx = "{DIR}/idx/{HOSTNAME}/{DEVICE}/"; my $data = "{DIR}/data"; @@ -38,35 +43,36 @@ GetOptions( "h|help" => sub { pod2usage(-verbose => 1, exit => 0) }, - "m|man" => sub { pod2usage(-verbose => 2, exit => 0, + "m|man" => sub { pod2usage(-verbose => 2, exit => 0, -noperldoc => system("perldoc -V >/dev/null 2>&1")) - } + }, + "z|compress:i" => sub { $o{compress} = $_[1] ? $_[1] : Z_BEST_SPEED }, ) and @ARGV == 2 or pod2usage; - ($dev, $dir) = @ARGV; + ($src, $dst) = @ARGV; foreach ($idx, $data) { - s/{DIR}/$dir/g; + s/{DIR}/$dst/g; s/{HOSTNAME}/hostname/eg; - s/{DEVICE}/get_devname($dev)/eg; + s/{DEVICE}/get_devname($src)/eg; } - $size = get_devsize($dev); + $size = get_devsize($src); - -d $dir or die "$0: $dir: $!\n"; - mkpath($data, $idx); + -d $dst or die "$0: $dst: $!\n"; + mkpath([$data, $idx]); my $index = File::Temp->new(DIR => $idx); print {$index} <<__EOT; # imager format: 1 -filesystem: $dev +filesystem: $src blocksize: @{[BLOCKSIZE]} devsize: $size -timestamp: $NOW -datetime: $DATETIME +timestamp: @{[NOW]} +datetime: @{[DATETIME]} __EOT - open(my $in => $dev); + open(my $in => $src); binmode($in); local $/ = \(my $bs = BLOCKSIZE); local $| = 1; @@ -77,19 +83,29 @@ ); while (my $buffer = <$in>) { - my $cs = md5_hex($buffer); - (my $file = $cs) =~ s/(..)(..)(.*)/$1\/$2\/$3/g; - $file .= ".gz" if $compress; + my ($file, $ext, $cs); + $file = $cs = md5_hex($buffer); +# $file =~ s/((..)(..).*)/$2\/$3\/$1/g; + $file =~ s/(?(?...).*)/$+{prefix}\/$+{fn}/g; + $ext = $o{compress} ? ".gz" : ""; + # the extension we do not put into the index my $log = sprintf "%6d %s %s" => ($.-1), $cs, $file; - if (!-e "$data/$file") { - mkpath dirname("$data/$file"); - open(my $out, ">$data/$file"); + if (not (-e "$data/$file" or -e "$data/$file$ext")) { + mkpath dirname("$data/$file$ext"); + my $out = File::Temp->new(TEMPLATE => ".XXXXXXX", DIR => dirname("$data/$file$ext")); binmode($out); - if ($compress) { gzip \$buffer => $out or die $GzipError } + if ($o{compress}) { + gzip(\$buffer => $out, + -Minimal => 1, + -Level => Z_BEST_SPEED, + -Strategy => Z_FILTERED) + or die $GzipError + } else { print {$out} $buffer } close($out); + rename($out => "$data/$file$ext"); $log .= " *"; $stats{written}++; } @@ -109,7 +125,7 @@ say "# SAVINGS " . sprintf "%3d%%" => 100 * ($stats{skipped}/($stats{written}+$stats{skipped})); - rename $index->filename => "$idx/$DATETIME"; + rename $index->filename => "$idx/".DATETIME; close $index; } @@ -142,9 +158,29 @@ =head1 DESCRIPTION This tool creates a snapshot of a blockdevice. +Just call it like + + imager /dev/sda1 /media/backup + +This will create F, if not already existing. +The index (blocklist) goes to +IFIFI. The data goes to +I/F. =head1 OPTIONS -Currently there are no useful options. +=over + +=item B<-z>[I]|B<--compress>[=I] + +Use compression when writing the blocks to disk. (default: off) + +=item B<-h>|B<--help> + +=item B<-m>|B<--man> + +The short and longer help. + +=back =cut