equal
deleted
inserted
replaced
5 use warnings; |
5 use warnings; |
6 use Pod::Usage; |
6 use Pod::Usage; |
7 use Hash::Util qw(lock_keys); |
7 use Hash::Util qw(lock_keys); |
8 use File::Find; |
8 use File::Find; |
9 use File::Temp; |
9 use File::Temp; |
10 use DB_File; |
10 use Digest::MD5 qw(md5_hex); |
11 use File::Basename; |
11 use File::Basename; |
12 use autodie qw(:all); |
12 use autodie qw(:all); |
13 use Cwd qw(abs_path); |
13 use Cwd qw(abs_path); |
|
14 use IO::Compress::Gzip qw(&gzip $GzipError Z_BEST_SPEED); |
|
15 use IO::Uncompress::Gunzip qw(&gunzip $GunzipError); |
14 |
16 |
15 use Getopt::Long; |
17 use Getopt::Long; |
|
18 use constant CIPHER => "aes-128-cbc"; |
16 sub get_block_list; |
19 sub get_block_list; |
17 sub purge_unused; |
20 sub purge_unused; |
18 sub check_images; |
21 sub check_images; |
19 |
22 |
20 my %o = ( |
23 my %o = ( |
21 yes => undef, |
24 yes => undef, |
22 verbose => 1, |
25 verbose => 1, |
23 check => undef, |
26 checksum => undef, |
|
27 pass => undef, |
24 ); |
28 ); |
25 lock_keys(%o); |
29 lock_keys(%o); |
26 |
30 |
27 MAIN: { |
31 MAIN: { |
28 Getopt::Long::Configure qw(Bundling); |
32 Getopt::Long::Configure qw(Bundling); |
29 GetOptions( |
33 GetOptions( |
30 "y|yes!" => \$o{yes}, |
34 "y|yes!" => \$o{yes}, |
31 "v|verbose!" => \$o{verbose}, |
35 "v|verbose!" => \$o{verbose}, |
32 "c|check" => \$o{check}, |
36 "c|checksum" => \$o{checksum}, |
|
37 "p|pass" => \$o{pass}, |
33 "h|help" => sub { pod2usage(-verbose => 1, -exit => 0) }, |
38 "h|help" => sub { pod2usage(-verbose => 1, -exit => 0) }, |
34 "m|man" => sub { |
39 "m|man" => sub { |
35 pod2usage( |
40 pod2usage( |
36 -verbose => 2, |
41 -verbose => 2, |
37 -exit => 0, |
42 -exit => 0, |
43 }, |
48 }, |
44 ) |
49 ) |
45 and @ARGV |
50 and @ARGV |
46 or pod2usage; |
51 or pod2usage; |
47 my $dir = shift; |
52 my $dir = shift; |
48 |
53 |
49 while (1) { |
54 for (my $pass = 1 ; 1 ; ++$pass) { |
50 my %block = get_block_list($dir); |
55 my %block = get_block_list($dir); |
51 |
56 |
52 verbose("# reading index files"); |
57 verbose("# reading index files"); |
53 verbose("# indexed: " |
58 verbose("# indexed: " |
54 . scalar(@{ $block{""} // [] }) |
59 . scalar(@{ $block{""} // [] }) |
55 . " images with " |
60 . " images with " |
56 . (grep !/^\.idx$/ => keys(%block)) |
61 . (grep !/^\.idx$/ => keys(%block)) |
57 . " blocks"); |
62 . " blocks"); |
58 |
63 |
59 purge_unused($dir => %block); |
64 my $subpass = 0; |
60 check_images($dir => %block) and last; |
65 purge_unused($pass => ++$subpass, $dir => %block); |
61 |
66 check_images($pass => ++$subpass, $dir => %block) and last; |
62 verbose("# STARTING OVER!"); |
67 |
|
68 verbose("# STARTING OVER!"); |
63 } |
69 } |
64 } |
70 } |
65 |
71 |
66 sub verbose { say @_ if $o{verbose} } |
72 sub verbose { say @_ if $o{verbose} } |
67 |
73 |
91 ); |
97 ); |
92 return %block; |
98 return %block; |
93 } |
99 } |
94 |
100 |
95 sub purge_unused { |
101 sub purge_unused { |
96 my ($dir, %block) = @_; |
102 my ($pass, $subpass, $dir, %block) = @_; |
97 my ($total, $done, $t0); |
103 my ($total, $done, $t0); |
98 state $subpass = -1; |
104 |
99 |
105 verbose("# pass $pass.$subpass - checking for unused blocks"); |
100 verbose("# pass 1.@{[++$subpass]} - checking for unused blocks"); |
|
101 verbose("# estimating file count"); |
106 verbose("# estimating file count"); |
102 |
107 |
103 # calculate the number of files we expect |
108 # calculate the number of files we expect |
104 find( |
109 find( |
105 sub { |
110 sub { |
118 $t0 = time; |
123 $t0 = time; |
119 local $SIG{ALRM} = sub { |
124 local $SIG{ALRM} = sub { |
120 return alarm 1 if not $done; |
125 return alarm 1 if not $done; |
121 my $speed = $done / (time - $t0 + 1); |
126 my $speed = $done / (time - $t0 + 1); |
122 verbose sprintf |
127 verbose sprintf |
123 "# pass 1.$subpass done %5.1f%% | %25s (%*d of %d blocks)", |
128 "# pass $pass.$subpass done %5.1f%% | %25s (%*d of %d blocks)", |
124 100 * ($done / $total), |
129 100 * ($done / $total), |
125 scalar(localtime $t0 + $total / $speed), length($total) => $done, |
130 scalar(localtime $t0 + $total / $speed), length($total) => $done, |
126 $total; |
131 $total; |
127 alarm 5; |
132 alarm 5; |
128 }; |
133 }; |
180 } |
185 } |
181 |
186 |
182 } |
187 } |
183 |
188 |
184 sub check_images { |
189 sub check_images { |
185 my ($dir, %block) = @_; |
190 my ($pass, $subpass, $dir, %block) = @_; |
186 |
191 |
187 my $total = grep { $_ ne "" } keys(%block); |
192 my $total = grep { $_ ne "" } keys(%block); |
188 my $done = 0; |
193 my $done = 0; |
189 my $t0 = time; |
194 my $t0 = time; |
190 |
195 |
191 state $subpass = -1; |
196 verbose("# pass $pass.$subpass - checking image completeness"); |
192 verbose("# pass 2.@{[++$subpass]} - checking image completeness"); |
|
193 |
197 |
194 # progress |
198 # progress |
195 local $SIG{ALRM} = sub { |
199 local $SIG{ALRM} = sub { |
196 return alarm 1 if not $done; |
200 return alarm 1 if not $done; |
197 my $speed = $done / (time - $t0 + 1); |
201 my $speed = $done / (time - $t0 + 1); |
198 verbose sprintf |
202 verbose sprintf |
199 "# pass 2.$subpass done %5.1f%% | %25s (%*d of %d blocks)", |
203 "# pass $pass.$subpass done %5.1f%% | %25s (%*d of %d blocks)", |
200 100 * $done / $total, |
204 100 * $done / $total, |
201 scalar(localtime $t0 + $total / $speed), length($total) => $done, |
205 scalar(localtime $t0 + $total / $speed), length($total) => $done, |
202 $total; |
206 $total; |
203 alarm 5; |
207 alarm 5; |
204 }; |
208 }; |
208 foreach my $k (keys %block) { |
212 foreach my $k (keys %block) { |
209 my $i = $block{$k}; |
213 my $i = $block{$k}; |
210 next if $k eq ""; |
214 next if $k eq ""; |
211 ++$done; |
215 ++$done; |
212 |
216 |
213 next |
217 my ($file) = |
214 if -f "$dir/data/$k" |
218 grep { -f } |
215 or -f "$dir/data/$k.gz" |
219 map { "$dir/data/$_" } ($k, "$k.gz", "$k.x", "$k.x.gz", "$k.gz.x"); |
216 or -f "$dir/data/$k.x" |
220 |
217 or -f "$dir/data/$k.x.gz" |
221 if (not $file) { |
218 or -f "$dir/data/$k.gz.x"; |
222 say "missing $k @$i"; |
219 say "missing $k @$i"; |
223 @invalid{@$i} = (); |
|
224 next; |
|
225 } |
|
226 |
|
227 next if not $o{checksum}; |
|
228 |
|
229 # checking the checksum |
|
230 my $buffer; |
|
231 given ($file) { |
|
232 when (/\.gz\.x$/) { |
|
233 open( |
|
234 my $fh => |
|
235 "openssl @{[CIPHER]} -d -pass $o{pass} -in $file|"); |
|
236 local $/ = undef; |
|
237 gunzip($fh => \$buffer) or die $GunzipError; |
|
238 } |
|
239 when (/\.gz$/) { gunzip($file => \$buffer) or die $GunzipError } |
|
240 when (/\.x$/) { |
|
241 open( |
|
242 my $fh => |
|
243 "openssl @{[CIPHER]} -d -pass $o{pass} -in $file|"); |
|
244 local $/ = undef; |
|
245 $buffer = <$fh>; |
|
246 } |
|
247 default { open(my $fh => $file); local $/ = undef; $buffer = <$fh> } |
|
248 } |
|
249 |
|
250 next if md5_hex($buffer) eq basename($file, qw(.gz .x .gz.x)); |
|
251 say "wrong checksum for $file\n"; |
220 @invalid{@$i} = (); |
252 @invalid{@$i} = (); |
|
253 |
221 } |
254 } |
222 $SIG{ALRM}->(); |
255 $SIG{ALRM}->(); |
223 alarm 0; |
256 alarm 0; |
224 |
257 |
225 # invalid now contains the numbers of the idx files beiing |
258 # invalid now contains the numbers of the idx files beiing |
266 |
299 |
267 =head1 OPTIONS |
300 =head1 OPTIONS |
268 |
301 |
269 =over |
302 =over |
270 |
303 |
|
304 =item B<-c>|B<--checksum> |
|
305 |
|
306 Read all block files and check their checksum. (default: off) |
|
307 |
|
308 =item B<-p>|B<--pass> I<pass> |
|
309 |
|
310 In case you're using encrypted blocks, the param is passed to |
|
311 C<openssl>s C<-pass> option. (default: unset) |
|
312 |
271 =item B<-v>|B<-->[no]B<verbose> |
313 =item B<-v>|B<-->[no]B<verbose> |
272 |
314 |
273 Generate more output about what's going on. (default: on) |
315 Generate more output about what's going on. (default: on) |
274 |
316 |
275 =item B<-y>|B<--yes> |
317 =item B<-y>|B<--yes> |