1 #! /usr/bin/perl |
|
2 |
|
3 use 5.010; |
|
4 use strict; |
|
5 use warnings; |
|
6 use Pod::Usage; |
|
7 use Hash::Util qw(lock_keys); |
|
8 use File::Find; |
|
9 use File::Temp; |
|
10 use Digest::MD5 qw(md5_hex); |
|
11 use File::Basename; |
|
12 use autodie qw(:all); |
|
13 use Cwd qw(abs_path); |
|
14 use Imager; |
|
15 |
|
16 use Getopt::Long; |
|
17 use constant CIPHER => "aes-128-cbc"; |
|
18 sub get_block_list; |
|
19 sub purge_unused; |
|
20 sub check_images; |
|
21 |
|
22 our %o = ( |
|
23 yes => undef, |
|
24 verbose => 1, |
|
25 checksum => undef, |
|
26 pass => undef, |
|
27 ); |
|
28 lock_keys(%o); |
|
29 |
|
30 MAIN: { |
|
31 Getopt::Long::Configure qw(Bundling); |
|
32 GetOptions( |
|
33 "y|yes!" => \$o{yes}, |
|
34 "v|verbose!" => \$o{verbose}, |
|
35 "c|checksum" => \$o{checksum}, |
|
36 "p|pass" => \$o{pass}, |
|
37 "h|help" => sub { pod2usage(-verbose => 1, -exit => 0) }, |
|
38 "m|man" => sub { |
|
39 pod2usage( |
|
40 -verbose => 2, |
|
41 -exit => 0, |
|
42 -noperldoc => system( |
|
43 "perldoc -V 1>/dev/null |
|
44 2>&1" |
|
45 ) |
|
46 ); |
|
47 }, |
|
48 ) |
|
49 and @ARGV |
|
50 or pod2usage; |
|
51 my $dir = shift; |
|
52 |
|
53 for (my $pass = 1 ; 1 ; ++$pass) { |
|
54 verbose("# reading index files"); |
|
55 my %block = get_block_list($dir); |
|
56 verbose("# indexed: " |
|
57 . scalar(@{ $block{""} // [] }) |
|
58 . " images with " |
|
59 . (grep !/^\.idx$/ => keys(%block)) |
|
60 . " blocks"); |
|
61 |
|
62 my $subpass = 0; |
|
63 purge_unused($pass => ++$subpass, $dir => %block); |
|
64 check_images($pass => ++$subpass, $dir => %block) and last; |
|
65 |
|
66 verbose("# STARTING OVER!"); |
|
67 } |
|
68 } |
|
69 |
|
70 sub verbose { say @_ if $o{verbose} } |
|
71 |
|
72 sub get_file_list { |
|
73 my ($list) = @_; |
|
74 my @files = (); |
|
75 |
|
76 open(my $fh => $list); |
|
77 while (<$fh>) { |
|
78 push @files, (split)[2]; |
|
79 } |
|
80 return grep /^[a-z\d.\/]+$/ => @files; |
|
81 } |
|
82 |
|
83 sub get_block_list { |
|
84 my $dir = shift; |
|
85 my %block; |
|
86 find( |
|
87 sub { |
|
88 (-f) or return; # we need to include the tmp files! |
|
89 push @{ $block{""} }, abs_path $_; |
|
90 foreach my $f (get_file_list($_)) { |
|
91 push @{ $block{$f} } => $#{ $block{""} }; |
|
92 } |
|
93 }, |
|
94 "$dir/idx" |
|
95 ); |
|
96 return %block; |
|
97 } |
|
98 |
|
99 sub purge_unused { |
|
100 my ($pass, $subpass, $dir, %block) = @_; |
|
101 my ($total, $done, $t0); |
|
102 |
|
103 verbose("# pass $pass.$subpass - checking for unused blocks"); |
|
104 verbose("# estimating file count"); |
|
105 |
|
106 # calculate the number of files we expect |
|
107 find( |
|
108 sub { |
|
109 -d or return; |
|
110 opendir(my $dh => $_); |
|
111 map { $total++ if not $_ ~~ [qw<. ..>] and length > 8 } readdir $dh; |
|
112 closedir($dh); |
|
113 $File::Find::prune = |
|
114 $_ =~ /^[\d[a-f]{3}$/; # FIXME should be configurable |
|
115 }, |
|
116 "$dir/data" |
|
117 ); |
|
118 verbose("# got $total blocks/files"); |
|
119 |
|
120 # progress |
|
121 $t0 = time; |
|
122 local $SIG{ALRM} = sub { |
|
123 return alarm 1 if not $done; |
|
124 my $speed = $done / (time - $t0 + 1); |
|
125 verbose sprintf |
|
126 "# pass $pass.$subpass done %5.1f%% | %25s (%*d of %d blocks)", |
|
127 100 * ($done / $total), |
|
128 scalar(localtime $t0 + $total / $speed), length($total) => $done, |
|
129 $total; |
|
130 alarm 5; |
|
131 }; |
|
132 $SIG{ALRM}->(); |
|
133 |
|
134 my @unused; |
|
135 find( |
|
136 sub { |
|
137 $done++ if -f; |
|
138 (-f _) and ((-M _) > 0) or return; # don't process the fresh blocks |
|
139 |
|
140 # we don't need uncompressed files if an compressed version |
|
141 # exists |
|
142 unlink $_ and return if -f "$_.gz"; |
|
143 unlink "$_.x" and return if -f "$_.x.gz"; |
|
144 |
|
145 # the next step we can't do, because it can happen that |
|
146 # the restorer does not know about a password |
|
147 #unlink "$_.gz.x" and return if -f "$_.gz"; |
|
148 |
|
149 # cut away the first part of the filename and |
|
150 # some optional extension |
|
151 (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/; |
|
152 return if exists $block{$rn}; |
|
153 push @unused => abs_path $_; |
|
154 return; |
|
155 |
|
156 }, |
|
157 "$dir/data" |
|
158 ); |
|
159 $SIG{ALRM}->(); |
|
160 alarm 0; |
|
161 |
|
162 return if not @unused; |
|
163 |
|
164 say sprintf "found %d (%.1f%%) unused files", |
|
165 0 + @unused, |
|
166 100 * (@unused / $total); |
|
167 |
|
168 if ($o{yes}) { |
|
169 verbose("# deleting " . @unused . " files"); |
|
170 unlink @unused; |
|
171 return; |
|
172 } |
|
173 |
|
174 if (-t) { |
|
175 while (1) { |
|
176 print "delete? [y/N/v]: "; |
|
177 given (<STDIN>) { |
|
178 when (/^y(?:es)?$/i) { unlink @unused; last } |
|
179 when (/^v/) { say join "\n", @unused; next } |
|
180 default { last } |
|
181 } |
|
182 } |
|
183 } |
|
184 |
|
185 } |
|
186 |
|
187 sub check_images { |
|
188 my ($pass, $subpass, $dir, %block) = @_; |
|
189 |
|
190 my $total = grep { $_ ne "" } keys(%block); |
|
191 my $done = 0; |
|
192 my $t0 = time; |
|
193 |
|
194 verbose("# pass $pass.$subpass - checking image completeness"); |
|
195 |
|
196 # progress |
|
197 local $SIG{ALRM} = sub { |
|
198 return alarm 1 if not $done; |
|
199 my $speed = $done / (time - $t0 + 1); |
|
200 verbose sprintf |
|
201 "# pass $pass.$subpass done %5.1f%% | %25s (%*d of %d blocks)", |
|
202 100 * $done / $total, |
|
203 scalar(localtime $t0 + $total / $speed), length($total) => $done, |
|
204 $total; |
|
205 alarm 5; |
|
206 }; |
|
207 $SIG{ALRM}->(); |
|
208 |
|
209 my %invalid; |
|
210 foreach my $k (keys %block) { |
|
211 state %checked; |
|
212 my $i = $block{$k}; |
|
213 next if $k eq ""; |
|
214 ++$done; |
|
215 |
|
216 my ($file) = |
|
217 grep { -f } |
|
218 map { "$dir/data/$_" } ($k, "$k.gz", "$k.x", "$k.x.gz", "$k.gz.x"); |
|
219 |
|
220 if (not $file) { |
|
221 say "missing $k @$i"; |
|
222 @invalid{@$i} = (); |
|
223 next; |
|
224 } |
|
225 |
|
226 next if not $o{checksum}; |
|
227 next if $checked{$file}; |
|
228 |
|
229 # checking the checksum |
|
230 Imager::get_block($file => \my $buffer); |
|
231 |
|
232 if (md5_hex($buffer) ne basename($file, qw(.gz .x .gz.x))) { |
|
233 say "wrong checksum for $file $k @$i\n"; |
|
234 @invalid{@$i} = (); |
|
235 next; |
|
236 } |
|
237 |
|
238 $checked{$file} = 1; |
|
239 } |
|
240 $SIG{ALRM}->(); |
|
241 alarm 0; |
|
242 |
|
243 # invalid now contains the numbers of the idx files beiing |
|
244 # invalid |
|
245 my @invalid = sort @{ $block{""} }[keys %invalid]; |
|
246 |
|
247 return 1 if not @invalid; |
|
248 |
|
249 say sprintf "found %d (%.1f%%) invalid images:", |
|
250 0 + @invalid, |
|
251 100 * (@invalid / $total); |
|
252 |
|
253 if ($o{yes}) { |
|
254 unlink @invalid; |
|
255 return undef; |
|
256 } |
|
257 |
|
258 while (-t) { |
|
259 print "delete? [y/N/v] "; |
|
260 given (<STDIN>) { |
|
261 when (/^y(?:es)?$/i) { unlink @invalid; return undef } |
|
262 when (/^v/i) { say join "\n" => @invalid; next } |
|
263 default { last } |
|
264 } |
|
265 } |
|
266 |
|
267 return 1; |
|
268 } |
|
269 __END__ |
|
270 |
|
271 =head1 NAME |
|
272 |
|
273 imager.check - checks the imager data and index files |
|
274 |
|
275 =head1 SYNOPSIS |
|
276 |
|
277 imager.check [options] {directory} |
|
278 |
|
279 =head1 DESCRIPTION |
|
280 |
|
281 This tool loads all the index files from I<directory>F</idx/>, |
|
282 checks if all mentioned files are existing and optionally purges |
|
283 unreferenced files. |
|
284 |
|
285 =head1 OPTIONS |
|
286 |
|
287 =over |
|
288 |
|
289 =item B<-c>|B<--checksum> |
|
290 |
|
291 Read all block files and check their checksum. (default: off) |
|
292 |
|
293 =item B<-p>|B<--pass> I<pass> |
|
294 |
|
295 In case you're using encrypted blocks, the param is passed to |
|
296 C<openssl>s C<-pass> option. (default: unset) |
|
297 |
|
298 =item B<-v>|B<-->[no]B<verbose> |
|
299 |
|
300 Generate more output about what's going on. (default: on) |
|
301 |
|
302 =item B<-y>|B<--yes> |
|
303 |
|
304 Assume "yes" for all questions (dangerous!). (default: no) |
|
305 |
|
306 =item B<-h>|B<--help> |
|
307 |
|
308 =item B<-m>|B<--man> |
|
309 |
|
310 The short and longer help. |
|
311 |
|
312 =back |
|
313 |
|
314 =cut |
|