|
1 #! /usr/bin/perl |
|
2 |
|
3 use 5.010; |
|
4 use strict; |
|
5 use warnings; |
|
6 use Pod::Usage; |
|
7 use Hash::Util qw(lock_keys); |
|
8 use File::Find; |
|
9 use File::Temp; |
|
10 use DB_File; |
|
11 use File::Basename; |
|
12 use autodie qw(:all); |
|
13 use Cwd qw(abs_path); |
|
14 |
|
15 use Getopt::Long; |
|
16 sub get_block_list; |
|
17 sub purge_unused; |
|
18 sub check_images; |
|
19 |
|
20 my %o = ( |
|
21 yes => undef, |
|
22 verbose => undef, |
|
23 check => undef, |
|
24 ); |
|
25 lock_keys(%o); |
|
26 |
|
27 MAIN: { |
|
28 Getopt::Long::Configure qw(Bundling); |
|
29 GetOptions( |
|
30 "y|yes!" => \$o{yes}, |
|
31 "v|verbose!" => \$o{verbose}, |
|
32 "c|check" => \$o{check}, |
|
33 "h|help" => sub { pod2usage(-verbose => 1, -exit 0) }, |
|
34 "m|man" => sub { |
|
35 pod2usage( |
|
36 -verbose => 2, |
|
37 -exit 0, |
|
38 -noperldoc => system( |
|
39 "perldoc -V 1>/dev/null |
|
40 2>&1" |
|
41 ) |
|
42 ); |
|
43 }, |
|
44 ) |
|
45 and @ARGV |
|
46 or pod2usage; |
|
47 my $dir = shift; |
|
48 my $tmp = File::Temp->new; |
|
49 |
|
50 # load the index files, remember the latest |
|
51 # timestamp we see |
|
52 #tie %idx, "DB_File" => $tmp->filename; |
|
53 my %block = get_block_list($dir); |
|
54 |
|
55 verbose("# indexed: " |
|
56 . scalar(@{ $block{""} // [] }) |
|
57 . " images with " |
|
58 . (grep !/^\.idx$/ => keys(%block)) |
|
59 . " blocks"); |
|
60 |
|
61 purge_unused($dir => %block); |
|
62 check_images($dir => %block); |
|
63 } |
|
64 |
|
65 sub verbose { say @_ if $o{verbose} } |
|
66 |
|
67 sub get_file_list { |
|
68 my ($list) = @_; |
|
69 my @files = (); |
|
70 |
|
71 open(my $fh => $list); |
|
72 while (<$fh>) { |
|
73 push @files, (split)[2]; |
|
74 } |
|
75 return grep /^[a-z\d.\/]+$/ => @files; |
|
76 } |
|
77 |
|
78 sub get_block_list { |
|
79 my $dir = shift; |
|
80 my %block; |
|
81 find( |
|
82 sub { |
|
83 (-f) or return; # we need to include the tmp files! |
|
84 push @{ $block{""} }, abs_path $_; |
|
85 foreach my $f (get_file_list($_)) { |
|
86 push @{ $block{$f} } => $#{ $block{""} }; |
|
87 } |
|
88 }, |
|
89 "$dir/idx" |
|
90 ); |
|
91 return %block; |
|
92 } |
|
93 |
|
94 sub purge_unused { |
|
95 my ($dir, %block) = @_; |
|
96 |
|
97 my ($total, $done); |
|
98 verbose("# pass 1 - checking for unused blocks"); |
|
99 verbose("# pass 1 - estimating file count"); |
|
100 |
|
101 # calculate the number of files we expect |
|
102 find( |
|
103 sub { |
|
104 -d or return; |
|
105 opendir(my $dh => $_); |
|
106 map { $total++ if not $_ ~~ [qw<. ..>] and length > 8 } readdir $dh; |
|
107 closedir($dh); |
|
108 }, |
|
109 "$dir/data" |
|
110 ); |
|
111 |
|
112 # progress |
|
113 local $SIG{ALRM} = sub { |
|
114 return alarm 1 if not $done; |
|
115 my $speed = $done / (time - $^T + 1); |
|
116 verbose sprintf "# pass 1 done %5.1f%% | %25s (%*d of %d blocks)", |
|
117 100 * ($done / $total), |
|
118 scalar(localtime($^T + $speed * ($total - $done))), |
|
119 length($total) => $done, |
|
120 $total; |
|
121 alarm 5; |
|
122 }; |
|
123 $SIG{ALRM}->(); |
|
124 |
|
125 my @unused; |
|
126 find( |
|
127 sub { |
|
128 $done++ if -f; |
|
129 (-f _) and ((-M _) > 0) or return; # don't process the fresh blocks |
|
130 |
|
131 # we don't need uncompressed files if an compressed version |
|
132 # exists |
|
133 unlink $_ and return if -f "$_.gz"; |
|
134 |
|
135 # cut away the first part of the filename and |
|
136 # some optional extension |
|
137 (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/; |
|
138 exists $block{$rn} and return; |
|
139 push @unused, abs_path $File::Find::name; |
|
140 return; |
|
141 |
|
142 }, |
|
143 "$dir/data" |
|
144 ); |
|
145 $SIG{ALRM}->(); |
|
146 alarm 0; |
|
147 |
|
148 return if not @unused; |
|
149 |
|
150 say sprintf "found %d (%.1f%%) unused files", |
|
151 0 + @unused, |
|
152 100 * (@unused / $total); |
|
153 |
|
154 if ($o{yes}) { |
|
155 verbose("# deleting " . @unused . " files"); |
|
156 unlink @unused; |
|
157 return; |
|
158 } |
|
159 |
|
160 if (-t) { |
|
161 while (1) { |
|
162 print "delete? [y/N/v]: "; |
|
163 given (<STDIN>) { |
|
164 when (/^y(?:es)?$/i) { unlink @unused; last } |
|
165 when (/^v/) { say join "\n", @unused; next } |
|
166 default { last } |
|
167 } |
|
168 } |
|
169 } |
|
170 |
|
171 } |
|
172 |
|
173 sub check_images { |
|
174 my ($dir, %block) = @_; |
|
175 |
|
176 my $total = grep { $_ ne "" } keys(%block); |
|
177 my $done = 0; |
|
178 |
|
179 verbose("# pass 2 - checking image completeness"); |
|
180 |
|
181 # progress |
|
182 local $SIG{ALRM} = sub { |
|
183 return alarm 1 if not $done; |
|
184 my $speed = $done / (time - $^T + 1); |
|
185 verbose sprintf "# pass 2 done %5.1f%% | %25s (%*d of %d blocks)", |
|
186 100 * $done / $total, |
|
187 scalar(localtime($^T + ($total - $done) * $speed)), |
|
188 length($total) => $done, |
|
189 $total; |
|
190 alarm 5; |
|
191 }; |
|
192 $SIG{ALRM}->(); |
|
193 |
|
194 my %invalid; |
|
195 foreach my $k (keys %block) { |
|
196 my $i = $block{$k}; |
|
197 next if $k eq ""; |
|
198 ++$done; |
|
199 |
|
200 next |
|
201 if -f "$dir/data/$k" |
|
202 or -f "$dir/data/$k.gz"; |
|
203 say "missing $k @$i"; |
|
204 @invalid{@$i} = (); |
|
205 } |
|
206 $SIG{ALRM}->(); |
|
207 alarm 0; |
|
208 |
|
209 # invalid now contains the numbers of the idx files beiing |
|
210 # invalid |
|
211 my @invalid = sort @{ $block{""} }[keys %invalid]; |
|
212 |
|
213 return if not @invalid; |
|
214 |
|
215 say sprintf "found %d (%.1f%%) invalid images:", |
|
216 0 + @invalid, |
|
217 100 * (@invalid / $total); |
|
218 |
|
219 if ($o{yes}) { |
|
220 unlink @invalid; |
|
221 return; |
|
222 } |
|
223 |
|
224 while (-t) { |
|
225 print "delete? [y/N/v] "; |
|
226 given (<STDIN>) { |
|
227 when (/^y(?:es)?$/i) { unlink @invalid; last } |
|
228 when (/^v/i) { say join "\n" => @invalid; next } |
|
229 default { last } |
|
230 } |
|
231 } |
|
232 } |
|
233 __END__ |
|
234 |
|
235 =head1 NAME |
|
236 |
|
237 imager.check - checks the imager data and index files |
|
238 |
|
239 =head1 SYNOPSIS |
|
240 |
|
241 imager.check [options] {directory} |
|
242 |
|
243 =head1 DESCRIPTION |
|
244 |
|
245 This tool loads all the index files from I<directory>F</idx/>, |
|
246 checks if all mentioned files are existing and optionally purges |
|
247 unreferenced files. |
|
248 |
|
249 =head1 OPTIONS |
|
250 |
|
251 =over |
|
252 |
|
253 =item B<-y>|B<--yes> |
|
254 |
|
255 Assume "yes" for all questions (dangerous!). (default: no) |
|
256 |
|
257 =item B<-h>|B<--help> |
|
258 |
|
259 =item B<-m>|B<--man> |
|
260 |
|
261 The short and longer help. |
|
262 |
|
263 =back |
|
264 |
|
265 =cut |