16 sub get_block_list; |
16 sub get_block_list; |
17 sub purge_unused; |
17 sub purge_unused; |
18 sub check_images; |
18 sub check_images; |
19 |
19 |
20 my %o = ( |
20 my %o = ( |
21 yes => undef, |
21 yes => undef, |
22 verbose => undef, |
22 verbose => undef, |
23 check => undef, |
23 check => undef, |
24 ); lock_keys(%o); |
24 ); |
|
25 lock_keys(%o); |
25 |
26 |
26 MAIN: { |
27 MAIN: { |
27 Getopt::Long::Configure qw(Bundling); |
28 Getopt::Long::Configure qw(Bundling); |
28 GetOptions( |
29 GetOptions( |
29 "y|yes!" => \$o{yes}, |
30 "y|yes!" => \$o{yes}, |
30 "v|verbose!" => \$o{verbose}, |
31 "v|verbose!" => \$o{verbose}, |
31 "c|check" => \$o{check}, |
32 "c|check" => \$o{check}, |
32 "h|help" => sub { pod2usage(-verbose => 1, -exit 0) }, |
33 "h|help" => sub { pod2usage(-verbose => 1, -exit 0) }, |
33 "m|man" => sub { pod2usage(-verbose => 2, -exit 0, |
34 "m|man" => sub { |
34 -noperldoc => system("perldoc -V 1>/dev/null |
35 pod2usage( |
35 2>&1"))}, |
36 -verbose => 2, |
36 ) and @ARGV or pod2usage; |
37 -exit 0, |
|
38 -noperldoc => system( |
|
39 "perldoc -V 1>/dev/null |
|
40 2>&1" |
|
41 ) |
|
42 ); |
|
43 }, |
|
44 ) |
|
45 and @ARGV |
|
46 or pod2usage; |
37 my $dir = shift; |
47 my $dir = shift; |
38 my $tmp = File::Temp->new; |
48 my $tmp = File::Temp->new; |
39 |
49 |
40 # load the index files, remember the latest |
50 # load the index files, remember the latest |
41 # timestamp we see |
51 # timestamp we see |
42 #tie %idx, "DB_File" => $tmp->filename; |
52 #tie %idx, "DB_File" => $tmp->filename; |
43 my %block = get_block_list($dir); |
53 my %block = get_block_list($dir); |
44 |
54 |
45 verbose("# indexed: " |
55 verbose("# indexed: " |
46 . scalar(@{$block{""}//[]}) . " images with " |
56 . scalar(@{ $block{""} // [] }) |
47 . (grep !/^\.idx$/ => keys(%block))." blocks"); |
57 . " images with " |
|
58 . (grep !/^\.idx$/ => keys(%block)) |
|
59 . " blocks"); |
48 |
60 |
49 purge_unused($dir => %block); |
61 purge_unused($dir => %block); |
50 check_images($dir => %block); |
62 check_images($dir => %block); |
51 } |
63 } |
52 |
64 |
56 my ($list) = @_; |
68 my ($list) = @_; |
57 my @files = (); |
69 my @files = (); |
58 |
70 |
59 open(my $fh => $list); |
71 open(my $fh => $list); |
60 while (<$fh>) { |
72 while (<$fh>) { |
61 push @files, (split)[2]; |
73 push @files, (split)[2]; |
62 } |
74 } |
63 return grep /^[a-z\d.\/]+$/ => @files; |
75 return grep /^[a-z\d.\/]+$/ => @files; |
64 } |
76 } |
65 |
77 |
66 sub get_block_list { |
78 sub get_block_list { |
67 my $dir = shift; |
79 my $dir = shift; |
68 my %block; |
80 my %block; |
69 find(sub { |
81 find( |
70 (-f) or return; # we need to include the tmp files! |
82 sub { |
71 push @{$block{""}}, abs_path $_; |
83 (-f) or return; # we need to include the tmp files! |
72 foreach my $f (get_file_list($_)) { |
84 push @{ $block{""} }, abs_path $_; |
73 push @{$block{$f}} => $#{$block{""}}; |
85 foreach my $f (get_file_list($_)) { |
74 } |
86 push @{ $block{$f} } => $#{ $block{""} }; |
75 }, "$dir/idx"); |
87 } |
|
88 }, |
|
89 "$dir/idx" |
|
90 ); |
76 return %block; |
91 return %block; |
77 } |
92 } |
78 |
93 |
79 sub purge_unused { |
94 sub purge_unused { |
80 my ($dir, %block) = @_; |
95 my ($dir, %block) = @_; |
82 my ($total, $done); |
97 my ($total, $done); |
83 verbose("# pass 1 - checking for unused blocks"); |
98 verbose("# pass 1 - checking for unused blocks"); |
84 verbose("# pass 1 - estimating file count"); |
99 verbose("# pass 1 - estimating file count"); |
85 |
100 |
86 # calculate the number of files we expect |
101 # calculate the number of files we expect |
87 find(sub { |
102 find( |
88 -d or return; |
103 sub { |
89 opendir(my $dh => $_); |
104 -d or return; |
90 map { $total++ if not $_ ~~ [qw<. ..>] and length > 8} readdir $dh; |
105 opendir(my $dh => $_); |
91 closedir($dh); |
106 map { $total++ if not $_ ~~ [qw<. ..>] and length > 8 } readdir $dh; |
92 }, "$dir/data"); |
107 closedir($dh); |
93 |
108 }, |
|
109 "$dir/data" |
|
110 ); |
94 |
111 |
95 # progress |
112 # progress |
96 local $SIG{ALRM} = sub { |
113 local $SIG{ALRM} = sub { |
97 return alarm 1 if not $done; |
114 return alarm 1 if not $done; |
98 my $speed = $done / (time - $^T + 1); |
115 my $speed = $done / (time - $^T + 1); |
99 verbose sprintf "# pass 1 done %5.1f%% | %25s (%*d of %d blocks)", |
116 verbose sprintf "# pass 1 done %5.1f%% | %25s (%*d of %d blocks)", |
100 100 * ($done/$total), |
117 100 * ($done / $total), |
101 scalar(localtime($^T + $speed * ($total - $done))), |
118 scalar(localtime($^T + $speed * ($total - $done))), |
102 length($total) => $done, |
119 length($total) => $done, |
103 $total; |
120 $total; |
104 alarm 5; |
121 alarm 5; |
105 }; |
122 }; |
106 $SIG{ALRM}->(); |
123 $SIG{ALRM}->(); |
107 |
124 |
108 my @unused; |
125 my @unused; |
109 find(sub { |
126 find( |
110 $done++ if -f; |
127 sub { |
111 (-f _) and ((-M _) > 0) or return; # don't process the fresh blocks |
128 $done++ if -f; |
112 |
129 (-f _) and ((-M _) > 0) or return; # don't process the fresh blocks |
113 # we don't need uncompressed files if an compressed version |
130 |
114 # exists |
131 # we don't need uncompressed files if an compressed version |
115 unlink $_ and return if -f "$_.gz"; |
132 # exists |
116 |
133 unlink $_ and return if -f "$_.gz"; |
117 # cut away the first part of the filename and |
134 |
118 # some optional extension |
135 # cut away the first part of the filename and |
119 (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/; |
136 # some optional extension |
120 exists $block{$rn} and return; |
137 (my $rn = $File::Find::name) =~ s/^$dir\/data\/(.*?)(?:\..+)?$/$1/; |
121 push @unused, abs_path $File::Find::name; |
138 exists $block{$rn} and return; |
122 return; |
139 push @unused, abs_path $File::Find::name; |
123 |
140 return; |
124 }, "$dir/data"); |
141 |
|
142 }, |
|
143 "$dir/data" |
|
144 ); |
125 $SIG{ALRM}->(); |
145 $SIG{ALRM}->(); |
126 alarm 0; |
146 alarm 0; |
127 |
147 |
128 return if not @unused; |
148 return if not @unused; |
129 |
149 |
130 say sprintf "found %d (%.1f%%) unused files", |
150 say sprintf "found %d (%.1f%%) unused files", |
131 0+@unused, |
151 0 + @unused, |
132 100 * (@unused/$total); |
152 100 * (@unused / $total); |
133 |
153 |
134 if ($o{yes}) { |
154 if ($o{yes}) { |
135 verbose("# deleting ".@unused." files"); |
155 verbose("# deleting " . @unused . " files"); |
136 unlink @unused; |
156 unlink @unused; |
137 return; |
157 return; |
138 } |
158 } |
139 |
159 |
140 if (-t) { |
160 if (-t) { |
141 while(1) { |
161 while (1) { |
142 print "delete? [y/N/v]: "; |
162 print "delete? [y/N/v]: "; |
143 given (<STDIN>) { |
163 given (<STDIN>) { |
144 when (/^y(?:es)?$/i) { unlink @unused; last } |
164 when (/^y(?:es)?$/i) { unlink @unused; last } |
145 when (/^v/) { say join "\n", @unused; next } |
165 when (/^v/) { say join "\n", @unused; next } |
146 default { last } |
166 default { last } |
147 } |
167 } |
148 } |
168 } |
149 } |
169 } |
150 |
170 |
151 } |
171 } |
152 |
172 |
153 sub check_images { |
173 sub check_images { |
158 |
178 |
159 verbose("# pass 2 - checking image completeness"); |
179 verbose("# pass 2 - checking image completeness"); |
160 |
180 |
161 # progress |
181 # progress |
162 local $SIG{ALRM} = sub { |
182 local $SIG{ALRM} = sub { |
163 return alarm 1 if not $done; |
183 return alarm 1 if not $done; |
164 my $speed = $done / (time - $^T + 1); |
184 my $speed = $done / (time - $^T + 1); |
165 verbose sprintf "# pass 2 done %5.1f%% | %25s (%*d of %d blocks)", |
185 verbose sprintf "# pass 2 done %5.1f%% | %25s (%*d of %d blocks)", |
166 100 * $done/$total, |
186 100 * $done / $total, |
167 scalar(localtime($^T + ($total - $done) * $speed)), |
187 scalar(localtime($^T + ($total - $done) * $speed)), |
168 length($total) => $done, |
188 length($total) => $done, |
169 $total; |
189 $total; |
170 alarm 5; |
190 alarm 5; |
171 }; |
191 }; |
172 $SIG{ALRM}->(); |
192 $SIG{ALRM}->(); |
173 |
193 |
174 my %invalid; |
194 my %invalid; |
175 foreach my $k (keys %block) { |
195 foreach my $k (keys %block) { |
176 my $i = $block{$k}; |
196 my $i = $block{$k}; |
177 next if $k eq ""; |
197 next if $k eq ""; |
178 ++$done; |
198 ++$done; |
179 |
199 |
180 next if -f "$dir/data/$k" |
200 next |
181 or -f "$dir/data/$k.gz"; |
201 if -f "$dir/data/$k" |
182 say "missing $k @$i"; |
202 or -f "$dir/data/$k.gz"; |
183 @invalid{@$i} = (); |
203 say "missing $k @$i"; |
|
204 @invalid{@$i} = (); |
184 } |
205 } |
185 $SIG{ALRM}->(); |
206 $SIG{ALRM}->(); |
186 alarm 0; |
207 alarm 0; |
187 |
208 |
188 # invalid now contains the numbers of the idx files beiing |
209 # invalid now contains the numbers of the idx files beiing |
189 # invalid |
210 # invalid |
190 my @invalid = sort @{$block{""}}[keys %invalid]; |
211 my @invalid = sort @{ $block{""} }[keys %invalid]; |
191 |
212 |
192 return if not @invalid; |
213 return if not @invalid; |
193 |
214 |
194 say sprintf "found %d (%.1f%%) invalid images:", |
215 say sprintf "found %d (%.1f%%) invalid images:", |
195 0+@invalid, |
216 0 + @invalid, |
196 100 * (@invalid/$total); |
217 100 * (@invalid / $total); |
197 |
218 |
198 if ($o{yes}) { |
219 if ($o{yes}) { |
199 unlink @invalid; |
220 unlink @invalid; |
200 return; |
221 return; |
201 } |
222 } |
202 |
223 |
203 while (-t) { |
224 while (-t) { |
204 print "delete? [y/N/v] "; |
225 print "delete? [y/N/v] "; |
205 given (<STDIN>) { |
226 given (<STDIN>) { |
206 when (/^y(?:es)?$/i) { unlink @invalid; last } |
227 when (/^y(?:es)?$/i) { unlink @invalid; last } |
207 when (/^v/i) { say join "\n" => @invalid; next } |
228 when (/^v/i) { say join "\n" => @invalid; next } |
208 default { last } |
229 default { last } |
209 } |
230 } |
210 } |
231 } |
211 } |
232 } |
212 __END__ |
233 __END__ |
213 |
234 |
214 =head1 NAME |
235 =head1 NAME |