|
1 #! /usr/bin/perl |
|
2 # (c) 2008 Heiko Schlittermann |
|
3 # simple charset converter, using iconv |
|
4 |
|
5 use strict; |
|
6 use warnings; |
|
7 use CGI qw(:all *table); |
|
8 use CGI::Carp qw(fatalsToBrowser); |
|
9 use File::Temp qw(tempfile); |
|
10 use File::Basename; |
|
11 |
|
12 $ENV{LANG} = "C"; |
|
13 delete @ENV{grep /^LC_/, keys %ENV}; |
|
14 |
|
15 my $ME = basename $0; |
|
16 my $CHARSETS = [qw(cp437 cp850 latin1)]; |
|
17 |
|
18 my $_ok = param(".ok"); |
|
19 my $_redo = param(".redo"); |
|
20 my $_download = param(".download"); |
|
21 |
|
22 my $charset = param("charset"); |
|
23 my $filename = param("filename"); |
|
24 my $outfile = param("outfile"); |
|
25 my $infile = param("infile"); |
|
26 my $src = param("src"); |
|
27 |
|
28 # sanitize the filenames |
|
29 map { s{[/;&<>]}{}g; $_ = "/tmp/$ME.$_" } ($infile, $outfile); |
|
30 |
|
31 my $STYLE = ""; |
|
32 my @HEADER = ( |
|
33 -title => "UTF8-Konverter", |
|
34 -style => {-code => $STYLE}, |
|
35 -encoding => "utf-8", |
|
36 ); |
|
37 my $FOOTER = div({-align => "right"}, "Scripting © 2008 Heiko Schlittermann"); |
|
38 |
|
39 END { |
|
40 unlink grep { -M > 1 } glob "/tmp/$ME.*"; |
|
41 } |
|
42 |
|
43 # Quelltext anzeigen |
|
44 if (defined $src) { |
|
45 if ($src eq "html") { |
|
46 open(my $this, "perltidy --html --st $0|"); |
|
47 print header(-charset => "utf-8"), |
|
48 <$this>; |
|
49 exit 0; |
|
50 } |
|
51 |
|
52 open(my $this, $0); |
|
53 print header(-charset => "utf-8", -type => "text/plain"), |
|
54 <$this>; |
|
55 exit 0; |
|
56 } |
|
57 |
|
58 # download nach preview |
|
59 if ($_download && $outfile && $filename) { |
|
60 |
|
61 open(my $fh, $outfile) or do { |
|
62 print header(-charset => "utf-8"), |
|
63 start_html(@HEADER), |
|
64 h1("Download"), |
|
65 "Sorry, Downloadfile $outfile: $!", |
|
66 end_html; |
|
67 exit 0; |
|
68 }; |
|
69 |
|
70 print header(-type => "application/octet-stream", |
|
71 -attachment => $filename), |
|
72 <$fh>; |
|
73 exit 0; |
|
74 |
|
75 |
|
76 } |
|
77 |
|
78 |
|
79 # upload und preview |
|
80 if (($_ok || $_redo) && ($filename && $charset)) { |
|
81 |
|
82 my $in; |
|
83 my $download; |
|
84 if ($_ok) { # first time, copy the upload to some tmp file |
|
85 ($in, $infile) = tempfile("/tmp/$ME.in.XXXXXX"); |
|
86 local $/ = \10240; |
|
87 print $in $_ while <$filename>; |
|
88 close($in); |
|
89 close($filename); |
|
90 |
|
91 ($download, $outfile) = tempfile("/tmp/$ME.out.XXXXXX"); |
|
92 } |
|
93 elsif ($_redo) { # redo - we read our saved orignal file |
|
94 # we can recycle the outfile |
|
95 open($download, ">$outfile"); |
|
96 } |
|
97 |
|
98 open($in, $infile) or die "$infile: $!"; |
|
99 |
|
100 |
|
101 require Text::Iconv; |
|
102 my $converter = new Text::Iconv $charset => "utf-8"; |
|
103 |
|
104 my @lines; |
|
105 my $last = 0; |
|
106 while (<$in>) { |
|
107 s/\r?\n$//; |
|
108 $_ = $converter->convert("$_\n"); |
|
109 die "Konvertierungsproblem in Zeile $." if not defined $_; |
|
110 print $download $_; |
|
111 $ENV{LC_CTYPE} = "de_DE.UTF-8"; |
|
112 if (eof || (@lines < 30 and ($. < 4 || /[^a-z0-9[:punct:][:space:]]{1,3}/i))) { |
|
113 push @lines, " : ", substr("." x ($. - $last), 0, 80), "\n" if $. - $last > 1; |
|
114 push @lines, sprintf "%4d: $_", $.; |
|
115 $last = $. |
|
116 } |
|
117 } |
|
118 |
|
119 # jetzt mal den Filenamen ermitteln, wie er nach der Konversion aussehen soll |
|
120 # das können wir nicht vorher machen, weil $file da auch noch ein Filehandle |
|
121 # darstellt, welches wir so einfach nicht modifizieren können |
|
122 $filename =~ y/\\/\//; |
|
123 $filename = basename $filename; |
|
124 $filename =~ s/(\..+?)$/.utf8$1/ |
|
125 or $filename .= ".utf8"; |
|
126 |
|
127 # die Namen der tmp-Files kürzen |
|
128 map { s/\/tmp\/\Q$ME\E\.// } $infile, $outfile; |
|
129 |
|
130 print header(-charset => "utf-8"), |
|
131 start_html(@HEADER), |
|
132 div({-align => "right"}, "[ " . a({-href => url(-query => 0)}, "home") . " ]"), |
|
133 h3("Preview"), |
|
134 start_form, |
|
135 "von: ", |
|
136 popup_menu(-name => "charset", -values => $CHARSETS, -onChange => "submit()"), |
|
137 submit(-name => ".redo", -value => "Los!"), |
|
138 hidden(-name => "filename", -value => $filename), |
|
139 hidden(-name => "infile", -value => $infile), |
|
140 hidden(-name => "outfile", -value => $outfile), |
|
141 hidden(-name => ".redo", -value => 1), |
|
142 end_form, |
|
143 pre( "\n", @lines ), |
|
144 br, |
|
145 a({-href => url(-query => 0) . "?.download=1;outfile=$outfile;filename=$filename"}, |
|
146 "download " . escapeHTML($filename)), |
|
147 hr, |
|
148 $FOOTER, |
|
149 end_html; |
|
150 exit 0; |
|
151 } |
|
152 |
|
153 # noch nichts angegeben |
|
154 if (!$_ok || !$filename) { |
|
155 print header(-charset => "utf-8"), |
|
156 start_html(@HEADER), |
|
157 h1("Heute schon den Ötzi nach UTF-8 konvertiert?"), |
|
158 fieldset( |
|
159 legend("Upload"), |
|
160 start_multipart_form, |
|
161 table( |
|
162 Tr(td("File"), td(filefield(-name => "filename"))), |
|
163 Tr(td("Zeichensatz"), td(popup_menu(-name => "charset", -values => $CHARSETS))), |
|
164 Tr(td(), td(submit(-name => ".ok", -value => "Los!"))), |
|
165 ), |
|
166 end_multipart_form, |
|
167 ), |
|
168 $FOOTER, |
|
169 end_html; |
|
170 exit 0; |
|
171 } |
|
172 |
|
173 print header, start_html, h1("Internal Error."), Dump, end_html; |