8 use CGI::Carp qw(fatalsToBrowser); |
8 use CGI::Carp qw(fatalsToBrowser); |
9 use File::Temp qw(tempfile); |
9 use File::Temp qw(tempfile); |
10 use File::Basename; |
10 use File::Basename; |
11 |
11 |
12 $ENV{LANG} = "C"; |
12 $ENV{LANG} = "C"; |
13 delete @ENV{grep /^LC_/, keys %ENV}; |
13 delete @ENV{ grep /^LC_/, keys %ENV }; |
14 |
14 |
15 my $ME = basename $0; |
15 my $ME = basename $0; |
16 my $CHARSETS = [qw(cp437 cp850 latin1)]; |
16 my $CHARSETS = [qw(cp437 cp850 latin1)]; |
17 |
17 |
18 my $_ok = param(".ok"); |
18 my $_ok = param(".ok"); |
19 my $_redo = param(".redo"); |
19 my $_redo = param(".redo"); |
20 my $_download = param(".download"); |
20 my $_download = param(".download"); |
21 |
21 |
22 my $charset = param("charset"); |
22 my $charset = param("charset"); |
23 my $filename = param("filename"); |
23 my $filename = param("filename"); |
24 my $outfile = param("outfile"); |
24 my $outfile = param("outfile"); |
25 my $infile = param("infile"); |
25 my $infile = param("infile"); |
26 my $src = param("src"); |
26 my $src = param("src"); |
27 |
27 |
28 # sanitize the filenames |
28 # sanitize the filenames |
29 map { s{[/;&<>]}{}g; $_ = "/tmp/$ME.$_" } ($infile, $outfile); |
29 map { s{[/;&<>]}{}g; $_ = "/tmp/$ME.$_" } ($infile, $outfile); |
30 |
30 |
31 my $STYLE = ""; |
31 my $STYLE = ""; |
32 my @HEADER = ( |
32 my @HEADER = ( |
33 -title => "UTF8-Konverter", |
33 -title => "UTF8-Konverter", |
34 -style => {-code => $STYLE}, |
34 -style => { -code => $STYLE }, |
35 -encoding => "utf-8", |
35 -encoding => "utf-8", |
36 ); |
36 ); |
37 my $FOOTER = div({-align => "right"}, "Scripting © 2008 Heiko Schlittermann"); |
37 my $FOOTER = |
|
38 div({ -align => "right" }, "Scripting © 2008 Heiko Schlittermann"); |
38 |
39 |
39 END { |
40 END { |
40 unlink grep { -M > 1 } glob "/tmp/$ME.*"; |
41 unlink grep { -M > 1 } glob "/tmp/$ME.*"; |
41 } |
42 } |
42 |
43 |
43 # Quelltext anzeigen |
44 # Quelltext anzeigen |
44 if (defined $src) { |
45 if (defined $src) { |
45 if ($src eq "html") { |
46 if ($src eq "html") { |
46 open(my $this, "perltidy --html --st $0|"); |
47 open(my $this, "perltidy --html --st $0|"); |
47 print header(-charset => "utf-8"), |
48 print header(-charset => "utf-8"), <$this>; |
48 <$this>; |
49 exit 0; |
49 exit 0; |
50 } |
50 } |
|
51 |
51 |
52 open(my $this, $0); |
52 open(my $this, $0); |
53 print header(-charset => "utf-8", -type => "text/plain"), |
53 print header(-charset => "utf-8", -type => "text/plain"), <$this>; |
54 <$this>; |
54 exit 0; |
55 exit 0; |
|
56 } |
55 } |
57 |
56 |
58 # download nach preview |
57 # download nach preview |
59 if ($_download && $outfile && $filename) { |
58 if ($_download && $outfile && $filename) { |
60 |
59 |
61 open(my $fh, $outfile) or do { |
60 open(my $fh, $outfile) or do { |
62 print header(-charset => "utf-8"), |
61 print header(-charset => "utf-8"), |
63 start_html(@HEADER), |
62 start_html(@HEADER), |
64 h1("Download"), |
63 h1("Download"), |
65 "Sorry, Downloadfile $outfile: $!", |
64 "Sorry, Downloadfile $outfile: $!", |
66 end_html; |
65 end_html; |
67 exit 0; |
66 exit 0; |
68 }; |
67 }; |
69 |
68 |
70 print header(-type => "application/octet-stream", |
69 print header( |
71 -attachment => $filename), |
70 -type => "application/octet-stream", |
72 <$fh>; |
71 -attachment => $filename |
73 exit 0; |
72 ), |
74 |
73 <$fh>; |
|
74 exit 0; |
75 |
75 |
76 } |
76 } |
77 |
|
78 |
77 |
79 # upload und preview |
78 # upload und preview |
80 if (($_ok || $_redo) && ($filename && $charset)) { |
79 if (($_ok || $_redo) && ($filename && $charset)) { |
81 |
80 |
82 my $in; |
81 my $in; |
83 my $download; |
82 my $download; |
84 if ($_ok) { # first time, copy the upload to some tmp file |
83 if ($_ok) { # first time, copy the upload to some tmp file |
85 ($in, $infile) = tempfile("/tmp/$ME.in.XXXXXX"); |
84 ($in, $infile) = tempfile("/tmp/$ME.in.XXXXXX"); |
86 local $/ = \10240; |
85 local $/ = \10240; |
87 print $in $_ while <$filename>; |
86 print $in $_ while <$filename>; |
88 close($in); |
87 close($in); |
89 close($filename); |
88 close($filename); |
90 |
89 |
91 ($download, $outfile) = tempfile("/tmp/$ME.out.XXXXXX"); |
90 ($download, $outfile) = tempfile("/tmp/$ME.out.XXXXXX"); |
92 } |
91 } |
93 elsif ($_redo) { # redo - we read our saved orignal file |
92 elsif ($_redo) { # redo - we read our saved orignal file |
94 # we can recycle the outfile |
93 # we can recycle the outfile |
95 open($download, ">$outfile"); |
94 open($download, ">$outfile"); |
96 } |
95 } |
97 |
96 |
98 open($in, $infile) or die "$infile: $!"; |
97 open($in, $infile) or die "$infile: $!"; |
99 |
98 |
|
99 require Text::Iconv; |
|
100 my $converter = new Text::Iconv $charset => "utf-8"; |
100 |
101 |
101 require Text::Iconv; |
102 my @lines; |
102 my $converter = new Text::Iconv $charset => "utf-8"; |
103 my $last = 0; |
|
104 while (<$in>) { |
|
105 s/\r?\n$//; |
|
106 $_ = $converter->convert("$_\n"); |
|
107 die "Konvertierungsproblem in Zeile $." if not defined $_; |
|
108 print $download $_; |
|
109 $ENV{LC_CTYPE} = "de_DE.UTF-8"; |
|
110 if (eof |
|
111 || (@lines < 30 and ($. < 4 || /[^a-z0-9[:punct:][:space:]]{1,3}/i)) |
|
112 ) |
|
113 { |
|
114 push @lines, " : ", substr("." x ($. - $last), 0, 80), "\n" |
|
115 if $. - $last > 1; |
|
116 push @lines, sprintf "%4d: $_", $.; |
|
117 $last = $.; |
|
118 } |
|
119 } |
103 |
120 |
104 my @lines; |
121 # jetzt mal den Filenamen ermitteln, wie er nach der Konversion aussehen soll |
105 my $last = 0; |
122 # das können wir nicht vorher machen, weil $file da auch noch ein Filehandle |
106 while (<$in>) { |
123 # darstellt, welches wir so einfach nicht modifizieren können |
107 s/\r?\n$//; |
124 $filename =~ y/\\/\//; |
108 $_ = $converter->convert("$_\n"); |
125 $filename = basename $filename; |
109 die "Konvertierungsproblem in Zeile $." if not defined $_; |
126 $filename =~ s/(\..+?)$/.utf8$1/ |
110 print $download $_; |
127 or $filename .= ".utf8"; |
111 $ENV{LC_CTYPE} = "de_DE.UTF-8"; |
|
112 if (eof || (@lines < 30 and ($. < 4 || /[^a-z0-9[:punct:][:space:]]{1,3}/i))) { |
|
113 push @lines, " : ", substr("." x ($. - $last), 0, 80), "\n" if $. - $last > 1; |
|
114 push @lines, sprintf "%4d: $_", $.; |
|
115 $last = $. |
|
116 } |
|
117 } |
|
118 |
128 |
119 # jetzt mal den Filenamen ermitteln, wie er nach der Konversion aussehen soll |
129 # die Namen der tmp-Files kürzen |
120 # das können wir nicht vorher machen, weil $file da auch noch ein Filehandle |
130 map { s/\/tmp\/\Q$ME\E\.// } $infile, $outfile; |
121 # darstellt, welches wir so einfach nicht modifizieren können |
|
122 $filename =~ y/\\/\//; |
|
123 $filename = basename $filename; |
|
124 $filename =~ s/(\..+?)$/.utf8$1/ |
|
125 or $filename .= ".utf8"; |
|
126 |
131 |
127 # die Namen der tmp-Files kürzen |
132 print header(-charset => "utf-8"), start_html(@HEADER), |
128 map { s/\/tmp\/\Q$ME\E\.// } $infile, $outfile; |
133 div({ -align => "right" }, |
129 |
134 "[ " . a({ -href => url(-query => 0) }, "home") . " ]"), |
130 print header(-charset => "utf-8"), |
135 h3("Preview"), start_form, "von: ", |
131 start_html(@HEADER), |
136 popup_menu( |
132 div({-align => "right"}, "[ " . a({-href => url(-query => 0)}, "home") . " ]"), |
137 -name => "charset", |
133 h3("Preview"), |
138 -values => $CHARSETS, |
134 start_form, |
139 -onChange => "submit()" |
135 "von: ", |
140 ), |
136 popup_menu(-name => "charset", -values => $CHARSETS, -onChange => "submit()"), |
141 submit(-name => ".redo", -value => "Los!"), |
137 submit(-name => ".redo", -value => "Los!"), |
142 hidden(-name => "filename", -value => $filename), |
138 hidden(-name => "filename", -value => $filename), |
143 hidden(-name => "infile", -value => $infile), |
139 hidden(-name => "infile", -value => $infile), |
144 hidden(-name => "outfile", -value => $outfile), |
140 hidden(-name => "outfile", -value => $outfile), |
145 hidden(-name => ".redo", -value => 1), end_form, pre("\n", @lines), br, |
141 hidden(-name => ".redo", -value => 1), |
146 a( |
142 end_form, |
147 { |
143 pre( "\n", @lines ), |
148 -href => url(-query => 0) |
144 br, |
149 . "?.download=1;outfile=$outfile;filename=$filename" |
145 a({-href => url(-query => 0) . "?.download=1;outfile=$outfile;filename=$filename"}, |
150 }, |
146 "download " . escapeHTML($filename)), |
151 "download " . escapeHTML($filename) |
147 hr, |
152 ), |
148 $FOOTER, |
153 hr, |
149 end_html; |
154 $FOOTER, |
150 exit 0; |
155 end_html; |
|
156 exit 0; |
151 } |
157 } |
152 |
158 |
153 # noch nichts angegeben |
159 # noch nichts angegeben |
154 if (!$_ok || !$filename) { |
160 if (!$_ok || !$filename) { |
155 print header(-charset => "utf-8"), |
161 print header(-charset => "utf-8"), |
156 start_html(@HEADER), |
162 start_html(@HEADER), h1("Heute schon den Ötzi nach UTF-8 konvertiert?"), |
157 h1("Heute schon den Ötzi nach UTF-8 konvertiert?"), |
163 fieldset( |
158 fieldset( |
164 legend("Upload"), |
159 legend("Upload"), |
165 start_multipart_form, |
160 start_multipart_form, |
166 table( |
161 table( |
167 Tr(td("File"), td(filefield(-name => "filename"))), |
162 Tr(td("File"), td(filefield(-name => "filename"))), |
168 Tr( |
163 Tr(td("Zeichensatz"), td(popup_menu(-name => "charset", -values => $CHARSETS))), |
169 td("Zeichensatz"), |
164 Tr(td(), td(submit(-name => ".ok", -value => "Los!"))), |
170 td(popup_menu(-name => "charset", -values => $CHARSETS)) |
165 ), |
171 ), |
166 end_multipart_form, |
172 Tr(td(), td(submit(-name => ".ok", -value => "Los!"))), |
167 ), |
173 ), |
168 $FOOTER, |
174 end_multipart_form, |
169 end_html; |
175 ), |
170 exit 0; |
176 $FOOTER, |
|
177 end_html; |
|
178 exit 0; |
171 } |
179 } |
172 |
180 |
173 print header, start_html, h1("Internal Error."), Dump, end_html; |
181 print header, start_html, h1("Internal Error."), Dump, end_html; |