iconv.pl
changeset 0 a74d3272e263
child 1 137256f711af
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/iconv.pl	Sat Mar 07 13:36:54 2009 +0100
@@ -0,0 +1,173 @@
+#! /usr/bin/perl
+# (c) 2008 Heiko Schlittermann
+# simple charset converter, using iconv
+
+use strict;
+use warnings;
+use CGI qw(:all *table);
+use CGI::Carp qw(fatalsToBrowser);
+use File::Temp qw(tempfile);
+use File::Basename;
+
+$ENV{LANG} = "C";
+delete @ENV{grep /^LC_/, keys %ENV};
+
+my $ME = basename $0;
+my $CHARSETS = [qw(cp437 cp850 latin1)];
+
+my $_ok = param(".ok");
+my $_redo = param(".redo");
+my $_download = param(".download");
+
+my $charset = param("charset");
+my $filename = param("filename");
+my $outfile = param("outfile");
+my $infile = param("infile");
+my $src = param("src");
+
+# sanitize the filenames
+map { s{[/;&<>]}{}g; $_ = "/tmp/$ME.$_" } ($infile, $outfile);
+
+my $STYLE = "";
+my @HEADER = (
+	-title => "UTF8-Konverter",
+	-style => {-code => $STYLE}, 
+	-encoding => "utf-8",
+);
+my $FOOTER = div({-align => "right"}, "Scripting &copy; 2008 Heiko Schlittermann");
+
+END {
+	unlink grep { -M > 1 } glob "/tmp/$ME.*";
+}
+
+# Quelltext anzeigen
+if (defined $src) {
+	if ($src eq "html") {
+		open(my $this, "perltidy --html --st $0|");
+		print header(-charset => "utf-8"),
+			<$this>;
+		exit 0;
+	}
+
+	open(my $this, $0);
+	print header(-charset => "utf-8", -type => "text/plain"),
+		<$this>;
+	exit 0;
+}
+
+# download nach preview
+if ($_download && $outfile && $filename) {
+
+	open(my $fh, $outfile) or do {
+		print header(-charset => "utf-8"),
+			start_html(@HEADER),
+			h1("Download"),
+			"Sorry, Downloadfile $outfile: $!",
+			end_html;
+		exit 0;
+	};
+
+	print header(-type => "application/octet-stream",
+		-attachment => $filename),
+		<$fh>;
+	exit 0;
+
+
+}
+
+
+# upload und preview
+if (($_ok || $_redo) && ($filename && $charset)) {
+
+	my $in;
+	my $download;
+	if ($_ok) {	# first time, copy the upload to some tmp file
+		($in, $infile) = tempfile("/tmp/$ME.in.XXXXXX");
+		local $/ = \10240;
+		print $in $_ while <$filename>;
+		close($in);
+		close($filename);
+
+		($download, $outfile) = tempfile("/tmp/$ME.out.XXXXXX");
+	}
+	elsif ($_redo) {	# redo - we read our saved orignal file
+		# we can recycle the outfile
+		open($download, ">$outfile");
+	}
+
+	open($in, $infile) or die "$infile: $!";
+
+
+	require Text::Iconv;
+	my $converter = new Text::Iconv $charset => "utf-8";
+
+	my @lines;
+	my $last = 0;
+	while (<$in>) {
+		s/\r?\n$//;
+		$_ = $converter->convert("$_\n");
+		die "Konvertierungsproblem in Zeile $." if not defined $_;
+		print $download $_;
+		$ENV{LC_CTYPE} = "de_DE.UTF-8";
+		if (eof || (@lines < 30 and ($. < 4 || /[^a-z0-9[:punct:][:space:]]{1,3}/i))) {
+			push @lines, "    : ", substr("." x ($. - $last), 0, 80), "\n"  if $. - $last > 1;
+			push @lines, sprintf "%4d: $_", $.;
+			$last = $.
+		}
+	}
+
+	# jetzt mal den Filenamen ermitteln, wie er nach der Konversion aussehen soll
+	# das können wir nicht vorher machen, weil $file da auch noch ein Filehandle 
+	# darstellt, welches wir so einfach nicht modifizieren können
+	$filename =~ y/\\/\//;
+	$filename = basename $filename;
+	$filename =~ s/(\..+?)$/.utf8$1/
+		or $filename .= ".utf8";
+
+	# die Namen der tmp-Files kürzen
+	map { s/\/tmp\/\Q$ME\E\.// } $infile, $outfile;
+
+	print header(-charset => "utf-8"),
+		start_html(@HEADER),
+		div({-align => "right"}, "[ " . a({-href => url(-query => 0)}, "home") . " ]"),
+		h3("Preview"),
+		start_form,
+			"von: ",
+			popup_menu(-name => "charset", -values => $CHARSETS, -onChange => "submit()"),
+			submit(-name => ".redo", -value => "Los!"),
+			hidden(-name => "filename", -value => $filename),
+			hidden(-name => "infile", -value => $infile),
+			hidden(-name => "outfile", -value => $outfile),
+			hidden(-name => ".redo", -value => 1),
+		end_form,
+		pre( "\n", @lines ),
+		br,
+		a({-href => url(-query => 0) . "?.download=1;outfile=$outfile;filename=$filename"}, 
+			"download " . escapeHTML($filename)),
+		hr,
+		$FOOTER,
+		end_html;
+	exit 0;
+}
+
+# noch nichts angegeben
+if (!$_ok || !$filename) {
+	print header(-charset => "utf-8"),
+		start_html(@HEADER),
+		h1("Heute schon den Ötzi nach UTF-8 konvertiert?"),
+		fieldset(
+			legend("Upload"),
+			start_multipart_form,
+			table(
+				Tr(td("File"), td(filefield(-name => "filename"))),
+				Tr(td("Zeichensatz"), td(popup_menu(-name => "charset", -values => $CHARSETS))),
+				Tr(td(), td(submit(-name => ".ok", -value => "Los!"))),
+			),
+			end_multipart_form,
+		),
+		$FOOTER,
+		end_html;
+	exit 0;
+}
+
+print header, start_html, h1("Internal Error."), Dump, end_html;