iconv.pl
changeset 0 a74d3272e263
child 1 137256f711af
equal deleted inserted replaced
-1:000000000000 0:a74d3272e263
       
     1 #! /usr/bin/perl
       
     2 # (c) 2008 Heiko Schlittermann
       
     3 # simple charset converter, using iconv
       
     4 
       
     5 use strict;
       
     6 use warnings;
       
     7 use CGI qw(:all *table);
       
     8 use CGI::Carp qw(fatalsToBrowser);
       
     9 use File::Temp qw(tempfile);
       
    10 use File::Basename;
       
    11 
       
    12 $ENV{LANG} = "C";
       
    13 delete @ENV{grep /^LC_/, keys %ENV};
       
    14 
       
    15 my $ME = basename $0;
       
    16 my $CHARSETS = [qw(cp437 cp850 latin1)];
       
    17 
       
    18 my $_ok = param(".ok");
       
    19 my $_redo = param(".redo");
       
    20 my $_download = param(".download");
       
    21 
       
    22 my $charset = param("charset");
       
    23 my $filename = param("filename");
       
    24 my $outfile = param("outfile");
       
    25 my $infile = param("infile");
       
    26 my $src = param("src");
       
    27 
       
    28 # sanitize the filenames
       
    29 map { s{[/;&<>]}{}g; $_ = "/tmp/$ME.$_" } ($infile, $outfile);
       
    30 
       
    31 my $STYLE = "";
       
    32 my @HEADER = (
       
    33 	-title => "UTF8-Konverter",
       
    34 	-style => {-code => $STYLE}, 
       
    35 	-encoding => "utf-8",
       
    36 );
       
    37 my $FOOTER = div({-align => "right"}, "Scripting &copy; 2008 Heiko Schlittermann");
       
    38 
       
    39 END {
       
    40 	unlink grep { -M > 1 } glob "/tmp/$ME.*";
       
    41 }
       
    42 
       
    43 # Quelltext anzeigen
       
    44 if (defined $src) {
       
    45 	if ($src eq "html") {
       
    46 		open(my $this, "perltidy --html --st $0|");
       
    47 		print header(-charset => "utf-8"),
       
    48 			<$this>;
       
    49 		exit 0;
       
    50 	}
       
    51 
       
    52 	open(my $this, $0);
       
    53 	print header(-charset => "utf-8", -type => "text/plain"),
       
    54 		<$this>;
       
    55 	exit 0;
       
    56 }
       
    57 
       
    58 # download nach preview
       
    59 if ($_download && $outfile && $filename) {
       
    60 
       
    61 	open(my $fh, $outfile) or do {
       
    62 		print header(-charset => "utf-8"),
       
    63 			start_html(@HEADER),
       
    64 			h1("Download"),
       
    65 			"Sorry, Downloadfile $outfile: $!",
       
    66 			end_html;
       
    67 		exit 0;
       
    68 	};
       
    69 
       
    70 	print header(-type => "application/octet-stream",
       
    71 		-attachment => $filename),
       
    72 		<$fh>;
       
    73 	exit 0;
       
    74 
       
    75 
       
    76 }
       
    77 
       
    78 
       
    79 # upload und preview
       
    80 if (($_ok || $_redo) && ($filename && $charset)) {
       
    81 
       
    82 	my $in;
       
    83 	my $download;
       
    84 	if ($_ok) {	# first time, copy the upload to some tmp file
       
    85 		($in, $infile) = tempfile("/tmp/$ME.in.XXXXXX");
       
    86 		local $/ = \10240;
       
    87 		print $in $_ while <$filename>;
       
    88 		close($in);
       
    89 		close($filename);
       
    90 
       
    91 		($download, $outfile) = tempfile("/tmp/$ME.out.XXXXXX");
       
    92 	}
       
    93 	elsif ($_redo) {	# redo - we read our saved orignal file
       
    94 		# we can recycle the outfile
       
    95 		open($download, ">$outfile");
       
    96 	}
       
    97 
       
    98 	open($in, $infile) or die "$infile: $!";
       
    99 
       
   100 
       
   101 	require Text::Iconv;
       
   102 	my $converter = new Text::Iconv $charset => "utf-8";
       
   103 
       
   104 	my @lines;
       
   105 	my $last = 0;
       
   106 	while (<$in>) {
       
   107 		s/\r?\n$//;
       
   108 		$_ = $converter->convert("$_\n");
       
   109 		die "Konvertierungsproblem in Zeile $." if not defined $_;
       
   110 		print $download $_;
       
   111 		$ENV{LC_CTYPE} = "de_DE.UTF-8";
       
   112 		if (eof || (@lines < 30 and ($. < 4 || /[^a-z0-9[:punct:][:space:]]{1,3}/i))) {
       
   113 			push @lines, "    : ", substr("." x ($. - $last), 0, 80), "\n"  if $. - $last > 1;
       
   114 			push @lines, sprintf "%4d: $_", $.;
       
   115 			$last = $.
       
   116 		}
       
   117 	}
       
   118 
       
   119 	# jetzt mal den Filenamen ermitteln, wie er nach der Konversion aussehen soll
       
   120 	# das können wir nicht vorher machen, weil $file da auch noch ein Filehandle 
       
   121 	# darstellt, welches wir so einfach nicht modifizieren können
       
   122 	$filename =~ y/\\/\//;
       
   123 	$filename = basename $filename;
       
   124 	$filename =~ s/(\..+?)$/.utf8$1/
       
   125 		or $filename .= ".utf8";
       
   126 
       
   127 	# die Namen der tmp-Files kürzen
       
   128 	map { s/\/tmp\/\Q$ME\E\.// } $infile, $outfile;
       
   129 
       
   130 	print header(-charset => "utf-8"),
       
   131 		start_html(@HEADER),
       
   132 		div({-align => "right"}, "[ " . a({-href => url(-query => 0)}, "home") . " ]"),
       
   133 		h3("Preview"),
       
   134 		start_form,
       
   135 			"von: ",
       
   136 			popup_menu(-name => "charset", -values => $CHARSETS, -onChange => "submit()"),
       
   137 			submit(-name => ".redo", -value => "Los!"),
       
   138 			hidden(-name => "filename", -value => $filename),
       
   139 			hidden(-name => "infile", -value => $infile),
       
   140 			hidden(-name => "outfile", -value => $outfile),
       
   141 			hidden(-name => ".redo", -value => 1),
       
   142 		end_form,
       
   143 		pre( "\n", @lines ),
       
   144 		br,
       
   145 		a({-href => url(-query => 0) . "?.download=1;outfile=$outfile;filename=$filename"}, 
       
   146 			"download " . escapeHTML($filename)),
       
   147 		hr,
       
   148 		$FOOTER,
       
   149 		end_html;
       
   150 	exit 0;
       
   151 }
       
   152 
       
   153 # noch nichts angegeben
       
   154 if (!$_ok || !$filename) {
       
   155 	print header(-charset => "utf-8"),
       
   156 		start_html(@HEADER),
       
   157 		h1("Heute schon den Ötzi nach UTF-8 konvertiert?"),
       
   158 		fieldset(
       
   159 			legend("Upload"),
       
   160 			start_multipart_form,
       
   161 			table(
       
   162 				Tr(td("File"), td(filefield(-name => "filename"))),
       
   163 				Tr(td("Zeichensatz"), td(popup_menu(-name => "charset", -values => $CHARSETS))),
       
   164 				Tr(td(), td(submit(-name => ".ok", -value => "Los!"))),
       
   165 			),
       
   166 			end_multipart_form,
       
   167 		),
       
   168 		$FOOTER,
       
   169 		end_html;
       
   170 	exit 0;
       
   171 }
       
   172 
       
   173 print header, start_html, h1("Internal Error."), Dump, end_html;