iconv.pl
changeset 1 137256f711af
parent 0 a74d3272e263
child 2 f2cb4b66de94
equal deleted inserted replaced
0:a74d3272e263 1:137256f711af
     8 use CGI::Carp qw(fatalsToBrowser);
     8 use CGI::Carp qw(fatalsToBrowser);
     9 use File::Temp qw(tempfile);
     9 use File::Temp qw(tempfile);
    10 use File::Basename;
    10 use File::Basename;
    11 
    11 
    12 $ENV{LANG} = "C";
    12 $ENV{LANG} = "C";
    13 delete @ENV{grep /^LC_/, keys %ENV};
    13 delete @ENV{ grep /^LC_/, keys %ENV };
    14 
    14 
    15 my $ME = basename $0;
    15 my $ME       = basename $0;
    16 my $CHARSETS = [qw(cp437 cp850 latin1)];
    16 my $CHARSETS = [qw(cp437 cp850 latin1)];
    17 
    17 
    18 my $_ok = param(".ok");
    18 my $_ok       = param(".ok");
    19 my $_redo = param(".redo");
    19 my $_redo     = param(".redo");
    20 my $_download = param(".download");
    20 my $_download = param(".download");
    21 
    21 
    22 my $charset = param("charset");
    22 my $charset  = param("charset");
    23 my $filename = param("filename");
    23 my $filename = param("filename");
    24 my $outfile = param("outfile");
    24 my $outfile  = param("outfile");
    25 my $infile = param("infile");
    25 my $infile   = param("infile");
    26 my $src = param("src");
    26 my $src      = param("src");
    27 
    27 
    28 # sanitize the filenames
    28 # sanitize the filenames
    29 map { s{[/;&<>]}{}g; $_ = "/tmp/$ME.$_" } ($infile, $outfile);
    29 map { s{[/;&<>]}{}g; $_ = "/tmp/$ME.$_" } ($infile, $outfile);
    30 
    30 
    31 my $STYLE = "";
    31 my $STYLE  = "";
    32 my @HEADER = (
    32 my @HEADER = (
    33 	-title => "UTF8-Konverter",
    33     -title    => "UTF8-Konverter",
    34 	-style => {-code => $STYLE}, 
    34     -style    => { -code => $STYLE },
    35 	-encoding => "utf-8",
    35     -encoding => "utf-8",
    36 );
    36 );
    37 my $FOOTER = div({-align => "right"}, "Scripting &copy; 2008 Heiko Schlittermann");
    37 my $FOOTER =
       
    38   div({ -align => "right" }, "Scripting &copy; 2008 Heiko Schlittermann");
    38 
    39 
    39 END {
    40 END {
    40 	unlink grep { -M > 1 } glob "/tmp/$ME.*";
    41     unlink grep { -M > 1 } glob "/tmp/$ME.*";
    41 }
    42 }
    42 
    43 
    43 # Quelltext anzeigen
    44 # Quelltext anzeigen
    44 if (defined $src) {
    45 if (defined $src) {
    45 	if ($src eq "html") {
    46     if ($src eq "html") {
    46 		open(my $this, "perltidy --html --st $0|");
    47         open(my $this, "perltidy --html --st $0|");
    47 		print header(-charset => "utf-8"),
    48         print header(-charset => "utf-8"), <$this>;
    48 			<$this>;
    49         exit 0;
    49 		exit 0;
    50     }
    50 	}
       
    51 
    51 
    52 	open(my $this, $0);
    52     open(my $this, $0);
    53 	print header(-charset => "utf-8", -type => "text/plain"),
    53     print header(-charset => "utf-8", -type => "text/plain"), <$this>;
    54 		<$this>;
    54     exit 0;
    55 	exit 0;
       
    56 }
    55 }
    57 
    56 
    58 # download nach preview
    57 # download nach preview
    59 if ($_download && $outfile && $filename) {
    58 if ($_download && $outfile && $filename) {
    60 
    59 
    61 	open(my $fh, $outfile) or do {
    60     open(my $fh, $outfile) or do {
    62 		print header(-charset => "utf-8"),
    61         print header(-charset => "utf-8"),
    63 			start_html(@HEADER),
    62           start_html(@HEADER),
    64 			h1("Download"),
    63           h1("Download"),
    65 			"Sorry, Downloadfile $outfile: $!",
    64           "Sorry, Downloadfile $outfile: $!",
    66 			end_html;
    65           end_html;
    67 		exit 0;
    66         exit 0;
    68 	};
    67     };
    69 
    68 
    70 	print header(-type => "application/octet-stream",
    69     print header(
    71 		-attachment => $filename),
    70         -type       => "application/octet-stream",
    72 		<$fh>;
    71         -attachment => $filename
    73 	exit 0;
    72       ),
    74 
    73       <$fh>;
       
    74     exit 0;
    75 
    75 
    76 }
    76 }
    77 
       
    78 
    77 
    79 # upload und preview
    78 # upload und preview
    80 if (($_ok || $_redo) && ($filename && $charset)) {
    79 if (($_ok || $_redo) && ($filename && $charset)) {
    81 
    80 
    82 	my $in;
    81     my $in;
    83 	my $download;
    82     my $download;
    84 	if ($_ok) {	# first time, copy the upload to some tmp file
    83     if ($_ok) {    # first time, copy the upload to some tmp file
    85 		($in, $infile) = tempfile("/tmp/$ME.in.XXXXXX");
    84         ($in, $infile) = tempfile("/tmp/$ME.in.XXXXXX");
    86 		local $/ = \10240;
    85         local $/ = \10240;
    87 		print $in $_ while <$filename>;
    86         print $in $_ while <$filename>;
    88 		close($in);
    87         close($in);
    89 		close($filename);
    88         close($filename);
    90 
    89 
    91 		($download, $outfile) = tempfile("/tmp/$ME.out.XXXXXX");
    90         ($download, $outfile) = tempfile("/tmp/$ME.out.XXXXXX");
    92 	}
    91     }
    93 	elsif ($_redo) {	# redo - we read our saved orignal file
    92     elsif ($_redo) {    # redo - we read our saved orignal file
    94 		# we can recycle the outfile
    93                         # we can recycle the outfile
    95 		open($download, ">$outfile");
    94         open($download, ">$outfile");
    96 	}
    95     }
    97 
    96 
    98 	open($in, $infile) or die "$infile: $!";
    97     open($in, $infile) or die "$infile: $!";
    99 
    98 
       
    99     require Text::Iconv;
       
   100     my $converter = new Text::Iconv $charset => "utf-8";
   100 
   101 
   101 	require Text::Iconv;
   102     my @lines;
   102 	my $converter = new Text::Iconv $charset => "utf-8";
   103     my $last = 0;
       
   104     while (<$in>) {
       
   105         s/\r?\n$//;
       
   106         $_ = $converter->convert("$_\n");
       
   107         die "Konvertierungsproblem in Zeile $." if not defined $_;
       
   108         print $download $_;
       
   109         $ENV{LC_CTYPE} = "de_DE.UTF-8";
       
   110         if (eof
       
   111             || (@lines < 30 and ($. < 4 || /[^a-z0-9[:punct:][:space:]]{1,3}/i))
       
   112           )
       
   113         {
       
   114             push @lines, "    : ", substr("." x ($. - $last), 0, 80), "\n"
       
   115               if $. - $last > 1;
       
   116             push @lines, sprintf "%4d: $_", $.;
       
   117             $last = $.;
       
   118         }
       
   119     }
   103 
   120 
   104 	my @lines;
   121    # jetzt mal den Filenamen ermitteln, wie er nach der Konversion aussehen soll
   105 	my $last = 0;
   122    # das können wir nicht vorher machen, weil $file da auch noch ein Filehandle
   106 	while (<$in>) {
   123    # darstellt, welches wir so einfach nicht modifizieren können
   107 		s/\r?\n$//;
   124     $filename =~ y/\\/\//;
   108 		$_ = $converter->convert("$_\n");
   125     $filename = basename $filename;
   109 		die "Konvertierungsproblem in Zeile $." if not defined $_;
   126     $filename =~ s/(\..+?)$/.utf8$1/
   110 		print $download $_;
   127       or $filename .= ".utf8";
   111 		$ENV{LC_CTYPE} = "de_DE.UTF-8";
       
   112 		if (eof || (@lines < 30 and ($. < 4 || /[^a-z0-9[:punct:][:space:]]{1,3}/i))) {
       
   113 			push @lines, "    : ", substr("." x ($. - $last), 0, 80), "\n"  if $. - $last > 1;
       
   114 			push @lines, sprintf "%4d: $_", $.;
       
   115 			$last = $.
       
   116 		}
       
   117 	}
       
   118 
   128 
   119 	# jetzt mal den Filenamen ermitteln, wie er nach der Konversion aussehen soll
   129     # die Namen der tmp-Files kürzen
   120 	# das können wir nicht vorher machen, weil $file da auch noch ein Filehandle 
   130     map { s/\/tmp\/\Q$ME\E\.// } $infile, $outfile;
   121 	# darstellt, welches wir so einfach nicht modifizieren können
       
   122 	$filename =~ y/\\/\//;
       
   123 	$filename = basename $filename;
       
   124 	$filename =~ s/(\..+?)$/.utf8$1/
       
   125 		or $filename .= ".utf8";
       
   126 
   131 
   127 	# die Namen der tmp-Files kürzen
   132     print header(-charset => "utf-8"), start_html(@HEADER),
   128 	map { s/\/tmp\/\Q$ME\E\.// } $infile, $outfile;
   133       div({ -align => "right" },
   129 
   134         "[ " . a({ -href => url(-query => 0) }, "home") . " ]"),
   130 	print header(-charset => "utf-8"),
   135       h3("Preview"), start_form, "von: ",
   131 		start_html(@HEADER),
   136       popup_menu(
   132 		div({-align => "right"}, "[ " . a({-href => url(-query => 0)}, "home") . " ]"),
   137         -name     => "charset",
   133 		h3("Preview"),
   138         -values   => $CHARSETS,
   134 		start_form,
   139         -onChange => "submit()"
   135 			"von: ",
   140       ),
   136 			popup_menu(-name => "charset", -values => $CHARSETS, -onChange => "submit()"),
   141       submit(-name => ".redo", -value => "Los!"),
   137 			submit(-name => ".redo", -value => "Los!"),
   142       hidden(-name => "filename", -value => $filename),
   138 			hidden(-name => "filename", -value => $filename),
   143       hidden(-name => "infile",   -value => $infile),
   139 			hidden(-name => "infile", -value => $infile),
   144       hidden(-name => "outfile",  -value => $outfile),
   140 			hidden(-name => "outfile", -value => $outfile),
   145       hidden(-name => ".redo",    -value => 1), end_form, pre("\n", @lines), br,
   141 			hidden(-name => ".redo", -value => 1),
   146       a(
   142 		end_form,
   147         {
   143 		pre( "\n", @lines ),
   148             -href => url(-query => 0)
   144 		br,
   149               . "?.download=1;outfile=$outfile;filename=$filename"
   145 		a({-href => url(-query => 0) . "?.download=1;outfile=$outfile;filename=$filename"}, 
   150         },
   146 			"download " . escapeHTML($filename)),
   151         "download " . escapeHTML($filename)
   147 		hr,
   152       ),
   148 		$FOOTER,
   153       hr,
   149 		end_html;
   154       $FOOTER,
   150 	exit 0;
   155       end_html;
       
   156     exit 0;
   151 }
   157 }
   152 
   158 
   153 # noch nichts angegeben
   159 # noch nichts angegeben
   154 if (!$_ok || !$filename) {
   160 if (!$_ok || !$filename) {
   155 	print header(-charset => "utf-8"),
   161     print header(-charset => "utf-8"),
   156 		start_html(@HEADER),
   162       start_html(@HEADER), h1("Heute schon den Ötzi nach UTF-8 konvertiert?"),
   157 		h1("Heute schon den Ötzi nach UTF-8 konvertiert?"),
   163       fieldset(
   158 		fieldset(
   164         legend("Upload"),
   159 			legend("Upload"),
   165         start_multipart_form,
   160 			start_multipart_form,
   166         table(
   161 			table(
   167             Tr(td("File"), td(filefield(-name => "filename"))),
   162 				Tr(td("File"), td(filefield(-name => "filename"))),
   168             Tr(
   163 				Tr(td("Zeichensatz"), td(popup_menu(-name => "charset", -values => $CHARSETS))),
   169                 td("Zeichensatz"),
   164 				Tr(td(), td(submit(-name => ".ok", -value => "Los!"))),
   170                 td(popup_menu(-name => "charset", -values => $CHARSETS))
   165 			),
   171             ),
   166 			end_multipart_form,
   172             Tr(td(), td(submit(-name => ".ok", -value => "Los!"))),
   167 		),
   173         ),
   168 		$FOOTER,
   174         end_multipart_form,
   169 		end_html;
   175       ),
   170 	exit 0;
   176       $FOOTER,
       
   177       end_html;
       
   178     exit 0;
   171 }
   179 }
   172 
   180 
   173 print header, start_html, h1("Internal Error."), Dump, end_html;
   181 print header, start_html, h1("Internal Error."), Dump, end_html;