#!/usr/bin/perl -w # # This is a test script used to explore encodings. # It generates a series of pages that can be sent to the browser # to test the transformations used by the TinyMCEPlugin. Specifically: # 1. Formulate page in a character encoding. The page contains a textarea # that contains text that makes sense in the selected encoding. # 2. Send that page to the client # Visually verify that the rendered page reflects the encoded text. # 3. Click the 'Run Test 1' button. Client compiles an XHR based on the # content of the textarea and POSTs it back to the server. # 4. Server (thi script) receives content and decodes it to a state where # TWiki could use it. This is done in the 'text2html' section. # 5. A suitably encoded response is sent to the client, which sets a # DIV.innerHTML with the response (this is how TMCE works) # 6. Client JS now compiles a second request, using the content of the DIV # (which is pure HTML). This is sent to the 'html2text' section. The # response is shown in the final textarea. # This is a true and accurate reflection of the process used for WYSIWYG. # use strict; use Encode; use CGI; use CGI::Carp qw(fatalsToBrowser); use File::Spec; use HTML::Entities; #use locale; my $htmlPageBody = <<'HERE';
To use the script you must understand the following terms:
You are also advised to read the encodings help page on TWiki.orgIf you want, you can just type into the textarea and it will work on whatever you type.
.innerHTML
Visually verify that the resulting
characters ar the same as those in the first textarea. Run Test 1 Warning - if you are using IE6, it is incredibly slow. be patient!
.innerHTML
of the DIV built in Test 1 back
to the server in a html2text request. The server does whatever processing is
specified
for 'html2text' and sends the result back to the client. The client adds this
new text to the following textarea below.Run Test 2
%SAVEFILENAME%
HERE my $q = new CGI; my $action = $q->param('action') || 'update'; $TWiki::cfg{Site}{CharSet} = $q->param('charset'); $TWiki::cfg{Site}{CharSet} = 'iso-8859-1' unless defined $TWiki::cfg{Site}{CharSet}; if ($TWiki::cfg{Site}{CharSet} !~ /^iso-?8859-?1$/io) { CGI::charset($TWiki::cfg{Site}{CharSet}); } my $firstchar = $q->param('firstchar'); $firstchar = 32 unless defined $firstchar; my $lastchar = $q->param('lastchar'); $lastchar = 126 unless defined $lastchar; # Create a unicode string. This is stored by perl using wide characters. my @test; for (my $i = $firstchar; $i <= $lastchar; $i++) { push(@test, $i.":".chr($i)); } my $text = join(' ', @test)."."; # Convert the unicode string to the selected encoding. We use an FB_PERLQQ # to defuse a string unencodeable in the current # charset. my $encoded_text = Encode::encode($TWiki::cfg{Site}{CharSet}, $text, Encode::FB_PERLQQ); # Decode the string again for using in tests. The string should now be what # ends up in a topic after a TWiki edit using that charset. $text = Encode::decode($TWiki::cfg{Site}{CharSet}, $encoded_text, Encode::FB_PERLQQ); # Mapping high-bit characters from unicode back to iso-8859-1 # (a.k.a Windows 1252 a.k.a "ANSI") - http://www.alanwood.net/demos/ansi.html my %unicode2ANSI = ( chr(8364) => chr(128), chr(8218) => chr(130), chr(402) => chr(131), chr(8222) => chr(132), chr(8230) => chr(133), chr(8224) => chr(134), chr(8225) => chr(135), chr(710) => chr(136), chr(8240) => chr(137), chr(352) => chr(138), chr(8249) => chr(139), chr(338) => chr(140), chr(381) => chr(142), chr(8216) => chr(145), chr(8217) => chr(146), chr(8220) => chr(147), chr(8221) => chr(148), chr(8226) => chr(149), chr(8211) => chr(150), chr(8212) => chr(151), chr(732) => chr(152), chr(8482) => chr(153), chr(353) => chr(154), chr(8250) => chr(155), chr(339) => chr(156), chr(382) => chr(158), chr(376) => chr(159), ); # Reverse mapping my %ANSI2Unicode = map { $unicode2ANSI{$_} => $_ } keys %unicode2ANSI; my $unicode2ANSIChars = join('', keys %unicode2ANSI); my $ANSI2UnicodeChars = join('', keys %ANSI2Unicode); =pod ---++ RESTParameter2SiteCharSet($text) Text that is taken from a web page and added to the parameters of an XHR by JavaScript is UTF-8 encoded. This is because UTF-8 is the default encoding for XML, which XHR was designed to transport. This function is used to decode such parameters to the currently selected TWiki site character set. Note that this transform is not as simple as an Encode::from_to, as a number of unicode code points must be remapped for certain encodings. =cut sub RESTParameter2SiteCharSet { my ($text) = @_; $text = Encode::decode_utf8($text, Encode::FB_PERLQQ); if (Encode::resolve_alias($TWiki::cfg{Site}{CharSet}) eq 'iso-8859-1') { # Map unicode back to iso-8859 high-bit chars $text =~ s/([$unicode2ANSIChars])/$unicode2ANSI{$1}/ge; } $text = Encode::encode( $TWiki::cfg{Site}{CharSet}, $text, Encode::FB_PERLQQ); return $text; } =pod ---++ siteCharSet2RESTResult($text) Text that is taken from a web page and added to the parameters of an XHR by JavaScript is UTF-8 encoded. This is because UTF-8 is the default encoding for XML, which XHR was designed to transport. For usefulness in Javascript the response to an XHR should also be UTF-8 encoded. Note that this transform is not as simple as an Encode::from_to, as a number of unicode code points must be remapped for certain encodings. =cut sub siteCharSet2RESTResult { my ($text) = @_; $text = Encode::decode( $TWiki::cfg{Site}{CharSet}, $text, Encode::FB_PERLQQ); if (Encode::resolve_alias($TWiki::cfg{Site}{CharSet}) eq 'iso-8859-1') { # Map unicode back to iso-8859 high-bit chars $text =~ s/([$ANSI2UnicodeChars])/$ANSI2Unicode{$1}/ge; } $text = Encode::encode_utf8($text); return $text; } if ($action eq 'text2html') { my $t = RESTParameter2SiteCharSet($q->param('text')); # This is the string that TWiki requires. $t = siteCharSet2RESTResult($t); print "Content-type: text/plain;charset=UTF-8\r\n"; my $len; { use bytes; $len = length($t); }; print "Content-length: ",$len,"\r\n"; print "\r\n"; print $t; exit 0; } if ($action eq 'html2text') { # Converting from HTML back to the textarea. Equivalent of the # HTML2TML REST handler, and the save script. my $t = RESTParameter2SiteCharSet($q->param('text')); # This is the string that TWiki requires. $t = siteCharSet2RESTResult($t); print "Content-type: text/plain;charset=UTF-8\r\n"; my $len; { use bytes; $len = length($t); }; print "Content-length: ",$len,"\r\n"; print "\r\n"; print $t; exit 0; } my $page = $htmlPageBody; $page =~ s/%CHARSETS%/join(', ', Encode->encodings(":all"))/ge; $page =~ s/%FIRSTCHAR%/$firstchar/g; $page =~ s/%LASTCHAR%/$lastchar/g; $page =~ s/%CHARSET%/$TWiki::cfg{Site}{CharSet}/gs; # The text is encoded, so should be exactly what is stored on disk # in TWiki. $page =~ s/%TEXT%/$encoded_text/gs; if ($action eq 'save') { my $data = $q->param('text'); # This is a form submission, so the text should already be # encoded in the site charset. my $dir = File::Spec->tmpdir(); if (open(F, ">$dir/encodings_test")) { print F $data; close(F); $page =~ s/%SAVEFILENAME%/$dir\/encodings_test/; } else { $page =~ s/%SAVEFILENAME%/Failed: $!/; } $page =~ s/%SAVEDTEXT%/$data/; } print "Content-type: text/html\r\n\r\n"; print $page; 1;