On Tue, Mar 26, 2002 at 09:07:25AM +0900, Dan Kogai wrote: > Encode hackers (Especially Autrijius)
Autrijus.
> * rename gb2312 to gb2312-raw, ksc5601 to ksc5601-raw
> * and alias gb2312 and ksc5601 to euc-(cn|kr)
I agree. :)
> I know it's technically wrong but perl opts more for practical than
> technical....
Well, at least almost every other program (hc, iconv, mozilla...) does
that anyway.
Also, please don't forget to apply the following patch to HZ.pm, which
does s/gb2312/gb2312-raw/, as well as cleaned up the code a little.
/Autrijus/
--- HZ.pm.old Tue Mar 26 11:43:54 2002
+++ HZ.pm Tue Mar 26 11:50:52 2002
@@ -1,13 +1,12 @@
package Encode::CN::HZ;
use strict;
-no warnings 'redefine'; # to quell the "use Encode" below
use vars qw($VERSION);
$VERSION = do { my @r = (q$Revision: 0.92 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r,
@r };
+use Encode ();
use Encode::CN;
-use Encode qw|encode decode|;
use base 'Encode::Encoding';
# HZ is but escaped GB, so we implement it with the
@@ -20,7 +19,7 @@
sub decode
{
my ($obj,$str,$chk) = @_;
- my $gb = Encode::find_encoding('gb2312');
+ my $gb = Encode::find_encoding('gb2312-raw');
$str =~ s{~ # starting tilde
(?:
@@ -44,7 +43,7 @@
:
(defined $2) ? $gb->decode($2, $chk) # decode the characters
:
- '' # '' on ~\n and invalid escape
+ '' # ~\n and invalid escape = ''
}egx;
return $str;
@@ -54,38 +53,38 @@
{
my ($obj,$str,$chk) = @_;
my ($out, $in_gb);
- my $gb = Encode::find_encoding('gb2312');
+ my $gb = Encode::find_encoding('gb2312-raw');
$str =~ s/~/~~/g;
- # XXX: Since CHECK and partial decoding has not been implemented yet,
+ # XXX: Since CHECK and partial decoding has not been implemented yet,
# we'll use a very crude way to test for GB2312ness.
for my $index (0 .. length($str) - 1) {
no warnings 'utf8';
my $char = substr($str, $index, 1);
- my $try = $gb->encode($char); # try encode this char
+ my $try = $gb->encode($char); # try to encode this character
- if (defined($try)) { # is a GB character
+ if (defined($try)) { # is a GB character:
if ($in_gb) {
- $out .= $try; # in GB mode - just append it
+ $out .= $try; # in GB mode - just append it
}
else {
- $out .= "~{$try"; # enter GB mode, then append it
- $in_gb = 1;
+ $in_gb = 1; # enter GB mode, then append it
+ $out .= "~{$try";
}
- }
+ } # not a GB character:
elsif ($in_gb) {
- $out .= "~}$char"; # leave GB mode, then append it
- $in_gb = 0;
+ $in_gb = 0; # leave GB mode, then append it
+ $out .= "~}$char";
}
else {
- $out .= $char; # not in GB mode - just append it
+ $out .= $char; # not in GB mode - just append it
}
}
- $out .= '~}' if $in_gb; # add closing brace as needed
+ $out .= '~}' if $in_gb; # add closing brace if needed
return $out;
}
msg00908/pgp00000.pgp
Description: PGP signature
