Change 34867 by [EMAIL PROTECTED] on 2008/11/17 11:57:20
Subject: [perl #58428][PATCH] Unicode::UCD::charinfo() does not work on
21 Han codepoints
From: [EMAIL PROTECTED]
Date: Sun, 31 Aug 2008 11:35:45 +0200 (CEST)
Message-Id: <[EMAIL PROTECTED]>
Affected files ...
... //depot/perl/lib/Unicode/UCD.pm#41 edit
Differences ...
==== //depot/perl/lib/Unicode/UCD.pm#41 (text) ====
Index: perl/lib/Unicode/UCD.pm
--- perl/lib/Unicode/UCD.pm#40~31237~ 2007-05-18 15:01:51.000000000 -0700
+++ perl/lib/Unicode/UCD.pm 2008-11-17 03:57:20.000000000 -0800
@@ -3,7 +3,7 @@
use strict;
use warnings;
-our $VERSION = '0.25';
+our $VERSION = '0.26';
use Storable qw(dclone);
@@ -174,13 +174,41 @@
return sprintf("CJK UNIFIED IDEOGRAPH-%04X", shift);
}
+my %first_last = (
+ 'CJK Ideograph Extension A' => [ 0x3400, 0x4DB5 ],
+ 'CJK Ideograph' => [ 0x4E00, 0x9FA5 ],
+ 'CJK Ideograph Extension B' => [ 0x20000, 0x2A6D6 ],
+);
+
+get_charinfo_ranges();
+
+sub get_charinfo_ranges {
+ my @blocks = keys %first_last;
+
+ my $fh;
+ openunicode( \$fh, 'UnicodeData.txt' );
+ if( defined $fh ){
+ while( my $line = <$fh> ){
+ next unless $line =~ /(?:First|Last)/;
+ if( grep{ $line =~ /[^;]+;<$_\s*,\s*(?:First|Last)>/ [EMAIL
PROTECTED] ){
+ my ($number,$block,$type);
+ ($number,$block) = split /;/, $line;
+ $block =~ s/<|>//g;
+ ($block,$type) = split /, /, $block;
+ my $index = $type eq 'First' ? 0 : 1;
+ $first_last{ $block }->[$index] = hex $number;
+ }
+ }
+ }
+}
+
my @CharinfoRanges = (
# block name
# [ first, last, coderef to name, coderef to decompose ],
# CJK Ideographs Extension A
- [ 0x3400, 0x4DB5, \&han_charname, undef ],
+ [ @{ $first_last{'CJK Ideograph Extension A'} }, \&han_charname,
undef ],
# CJK Ideographs
- [ 0x4E00, 0x9FA5, \&han_charname, undef ],
+ [ @{ $first_last{'CJK Ideograph'} }, \&han_charname,
undef ],
# Hangul Syllables
[ 0xAC00, 0xD7A3, $hasHangulUtil ? \&getHangulName : \&hangul_charname,
\&hangul_decomp ],
# Non-Private Use High Surrogates
@@ -192,7 +220,7 @@
# The Private Use Area
[ 0xE000, 0xF8FF, undef, undef ],
# CJK Ideographs Extension B
- [ 0x20000, 0x2A6D6, \&han_charname, undef ],
+ [ @{ $first_last{'CJK Ideograph Extension B'} }, \&han_charname,
undef ],
# Plane 15 Private Use Area
[ 0xF0000, 0xFFFFD, undef, undef ],
# Plane 16 Private Use Area
End of Patch.