Change 34867 by [EMAIL PROTECTED] on 2008/11/17 11:57:20

        Subject: [perl #58428][PATCH] Unicode::UCD::charinfo() does not work on 
21 Han codepoints
        From: [EMAIL PROTECTED]
        Date: Sun, 31 Aug 2008 11:35:45 +0200 (CEST)
        Message-Id: <[EMAIL PROTECTED]>

Affected files ...

... //depot/perl/lib/Unicode/UCD.pm#41 edit

Differences ...

==== //depot/perl/lib/Unicode/UCD.pm#41 (text) ====
Index: perl/lib/Unicode/UCD.pm
--- perl/lib/Unicode/UCD.pm#40~31237~   2007-05-18 15:01:51.000000000 -0700
+++ perl/lib/Unicode/UCD.pm     2008-11-17 03:57:20.000000000 -0800
@@ -3,7 +3,7 @@
 use strict;
 use warnings;
 
-our $VERSION = '0.25';
+our $VERSION = '0.26';
 
 use Storable qw(dclone);
 
@@ -174,13 +174,41 @@
     return sprintf("CJK UNIFIED IDEOGRAPH-%04X", shift);
 }
 
+my %first_last = (
+   'CJK Ideograph Extension A' => [ 0x3400,   0x4DB5   ],
+   'CJK Ideograph'             => [ 0x4E00,   0x9FA5   ],
+   'CJK Ideograph Extension B' => [ 0x20000,  0x2A6D6  ],
+);
+
+get_charinfo_ranges();
+
+sub get_charinfo_ranges {
+   my @blocks = keys %first_last;
+   
+   my $fh;
+   openunicode( \$fh, 'UnicodeData.txt' );
+   if( defined $fh ){
+      while( my $line = <$fh> ){
+         next unless $line =~ /(?:First|Last)/;
+         if( grep{ $line =~ /[^;]+;<$_\s*,\s*(?:First|Last)>/ [EMAIL 
PROTECTED] ){
+            my ($number,$block,$type);
+            ($number,$block) = split /;/, $line;
+            $block =~ s/<|>//g;
+            ($block,$type) = split /, /, $block;
+            my $index = $type eq 'First' ? 0 : 1;
+            $first_last{ $block }->[$index] = hex $number;
+         }
+      }
+   }
+}
+
 my @CharinfoRanges = (
 # block name
 # [ first, last, coderef to name, coderef to decompose ],
 # CJK Ideographs Extension A
-  [ 0x3400,   0x4DB5,   \&han_charname,   undef  ],
+  [ @{ $first_last{'CJK Ideograph Extension A'} },        \&han_charname,   
undef  ],
 # CJK Ideographs
-  [ 0x4E00,   0x9FA5,   \&han_charname,   undef  ],
+  [ @{ $first_last{'CJK Ideograph'} },                    \&han_charname,   
undef  ],
 # Hangul Syllables
   [ 0xAC00,   0xD7A3,   $hasHangulUtil ? \&getHangulName : \&hangul_charname,  
\&hangul_decomp ],
 # Non-Private Use High Surrogates
@@ -192,7 +220,7 @@
 # The Private Use Area
   [ 0xE000,   0xF8FF,   undef,   undef  ],
 # CJK Ideographs Extension B
-  [ 0x20000,  0x2A6D6,  \&han_charname,   undef  ],
+  [ @{ $first_last{'CJK Ideograph Extension B'} },        \&han_charname,   
undef  ],
 # Plane 15 Private Use Area
   [ 0xF0000,  0xFFFFD,  undef,   undef  ],
 # Plane 16 Private Use Area
End of Patch.

Reply via email to