reassign 320406 libunicode-map-perl 0.112-8 tags 320406 patch forwarded 320406 http://rt.cpan.org/NoAuth/Bug.html?id=16734 thanks
> Unicode::MapUTF8 fails to handle one byte BIG5 characters properly; Hi, this bug is actually in Unicode::Map, which Unicode::MapUTF8 uses. The BIG5 map distributed in Unicode::Map 0.112 (Map/EASTASIA/BIG5.map) is missing the characters 0-127, which are the same as the respective ASCII characters. The actual error is in the original input file, currently at < ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT >. Since that file is now considered obsolete by the Unicode Consortium, I suppose they are not interested in updating it. I have re-reported the bug in the upstream CPAN bug tracker against the correct package. The URL is < http://rt.cpan.org/NoAuth/Bug.html?id=16734 >. The attached patch fixes the problem by modifying the binary mapping file at package build time. It also includes a new test that checks for the correct BIG5 behaviour. We cannot include the original input file, modified or not, since its header explicitly forbids redistributing it to third parties. In a strict sense, this means the character maps licensed in this way are non-free since their sources (as in "preferred form of modification") are not distributable. As it's relatively easy to modify the binary maps as well, I suppose this is not a critical issue. Cheers, -- Niko Tyni [EMAIL PROTECTED]
diff -urN libunicode-map-perl-0.112/debian/fix-big5 libunicode-map-perl-0.112-big5/debian/fix-big5 --- libunicode-map-perl-0.112/debian/fix-big5 1970-01-01 02:00:00.000000000 +0200 +++ libunicode-map-perl-0.112-big5/debian/fix-big5 2005-12-26 20:42:27.790460730 +0200 @@ -0,0 +1,33 @@ +#!/bin/sh +# Insert the US-ASCII compatible characters into the BIG5 binary map, +# since the original source file (currently +# ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT +# ) doesn't include them. Note that this depends on the binary map format. +# Hopefully by the time it is changed, this problem has been fixed upstream. +# +# See http://bugs.debian.org/320406 +# +# Copyright Niko Tyni <[EMAIL PROTECTED]> 2005 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of either: +# +# a) the GNU General Public License as published by the Free Software +# Foundation; either version 1, or (at your option) any later +# version, or +# +# b) the "Artistic License" which comes with Perl. + +set -e + +IN=Map/EASTASIA/BIG5.map +DD=/bin/dd +PF=/usr/bin/printf + +$DD if=$IN bs=1 count=12 # header, 12 bytes +$PF "\x0\x8\x0" # partial key-value mappings +$PF "\x8\x1\x10\x1" # input 1 char of 8-bits at a time, output 1 char of 16 bits +$PF "\x80\x0\x80\x0\x0" # 128 characters starting at 0x00 -> 128 chars starting at 0x0000 +$PF "\x0\x0\x0" # end of submap +$DD if=$IN bs=1 skip=12 # rest of the file + diff -urN libunicode-map-perl-0.112/debian/rules libunicode-map-perl-0.112-big5/debian/rules --- libunicode-map-perl-0.112/debian/rules 2005-12-26 20:15:47.328299427 +0200 +++ libunicode-map-perl-0.112-big5/debian/rules 2005-12-26 19:33:57.176463414 +0200 @@ -18,10 +18,11 @@ dh_testroot [ ! -f Makefile ] || $(MAKE) realclean dh_clean - rm -f build-stamp install-stamp + [ ! -f fix-big5-stamp ] || mv debian/BIG5.map.dist Map/EASTASIA/BIG5.map + rm -f build-stamp install-stamp fix-big5-stamp build: build-stamp -build-stamp: +build-stamp: fix-big5 dh_testdir perl Makefile.PL INSTALLDIRS=vendor $(MAKE) OPTIMIZE="-O2 -g -Wall" @@ -37,6 +38,15 @@ $(MAKE) install PREFIX=$(PWD)/$(TMP_DIR)/usr touch install-stamp +# fix the BIG5 map on the fly +fix-big5: fix-big5-stamp +fix-big5-stamp: + dh_testdir + debian/fix-big5 > debian/BIG5.map.new + mv Map/EASTASIA/BIG5.map debian/BIG5.map.dist + mv debian/BIG5.map.new Map/EASTASIA/BIG5.map + touch fix-big5-stamp + binary-indep: binary-arch: build install @@ -58,6 +68,6 @@ binary: binary-indep binary-arch -.PHONY: clean build install binary-indep binary-arch binary +.PHONY: clean build install binary-indep binary-arch binary fix-big5 ## ---------------------------------------------------------------------- diff -urN libunicode-map-perl-0.112/t/map.t libunicode-map-perl-0.112-big5/t/map.t --- libunicode-map-perl-0.112/t/map.t 2001-01-07 23:51:18.000000000 +0200 +++ libunicode-map-perl-0.112-big5/t/map.t 2005-12-26 19:33:20.566730033 +0200 @@ -6,7 +6,7 @@ # Change 1..1 below to 1..last_test_to_print . # (It may become useful if the test is moved to ./t subdirectory.) -BEGIN { $| = 1; print "1..5\n"; } +BEGIN { $| = 1; print "1..6\n"; } END {print "not ok 1\n" unless $loaded;} use Unicode::Map; $loaded = 1; @@ -27,6 +27,7 @@ ["GB2312", "n->m: GB2312 (GB2312-80^8080 + ISO8859-1)"], ["DEVANAGA", "n->m: DEVANAGA"], ["EUC_JP", "n->m: EUC-JP"], + ["BIG5", "n->m: BIG5"], ); { @@ -133,6 +134,21 @@ return testMapping ( "APPLE-DEVANAGA", $_locale, $_unicode ); } +sub BIG5 { + my $_locale = + "\xA5\x40" + ."\xA5\x41" + ."\x30" + ." " + ; + my $_unicode = + "\x4E\x16" + ."\x4E\x15" + ."\x00\x30\x00\x20\x00\x20" + ; + return testMapping ( "BIG5", $_locale, $_unicode ); +} + sub testMapping { my ( $charsetId, $txtLocale, $txtUnicode ) = @_; return 0 if ! ( my $Map = new Unicode::Map($charsetId) );

