Re: Updating src/share/locale/ctype with NetBSD

2013-03-20 Thread Stefan Sperling
On Tue, Mar 19, 2013 at 09:44:37PM -0500, Vladimir Támara Patiño wrote:
> Attached --includes patch for spanish speaking countries.

I don't think it makes sense to manually update the en_US.UTF-8 file.
Rather, I think we should have a script which generates en_US.UTF-8 from
unicode.org XML data files (see http://www.unicode.org/Public/6.2.0/ucdxml/).  
As a first step, such a script would generate an en_US.UTF-8 file
containing the same character definition blocks as the current file,
possibly containing corrections within these blocks. In a later step we
could think about adding more blocks if doing so would be useful. 
A suitable scripting language would be Perl since Perl exists in
the base system and supports XML parsing.

Your diff is adding ctype definitions for character sets which our
libc doesn't support, such as euc-jp. I don't see any reason to add
such files as long as libc doesn't support the corresponding encodings.
I don't even see much reason to add support for additional encodings 
unless you can provide a convincing reason to do so. UTF-8 should cover
most, if not all, needs.



Updating src/share/locale/ctype with NetBSD

2013-03-19 Thread Vladimir Támara Patiño

Attached --includes patch for spanish speaking countries.

--
Dios, gracias por tu amor infinito.
--  
 Vladimir Támara Patiño.  http://vtamara.pasosdeJesus.org/

 http://www.pasosdejesus.org/dominio_publico_colombia.html

diff -ruN -x obj -x CVS src53orig/share/locale/ctype/Makefile 
src/share/locale/ctype/Makefile
--- src53orig/share/locale/ctype/Makefile   Wed Jul 20 13:26:08 2011
+++ src/share/locale/ctype/Makefile Tue Mar 19 20:34:47 2013
@@ -80,15 +80,18 @@
 LOCALES += en_US.UTF-8
  LOCALESRC_en_US.UTF-8 = en_US.UTF-8
 
-LOCALES += es_ES.ISO8859-1
- LOCALESRC_es_ES.ISO8859-1 = en_US.ISO_8859-1
+#http://en.wikipedia.org/wiki/List_of_countries_where_Spanish_is_an_official_language
+ES_COUNTRIES= AR BO CH CO CR CU DO EC ES GQ GT HN MX NI PA PE PR PY SV UY VE 
+ES_ENCODINGS= ISO8859-1 ISO8859-15 UTF-8
+.for c in ${ES_COUNTRIES}
+LOCALES += es_${c}.UTF-8
+LOCALESRC_es_${c}.UTF-8 = en_US.UTF-8
+LOCALES += es_${c}.ISO8859-1
+LOCALESRC_es_${c}.ISO8859-1 = en_US.ISO_8859-1
+LOCALES += es_${c}.ISO8859-15
+LOCALESRC_es_${c}.ISO8859-15 = en_US.DIS_8859-15
+.endfor
 
-LOCALES += es_ES.ISO8859-15
- LOCALESRC_es_ES.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += es_ES.UTF-8
- LOCALESRC_es_ES.UTF-8 = en_US.UTF-8
-
 LOCALES += fa_IR.UTF-8
  LOCALESRC_fa_IR.UTF-8 = en_US.UTF-8
 
@@ -191,6 +194,9 @@
 #LOCALES += ko_KR.eucKR
 # LOCALESRC_ko_KR.eucKR = ko_KR.eucKR
 
+LOCALES += kk_KZ.PT154
+ LOCALESRC_kk_KZ.PT154 = kk_KZ.PT154
+
 LOCALES += ko_KR.UTF-8
  LOCALESRC_ko_KR.UTF-8 = en_US.UTF-8
 
@@ -263,9 +269,13 @@
 LOCALES += sv_SE.UTF-8
  LOCALESRC_sv_SE.UTF-8 = en_US.UTF-8
 
+LOCALES += tr_TR.ISO8859-9
+ LOCALESRC_tr_TR.ISO8859-9 = tr_TR.ISO8859-9
+
 LOCALES += uk_UA.KOI8-U
  LOCALESRC_uk_UA.KOI8-U = uk_UA.KOI8-U
 
+
 #LOCALES += zh_CN.eucCN
 # LOCALESRC_zh_CN.eucCN = zh_CN.eucCN
 
@@ -274,6 +284,9 @@
 
 LOCALES += zh_CN.UTF-8
  LOCALESRC_zh_CN.UTF-8 = en_US.UTF-8
+
+LOCALES += zh_HK.Big5-HKSCS
+ LOCALESRC_zh_HK.Big5-HKSCS = zh_HK.Big5-HKSCS
 
 #LOCALES += zh_TW.Big5
 # LOCALESRC_zh_TW.Big5 = zh_TW.BIG5
diff -ruN -x obj -x CVS src53orig/share/locale/ctype/charset/JISX0201-right 
src/share/locale/ctype/charset/JISX0201-right
--- src53orig/share/locale/ctype/charset/JISX0201-right Sun Aug  7 05:03:45 2005
+++ src/share/locale/ctype/charset/JISX0201-right   Tue Mar 19 19:05:40 2013
@@ -1,12 +1,13 @@
-/* $NetBSD: JISX0201-right,v 1.1 2000/12/30 02:33:20 itojun Exp $  */
+/* $NetBSD: JISX0201-right,v 1.2 2006/04/11 18:45:03 tnozaki Exp $ */
 
 /*
  * JIS X0201 right Code Set (ESC ( I)
  */
 CHARSET"(I"
 
+PUNCT  0x0021 - 0x0025
 SPECIAL0x0021 - 0x005f
-PHONOGRAM  0x0021 - 0x005f
+PHONOGRAM  0x0026 - 0x005f
 CONTROL0x0004 - 0x0017 0x001b - 0x001f
 CONTROL0x0060 - 0x007e
 SWIDTH10x0021 - 0x005f
diff -ruN -x obj -x CVS src53orig/share/locale/ctype/charset/JISX0208-1978 
src/share/locale/ctype/charset/JISX0208-1978
--- src53orig/share/locale/ctype/charset/JISX0208-1978  Sun Aug  7 05:03:45 2005
+++ src/share/locale/ctype/charset/JISX0208-1978Tue Mar 19 19:06:00 2013
@@ -1,4 +1,4 @@
-/* $NetBSD: JISX0208-1978,v 1.1 2000/12/30 02:33:20 itojun Exp $   */
+/* $NetBSD: JISX0208-1978,v 1.2 2006/04/11 18:45:03 tnozaki Exp $  */
 
 /*
  * JIS X0208-1978 - (ESC $ ( @ or ESC $ @)
@@ -7,6 +7,8 @@
 CHARSET"$(@"
 
 SPACE  0x2121
+BLANK  0x2121
+PRINT  0x2121
 PHONOGRAM  0x213c
 SPECIAL0x2122 - 0x217e
 PUNCT  0x2122 - 0x2178 /* A few too many in here... */
diff -ruN -x obj -x CVS src53orig/share/locale/ctype/charset/JISX0208-1983 
src/share/locale/ctype/charset/JISX0208-1983
--- src53orig/share/locale/ctype/charset/JISX0208-1983  Sun Aug  7 05:03:45 2005
+++ src/share/locale/ctype/charset/JISX0208-1983Tue Mar 19 19:06:11 2013
@@ -1,4 +1,4 @@
-/* $NetBSD: JISX0208-1983,v 1.1 2000/12/30 02:33:20 itojun Exp $   */
+/* $NetBSD: JISX0208-1983,v 1.2 2006/04/11 18:45:03 tnozaki Exp $  */
 
 /*
  * JIS X0208-1983 - (ESC $ ( B or ESC $ B)
@@ -6,6 +6,8 @@
 CHARSET"$(B"
 
 SPACE  0x2121
+BLANK  0x2121
+PRINT  0x2121
 PHONOGRAM  0x213c
 SPECIAL0x2122 - 0x217e
 PUNCT  0x2122 - 0x2178 /* A few too many in here... */
diff -ruN -x obj -x CVS src53orig/share/locale/ctype/en_US.UTF-8.src 
src/share/locale/ctype/en_US.UTF-8.src
--- src53orig/share/locale/ctype/en_US.UTF-8.srcSat Feb  9 14:27:36 2013
+++ src/share/locale/ctype/en_US.UTF-8.src  Tue Mar 19 19:07:05 2013
@@ -1,4 +1,4 @@
-/* $NetBSD: en_US.UTF-8.src,v 1.4 2005/02/10 18:12:42 tnozaki Exp $
*/
+/* $NetBSD: en_US.UTF-8.src,v 1.5 2012/08/08 18:40:37 tnozaki Exp $
*/
 /* $FreeBSD: /repoman/r/ncvs/src/share/mklocale/UTF-8.src,v 1.1 2004/03/27 
08:14:14 tjr Exp $  */
 
 /*
@@ -491,9 +491,9 @@
  * U+0300 - U+036F : Combining Diacritical Marks
  */
 
-GRAPH 0x0300 - 0x034e  0x0350 - 0x