Author: atsushi
Date: 2005-06-15 01:59:02 -0400 (Wed, 15 Jun 2005)
New Revision: 46012

Modified:
   branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog
   
branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/create-mscompat-collation-table.cs
Log:
2005-06-15  Atsushi Enomoto  <[EMAIL PROTECTED]>

        * create-mscompat-collation-table.cs : some fixes on primary category
          07 (miscellaneous symbols and punctuations).



Modified: branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog
===================================================================
--- branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog      
2005-06-15 05:56:46 UTC (rev 46011)
+++ branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog      
2005-06-15 05:59:02 UTC (rev 46012)
@@ -1,3 +1,8 @@
+2005-06-15  Atsushi Enomoto  <[EMAIL PROTECTED]>
+
+       * create-mscompat-collation-table.cs : some fixes on primary category
+         07 (miscellaneous symbols and punctuations).
+
 2005-06-14  Atsushi Enomoto  <[EMAIL PROTECTED]>
 
        * create-mscompat-collation-table.cs : more mapping fix on numbers,

Modified: 
branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/create-mscompat-collation-table.cs
===================================================================
--- 
branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/create-mscompat-collation-table.cs
     2005-06-15 05:56:46 UTC (rev 46011)
+++ 
branches/atsushi/mcs/class/corlib/Mono.Globalization.Unicode/create-mscompat-collation-table.cs
     2005-06-15 05:59:02 UTC (rev 46012)
@@ -57,6 +57,7 @@
                const int DecompositionCircle = 0xF;
                const int DecompositionSquare = 0x10;
                const int DecompositionCompat = 0x11;
+               const int DecompositionCanonical = 0x12;
 
                TextWriter Result = Console.Out;
 
@@ -595,6 +596,8 @@
                                        throw new Exception ("Support NFKD type 
: " + decomp);
                                }
                        }
+                       else
+                               decompType [cp] = DecompositionCanonical;
                        decomp = idx < 0 ? decomp : decomp.Substring 
(decomp.IndexOf ('>') + 2);
                        if (decomp.Length > 0) {
 
@@ -1036,32 +1039,12 @@
                        #endregion
 
 
-                       #region ASCII non-alphanumeric + 3001, 3002 // 07
-                       // non-alphanumeric ASCII except for: + - < = > '
-                       for (int i = 0x21; i < 0x7F; i++) {
-                               if (Char.IsLetterOrDigit ((char) i)
-                                       || "+-<=>'".IndexOf ((char) i) >= 0)
-                                       continue; // they are not added here.
-                                       AddCharMapGroup2 ((char) i, 0x7, 1, 0);
-                               // Insert 3001 after ',' and 3002 after '.'
-                               if (i == 0x2C)
-                                       AddCharMapGroup2 ('\u3001', 0x7, 1, 0);
-                               else if (i == 0x2E)
-                                       AddCharMapGroup2 ('\u3002', 0x7, 1, 0);
-                               else if (i == 0x3A)
-                                       AddCharMap ('\uFE30', 0x7, 1, 0);
-                       }
-                       #endregion
-
-
-                       // FIXME: for 07 xx we need more love.
-
                        // FIXME: 08 should be more complete.
                        fillIndex [0x8] = 2;
                        for (int cp = 0; cp < char.MaxValue; cp++)
                                if (Char.GetUnicodeCategory ((char) cp) ==
                                        UnicodeCategory.MathSymbol)
-                                       AddCharMapGroup2 ((char) cp, 0x8, 1, 0);
+                                       AddCharMapGroup ((char) cp, 0x8, 1, 0);
 
                        // FIXME: implement 09
 
@@ -1073,13 +1056,13 @@
                                uc = Char.GetUnicodeCategory ((char) cp);
                                if (uc == UnicodeCategory.CurrencySymbol &&
                                        cp != '$')
-                                       AddCharMapGroup2 ((char) cp, 0xA, 1, 0);
+                                       AddCharMapGroup ((char) cp, 0xA, 1, 0);
                        }
                        // byte other symbols
                        for (int cp = 0; cp < 0x100; cp++) {
                                uc = Char.GetUnicodeCategory ((char) cp);
                                if (uc == UnicodeCategory.OtherSymbol)
-                                       AddCharMapGroup2 ((char) cp, 0xA, 1, 0);
+                                       AddCharMapGroup ((char) cp, 0xA, 1, 0);
                        }
                        #endregion
 
@@ -1522,22 +1505,24 @@
                        diacritical [0x64A] = 0x7; // Yaa'
 
 
-                       for (int i = 0; i < 0x10000; i++) {
-                               switch (map [i].Category) {
+                       for (int i = 0; i < char.MaxValue; i++) {
+                               byte mod = 0;
+                               byte cat = map [i].Category;
+                               switch (cat) {
                                case 0xE: // Latin diacritics
                                case 0x22: // Japanese: circled characters
-                                       map [i] = new CharMapEntry (
-                                               map [i].Category,
-                                               map [i].Level1,
-                                               diacritical [i]);
+                                       mod = diacritical [i];
                                        break;
                                case 0x13: // Arabic
                                        if (diacritical [i] == 0)
-                                               // default by 8
-                                               diacritical [i] = 0x8;
-                                       map [i] = new CharMapEntry (0xE, map 
[i].Level1, diacritical [i]);
+                                               mod = 0x8; // default for arabic
                                        break;
                                }
+                               if (0x52 <= cat && cat <= 0x7F) // Hangul
+                                       mod = diacritical [i];
+                               if (mod > 0)
+                                       map [i] = new CharMapEntry (
+                                               cat, map [i].Level1, mod);
                        }
                        #endregion
 
@@ -1679,6 +1664,59 @@
                                AddCharMap (c, 0xFF, 0);
                        #endregion
 
+                       #region 07 - ASCII non-alphanumeric + 3001, 3002 // 07
+                       // non-alphanumeric ASCII except for: + - < = > '
+                       for (int i = 0x21; i < 0x7F; i++) {
+                               if (Char.IsLetterOrDigit ((char) i)
+                                       || "+-<=>'".IndexOf ((char) i) >= 0)
+                                       continue; // they are not added here.
+                                       AddCharMapGroup2 ((char) i, 0x7, 1, 0);
+                               // Insert 3001 after ',' and 3002 after '.'
+                               if (i == 0x2C)
+                                       AddCharMapGroup2 ('\u3001', 0x7, 1, 0);
+                               else if (i == 0x2E) {
+                                       fillIndex [0x7]--;
+                                       AddCharMapGroup2 ('\u3002', 0x7, 1, 0);
+                               }
+                               else if (i == 0x3A)
+                                       AddCharMap ('\uFE30', 0x7, 1, 0);
+                       }
+                       #endregion
+
+                       #region 07 - Punctuations and something else
+                       for (int i = 0xA0; i < char.MaxValue; i++) {
+                               if (IsIgnorable (i))
+                                       continue;
+
+                               // SPECIAL CASES:
+                               switch (i) {
+                               case 0xAB: // 08
+                               case 0x2329: // 09
+                               case 0x232A: // 09
+                                       continue;
+                               }
+
+                               switch (Char.GetUnicodeCategory ((char) i)) {
+                               case UnicodeCategory.OtherPunctuation:
+                               case UnicodeCategory.ClosePunctuation:
+                               case UnicodeCategory.InitialQuotePunctuation:
+                               case UnicodeCategory.FinalQuotePunctuation:
+                               case UnicodeCategory.ModifierSymbol:
+                                       // SPECIAL CASES: // 0xA
+                                       if (0x2020 <= i && i <= 0x2042)
+                                               continue;
+                                       AddCharMapGroup ((char) i, 0x7, 1, 0);
+                                       break;
+                               default:
+                                       if (i == 0xA6) // SPECIAL CASE. FIXME: 
why?
+                                               goto case 
UnicodeCategory.OtherPunctuation;
+                                       break;
+                               }
+                       }
+                       #endregion
+
+                       // FIXME: for 07 xx we need more love.
+
                        // Characters w/ diacritical marks (NFKD)
                        for (int i = 0; i <= char.MaxValue; i++) {
                                if (map [i].Defined || IsIgnorable (i))
@@ -1767,34 +1805,33 @@
                        // <small> updates index
                        c2 = ToSmallForm (c);
                        if (c2 != c)
-                               AddCharMapGroup2 (c2, category, updateCount, 
level2);
+                               AddCharMapGroup (c2, category, updateCount, 
level2);
                        c2 = Char.ToLower (c, CultureInfo.InvariantCulture);
                        if (c2 != c && !map [(int) c2].Defined)
                                AddLetterMapCore (c2, category, 0, level2);
                        bool doUpdate = true;
-                       if (!map [c].Defined)
-                               AddCharMapGroup2 (c, category, 0, level2);
+                       if (IsIgnorable ((int) c) || map [(int) c].Defined)
+                               doUpdate = false;
                        else
-                               doUpdate = false;
+                               AddCharMapGroup (c, category, 0, level2);
                        if (doUpdate)
                                fillIndex [category] += updateCount;
                }
 
-               private void AddCharMap (char c, byte category, byte increment)
+               private bool AddCharMap (char c, byte category, byte increment)
                {
-                       AddCharMap (c, category, increment, 0);
+                       return AddCharMap (c, category, increment, 0);
                }
                
-               private void AddCharMap (char c, byte category, byte increment, 
byte alt)
+               private bool AddCharMap (char c, byte category, byte increment, 
byte alt)
                {
-                       if (IsIgnorable ((int) c) || map [(int) c].Defined) {
-                               return; // do nothing
-                       }
-
+                       if (IsIgnorable ((int) c) || map [(int) c].Defined)
+                               return false; // do nothing
                        map [(int) c] = new CharMapEntry (category,
                                category == 1 ? alt : fillIndex [category],
                                category == 1 ? fillIndex [category] : alt);
                        fillIndex [category] += increment;
+                       return true;
                }
 
                private void AddCharMapGroupTail (char c, byte category, byte 
updateCount)
@@ -1822,18 +1859,21 @@
                //      (vertical +)
                //
                // level2 is fixed (does not increase).
-                       int [] sameWeightItems = new int [] {
-                               0, // canonically compatible
-                               DecompositionFraction,
-                               DecompositionFull,
-                               DecompositionSuper,
-                               DecompositionSub,
-                               DecompositionCircle,
-                               DecompositionWide,
-                               DecompositionNarrow,
-                               };
-               private void AddCharMapGroup2 (char c, byte category, byte 
updateCount, byte level2)
+               int [] sameWeightItems = new int [] {
+                       0, // canonically compatible
+                       DecompositionFraction,
+                       DecompositionFull,
+                       DecompositionSuper,
+                       DecompositionSub,
+                       DecompositionCircle,
+                       DecompositionWide,
+                       DecompositionNarrow,
+                       };
+               private void AddCharMapGroup (char c, byte category, byte 
updateCount, byte level2)
                {
+if (map [(int) c].Defined)
+return;
+
                        char small = char.MinValue;
                        char vertical = char.MinValue;
                        Hashtable nfkd = (Hashtable) nfkdMap [(int) c];
@@ -1897,7 +1937,7 @@
                        Hashtable nfkd = (Hashtable) nfkdMap [(int) c];
                        if (nfkd == null)
                                return;
-                       for (byte weight = 0; weight <= 17; weight++) {
+                       for (byte weight = 0; weight <= 0x12; weight++) {
                                object wv = nfkd [weight];
                                if (wv == null)
                                        continue;
@@ -1920,30 +1960,47 @@
                        }
                }
 
-               // note that level2 is fixed
-               // different order than AddCharMapGroup2()
-               private void AddCharMapGroup (char c, byte category, byte 
updateCount, byte level2)
+               // For now it is only for 0x7 category.
+               private void AddCharMapGroup2 (char c, byte category, byte 
updateCount, byte level2)
                {
-/*
+                       char small = char.MinValue;
+                       char vertical = char.MinValue;
+                       Hashtable nfkd = (Hashtable) nfkdMap [(int) c];
+                       if (nfkd != null) {
+                               object smv = nfkd [(byte) DecompositionSmall];
+                               if (smv != null)
+                                       small = (char) ((int) smv);
+                               object vv = nfkd [(byte) DecompositionVertical];
+                               if (vv != null)
+                                       vertical = (char) ((int) vv);
+                       }
+
+                       // <small> updates index
+                       if (small != char.MinValue)
+                               // SPECIAL CASE excluded (FIXME: why?)
+                               if (small != '\u2024')
+                                       AddCharMap (small, category, 
updateCount);
+
                        // itself
                        AddCharMap (c, category, updateCount, level2);
 
-                       Hashtable nfkd = (Hashtable) nfkdMap [(int) c];
-                       if (nfkd == null)
-                               return;
-
-                       // Here type of i must be byte since the constants
-                       // are stored as byte.
-                       for (byte i = 1; i <= 17; i++) {
-                               if (nfkd.ContainsKey (i)) {
-                                       int cp = (int) nfkd [i];
-                                       if (decompLength [cp] == 1) {
-                                               AddCharMapGroup ((char) cp, 
category, updateCount, level2);
+                       // Since nfkdMap is problematic to have two or more
+                       // NFKD to an identical character, here I iterate all.
+                       for (int c2 = 0; c2 < char.MaxValue; c2++) {
+                               if (decompLength [c2] == 1 &&
+                                       (int) (decompValues [decompIndex [c2]]) 
== (int) c) {
+                                       switch (decompType [c2]) {
+                                       case DecompositionCompat:
+                                               AddCharMap ((char) c2, 
category, updateCount, level2);
+                                               break;
                                        }
                                }
                        }
-*/
-                       AddCharMapGroup2 (c, category, updateCount, level2);
+
+                       if (vertical != char.MinValue)
+                               // SPECIAL CASE excluded (FIXME: why?)
+                               if (vertical != '\uFE33' && vertical != 
'\uFE34')
+                                       AddCharMap (vertical, category, 
updateCount, level2);
                }
 
                char ToFullWidth (char c)

_______________________________________________
Mono-patches maillist  -  [email protected]
http://lists.ximian.com/mailman/listinfo/mono-patches

Reply via email to