Author: atsushi
Date: 2005-08-03 08:24:02 -0400 (Wed, 03 Aug 2005)
New Revision: 47953

Modified:
   trunk/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog
   trunk/mcs/class/corlib/Mono.Globalization.Unicode/Normalization.cs
   trunk/mcs/class/corlib/Mono.Globalization.Unicode/NormalizationTableUtil.cs
   
trunk/mcs/class/corlib/Mono.Globalization.Unicode/create-normalization-source.cs
Log:
2005-08-03  Atsushi Enomoto  <[EMAIL PROTECTED]>

        * create-normalization-source.cs,
          NormalizationTableUtil.cs,
          Normalization.cs : several bugfixes on index miscomputation.
          Renamed using aliases (csc will bork). Primary combine safety is now
          computed during UnicodeData.txt parse.
          Maximum NFKD length was 18, not 4 (U+FDFA).



Modified: trunk/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog
===================================================================
--- trunk/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog 2005-08-03 
11:39:01 UTC (rev 47952)
+++ trunk/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog 2005-08-03 
12:24:02 UTC (rev 47953)
@@ -1,3 +1,12 @@
+2005-08-03  Atsushi Enomoto  <[EMAIL PROTECTED]>
+
+       * create-normalization-source.cs,
+         NormalizationTableUtil.cs,
+         Normalization.cs : several bugfixes on index miscomputation.
+         Renamed using aliases (csc will bork). Primary combine safety is now
+         computed during UnicodeData.txt parse.
+         Maximum NFKD length was 18, not 4 (U+FDFA).
+
 2005-08-02  Atsushi Enomoto  <[EMAIL PROTECTED]>
 
        * managed-collation.patch : added Normalization support.

Modified: trunk/mcs/class/corlib/Mono.Globalization.Unicode/Normalization.cs
===================================================================
--- trunk/mcs/class/corlib/Mono.Globalization.Unicode/Normalization.cs  
2005-08-03 11:39:01 UTC (rev 47952)
+++ trunk/mcs/class/corlib/Mono.Globalization.Unicode/Normalization.cs  
2005-08-03 12:24:02 UTC (rev 47953)
@@ -3,7 +3,7 @@
 using System.Text;
 using System.Runtime.CompilerServices;
 
-using Util = Mono.Globalization.Unicode.NormalizationTableUtil;
+using NUtil = Mono.Globalization.Unicode.NormalizationTableUtil;
 
 namespace Mono.Globalization.Unicode
 {
@@ -22,7 +22,7 @@
                public const int NoNfkc = 16;
                public const int MaybeNfkc = 32;
                public const int FullCompositionExclusion = 64;
-               public const int IsSafe = 128;
+               public const int IsUnsafe = 128;
 //             public const int ExpandOnNfd = 256;
 //             public const int ExpandOnNfc = 512;
 //             public const int ExpandOnNfkd = 1024;
@@ -30,35 +30,36 @@
 
                static uint PropValue (int cp)
                {
-                       return props [Util.PropIdx (cp)];
+                       return props [NUtil.PropIdx (cp)];
                }
 
                static int CharMapIdx (int cp)
                {
-                       return charMapIndex [Util.MapIdx (cp)];
+                       return charMapIndex [NUtil.MapIdx (cp)];
                }
 
-               static int GetComposedStringLength (int mapIdx)
+               static int GetComposedStringLength (int ch)
                {
-                       int i = mapIdx;
+                       int start = charMapIndex [NUtil.MapIdx (ch)];
+                       int i = start;
                        while (mappedChars [i] != 0)
                                i++;
-                       return i - mapIdx;
+                       return i - start;
                }
 
                static byte GetCombiningClass (int c)
                {
-                       return combiningClass [Util.Combining.ToIndex (c)];
+                       return combiningClass [NUtil.Combining.ToIndex (c)];
                }
 
                static int GetPrimaryCompositeFromMapIndex (int src)
                {
-                       return mapIdxToComposite [Util.MapIndexes.ToIndex 
(src)];
+                       return mapIdxToComposite [NUtil.Composite.ToIndex 
(src)];
                }
 
                static short GetPrimaryCompositeHelperIndex (int cp)
                {
-                       return helperIndex [Util.Helper.ToIndex (cp)];
+                       return helperIndex [NUtil.Helper.ToIndex (cp)];
                }
 
                static int GetPrimaryCompositeCharIndex (object chars, int 
start, int charsLength)
@@ -119,6 +120,13 @@
                        return null;
                }
 
+               private static bool CanBePrimaryComposite (int i)
+               {
+                       if (i >= 0x3400 && i <= 0x9FBB)
+                               return GetPrimaryCompositeHelperIndex (i) != 0;
+                       return (PropValue (i) & IsUnsafe) != 0;
+               }
+
                private static void Combine (StringBuilder sb, int start, int 
checkType)
                {
                        for (int i = start; i < sb.Length; i++) {
@@ -135,40 +143,43 @@
                                }
 
                                int cur = i;
-                               // FIXME: It should use IsUnsafe flag.
                                // FIXME: It should check "blocked" too
                                for (;i >= 0; i--)
-                                       if (QuickCheck (sb [i], checkType) == 
NormalizationCheck.Yes)
+                                       if (!CanBePrimaryComposite ((int) sb 
[i]))
                                                break;
                                i++;
-
                                // Now i is the "starter"
-
-                               int ch = 0;
                                int idx = 0;
                                for (; i < cur; i++) {
-                                       idx = GetPrimaryComposite (sb, (int) sb 
[i], sb.Length, i, ref ch);
+                                       idx = GetPrimaryCompositeMapIndex (sb, 
(int) sb [i], sb.Length, i);
                                        if (idx > 0)
                                                break;
                                }
-                               if (idx == 0)
+                               if (idx == 0) {
+                                       i = cur;
                                        continue;
-                               sb.Remove (i, GetComposedStringLength (idx));
-                               sb.Insert (i, (char) ch);
-                               i--; // apply recursively
+                               }
+                               int ch = GetPrimaryCompositeFromMapIndex (idx);
+                               int len = GetComposedStringLength (ch);
+                               if (ch == 0 || len == 0) {
+                                       // FIXME: this actually happens
+                                       // throw new SystemException ("Internal 
error: should not happen.");
+                                       i = cur;
+                                       continue;
+                               }
+                               sb.Remove (i, len);
+                               sb.Insert (i, (char) ch); // always single 
character
+                               i = cur - 1; // apply recursively
                        }
                }
 
-               static int GetPrimaryComposite (object o, int cur, int length, 
int bufferPos, ref int ch)
+               static int GetPrimaryCompositeMapIndex (object o, int cur, int 
length, int bufferPos)
                {
                        if ((PropValue (cur) & FullCompositionExclusion) != 0)
                                return 0;
                        if (GetCombiningClass (cur) != 0)
                                return 0; // not a starter
-                       int idx = GetPrimaryCompositeCharIndex (o, bufferPos, 
length);
-                       if (idx == 0)
-                               return 0;
-                       return GetPrimaryCompositeFromMapIndex (idx);
+                       return GetPrimaryCompositeCharIndex (o, bufferPos, 
length);
                }
 
                static string Decompose (string source, int checkType)
@@ -230,7 +241,7 @@
                                sb = new StringBuilder (s.Length + 100);
                        sb.Append (s, start, i - start);
                        if (buf == null)
-                               buf = new int [5];
+                               buf = new int [19];
                        GetCanonical (s [i], buf, 0);
                        for (int x = 0; ; x++) {
                                if (buf [x] == 0)
@@ -311,23 +322,30 @@
 
                public static bool IsNormalized (string source, int type)
                {
-                       int prevCC = -1;
+//                     int prevCC = -1;
                        for (int i = 0; i < source.Length; i++) {
-                               int cc = GetCombiningClass (source [i]);
-                               if (cc != 0 && cc < prevCC)
-                                       return false;
-                               prevCC = cc;
+//                             int cc = GetCombiningClass (source [i]);
+//                             if (cc != 0 && cc < prevCC)
+//                                     return false;
+//                             prevCC = cc;
                                switch (QuickCheck (source [i], type)) {
                                case NormalizationCheck.Yes:
                                        break;
                                case NormalizationCheck.No:
                                        return false;
                                case NormalizationCheck.Maybe:
-                                       int ch = 0;
-                                       if (GetPrimaryComposite (source,
-                                               source [i], source.Length,
-                                               i, ref ch) != 0)
-                                               return false;
+                                       // partly copied from Combine()
+                                       int cur = i;
+                                       // FIXME: It should check "blocked" too
+                                       for (;i >= 0; i--)
+                                               if (!CanBePrimaryComposite 
((int) source [i]))
+                                                       break;
+                                       i++;
+                                       // Now i is the "starter"
+                                       for (; i < cur; i++) {
+                                               if 
(GetPrimaryCompositeCharIndex (source, i, source.Length) != 0)
+                                                       return false;
+                                       }
                                        break;
                                }
                        }

Modified: 
trunk/mcs/class/corlib/Mono.Globalization.Unicode/NormalizationTableUtil.cs
===================================================================
--- trunk/mcs/class/corlib/Mono.Globalization.Unicode/NormalizationTableUtil.cs 
2005-08-03 11:39:01 UTC (rev 47952)
+++ trunk/mcs/class/corlib/Mono.Globalization.Unicode/NormalizationTableUtil.cs 
2005-08-03 12:24:02 UTC (rev 47953)
@@ -9,7 +9,7 @@
                public static readonly CodePointIndexer prop;
                public static readonly CodePointIndexer map;
                public static readonly CodePointIndexer Combining;
-               public static readonly CodePointIndexer MapIndexes;
+               public static readonly CodePointIndexer Composite;
                public static readonly CodePointIndexer Helper;
 
                static NormalizationTableUtil ()
@@ -52,10 +52,12 @@
                                0x3030, 0x30A0, 0xA810, 0xFB20, 0xFE30,
 //                             0x10A40, 0x1D1B0, 0x1D250
                                };
-                       int [] mapIndexStarts = new int [] {
+                       // since mapToCompositeIndex only holds canonical
+                       // mappings, those indexes could be still shorten.
+                       int [] compositeStarts = new int [] {
                                0x480, 0x1450, 0x16D0
                                };
-                       int [] mapIndexEnds = new int [] {
+                       int [] compositeEnds = new int [] {
                                0x10C0, 0x15D0, 0x2190
                                };
                        int [] helperStarts = new int [] {
@@ -75,8 +77,8 @@
                        map = new CodePointIndexer (mapStarts, mapEnds, 0, 0);
                        Combining = new CodePointIndexer (combiningStarts,
                                combiningEnds, 0, 0);
-                       MapIndexes = new CodePointIndexer (mapIndexStarts,
-                               mapIndexEnds, 0, 0);
+                       Composite = new CodePointIndexer (compositeStarts,
+                               compositeEnds, 0, 0);
                        Helper = new CodePointIndexer (helperStarts, helperEnds,
                                0, 0);
                }

Modified: 
trunk/mcs/class/corlib/Mono.Globalization.Unicode/create-normalization-source.cs
===================================================================
--- 
trunk/mcs/class/corlib/Mono.Globalization.Unicode/create-normalization-source.cs
    2005-08-03 11:39:01 UTC (rev 47952)
+++ 
trunk/mcs/class/corlib/Mono.Globalization.Unicode/create-normalization-source.cs
    2005-08-03 12:24:02 UTC (rev 47953)
@@ -80,7 +80,6 @@
                        } catch (Exception ex) {
                                throw new InvalidOperationException ("Internal 
error at line " + lineCount + " : " + ex);
                        }
-                       ComputeSafety ();
                        CompressUCD ();
                        Serialize ();
                        ProcessCombiningClass ();
@@ -148,7 +147,7 @@
                        COut.WriteLine ("0};");
                        CSOut.WriteLine ("};");
 
-                       ushort [] mapIndexes = new ushort [0x2600];
+                       ushort [] mapIndexes = new ushort [char.MaxValue + 1];
 
                        // GetPrimaryCompositeFromMapIndex ()
                        int currentIndex = -1;
@@ -157,11 +156,12 @@
                                        continue;
                                if (!m.IsCanonical)
                                        continue;
-                               mapIndexes [m.MapIndex] = (ushort) m.CodePoint;
+                               // FIXME: why this shift happens?
+                               mapIndexes [m.MapIndex - 2] = (ushort) 
m.CodePoint;
                                currentIndex = m.MapIndex;
                        }
 
-                       mapIndexes = CodePointIndexer.CompressArray 
(mapIndexes, typeof (ushort), NUtil.MapIndexes) as ushort [];
+                       mapIndexes = CodePointIndexer.CompressArray 
(mapIndexes, typeof (ushort), NUtil.Composite) as ushort [];
 
                        COut.WriteLine ("static const guint16 mapIdxToComposite 
[] = {");
                        CSOut.WriteLine ("static ushort [] mapIdxToCompositeArr 
= new ushort [] {");
@@ -173,7 +173,7 @@
                                        CSOut.Write ("0x{0:X04},", value);
                                COut.Write ("{0},", value);
                                if (i % 16 == 15) {
-                                       CSOut.WriteLine (" // {0:X04}", 
NUtil.MapIndexes.ToCodePoint (i - 15));
+                                       CSOut.WriteLine (" // {0:X04}", 
NUtil.Composite.ToCodePoint (i - 15));
                                        COut.WriteLine ();
                                }
                        }
@@ -242,17 +242,6 @@
                        }
                }
 
-               private void ComputeSafety ()
-               {
-                       foreach (int i in mappedChars) {
-                               if (i == 0 || i > char.MaxValue)
-                                       continue;
-                               if (0x3400 <= i && i <= 0xA000)
-                                       continue;
-                               SetProp (i, -1, IsUnsafe);
-                       }
-               }
-
                private void CompressUCD ()
                {
                        mappings.Sort (comparer);
@@ -322,6 +311,7 @@
                                if (combiningCategory.Length > 0)
                                        mappedCharsValue = canon.Substring 
(combiningCategory.Length + 2).Trim ();
                                if (mappedCharsValue.Length > 0) {
+                                       int start = mappedCharCount;
                                        mappings.Add (new CharMapping (cp,
                                                mappedCharCount, 
                                                combiningCategory.Length == 0));
@@ -330,12 +320,26 @@
                                                AddMappedChars (cp,
                                                        int.Parse (v, 
NumberStyles.HexNumber));
                                        AddMappedChars (cp, 0);
+                                       // For canonical composite, set IsUnsafe
+                                       if (combiningCategory == "") {
+                                               for (int ca = start; ca < 
mappedCharCount - 1; ca++)
+                                                       FillUnsafe (mappedChars 
[ca]);
+                                       }
                                }
                        }
                        if (reader != Console.In)
                                reader.Close ();
                }
 
+               private void FillUnsafe (int i)
+               {
+                       if (i < 0 || i > char.MaxValue)
+                               return;
+                       if (0x3400 <= i && i <= 0x9FBB)
+                               return;
+                       SetProp (i, -1, IsUnsafe);
+               }
+
                private void AddMappedChars (int cp, int cv)
                {
                        if (mappedCharCount == mappedChars.Length) {

_______________________________________________
Mono-patches maillist  -  [email protected]
http://lists.ximian.com/mailman/listinfo/mono-patches

Reply via email to