Author: atsushi
Date: 2005-08-03 08:24:02 -0400 (Wed, 03 Aug 2005)
New Revision: 47953
Modified:
trunk/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog
trunk/mcs/class/corlib/Mono.Globalization.Unicode/Normalization.cs
trunk/mcs/class/corlib/Mono.Globalization.Unicode/NormalizationTableUtil.cs
trunk/mcs/class/corlib/Mono.Globalization.Unicode/create-normalization-source.cs
Log:
2005-08-03 Atsushi Enomoto <[EMAIL PROTECTED]>
* create-normalization-source.cs,
NormalizationTableUtil.cs,
Normalization.cs : several bugfixes on index miscomputation.
Renamed using aliases (csc will bork). Primary combine safety is now
computed during UnicodeData.txt parse.
Maximum NFKD length was 18, not 4 (U+FDFA).
Modified: trunk/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog
===================================================================
--- trunk/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog 2005-08-03
11:39:01 UTC (rev 47952)
+++ trunk/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog 2005-08-03
12:24:02 UTC (rev 47953)
@@ -1,3 +1,12 @@
+2005-08-03 Atsushi Enomoto <[EMAIL PROTECTED]>
+
+ * create-normalization-source.cs,
+ NormalizationTableUtil.cs,
+ Normalization.cs : several bugfixes on index miscomputation.
+ Renamed using aliases (csc will bork). Primary combine safety is now
+ computed during UnicodeData.txt parse.
+ Maximum NFKD length was 18, not 4 (U+FDFA).
+
2005-08-02 Atsushi Enomoto <[EMAIL PROTECTED]>
* managed-collation.patch : added Normalization support.
Modified: trunk/mcs/class/corlib/Mono.Globalization.Unicode/Normalization.cs
===================================================================
--- trunk/mcs/class/corlib/Mono.Globalization.Unicode/Normalization.cs
2005-08-03 11:39:01 UTC (rev 47952)
+++ trunk/mcs/class/corlib/Mono.Globalization.Unicode/Normalization.cs
2005-08-03 12:24:02 UTC (rev 47953)
@@ -3,7 +3,7 @@
using System.Text;
using System.Runtime.CompilerServices;
-using Util = Mono.Globalization.Unicode.NormalizationTableUtil;
+using NUtil = Mono.Globalization.Unicode.NormalizationTableUtil;
namespace Mono.Globalization.Unicode
{
@@ -22,7 +22,7 @@
public const int NoNfkc = 16;
public const int MaybeNfkc = 32;
public const int FullCompositionExclusion = 64;
- public const int IsSafe = 128;
+ public const int IsUnsafe = 128;
// public const int ExpandOnNfd = 256;
// public const int ExpandOnNfc = 512;
// public const int ExpandOnNfkd = 1024;
@@ -30,35 +30,36 @@
static uint PropValue (int cp)
{
- return props [Util.PropIdx (cp)];
+ return props [NUtil.PropIdx (cp)];
}
static int CharMapIdx (int cp)
{
- return charMapIndex [Util.MapIdx (cp)];
+ return charMapIndex [NUtil.MapIdx (cp)];
}
- static int GetComposedStringLength (int mapIdx)
+ static int GetComposedStringLength (int ch)
{
- int i = mapIdx;
+ int start = charMapIndex [NUtil.MapIdx (ch)];
+ int i = start;
while (mappedChars [i] != 0)
i++;
- return i - mapIdx;
+ return i - start;
}
static byte GetCombiningClass (int c)
{
- return combiningClass [Util.Combining.ToIndex (c)];
+ return combiningClass [NUtil.Combining.ToIndex (c)];
}
static int GetPrimaryCompositeFromMapIndex (int src)
{
- return mapIdxToComposite [Util.MapIndexes.ToIndex
(src)];
+ return mapIdxToComposite [NUtil.Composite.ToIndex
(src)];
}
static short GetPrimaryCompositeHelperIndex (int cp)
{
- return helperIndex [Util.Helper.ToIndex (cp)];
+ return helperIndex [NUtil.Helper.ToIndex (cp)];
}
static int GetPrimaryCompositeCharIndex (object chars, int
start, int charsLength)
@@ -119,6 +120,13 @@
return null;
}
+ private static bool CanBePrimaryComposite (int i)
+ {
+ if (i >= 0x3400 && i <= 0x9FBB)
+ return GetPrimaryCompositeHelperIndex (i) != 0;
+ return (PropValue (i) & IsUnsafe) != 0;
+ }
+
private static void Combine (StringBuilder sb, int start, int
checkType)
{
for (int i = start; i < sb.Length; i++) {
@@ -135,40 +143,43 @@
}
int cur = i;
- // FIXME: It should use IsUnsafe flag.
// FIXME: It should check "blocked" too
for (;i >= 0; i--)
- if (QuickCheck (sb [i], checkType) ==
NormalizationCheck.Yes)
+ if (!CanBePrimaryComposite ((int) sb
[i]))
break;
i++;
-
// Now i is the "starter"
-
- int ch = 0;
int idx = 0;
for (; i < cur; i++) {
- idx = GetPrimaryComposite (sb, (int) sb
[i], sb.Length, i, ref ch);
+ idx = GetPrimaryCompositeMapIndex (sb,
(int) sb [i], sb.Length, i);
if (idx > 0)
break;
}
- if (idx == 0)
+ if (idx == 0) {
+ i = cur;
continue;
- sb.Remove (i, GetComposedStringLength (idx));
- sb.Insert (i, (char) ch);
- i--; // apply recursively
+ }
+ int ch = GetPrimaryCompositeFromMapIndex (idx);
+ int len = GetComposedStringLength (ch);
+ if (ch == 0 || len == 0) {
+ // FIXME: this actually happens
+ // throw new SystemException ("Internal
error: should not happen.");
+ i = cur;
+ continue;
+ }
+ sb.Remove (i, len);
+ sb.Insert (i, (char) ch); // always single
character
+ i = cur - 1; // apply recursively
}
}
- static int GetPrimaryComposite (object o, int cur, int length,
int bufferPos, ref int ch)
+ static int GetPrimaryCompositeMapIndex (object o, int cur, int
length, int bufferPos)
{
if ((PropValue (cur) & FullCompositionExclusion) != 0)
return 0;
if (GetCombiningClass (cur) != 0)
return 0; // not a starter
- int idx = GetPrimaryCompositeCharIndex (o, bufferPos,
length);
- if (idx == 0)
- return 0;
- return GetPrimaryCompositeFromMapIndex (idx);
+ return GetPrimaryCompositeCharIndex (o, bufferPos,
length);
}
static string Decompose (string source, int checkType)
@@ -230,7 +241,7 @@
sb = new StringBuilder (s.Length + 100);
sb.Append (s, start, i - start);
if (buf == null)
- buf = new int [5];
+ buf = new int [19];
GetCanonical (s [i], buf, 0);
for (int x = 0; ; x++) {
if (buf [x] == 0)
@@ -311,23 +322,30 @@
public static bool IsNormalized (string source, int type)
{
- int prevCC = -1;
+// int prevCC = -1;
for (int i = 0; i < source.Length; i++) {
- int cc = GetCombiningClass (source [i]);
- if (cc != 0 && cc < prevCC)
- return false;
- prevCC = cc;
+// int cc = GetCombiningClass (source [i]);
+// if (cc != 0 && cc < prevCC)
+// return false;
+// prevCC = cc;
switch (QuickCheck (source [i], type)) {
case NormalizationCheck.Yes:
break;
case NormalizationCheck.No:
return false;
case NormalizationCheck.Maybe:
- int ch = 0;
- if (GetPrimaryComposite (source,
- source [i], source.Length,
- i, ref ch) != 0)
- return false;
+ // partly copied from Combine()
+ int cur = i;
+ // FIXME: It should check "blocked" too
+ for (;i >= 0; i--)
+ if (!CanBePrimaryComposite
((int) source [i]))
+ break;
+ i++;
+ // Now i is the "starter"
+ for (; i < cur; i++) {
+ if
(GetPrimaryCompositeCharIndex (source, i, source.Length) != 0)
+ return false;
+ }
break;
}
}
Modified:
trunk/mcs/class/corlib/Mono.Globalization.Unicode/NormalizationTableUtil.cs
===================================================================
--- trunk/mcs/class/corlib/Mono.Globalization.Unicode/NormalizationTableUtil.cs
2005-08-03 11:39:01 UTC (rev 47952)
+++ trunk/mcs/class/corlib/Mono.Globalization.Unicode/NormalizationTableUtil.cs
2005-08-03 12:24:02 UTC (rev 47953)
@@ -9,7 +9,7 @@
public static readonly CodePointIndexer prop;
public static readonly CodePointIndexer map;
public static readonly CodePointIndexer Combining;
- public static readonly CodePointIndexer MapIndexes;
+ public static readonly CodePointIndexer Composite;
public static readonly CodePointIndexer Helper;
static NormalizationTableUtil ()
@@ -52,10 +52,12 @@
0x3030, 0x30A0, 0xA810, 0xFB20, 0xFE30,
// 0x10A40, 0x1D1B0, 0x1D250
};
- int [] mapIndexStarts = new int [] {
+ // since mapToCompositeIndex only holds canonical
+ // mappings, those indexes could be still shorten.
+ int [] compositeStarts = new int [] {
0x480, 0x1450, 0x16D0
};
- int [] mapIndexEnds = new int [] {
+ int [] compositeEnds = new int [] {
0x10C0, 0x15D0, 0x2190
};
int [] helperStarts = new int [] {
@@ -75,8 +77,8 @@
map = new CodePointIndexer (mapStarts, mapEnds, 0, 0);
Combining = new CodePointIndexer (combiningStarts,
combiningEnds, 0, 0);
- MapIndexes = new CodePointIndexer (mapIndexStarts,
- mapIndexEnds, 0, 0);
+ Composite = new CodePointIndexer (compositeStarts,
+ compositeEnds, 0, 0);
Helper = new CodePointIndexer (helperStarts, helperEnds,
0, 0);
}
Modified:
trunk/mcs/class/corlib/Mono.Globalization.Unicode/create-normalization-source.cs
===================================================================
---
trunk/mcs/class/corlib/Mono.Globalization.Unicode/create-normalization-source.cs
2005-08-03 11:39:01 UTC (rev 47952)
+++
trunk/mcs/class/corlib/Mono.Globalization.Unicode/create-normalization-source.cs
2005-08-03 12:24:02 UTC (rev 47953)
@@ -80,7 +80,6 @@
} catch (Exception ex) {
throw new InvalidOperationException ("Internal
error at line " + lineCount + " : " + ex);
}
- ComputeSafety ();
CompressUCD ();
Serialize ();
ProcessCombiningClass ();
@@ -148,7 +147,7 @@
COut.WriteLine ("0};");
CSOut.WriteLine ("};");
- ushort [] mapIndexes = new ushort [0x2600];
+ ushort [] mapIndexes = new ushort [char.MaxValue + 1];
// GetPrimaryCompositeFromMapIndex ()
int currentIndex = -1;
@@ -157,11 +156,12 @@
continue;
if (!m.IsCanonical)
continue;
- mapIndexes [m.MapIndex] = (ushort) m.CodePoint;
+ // FIXME: why this shift happens?
+ mapIndexes [m.MapIndex - 2] = (ushort)
m.CodePoint;
currentIndex = m.MapIndex;
}
- mapIndexes = CodePointIndexer.CompressArray
(mapIndexes, typeof (ushort), NUtil.MapIndexes) as ushort [];
+ mapIndexes = CodePointIndexer.CompressArray
(mapIndexes, typeof (ushort), NUtil.Composite) as ushort [];
COut.WriteLine ("static const guint16 mapIdxToComposite
[] = {");
CSOut.WriteLine ("static ushort [] mapIdxToCompositeArr
= new ushort [] {");
@@ -173,7 +173,7 @@
CSOut.Write ("0x{0:X04},", value);
COut.Write ("{0},", value);
if (i % 16 == 15) {
- CSOut.WriteLine (" // {0:X04}",
NUtil.MapIndexes.ToCodePoint (i - 15));
+ CSOut.WriteLine (" // {0:X04}",
NUtil.Composite.ToCodePoint (i - 15));
COut.WriteLine ();
}
}
@@ -242,17 +242,6 @@
}
}
- private void ComputeSafety ()
- {
- foreach (int i in mappedChars) {
- if (i == 0 || i > char.MaxValue)
- continue;
- if (0x3400 <= i && i <= 0xA000)
- continue;
- SetProp (i, -1, IsUnsafe);
- }
- }
-
private void CompressUCD ()
{
mappings.Sort (comparer);
@@ -322,6 +311,7 @@
if (combiningCategory.Length > 0)
mappedCharsValue = canon.Substring
(combiningCategory.Length + 2).Trim ();
if (mappedCharsValue.Length > 0) {
+ int start = mappedCharCount;
mappings.Add (new CharMapping (cp,
mappedCharCount,
combiningCategory.Length == 0));
@@ -330,12 +320,26 @@
AddMappedChars (cp,
int.Parse (v,
NumberStyles.HexNumber));
AddMappedChars (cp, 0);
+ // For canonical composite, set IsUnsafe
+ if (combiningCategory == "") {
+ for (int ca = start; ca <
mappedCharCount - 1; ca++)
+ FillUnsafe (mappedChars
[ca]);
+ }
}
}
if (reader != Console.In)
reader.Close ();
}
+ private void FillUnsafe (int i)
+ {
+ if (i < 0 || i > char.MaxValue)
+ return;
+ if (0x3400 <= i && i <= 0x9FBB)
+ return;
+ SetProp (i, -1, IsUnsafe);
+ }
+
private void AddMappedChars (int cp, int cv)
{
if (mappedCharCount == mappedChars.Length) {
_______________________________________________
Mono-patches maillist - [email protected]
http://lists.ximian.com/mailman/listinfo/mono-patches