Author: pmichaud
Date: Sat Dec 20 06:17:50 2008
New Revision: 34143

Modified:
   trunk/src/charset/unicode.c

Log:
[core]:  Adjust .CCLASS_WORD and .CCLASS_ALPHABETIC to not throw exceptions
on unicode strings when ICU isn't present.  This is a big cheat --
it only handles codepoints below U+02B0 -- but it's a lot better than
getting the exception and allows a wider range of programs to run.



Modified: trunk/src/charset/unicode.c
==============================================================================
--- trunk/src/charset/unicode.c (original)
+++ trunk/src/charset/unicode.c Sat Dec 20 06:17:50 2008
@@ -745,6 +745,13 @@
     if (flags == enum_cclass_any)
         return 1;
 
+    /* All codepoints from u+0100 to u+02af are alphabetic, so we
+     * cheat on the WORD and ALPHABETIC properties to include these
+     * (and incorrectly exclude all others).  This is a stopgap until
+     * ICU is everywhere, or we have better non-ICU unicode support. */
+    if (flags == enum_cclass_word || flags == enum_cclass_alphabetic)
+        return (codepoint < 0x2b0);
+
     if (flags & enum_cclass_whitespace) {
         /* from http://www.unicode.org/Public/UNIDATA/PropList.txt */
         switch (codepoint) {

Reply via email to