Author: pmichaud
Date: Sat Dec 20 06:17:50 2008
New Revision: 34143
Modified:
trunk/src/charset/unicode.c
Log:
[core]: Adjust .CCLASS_WORD and .CCLASS_ALPHABETIC to not throw exceptions
on unicode strings when ICU isn't present. This is a big cheat --
it only handles codepoints below U+02B0 -- but it's a lot better than
getting the exception and allows a wider range of programs to run.
Modified: trunk/src/charset/unicode.c
==============================================================================
--- trunk/src/charset/unicode.c (original)
+++ trunk/src/charset/unicode.c Sat Dec 20 06:17:50 2008
@@ -745,6 +745,13 @@
if (flags == enum_cclass_any)
return 1;
+ /* All codepoints from u+0100 to u+02af are alphabetic, so we
+ * cheat on the WORD and ALPHABETIC properties to include these
+ * (and incorrectly exclude all others). This is a stopgap until
+ * ICU is everywhere, or we have better non-ICU unicode support. */
+ if (flags == enum_cclass_word || flags == enum_cclass_alphabetic)
+ return (codepoint < 0x2b0);
+
if (flags & enum_cclass_whitespace) {
/* from http://www.unicode.org/Public/UNIDATA/PropList.txt */
switch (codepoint) {