Author: leo
Date: Thu Nov 3 11:43:59 2005
New Revision: 9757
Modified:
trunk/charset/unicode.c
trunk/t/op/string_cclass.t
Log:
use iso-8859-1 cclass table for <256; fix find_not_cclass; more verbose errs in
test
Modified: trunk/charset/unicode.c
==============================================================================
--- trunk/charset/unicode.c (original)
+++ trunk/charset/unicode.c Thu Nov 3 11:43:59 2005
@@ -264,22 +264,23 @@ is_cclass(Interp *interpreter, PARROT_CC
if (offset >= source_string->strlen)
return 0;
codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, offset);
+ if (codepoint >= 256) {
#if PARROT_HAS_ICU
- for (mask = enum_cclass_uppercase;
- mask <= enum_cclass_word ; mask <<= 1) {
- bit = mask & flags;
- if (!bit)
- continue;
- if (is_foo(interpreter, codepoint, bit))
- return 1;
- }
- return 0;
+ for (mask = enum_cclass_uppercase;
+ mask <= enum_cclass_word ; mask <<= 1) {
+ bit = mask & flags;
+ if (!bit)
+ continue;
+ if (is_foo(interpreter, codepoint, bit))
+ return 1;
+ }
+ return 0;
#else
- if (codepoint >= 256)
real_exception(interpreter, NULL, E_LibraryNotLoadedError,
"no ICU lib loaded");
- return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;
#endif
+ }
+ return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;
}
static INTVAL
@@ -296,23 +297,26 @@ find_cclass(Interp *interpreter, PARROT_
end = source_string->strlen < end ? source_string->strlen : end;
for (; pos < end; ++pos) {
codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, pos);
+ if (codepoint >= 256) {
#if PARROT_HAS_ICU
- for (mask = enum_cclass_uppercase;
- mask <= enum_cclass_word ; mask <<= 1) {
- bit = mask & flags;
- if (!bit)
- continue;
- if (is_foo(interpreter, codepoint, bit))
- return pos;
- }
+ for (mask = enum_cclass_uppercase;
+ mask <= enum_cclass_word ; mask <<= 1) {
+ bit = mask & flags;
+ if (!bit)
+ continue;
+ if (is_foo(interpreter, codepoint, bit))
+ return pos;
+ }
#else
- if (codepoint >= 256)
real_exception(interpreter, NULL, E_LibraryNotLoadedError,
"no ICU lib loaded");
- if ((Parrot_iso_8859_1_typetable[codepoint] & flags) != 0) {
- return pos;
- }
#endif
+ }
+ else {
+ if (Parrot_iso_8859_1_typetable[codepoint] & flags) {
+ return pos;
+ }
+ }
}
return end;
}
@@ -330,24 +334,27 @@ find_not_cclass(Interp *interpreter, PAR
assert(source_string != 0);
end = source_string->strlen < end ? source_string->strlen : end;
for (; pos < end; ++pos) {
- codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, pos);
+ codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, pos);
+ if (codepoint >= 256) {
#if PARROT_HAS_ICU
- for (mask = enum_cclass_uppercase;
- mask <= enum_cclass_word ; mask <<= 1) {
- bit = mask & flags;
- if (!bit)
- continue;
- if (!is_foo(interpreter, codepoint, bit))
- return pos;
- }
+ for (mask = enum_cclass_uppercase;
+ mask <= enum_cclass_word ; mask <<= 1) {
+ bit = mask & flags;
+ if (!bit)
+ continue;
+ if (!is_foo(interpreter, codepoint, bit))
+ return pos;
+ }
#else
- if (codepoint >= 256)
real_exception(interpreter, NULL, E_LibraryNotLoadedError,
"no ICU lib loaded");
- if ((Parrot_iso_8859_1_typetable[codepoint] & flags) != 0) {
- return pos;
- }
#endif
+ }
+ else {
+ if (!(Parrot_iso_8859_1_typetable[codepoint] & flags)) {
+ return pos;
+ }
+ }
}
return end;
}
Modified: trunk/t/op/string_cclass.t
==============================================================================
--- trunk/t/op/string_cclass.t (original)
+++ trunk/t/op/string_cclass.t Thu Nov 3 11:43:59 2005
@@ -324,6 +324,14 @@ pir_output_is(<<"CODE", <<'OUT', "unicod
loop:
result = is_cclass .CCLASS_WHITESPACE, s, i
print result
+ if result goto ok
+ \$S0 = s[i]
+ \$I0 = ord \$S0
+ \$P0 = new .ResizablePMCArray
+ push \$P0, \$I0
+ \$S0 = sprintf "\\nchar %#x not reported as ws\\n", \$P0
+ print \$S0
+ok:
inc i
if i < len goto loop
print "\\n"