andrei Thu Jun 29 12:32:01 2006 UTC
Modified files:
/php-src/ext/unicode unicode_iterators.c
Log:
Try to make combining sequences work. Not entirely succesful.
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.29&r2=1.30&diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.29
php-src/ext/unicode/unicode_iterators.c:1.30
--- php-src/ext/unicode/unicode_iterators.c:1.29 Wed Jun 28 15:28:55 2006
+++ php-src/ext/unicode/unicode_iterators.c Thu Jun 29 12:32:00 2006
@@ -14,7 +14,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: unicode_iterators.c,v 1.29 2006/06/28 15:28:55 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.30 2006/06/29 12:32:00 andrei Exp $ */
/*
* TODO
@@ -187,21 +187,30 @@
static int text_iter_cs_valid(text_iter_obj* object, long flags TSRMLS_DC)
{
- if (flags & ITER_REVERSE) {
- return (object->u.cs.end > 0);
- } else {
- return (object->u.cs.end <= object->text_len);
- }
+ return (object->u.cs.start != UBRK_DONE);
}
static void text_iter_cs_current(text_iter_obj* object, long flags TSRMLS_DC)
{
- uint32_t length = object->u.cs.end - object->u.cs.start;
- if (length+1 > object->current_alloc) {
- object->current_alloc = length+1;
- Z_USTRVAL_P(object->current) =
eurealloc(Z_USTRVAL_P(object->current), object->current_alloc);
+ uint32_t length;
+ UChar *start;
+
+ if (object->u.cs.start == UBRK_DONE || object->u.cs.end == UBRK_DONE) {
+ length = 0;
+ } else {
+ if (flags & ITER_REVERSE) {
+ start = object->text + object->u.cs.end;
+ } else {
+ start = object->text + object->u.cs.start;
+ }
+ length = abs(object->u.cs.end - object->u.cs.start);
+ if (length+1 > object->current_alloc) {
+ object->current_alloc = length+1;
+ Z_USTRVAL_P(object->current) =
eurealloc(Z_USTRVAL_P(object->current), object->current_alloc);
+ }
+ u_memcpy(Z_USTRVAL_P(object->current), start, length);
}
- u_memcpy(Z_USTRVAL_P(object->current), object->text +
object->u.cs.start, length);
+
Z_USTRVAL_P(object->current)[length] = 0;
Z_USTRLEN_P(object->current) = length;
}
@@ -213,7 +222,11 @@
static int text_iter_cs_offset(text_iter_obj* object, long flags TSRMLS_DC)
{
- return object->u.cs.start_cp_offset;
+ if (flags & ITER_REVERSE) {
+ return object->u.cs.end_cp_offset;
+ } else {
+ return object->u.cs.start_cp_offset;
+ }
}
static void text_iter_cs_next(text_iter_obj* object, long flags TSRMLS_DC)
@@ -221,21 +234,31 @@
UChar32 cp;
int32_t tmp, tmp2;
- if (text_iter_cs_valid(object, flags TSRMLS_CC)) {
- if (flags & ITER_REVERSE) {
- object->u.cs.end = object->u.cs.start;
- object->u.cs.end_cp_offset =
object->u.cs.start_cp_offset;
- U16_PREV(object->text, 0, object->u.cs.start, cp);
- object->u.cs.start_cp_offset--;
+ if (object->u.cs.start == UBRK_DONE) {
+ return;
+ }
+
+ object->u.cs.start = object->u.cs.end;
+ object->u.cs.start_cp_offset = object->u.cs.end_cp_offset;
+ if (flags & ITER_REVERSE) {
+ if (object->u.cs.end == 0) {
+ object->u.cs.end = UBRK_DONE;
+ object->u.cs.end_cp_offset = UBRK_DONE;
+ } else {
+ U16_PREV(object->text, 0, object->u.cs.end, cp);
+ object->u.cs.end_cp_offset--;
if (u_getCombiningClass(cp) != 0) {
do {
- U16_PREV(object->text, 0,
object->u.cs.start, cp);
- object->u.cs.start_cp_offset--;
- } while (object->u.cs.start > 0 &&
u_getCombiningClass(cp) != 0);
+ U16_PREV(object->text, 0,
object->u.cs.end, cp);
+ object->u.cs.end_cp_offset--;
+ } while (object->u.cs.end > 0 &&
u_getCombiningClass(cp) != 0);
}
+ }
+ } else {
+ if (object->u.cs.end == object->text_len) {
+ object->u.cs.end = UBRK_DONE;
+ object->u.cs.end_cp_offset = UBRK_DONE;
} else {
- object->u.cs.start = object->u.cs.end;
- object->u.cs.start_cp_offset =
object->u.cs.end_cp_offset;
U16_NEXT(object->text, object->u.cs.end,
object->text_len, cp);
object->u.cs.end_cp_offset++;
if (u_getCombiningClass(cp) == 0) {
@@ -253,8 +276,8 @@
}
}
}
- object->u.cs.index++;
}
+ object->u.cs.index++;
}
static void text_iter_cs_rewind(text_iter_obj *object, long flags TSRMLS_DC)
@@ -281,15 +304,11 @@
};
-/* UBreakIterator Character Ops */
+/* UBreakIterator Ops */
static int text_iter_brk_valid(text_iter_obj* object, long flags TSRMLS_DC)
{
- if (flags & ITER_REVERSE) {
- return (object->u.brk.bound != UBRK_DONE);
- } else {
- return (object->u.brk.bound != UBRK_DONE);
- }
+ return (object->u.brk.bound != UBRK_DONE);
}
static void text_iter_brk_current(text_iter_obj* object, long flags TSRMLS_DC)
@@ -297,22 +316,26 @@
UChar *start;
int32_t length = -1;
- if (flags & ITER_REVERSE) {
- if (object->u.brk.next == object->u.brk.bound) {
- object->u.brk.next =
ubrk_preceding(object->u.brk.n_iter, object->u.brk.bound);
- }
- start = object->text + object->u.brk.next;
- } else {
- if (object->u.brk.next == object->u.brk.bound) {
- object->u.brk.next =
ubrk_following(object->u.brk.n_iter, object->u.brk.bound);
+ if (object->u.brk.bound != UBRK_DONE) {
+ if (flags & ITER_REVERSE) {
+ if (object->u.brk.next == object->u.brk.bound) {
+ object->u.brk.next =
ubrk_preceding(object->u.brk.n_iter, object->u.brk.bound);
+ }
+ start = object->text + object->u.brk.next;
+ } else {
+ if (object->u.brk.next == object->u.brk.bound) {
+ object->u.brk.next =
ubrk_following(object->u.brk.n_iter, object->u.brk.bound);
+ }
+ start = object->text + object->u.brk.bound;
}
- start = object->text + object->u.brk.bound;
- }
- if (object->u.brk.next == UBRK_DONE) {
- length = 0;
+ if (object->u.brk.next == UBRK_DONE) {
+ length = 0;
+ } else {
+ length = abs(object->u.brk.next - object->u.brk.bound);
+ }
} else {
- length = abs(object->u.brk.next - object->u.brk.bound);
+ length = 0;
}
if (length != 0) {
@@ -350,7 +373,7 @@
object->u.brk.next = object->u.brk.bound;
if (object->u.brk.bound != UBRK_DONE) {
if (tmp - object->u.brk.bound > 1) {
- object->u.brk.cp_offset -=
u_countChar32(object->text, tmp - object->u.brk.bound);
+ object->u.brk.cp_offset -=
u_countChar32(object->text + object->u.brk.bound, tmp - object->u.brk.bound);
} else {
object->u.brk.cp_offset--;
}
@@ -362,7 +385,7 @@
object->u.brk.next = object->u.brk.bound;
if (object->u.brk.bound != UBRK_DONE) {
if (object->u.brk.bound - tmp > 1) {
- object->u.brk.cp_offset +=
u_countChar32(object->text, object->u.brk.bound - tmp);
+ object->u.brk.cp_offset +=
u_countChar32(object->text + tmp, object->u.brk.bound - tmp);
} else {
object->u.brk.cp_offset++;
}
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php