andrei Tue Feb 7 00:13:54 2006 UTC Modified files: /php-src/ext/unicode unicode_iterators.c Log: Implement combining sequences support in TextIterator. http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.14&r2=1.15&diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.14 php-src/ext/unicode/unicode_iterators.c:1.15 --- php-src/ext/unicode/unicode_iterators.c:1.14 Mon Feb 6 22:58:10 2006 +++ php-src/ext/unicode/unicode_iterators.c Tue Feb 7 00:13:54 2006 @@ -14,7 +14,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: unicode_iterators.c,v 1.14 2006/02/06 22:58:10 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.15 2006/02/07 00:13:54 andrei Exp $ */ /* * TODO @@ -45,14 +45,20 @@ uint32_t text_len; text_iter_type type; zval* current; + size_t current_alloc; union { struct { - int32_t index; - int32_t offset; + uint32_t index; + uint32_t offset; } cp; struct { - int32_t index; + uint32_t index; } cu; + struct { + uint32_t index; + uint32_t start; + uint32_t end; + } cs; } u; } text_iter_obj; @@ -153,9 +159,74 @@ text_iter_cp_rewind, }; -static text_iter_ops* iter_ops[2] = { +/* Combining sequence ops */ + +static int text_iter_cs_valid(text_iter_obj* object TSRMLS_DC) +{ + return (object->u.cs.end <= object->text_len); +} + +static void text_iter_cs_current(text_iter_obj* object TSRMLS_DC) +{ + uint32_t length = object->u.cs.end - object->u.cs.start; + if (length > object->current_alloc) { + object->current_alloc = length+1; + Z_USTRVAL_P(object->current) = eurealloc(Z_USTRVAL_P(object->current), object->current_alloc); + } + u_memcpy(Z_USTRVAL_P(object->current), object->text + object->u.cs.start, length); + Z_USTRVAL_P(object->current)[length] = 0; + Z_USTRLEN_P(object->current) = length; +} + +static int text_iter_cs_key(text_iter_obj* object TSRMLS_DC) +{ + return object->u.cs.index; +} + +static void text_iter_cs_next(text_iter_obj* object TSRMLS_DC) +{ + UChar32 cp; + uint32_t end; + + object->u.cs.start = object->u.cs.end; + U16_NEXT(object->text, object->u.cs.end, object->text_len, cp); + if (u_getCombiningClass(cp) == 0) { + end = object->u.cs.end; + while (end < object->text_len) { + U16_NEXT(object->text, end, object->text_len, cp); + if (u_getCombiningClass(cp) == 0) { + break; + } else { + object->u.cs.end = end; + } + } + } + object->u.cs.index++; +} + +static void text_iter_cs_rewind(text_iter_obj *object TSRMLS_DC) +{ + object->u.cs.start = 0; + object->u.cs.end = 0; + text_iter_cs_next(object TSRMLS_CC); /* find first sequence */ + object->u.cs.index = 0; /* because _next increments index */ +} + +static text_iter_ops text_iter_cs_ops = { + text_iter_cs_valid, + text_iter_cs_current, + text_iter_cs_key, + text_iter_cs_next, + text_iter_cs_rewind, +}; + + +/* Ops array */ + +static text_iter_ops* iter_ops[] = { &text_iter_cu_ops, &text_iter_cp_ops, + &text_iter_cs_ops, }; /* Iterator Funcs */ @@ -270,6 +341,7 @@ intern->type = ITER_CODE_POINT; MAKE_STD_ZVAL(intern->current); /* pre-allocate buffer for codepoint */ + intern->current_alloc = 3; Z_USTRVAL_P(intern->current) = eumalloc(3); Z_USTRVAL_P(intern->current)[0] = 0; Z_TYPE_P(intern->current) = IS_UNICODE;
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php