andrei Fri Jul 7 22:34:46 2006 UTC Modified files: /php-src/ext/unicode unicode_iterators.c Log: Implement TextIterator::isBoundary() for break iterators. http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.31&r2=1.32&diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.31 php-src/ext/unicode/unicode_iterators.c:1.32 --- php-src/ext/unicode/unicode_iterators.c:1.31 Fri Jul 7 21:41:18 2006 +++ php-src/ext/unicode/unicode_iterators.c Fri Jul 7 22:34:46 2006 @@ -14,7 +14,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: unicode_iterators.c,v 1.31 2006/07/07 21:41:18 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.32 2006/07/07 22:34:46 andrei Exp $ */ /* * TODO @@ -89,6 +89,7 @@ void (*next) (text_iter_obj* object, long flags TSRMLS_DC); void (*rewind) (text_iter_obj* object, long flags TSRMLS_DC); void (*following) (text_iter_obj* object, int32_t offset, long flags TSRMLS_DC); + zend_bool (*isBoundary)(text_iter_obj* object, int32_t offset, long flags TSRMLS_DC); } text_iter_ops; enum UBreakIteratorType brk_type_map[] = { @@ -478,6 +479,71 @@ } } +static zend_bool text_iter_brk_isBoundary(text_iter_obj *object, int32_t offset, long flags TSRMLS_DC) +{ + int32_t k, tmp; + UBool result; + + if (offset < 0) { + offset = 0; + } + + /* + * On invalid iterator we always want to start looking for the code unit + * offset from the beginning of the string. + */ + if (object->u.brk.cp_offset == UBRK_DONE) { + object->u.brk.cp_offset = 0; + object->u.brk.bound = 0; + } + + /* + * Try to locate the code unit position relative to the last known codepoint + * offset. + */ + k = tmp = object->u.brk.bound; + if (offset > object->u.brk.cp_offset) { + U16_FWD_N(object->text, k, object->text_len, offset - object->u.brk.cp_offset); + } else { + U16_BACK_N(object->text, 0, k, object->u.brk.cp_offset - offset); + } + + result = ubrk_isBoundary(object->u.brk.iter, k); + + object->u.brk.bound = ubrk_current(object->u.brk.iter); + object->u.brk.next = object->u.brk.bound; + + /* + * If boundary is the same one as where we were at before, simply return. + */ + if (object->u.brk.bound == tmp) { + return result; + } + + /* + * Adjust the internal codepoint offset based on how far we've moved. + */ + if (object->u.brk.bound != UBRK_DONE) { + if (object->u.brk.bound > tmp) { + if (object->u.brk.bound - tmp > 1) { + object->u.brk.cp_offset += u_countChar32(object->text + tmp, object->u.brk.bound - tmp); + } else { + object->u.brk.cp_offset++; + } + } else { + if (tmp - object->u.brk.bound > 1) { + object->u.brk.cp_offset -= u_countChar32(object->text + object->u.brk.bound, tmp - object->u.brk.bound); + } else { + object->u.brk.cp_offset--; + } + } + } else { + object->u.brk.cp_offset = UBRK_DONE; + } + + return result; +} + static text_iter_ops text_iter_brk_ops = { text_iter_brk_valid, text_iter_brk_current, @@ -486,6 +552,7 @@ text_iter_brk_next, text_iter_brk_rewind, text_iter_brk_following, + text_iter_brk_isBoundary, }; @@ -778,6 +845,23 @@ iter_ops[intern->type]->following(intern, offset, flags TSRMLS_CC); RETURN_LONG(iter_ops[intern->type]->offset(intern, flags TSRMLS_CC)); } + +PHP_METHOD(TextIterator, isBoundary) +{ + long flags, offset; + zval *object = getThis(); + text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &offset) == FAILURE) { + return; + } + + /* + * ReverseTextIterator will behave the same as the normal one. + */ + RETURN_BOOL(iter_ops[intern->type]->isBoundary(intern, offset, flags TSRMLS_CC)); +} + static zend_function_entry text_iterator_funcs[] = { PHP_ME(TextIterator, __construct, NULL, ZEND_ACC_PUBLIC) @@ -794,6 +878,7 @@ PHP_ME(TextIterator, last, NULL, ZEND_ACC_PUBLIC) PHP_ME(TextIterator, following, NULL, ZEND_ACC_PUBLIC) PHP_ME(TextIterator, preceding, NULL, ZEND_ACC_PUBLIC) + PHP_ME(TextIterator, isBoundary, NULL, ZEND_ACC_PUBLIC) PHP_MALIAS(TextIterator, first, rewind, NULL, ZEND_ACC_PUBLIC) {NULL, NULL, NULL}
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php