andrei Wed Jun 28 14:12:14 2006 UTC Modified files: /php-src/ext/unicode unicode.c unicode_iterators.c Log: Rework break iterator based implementation to make it more generic for forward and backwards iterators and to return current elements properly.
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode.c?r1=1.38&r2=1.39&diff_format=u Index: php-src/ext/unicode/unicode.c diff -u php-src/ext/unicode/unicode.c:1.38 php-src/ext/unicode/unicode.c:1.39 --- php-src/ext/unicode/unicode.c:1.38 Wed Jun 21 20:17:21 2006 +++ php-src/ext/unicode/unicode.c Wed Jun 28 14:12:14 2006 @@ -15,7 +15,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: unicode.c,v 1.38 2006/06/21 20:17:21 andrei Exp $ */ +/* $Id: unicode.c,v 1.39 2006/06/28 14:12:14 andrei Exp $ */ #include "php_unicode.h" #include "zend_unicode.h" @@ -362,7 +362,6 @@ PHP_FE(char_enum_types, NULL) /* text transformation functions */ - PHP_FE(str_transliterate, NULL) { NULL, NULL, NULL } http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.26&r2=1.27&diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.26 php-src/ext/unicode/unicode_iterators.c:1.27 --- php-src/ext/unicode/unicode_iterators.c:1.26 Sat Jun 24 21:57:14 2006 +++ php-src/ext/unicode/unicode_iterators.c Wed Jun 28 14:12:14 2006 @@ -14,7 +14,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: unicode_iterators.c,v 1.26 2006/06/24 21:57:14 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.27 2006/06/28 14:12:14 andrei Exp $ */ /* * TODO @@ -65,11 +65,12 @@ int32_t end_cp_offset; } cs; struct { - int32_t start; - int32_t end; + int32_t bound; + int32_t next; int32_t index; int32_t cp_offset; UBreakIterator *iter; + UBreakIterator *n_iter; } brk; } u; zend_object_iterator iter; @@ -277,27 +278,41 @@ static int text_iter_brk_char_valid(text_iter_obj* object, long flags TSRMLS_DC) { if (flags & ITER_REVERSE) { - return (object->u.brk.start != UBRK_DONE); + return (object->u.brk.bound != UBRK_DONE); } else { - return (object->u.brk.end != UBRK_DONE); + return (object->u.brk.bound != UBRK_DONE); } } static void text_iter_brk_char_current(text_iter_obj* object, long flags TSRMLS_DC) { - uint32_t length; - int32_t start = object->u.brk.start; - int32_t end = object->u.brk.end; + UChar *start; + int32_t length = -1; + + if (flags & ITER_REVERSE) { + if (object->u.brk.next == object->u.brk.bound) { + object->u.brk.next = ubrk_preceding(object->u.brk.n_iter, object->u.brk.bound); + } + start = object->text + object->u.brk.next; + } else { + if (object->u.brk.next == object->u.brk.bound) { + object->u.brk.next = ubrk_following(object->u.brk.n_iter, object->u.brk.bound); + } + start = object->text + object->u.brk.bound; + } + + if (object->u.brk.next == UBRK_DONE) { + length = 0; + } else { + length = abs(object->u.brk.next - object->u.brk.bound); + } - if (start != UBRK_DONE && end != UBRK_DONE) { - length = end - start; + if (length != 0) { if (length+1 > object->current_alloc) { object->current_alloc = length+1; Z_USTRVAL_P(object->current) = eurealloc(Z_USTRVAL_P(object->current), object->current_alloc); } - u_memcpy(Z_USTRVAL_P(object->current), object->text + start, length); - } else { - length = 0; + u_memcpy(Z_USTRVAL_P(object->current), start, length); } Z_USTRVAL_P(object->current)[length] = 0; @@ -316,43 +331,49 @@ static void text_iter_brk_char_next(text_iter_obj* object, long flags TSRMLS_DC) { - if (text_iter_brk_char_valid(object, flags TSRMLS_CC)) { - if (flags & ITER_REVERSE) { - object->u.brk.end = object->u.brk.start; - object->u.brk.start = ubrk_previous(object->u.brk.iter); - if (object->u.brk.end - object->u.brk.start > 1) { - object->u.brk.cp_offset -= u_countChar32(object->text, object->u.brk.end - object->u.brk.start); + int32_t tmp = object->u.brk.bound; + + if (object->u.brk.bound == UBRK_DONE) { + return; + } + + if (flags & ITER_REVERSE) { + object->u.brk.bound = ubrk_previous(object->u.brk.iter); + object->u.brk.next = object->u.brk.bound; + if (object->u.brk.bound != UBRK_DONE) { + if (tmp - object->u.brk.bound > 1) { + object->u.brk.cp_offset -= u_countChar32(object->text, tmp - object->u.brk.bound); } else { object->u.brk.cp_offset--; } - if (object->u.brk.start == UBRK_DONE) { - object->u.brk.end = UBRK_DONE; - } } else { - if (object->u.brk.end - object->u.brk.start > 1) { - object->u.brk.cp_offset += u_countChar32(object->text, object->u.brk.end - object->u.brk.start); + object->u.brk.cp_offset = UBRK_DONE; + } + } else { + object->u.brk.bound = ubrk_next(object->u.brk.iter); + object->u.brk.next = object->u.brk.bound; + if (object->u.brk.bound != UBRK_DONE) { + if (object->u.brk.bound - tmp > 1) { + object->u.brk.cp_offset += u_countChar32(object->text, object->u.brk.bound - tmp); } else { object->u.brk.cp_offset++; } - object->u.brk.start = object->u.brk.end; - object->u.brk.end = ubrk_next(object->u.brk.iter); - if (object->u.brk.end == UBRK_DONE) { - object->u.brk.start = UBRK_DONE; - } + } else { + object->u.brk.cp_offset = UBRK_DONE; } - object->u.brk.index++; } + object->u.brk.index++; } static void text_iter_brk_char_rewind(text_iter_obj *object, long flags TSRMLS_DC) { if (flags & ITER_REVERSE) { - object->u.brk.end = ubrk_last(object->u.brk.iter); - object->u.brk.start = ubrk_previous(object->u.brk.iter); - object->u.brk.cp_offset = u_countChar32(object->text, object->u.brk.start); + object->u.brk.bound = ubrk_last(object->u.brk.iter); + object->u.brk.next = ubrk_last(object->u.brk.n_iter); + object->u.brk.cp_offset = u_countChar32(object->text, object->u.brk.bound); } else { - object->u.brk.start = ubrk_first(object->u.brk.iter); - object->u.brk.end = ubrk_next(object->u.brk.iter); + object->u.brk.bound = ubrk_first(object->u.brk.iter); + object->u.brk.next = ubrk_first(object->u.brk.n_iter); object->u.brk.cp_offset = 0; } object->u.brk.index = 0; @@ -465,8 +486,13 @@ if (intern->text) { efree(intern->text); } - if (intern->type > ITER_CHARACTER && intern->u.brk.iter) { - ubrk_close(intern->u.brk.iter); + if (intern->type > ITER_CHARACTER) { + if (intern->u.brk.iter) { + ubrk_close(intern->u.brk.iter); + } + if (intern->u.brk.n_iter) { + ubrk_close(intern->u.brk.n_iter); + } } zval_ptr_dtor(&intern->current); efree(object); @@ -535,9 +561,11 @@ if (intern->type >= ITER_CHARACTER && intern->type < ITER_TYPE_LAST) { UErrorCode status = U_ZERO_ERROR; + UErrorCode status2 = U_ZERO_ERROR; locale = locale ? locale : UG(default_locale); intern->u.brk.iter = ubrk_open(brk_type_map[intern->type - ITER_CHARACTER], locale, text, text_len, &status); - if (!U_SUCCESS(status)) { + intern->u.brk.n_iter = ubrk_open(brk_type_map[intern->type - ITER_CHARACTER], locale, text, text_len, &status); + if (!U_SUCCESS(status) || !U_SUCCESS(status2)) { php_error(E_RECOVERABLE_ERROR, "Could not create UBreakIterator for '%s' locale: %s", locale, u_errorName(status)); return; } @@ -561,11 +589,7 @@ text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); iter_ops[intern->type]->next(intern, intern->flags TSRMLS_CC); - if (iter_ops[intern->type]->valid(intern, intern->flags TSRMLS_CC)) { - RETURN_LONG(iter_ops[intern->type]->offset(intern, intern->flags TSRMLS_CC)); - } else { - RETURN_LONG((long)UBRK_DONE); - } + RETURN_LONG(iter_ops[intern->type]->offset(intern, intern->flags TSRMLS_CC)); } PHP_METHOD(TextIterator, key) @@ -593,6 +617,17 @@ RETURN_LONG(iter_ops[intern->type]->offset(intern, intern->flags TSRMLS_CC)); } +PHP_METHOD(TextIterator, last) +{ + long flags; + zval *object = getThis(); + text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); + + flags = intern->flags ^ ITER_REVERSE; + iter_ops[intern->type]->rewind(intern, flags TSRMLS_CC); + RETURN_LONG(iter_ops[intern->type]->offset(intern, flags TSRMLS_CC)); +} + PHP_METHOD(TextIterator, offset) { zval *object = getThis(); @@ -607,13 +642,9 @@ zval *object = getThis(); text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); - flags = intern->flags | ITER_REVERSE; + flags = intern->flags ^ ITER_REVERSE; iter_ops[intern->type]->next(intern, flags TSRMLS_CC); - if (iter_ops[intern->type]->valid(intern, flags TSRMLS_CC)) { - RETURN_LONG(iter_ops[intern->type]->offset(intern, flags TSRMLS_CC)); - } else { - RETURN_LONG((long)UBRK_DONE); - } + RETURN_LONG(iter_ops[intern->type]->offset(intern, flags TSRMLS_CC)); } static zend_function_entry text_iterator_funcs[] = { @@ -628,6 +659,9 @@ PHP_ME(TextIterator, offset, NULL, ZEND_ACC_PUBLIC) PHP_ME(TextIterator, previous, NULL, ZEND_ACC_PUBLIC) + PHP_ME(TextIterator, last, NULL, ZEND_ACC_PUBLIC) + + PHP_MALIAS(TextIterator, first, rewind, NULL, ZEND_ACC_PUBLIC) {NULL, NULL, NULL} };
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php