andrei          Wed Jun 28 14:12:14 2006 UTC

  Modified files:              
    /php-src/ext/unicode        unicode.c unicode_iterators.c 
  Log:
  Rework break iterator based implementation to make it more generic for
  forward and backwards iterators and to return current elements properly.
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode.c?r1=1.38&r2=1.39&diff_format=u
Index: php-src/ext/unicode/unicode.c
diff -u php-src/ext/unicode/unicode.c:1.38 php-src/ext/unicode/unicode.c:1.39
--- php-src/ext/unicode/unicode.c:1.38  Wed Jun 21 20:17:21 2006
+++ php-src/ext/unicode/unicode.c       Wed Jun 28 14:12:14 2006
@@ -15,7 +15,7 @@
   +----------------------------------------------------------------------+
 */
 
-/* $Id: unicode.c,v 1.38 2006/06/21 20:17:21 andrei Exp $ */ 
+/* $Id: unicode.c,v 1.39 2006/06/28 14:12:14 andrei Exp $ */ 
 
 #include "php_unicode.h"
 #include "zend_unicode.h"
@@ -362,7 +362,6 @@
        PHP_FE(char_enum_types, NULL)
 
        /* text transformation functions */
-
        PHP_FE(str_transliterate, NULL)
 
        { NULL, NULL, NULL }
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.26&r2=1.27&diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.26 
php-src/ext/unicode/unicode_iterators.c:1.27
--- php-src/ext/unicode/unicode_iterators.c:1.26        Sat Jun 24 21:57:14 2006
+++ php-src/ext/unicode/unicode_iterators.c     Wed Jun 28 14:12:14 2006
@@ -14,7 +14,7 @@
    +----------------------------------------------------------------------+
 */
 
-/* $Id: unicode_iterators.c,v 1.26 2006/06/24 21:57:14 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.27 2006/06/28 14:12:14 andrei Exp $ */
 
 /*
  * TODO
@@ -65,11 +65,12 @@
                        int32_t end_cp_offset;
                } cs;
                struct {
-                       int32_t start;
-                       int32_t end;
+                       int32_t bound;
+                       int32_t next;
                        int32_t index;
                        int32_t cp_offset;
                        UBreakIterator *iter;
+                       UBreakIterator *n_iter;
                } brk;
        } u;
        zend_object_iterator iter;
@@ -277,27 +278,41 @@
 static int text_iter_brk_char_valid(text_iter_obj* object, long flags 
TSRMLS_DC)
 {
        if (flags & ITER_REVERSE) {
-               return (object->u.brk.start != UBRK_DONE);
+               return (object->u.brk.bound != UBRK_DONE);
        } else {
-               return (object->u.brk.end != UBRK_DONE);
+               return (object->u.brk.bound != UBRK_DONE);
        }
 }
 
 static void text_iter_brk_char_current(text_iter_obj* object, long flags 
TSRMLS_DC)
 {
-       uint32_t length;
-       int32_t start = object->u.brk.start;
-       int32_t end = object->u.brk.end;
+       UChar *start;
+       int32_t length = -1;
+
+       if (flags & ITER_REVERSE) {
+               if (object->u.brk.next == object->u.brk.bound) {
+                       object->u.brk.next = 
ubrk_preceding(object->u.brk.n_iter, object->u.brk.bound);
+               }
+               start = object->text + object->u.brk.next;
+       } else {
+               if (object->u.brk.next == object->u.brk.bound) {
+                       object->u.brk.next = 
ubrk_following(object->u.brk.n_iter, object->u.brk.bound);
+               }
+               start = object->text + object->u.brk.bound;
+       }
+
+       if (object->u.brk.next == UBRK_DONE) {
+               length = 0;
+       } else {
+               length = abs(object->u.brk.next - object->u.brk.bound);
+       }
 
-       if (start != UBRK_DONE && end != UBRK_DONE) {
-               length = end - start;
+       if (length != 0) {
                if (length+1 > object->current_alloc) {
                        object->current_alloc = length+1;
                        Z_USTRVAL_P(object->current) = 
eurealloc(Z_USTRVAL_P(object->current), object->current_alloc);
                }
-               u_memcpy(Z_USTRVAL_P(object->current), object->text + start, 
length);
-       } else {
-               length = 0;
+               u_memcpy(Z_USTRVAL_P(object->current), start, length);
        }
 
        Z_USTRVAL_P(object->current)[length] = 0;
@@ -316,43 +331,49 @@
 
 static void text_iter_brk_char_next(text_iter_obj* object, long flags 
TSRMLS_DC)
 {
-       if (text_iter_brk_char_valid(object, flags TSRMLS_CC)) {
-               if (flags & ITER_REVERSE) {
-                       object->u.brk.end = object->u.brk.start;
-                       object->u.brk.start = ubrk_previous(object->u.brk.iter);
-                       if (object->u.brk.end - object->u.brk.start > 1) {
-                               object->u.brk.cp_offset -= 
u_countChar32(object->text, object->u.brk.end - object->u.brk.start);
+       int32_t tmp = object->u.brk.bound;
+
+       if (object->u.brk.bound == UBRK_DONE) {
+               return;
+       }
+
+       if (flags & ITER_REVERSE) {
+               object->u.brk.bound = ubrk_previous(object->u.brk.iter);
+               object->u.brk.next  = object->u.brk.bound;
+               if (object->u.brk.bound != UBRK_DONE) {
+                       if (tmp - object->u.brk.bound > 1) {
+                               object->u.brk.cp_offset -= 
u_countChar32(object->text, tmp - object->u.brk.bound);
                        } else {
                                object->u.brk.cp_offset--;
                        }
-                       if (object->u.brk.start == UBRK_DONE) {
-                               object->u.brk.end = UBRK_DONE;
-                       }
                } else {
-                       if (object->u.brk.end - object->u.brk.start > 1) {
-                               object->u.brk.cp_offset += 
u_countChar32(object->text, object->u.brk.end - object->u.brk.start);
+                       object->u.brk.cp_offset = UBRK_DONE;
+               }
+       } else {
+               object->u.brk.bound = ubrk_next(object->u.brk.iter);
+               object->u.brk.next  = object->u.brk.bound;
+               if (object->u.brk.bound != UBRK_DONE) {
+                       if (object->u.brk.bound - tmp > 1) {
+                               object->u.brk.cp_offset += 
u_countChar32(object->text, object->u.brk.bound - tmp);
                        } else {
                                object->u.brk.cp_offset++;
                        }
-                       object->u.brk.start = object->u.brk.end;
-                       object->u.brk.end = ubrk_next(object->u.brk.iter);
-                       if (object->u.brk.end == UBRK_DONE) {
-                               object->u.brk.start = UBRK_DONE;
-                       }
+               } else {
+                       object->u.brk.cp_offset = UBRK_DONE;
                }
-               object->u.brk.index++;
        }
+       object->u.brk.index++;
 }
 
 static void text_iter_brk_char_rewind(text_iter_obj *object, long flags 
TSRMLS_DC)
 {
        if (flags & ITER_REVERSE) {
-               object->u.brk.end       = ubrk_last(object->u.brk.iter);
-               object->u.brk.start     = ubrk_previous(object->u.brk.iter);
-               object->u.brk.cp_offset = u_countChar32(object->text, 
object->u.brk.start);
+               object->u.brk.bound     = ubrk_last(object->u.brk.iter);
+               object->u.brk.next              = 
ubrk_last(object->u.brk.n_iter);
+               object->u.brk.cp_offset = u_countChar32(object->text, 
object->u.brk.bound);
        } else {
-               object->u.brk.start     = ubrk_first(object->u.brk.iter);
-               object->u.brk.end       = ubrk_next(object->u.brk.iter);
+               object->u.brk.bound     = ubrk_first(object->u.brk.iter);
+               object->u.brk.next              = 
ubrk_first(object->u.brk.n_iter);
                object->u.brk.cp_offset = 0;
        }
        object->u.brk.index = 0;
@@ -465,8 +486,13 @@
        if (intern->text) {
                efree(intern->text);
        }
-       if (intern->type > ITER_CHARACTER && intern->u.brk.iter) {
-               ubrk_close(intern->u.brk.iter);
+       if (intern->type > ITER_CHARACTER) {
+               if (intern->u.brk.iter) {
+                       ubrk_close(intern->u.brk.iter);
+               }
+               if (intern->u.brk.n_iter) {
+                       ubrk_close(intern->u.brk.n_iter);
+               }
        }
        zval_ptr_dtor(&intern->current);
        efree(object);
@@ -535,9 +561,11 @@
 
        if (intern->type >= ITER_CHARACTER && intern->type < ITER_TYPE_LAST) {
                UErrorCode status = U_ZERO_ERROR;
+               UErrorCode status2 = U_ZERO_ERROR;
                locale = locale ? locale : UG(default_locale);
                intern->u.brk.iter = ubrk_open(brk_type_map[intern->type - 
ITER_CHARACTER], locale, text, text_len, &status);
-               if (!U_SUCCESS(status)) {
+               intern->u.brk.n_iter = ubrk_open(brk_type_map[intern->type - 
ITER_CHARACTER], locale, text, text_len, &status);
+               if (!U_SUCCESS(status) || !U_SUCCESS(status2)) {
                        php_error(E_RECOVERABLE_ERROR, "Could not create 
UBreakIterator for '%s' locale: %s", locale, u_errorName(status));
                        return;
                }
@@ -561,11 +589,7 @@
        text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
 
        iter_ops[intern->type]->next(intern, intern->flags TSRMLS_CC);
-       if (iter_ops[intern->type]->valid(intern, intern->flags TSRMLS_CC)) {
-               RETURN_LONG(iter_ops[intern->type]->offset(intern, 
intern->flags TSRMLS_CC));
-       } else {
-               RETURN_LONG((long)UBRK_DONE);
-       }
+       RETURN_LONG(iter_ops[intern->type]->offset(intern, intern->flags 
TSRMLS_CC));
 }
 
 PHP_METHOD(TextIterator, key)
@@ -593,6 +617,17 @@
        RETURN_LONG(iter_ops[intern->type]->offset(intern, intern->flags 
TSRMLS_CC));
 }
 
+PHP_METHOD(TextIterator, last)
+{
+       long flags;
+       zval *object = getThis();
+       text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
+
+       flags = intern->flags ^ ITER_REVERSE;
+       iter_ops[intern->type]->rewind(intern, flags TSRMLS_CC);
+       RETURN_LONG(iter_ops[intern->type]->offset(intern, flags TSRMLS_CC));
+}
+
 PHP_METHOD(TextIterator, offset)
 {
        zval *object = getThis();
@@ -607,13 +642,9 @@
        zval *object = getThis();
        text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
 
-       flags = intern->flags | ITER_REVERSE;
+       flags = intern->flags ^ ITER_REVERSE;
        iter_ops[intern->type]->next(intern, flags TSRMLS_CC);
-       if (iter_ops[intern->type]->valid(intern, flags TSRMLS_CC)) {
-               RETURN_LONG(iter_ops[intern->type]->offset(intern, flags 
TSRMLS_CC));
-       } else {
-               RETURN_LONG((long)UBRK_DONE);
-       }
+       RETURN_LONG(iter_ops[intern->type]->offset(intern, flags TSRMLS_CC));
 }
 
 static zend_function_entry text_iterator_funcs[] = {
@@ -628,6 +659,9 @@
 
        PHP_ME(TextIterator, offset,      NULL, ZEND_ACC_PUBLIC)
        PHP_ME(TextIterator, previous,    NULL, ZEND_ACC_PUBLIC)
+       PHP_ME(TextIterator, last,                NULL, ZEND_ACC_PUBLIC)
+
+       PHP_MALIAS(TextIterator, first, rewind, NULL, ZEND_ACC_PUBLIC)
        {NULL, NULL, NULL}
 };
 

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to