andrei          Fri Jul  7 22:34:46 2006 UTC

  Modified files:              
    /php-src/ext/unicode        unicode_iterators.c 
  Log:
  Implement TextIterator::isBoundary() for break iterators.
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.31&r2=1.32&diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.31 
php-src/ext/unicode/unicode_iterators.c:1.32
--- php-src/ext/unicode/unicode_iterators.c:1.31        Fri Jul  7 21:41:18 2006
+++ php-src/ext/unicode/unicode_iterators.c     Fri Jul  7 22:34:46 2006
@@ -14,7 +14,7 @@
    +----------------------------------------------------------------------+
 */
 
-/* $Id: unicode_iterators.c,v 1.31 2006/07/07 21:41:18 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.32 2006/07/07 22:34:46 andrei Exp $ */
 
 /*
  * TODO
@@ -89,6 +89,7 @@
        void (*next)      (text_iter_obj* object, long flags TSRMLS_DC);
        void (*rewind)    (text_iter_obj* object, long flags TSRMLS_DC);
        void (*following) (text_iter_obj* object, int32_t offset, long flags 
TSRMLS_DC);
+       zend_bool (*isBoundary)(text_iter_obj* object, int32_t offset, long 
flags TSRMLS_DC);
 } text_iter_ops;
 
 enum UBreakIteratorType brk_type_map[] = {
@@ -478,6 +479,71 @@
        }
 }
 
+static zend_bool text_iter_brk_isBoundary(text_iter_obj *object, int32_t 
offset, long flags TSRMLS_DC)
+{
+       int32_t k, tmp;
+       UBool result;
+
+       if (offset < 0) {
+               offset = 0;
+       }
+
+       /*
+        * On invalid iterator we always want to start looking for the code unit
+        * offset from the beginning of the string.
+        */
+       if (object->u.brk.cp_offset == UBRK_DONE) {
+               object->u.brk.cp_offset = 0;
+               object->u.brk.bound     = 0;
+       }
+
+       /*
+        * Try to locate the code unit position relative to the last known 
codepoint
+        * offset.
+        */
+       k = tmp = object->u.brk.bound;
+       if (offset > object->u.brk.cp_offset) {
+               U16_FWD_N(object->text, k, object->text_len, offset - 
object->u.brk.cp_offset);
+       } else {
+               U16_BACK_N(object->text, 0, k, object->u.brk.cp_offset - 
offset);
+       }
+
+       result = ubrk_isBoundary(object->u.brk.iter, k);
+
+       object->u.brk.bound = ubrk_current(object->u.brk.iter);
+       object->u.brk.next  = object->u.brk.bound;
+
+       /*
+        * If boundary is the same one as where we were at before, simply 
return.
+        */
+       if (object->u.brk.bound == tmp) {
+               return result;
+       }
+
+       /*
+        * Adjust the internal codepoint offset based on how far we've moved.
+        */
+       if (object->u.brk.bound != UBRK_DONE) {
+               if (object->u.brk.bound > tmp) {
+                       if (object->u.brk.bound - tmp > 1) {
+                               object->u.brk.cp_offset += 
u_countChar32(object->text + tmp, object->u.brk.bound - tmp);
+                       } else {
+                               object->u.brk.cp_offset++;
+                       }
+               } else {
+                       if (tmp - object->u.brk.bound > 1) {
+                               object->u.brk.cp_offset -= 
u_countChar32(object->text + object->u.brk.bound, tmp - object->u.brk.bound);
+                       } else {
+                               object->u.brk.cp_offset--;
+                       }
+               }
+       } else {
+               object->u.brk.cp_offset = UBRK_DONE;
+       }
+
+       return result;
+}
+
 static text_iter_ops text_iter_brk_ops = {
        text_iter_brk_valid,
        text_iter_brk_current,
@@ -486,6 +552,7 @@
        text_iter_brk_next,
        text_iter_brk_rewind,
        text_iter_brk_following,
+       text_iter_brk_isBoundary,
 };
 
 
@@ -778,6 +845,23 @@
        iter_ops[intern->type]->following(intern, offset, flags TSRMLS_CC);
        RETURN_LONG(iter_ops[intern->type]->offset(intern, flags TSRMLS_CC));
 }
+
+PHP_METHOD(TextIterator, isBoundary)
+{
+       long flags, offset;
+       zval *object = getThis();
+       text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
+
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &offset) == 
FAILURE) {
+               return;
+       }
+
+       /*
+        * ReverseTextIterator will behave the same as the normal one.
+        */
+       RETURN_BOOL(iter_ops[intern->type]->isBoundary(intern, offset, flags 
TSRMLS_CC));
+}
+
 static zend_function_entry text_iterator_funcs[] = {
 
        PHP_ME(TextIterator, __construct, NULL, ZEND_ACC_PUBLIC)
@@ -794,6 +878,7 @@
        PHP_ME(TextIterator, last,                NULL, ZEND_ACC_PUBLIC)
        PHP_ME(TextIterator, following,   NULL, ZEND_ACC_PUBLIC)
        PHP_ME(TextIterator, preceding,   NULL, ZEND_ACC_PUBLIC)
+       PHP_ME(TextIterator, isBoundary,  NULL, ZEND_ACC_PUBLIC)
 
        PHP_MALIAS(TextIterator, first, rewind, NULL, ZEND_ACC_PUBLIC)
        {NULL, NULL, NULL}

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to