andrei          Mon Jul 10 20:14:12 2006 UTC

  Modified files:              
    /php-src/ext/unicode        unicode_iterators.c 
  Log:
  Fix combining sequence iterators for forward and backward movement.
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.34&r2=1.35&diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.34 
php-src/ext/unicode/unicode_iterators.c:1.35
--- php-src/ext/unicode/unicode_iterators.c:1.34        Sat Jul  8 18:46:24 2006
+++ php-src/ext/unicode/unicode_iterators.c     Mon Jul 10 20:14:12 2006
@@ -14,7 +14,7 @@
    +----------------------------------------------------------------------+
 */
 
-/* $Id: unicode_iterators.c,v 1.34 2006/07/08 18:46:24 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.35 2006/07/10 20:14:12 andrei Exp $ */
 
 /*
  * TODO
@@ -306,6 +306,59 @@
 
 /* Combining sequence ops */
 
+static void text_iter_helper_move(zend_bool forward, UChar *text, int32_t 
text_len, int32_t *offset, int32_t *cp_offset)
+{
+       UChar32 cp;
+       int32_t tmp, tmp2;
+
+       if (*offset == UBRK_DONE) {
+               return;
+       }
+
+       if (forward) {
+               if (*offset == text_len) {
+                       *offset    = UBRK_DONE;
+                       *cp_offset = UBRK_DONE;
+               } else {
+                       U16_NEXT(text, (*offset), text_len, cp);
+                       (*cp_offset)++;
+
+                       if (u_getCombiningClass(cp) == 0) {
+                               tmp = *offset;
+                               tmp2 = *cp_offset;
+                               /*
+                                * At the end of the string cp will be 0 
because of the NULL
+                                * terminating NULL, so combining class will be 
0 as well.
+                                */
+                               while (tmp < text_len) {
+                                       U16_NEXT(text, tmp, text_len, cp);
+                                       tmp2++;
+                                       if (u_getCombiningClass(cp) == 0) {
+                                               break;
+                                       } else {
+                                               *offset    = tmp;
+                                               *cp_offset = tmp2;
+                                       }
+                               }
+                       }
+               }
+       } else {
+               if (*offset == 0) {
+                       *offset    = UBRK_DONE;
+                       *cp_offset = UBRK_DONE;
+               } else {
+                       U16_PREV(text, 0, (*offset), cp);
+                       (*cp_offset)--;
+                       if (u_getCombiningClass(cp) != 0) {
+                               do {
+                                       U16_PREV(text, 0, (*offset), cp);
+                                       (*cp_offset)--;
+                               } while (*offset > 0 && u_getCombiningClass(cp) 
!= 0);
+                       }
+               }
+       }
+}
+
 static int text_iter_cs_valid(text_iter_obj* object, long flags TSRMLS_DC)
 {
        return (object->u.cs.start != UBRK_DONE);
@@ -313,25 +366,41 @@
 
 static void text_iter_cs_current(text_iter_obj* object, long flags TSRMLS_DC)
 {
-       uint32_t length;
        UChar *start;
+       int32_t length = -1;
 
-       if (object->u.cs.start == UBRK_DONE || object->u.cs.end == UBRK_DONE) {
-               length = 0;
-       } else {
+       if (object->u.cs.start != UBRK_DONE) {
                if (flags & ITER_REVERSE) {
+                       if (object->u.cs.end == object->u.cs.start) {
+                               text_iter_helper_move(0, object->text, 
object->text_len,
+                                                                         
&object->u.cs.start, &object->u.cs.start_cp_offset);
+                       }
                        start = object->text + object->u.cs.end;
                } else {
+                       if (object->u.cs.end == object->u.cs.start) {
+                               text_iter_helper_move(1, object->text, 
object->text_len,
+                                                                         
&object->u.cs.end, &object->u.cs.end_cp_offset);
+                       }
                        start = object->text + object->u.cs.start;
                }
-               length = abs(object->u.cs.end - object->u.cs.start);
+
+               if (object->u.cs.end == UBRK_DONE) {
+                       length = 0;
+               } else {
+                       length = abs(object->u.cs.end - object->u.cs.start);
+               }
+       } else {
+               length = 0;
+       }
+   
+       if (length != 0) {
                if (length+1 > object->current_alloc) {
                        object->current_alloc = length+1;
                        Z_USTRVAL_P(object->current) = 
eurealloc(Z_USTRVAL_P(object->current), object->current_alloc);
                }
                u_memcpy(Z_USTRVAL_P(object->current), start, length);
        }
-   
+
        Z_USTRVAL_P(object->current)[length] = 0;
        Z_USTRLEN_P(object->current) = length;
 }
@@ -343,60 +412,23 @@
 
 static int text_iter_cs_offset(text_iter_obj* object, long flags TSRMLS_DC)
 {
-       if (flags & ITER_REVERSE) {
-               return object->u.cs.end_cp_offset;
-       } else {
-               return object->u.cs.start_cp_offset;
-       }
+       return object->u.cs.start_cp_offset;
 }
 
 static void text_iter_cs_next(text_iter_obj* object, long flags TSRMLS_DC)
 {
-       UChar32 cp;
-       int32_t tmp, tmp2;
-
        if (object->u.cs.start == UBRK_DONE) {
                return;
        }
 
-       object->u.cs.start = object->u.cs.end;
-       object->u.cs.start_cp_offset = object->u.cs.end_cp_offset;
        if (flags & ITER_REVERSE) {
-               if (object->u.cs.end == 0) {
-                       object->u.cs.end = UBRK_DONE;
-                       object->u.cs.end_cp_offset = UBRK_DONE;
-               } else {
-                       U16_PREV(object->text, 0, object->u.cs.end, cp);
-                       object->u.cs.end_cp_offset--;
-                       if (u_getCombiningClass(cp) != 0) {
-                               do {
-                                       U16_PREV(object->text, 0, 
object->u.cs.end, cp);
-                                       object->u.cs.end_cp_offset--;
-                               } while (object->u.cs.end > 0 && 
u_getCombiningClass(cp) != 0);
-                       }
-               }
+               text_iter_helper_move(0, object->text, object->text_len,
+                                                         &object->u.cs.start, 
&object->u.cs.start_cp_offset);
+               object->u.cs.end = object->u.cs.start;
        } else {
-               if (object->u.cs.end == object->text_len) {
-                       object->u.cs.end = UBRK_DONE;
-                       object->u.cs.end_cp_offset = UBRK_DONE;
-               } else {
-                       U16_NEXT(object->text, object->u.cs.end, 
object->text_len, cp);
-                       object->u.cs.end_cp_offset++;
-                       if (u_getCombiningClass(cp) == 0) {
-                               tmp = object->u.cs.end;
-                               tmp2 = object->u.cs.end_cp_offset;
-                               while (tmp < object->text_len) {
-                                       U16_NEXT(object->text, tmp, 
object->text_len, cp);
-                                       tmp2++;
-                                       if (u_getCombiningClass(cp) == 0) {
-                                               break;
-                                       } else {
-                                               object->u.cs.end = tmp;
-                                               object->u.cs.end_cp_offset = 
tmp2;
-                                       }
-                               }
-                       }
-               }
+               text_iter_helper_move(1, object->text, object->text_len,
+                                                         &object->u.cs.start, 
&object->u.cs.start_cp_offset);
+               object->u.cs.end = object->u.cs.start;
        }
        object->u.cs.index++;
 }
@@ -411,7 +443,6 @@
                object->u.cs.start = object->u.cs.end = 0;
                object->u.cs.start_cp_offset = object->u.cs.end_cp_offset = 0;
        }
-       text_iter_cs_next(object, flags TSRMLS_CC); /* find first sequence */
        object->u.cs.index = 0; /* because _next increments index */
 }
 

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to