andrei          Thu Jun 29 12:32:01 2006 UTC

  Modified files:              
    /php-src/ext/unicode        unicode_iterators.c 
  Log:
  Try to make combining sequences work. Not entirely succesful.
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.29&r2=1.30&diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.29 
php-src/ext/unicode/unicode_iterators.c:1.30
--- php-src/ext/unicode/unicode_iterators.c:1.29        Wed Jun 28 15:28:55 2006
+++ php-src/ext/unicode/unicode_iterators.c     Thu Jun 29 12:32:00 2006
@@ -14,7 +14,7 @@
    +----------------------------------------------------------------------+
 */
 
-/* $Id: unicode_iterators.c,v 1.29 2006/06/28 15:28:55 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.30 2006/06/29 12:32:00 andrei Exp $ */
 
 /*
  * TODO
@@ -187,21 +187,30 @@
 
 static int text_iter_cs_valid(text_iter_obj* object, long flags TSRMLS_DC)
 {
-       if (flags & ITER_REVERSE) {
-               return (object->u.cs.end > 0);
-       } else {
-               return (object->u.cs.end <= object->text_len);
-       }
+       return (object->u.cs.start != UBRK_DONE);
 }
 
 static void text_iter_cs_current(text_iter_obj* object, long flags TSRMLS_DC)
 {
-       uint32_t length = object->u.cs.end - object->u.cs.start;
-       if (length+1 > object->current_alloc) {
-               object->current_alloc = length+1;
-               Z_USTRVAL_P(object->current) = 
eurealloc(Z_USTRVAL_P(object->current), object->current_alloc);
+       uint32_t length;
+       UChar *start;
+
+       if (object->u.cs.start == UBRK_DONE || object->u.cs.end == UBRK_DONE) {
+               length = 0;
+       } else {
+               if (flags & ITER_REVERSE) {
+                       start = object->text + object->u.cs.end;
+               } else {
+                       start = object->text + object->u.cs.start;
+               }
+               length = abs(object->u.cs.end - object->u.cs.start);
+               if (length+1 > object->current_alloc) {
+                       object->current_alloc = length+1;
+                       Z_USTRVAL_P(object->current) = 
eurealloc(Z_USTRVAL_P(object->current), object->current_alloc);
+               }
+               u_memcpy(Z_USTRVAL_P(object->current), start, length);
        }
-       u_memcpy(Z_USTRVAL_P(object->current), object->text + 
object->u.cs.start, length);
+   
        Z_USTRVAL_P(object->current)[length] = 0;
        Z_USTRLEN_P(object->current) = length;
 }
@@ -213,7 +222,11 @@
 
 static int text_iter_cs_offset(text_iter_obj* object, long flags TSRMLS_DC)
 {
-       return object->u.cs.start_cp_offset;
+       if (flags & ITER_REVERSE) {
+               return object->u.cs.end_cp_offset;
+       } else {
+               return object->u.cs.start_cp_offset;
+       }
 }
 
 static void text_iter_cs_next(text_iter_obj* object, long flags TSRMLS_DC)
@@ -221,21 +234,31 @@
        UChar32 cp;
        int32_t tmp, tmp2;
 
-       if (text_iter_cs_valid(object, flags TSRMLS_CC)) {
-               if (flags & ITER_REVERSE) {
-                       object->u.cs.end = object->u.cs.start;
-                       object->u.cs.end_cp_offset = 
object->u.cs.start_cp_offset;
-                       U16_PREV(object->text, 0, object->u.cs.start, cp);
-                       object->u.cs.start_cp_offset--;
+       if (object->u.cs.start == UBRK_DONE) {
+               return;
+       }
+
+       object->u.cs.start = object->u.cs.end;
+       object->u.cs.start_cp_offset = object->u.cs.end_cp_offset;
+       if (flags & ITER_REVERSE) {
+               if (object->u.cs.end == 0) {
+                       object->u.cs.end = UBRK_DONE;
+                       object->u.cs.end_cp_offset = UBRK_DONE;
+               } else {
+                       U16_PREV(object->text, 0, object->u.cs.end, cp);
+                       object->u.cs.end_cp_offset--;
                        if (u_getCombiningClass(cp) != 0) {
                                do {
-                                       U16_PREV(object->text, 0, 
object->u.cs.start, cp);
-                                       object->u.cs.start_cp_offset--;
-                               } while (object->u.cs.start > 0 && 
u_getCombiningClass(cp) != 0);
+                                       U16_PREV(object->text, 0, 
object->u.cs.end, cp);
+                                       object->u.cs.end_cp_offset--;
+                               } while (object->u.cs.end > 0 && 
u_getCombiningClass(cp) != 0);
                        }
+               }
+       } else {
+               if (object->u.cs.end == object->text_len) {
+                       object->u.cs.end = UBRK_DONE;
+                       object->u.cs.end_cp_offset = UBRK_DONE;
                } else {
-                       object->u.cs.start = object->u.cs.end;
-                       object->u.cs.start_cp_offset = 
object->u.cs.end_cp_offset;
                        U16_NEXT(object->text, object->u.cs.end, 
object->text_len, cp);
                        object->u.cs.end_cp_offset++;
                        if (u_getCombiningClass(cp) == 0) {
@@ -253,8 +276,8 @@
                                }
                        }
                }
-               object->u.cs.index++;
        }
+       object->u.cs.index++;
 }
 
 static void text_iter_cs_rewind(text_iter_obj *object, long flags TSRMLS_DC)
@@ -281,15 +304,11 @@
 };
 
 
-/* UBreakIterator Character Ops */
+/* UBreakIterator Ops */
 
 static int text_iter_brk_valid(text_iter_obj* object, long flags TSRMLS_DC)
 {
-       if (flags & ITER_REVERSE) {
-               return (object->u.brk.bound != UBRK_DONE);
-       } else {
-               return (object->u.brk.bound != UBRK_DONE);
-       }
+       return (object->u.brk.bound != UBRK_DONE);
 }
 
 static void text_iter_brk_current(text_iter_obj* object, long flags TSRMLS_DC)
@@ -297,22 +316,26 @@
        UChar *start;
        int32_t length = -1;
 
-       if (flags & ITER_REVERSE) {
-               if (object->u.brk.next == object->u.brk.bound) {
-                       object->u.brk.next = 
ubrk_preceding(object->u.brk.n_iter, object->u.brk.bound);
-               }
-               start = object->text + object->u.brk.next;
-       } else {
-               if (object->u.brk.next == object->u.brk.bound) {
-                       object->u.brk.next = 
ubrk_following(object->u.brk.n_iter, object->u.brk.bound);
+       if (object->u.brk.bound != UBRK_DONE) {
+               if (flags & ITER_REVERSE) {
+                       if (object->u.brk.next == object->u.brk.bound) {
+                               object->u.brk.next = 
ubrk_preceding(object->u.brk.n_iter, object->u.brk.bound);
+                       }
+                       start = object->text + object->u.brk.next;
+               } else {
+                       if (object->u.brk.next == object->u.brk.bound) {
+                               object->u.brk.next = 
ubrk_following(object->u.brk.n_iter, object->u.brk.bound);
+                       }
+                       start = object->text + object->u.brk.bound;
                }
-               start = object->text + object->u.brk.bound;
-       }
 
-       if (object->u.brk.next == UBRK_DONE) {
-               length = 0;
+               if (object->u.brk.next == UBRK_DONE) {
+                       length = 0;
+               } else {
+                       length = abs(object->u.brk.next - object->u.brk.bound);
+               }
        } else {
-               length = abs(object->u.brk.next - object->u.brk.bound);
+               length = 0;
        }
 
        if (length != 0) {
@@ -350,7 +373,7 @@
                object->u.brk.next  = object->u.brk.bound;
                if (object->u.brk.bound != UBRK_DONE) {
                        if (tmp - object->u.brk.bound > 1) {
-                               object->u.brk.cp_offset -= 
u_countChar32(object->text, tmp - object->u.brk.bound);
+                               object->u.brk.cp_offset -= 
u_countChar32(object->text + object->u.brk.bound, tmp - object->u.brk.bound);
                        } else {
                                object->u.brk.cp_offset--;
                        }
@@ -362,7 +385,7 @@
                object->u.brk.next  = object->u.brk.bound;
                if (object->u.brk.bound != UBRK_DONE) {
                        if (object->u.brk.bound - tmp > 1) {
-                               object->u.brk.cp_offset += 
u_countChar32(object->text, object->u.brk.bound - tmp);
+                               object->u.brk.cp_offset += 
u_countChar32(object->text + tmp, object->u.brk.bound - tmp);
                        } else {
                                object->u.brk.cp_offset++;
                        }

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to