[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-07-12 Thread Andrei Zmievski
andrei  Tue Jul 11 17:59:47 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Make next() and previous() take optional step parameter and optimize
  return value usage.
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.41r2=1.42diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.41 
php-src/ext/unicode/unicode_iterators.c:1.42
--- php-src/ext/unicode/unicode_iterators.c:1.41Tue Jul 11 17:48:14 2006
+++ php-src/ext/unicode/unicode_iterators.c Tue Jul 11 17:59:46 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.41 2006/07/11 17:48:14 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.42 2006/07/11 17:59:46 andrei Exp $ */
 
 /*
  * TODO
@@ -1035,11 +1035,25 @@
 
 PHP_METHOD(TextIterator, next)
 {
+   long i, step = 1;
zval *object = getThis();
text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
 
-   iter_ops[intern-type]-next(intern, intern-flags TSRMLS_CC);
-   RETURN_LONG(iter_ops[intern-type]-offset(intern, intern-flags 
TSRMLS_CC));
+   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, |l, step) == 
FAILURE) {
+   return;
+   }
+
+   if (step = 0) {
+   step = 1;
+   }
+
+   for (i = 0; i  step; i++) {
+   iter_ops[intern-type]-next(intern, intern-flags TSRMLS_CC);
+   }
+
+   if (return_value_used) {
+   RETURN_LONG(iter_ops[intern-type]-offset(intern, 
intern-flags TSRMLS_CC));
+   }
 }
 
 PHP_METHOD(TextIterator, key)
@@ -1088,13 +1102,26 @@
 
 PHP_METHOD(TextIterator, previous)
 {
-   long flags;
+   long flags, i, step = 1;
zval *object = getThis();
text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
 
+   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, |l, step) == 
FAILURE) {
+   return;
+   }
+
+   if (step = 0) {
+   step = 1;
+   }
flags = intern-flags ^ ITER_REVERSE;
-   iter_ops[intern-type]-next(intern, flags TSRMLS_CC);
-   RETURN_LONG(iter_ops[intern-type]-offset(intern, flags TSRMLS_CC));
+
+   for (i = 0; i  step; i++) {
+   iter_ops[intern-type]-next(intern, flags TSRMLS_CC);
+   }
+
+   if (return_value_used) {
+   RETURN_LONG(iter_ops[intern-type]-offset(intern, flags 
TSRMLS_CC));
+   }
 }
 
 PHP_METHOD(TextIterator, following)

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-07-12 Thread Andrei Zmievski
andrei  Tue Jul 11 20:51:18 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Protos.
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.43r2=1.44diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.43 
php-src/ext/unicode/unicode_iterators.c:1.44
--- php-src/ext/unicode/unicode_iterators.c:1.43Tue Jul 11 19:43:08 2006
+++ php-src/ext/unicode/unicode_iterators.c Tue Jul 11 20:51:18 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.43 2006/07/11 19:43:08 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.44 2006/07/11 20:51:18 andrei Exp $ */
 
 /*
  * TODO
@@ -985,6 +985,8 @@
return retval;
 }
 
+/* {{{ proto void TextIterator::__construct(unicode text [, int flags = 
TextIterator::CODEPOINT [, string locale ]]) U
+   TextIterator constructor */
 PHP_METHOD(TextIterator, __construct)
 {
UChar *text;
@@ -1032,16 +1034,26 @@
 
iter_ops[intern-type]-rewind(intern, intern-flags TSRMLS_CC);
 }
+/* }}} */
 
+/* {{{ proto unicode TextIterator::current() U
+   Returns the element at the current boundary */
 PHP_METHOD(TextIterator, current)
 {
zval *object = getThis();
text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
 
+   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ) == FAILURE) {
+   return;
+   }
+
iter_ops[intern-type]-current(intern, intern-flags TSRMLS_CC);
RETURN_UNICODEL(Z_USTRVAL_P(intern-current), 
Z_USTRLEN_P(intern-current), 1);
 }
+/* }}} */
 
+/* {{{ proto int TextIterator::next([int n]) U
+   Advances to the n'th text boundary following the current one and returns 
its offset */
 PHP_METHOD(TextIterator, next)
 {
long i, step = 1;
@@ -1066,29 +1078,50 @@
RETURN_LONG(cp_offset);
}
 }
+/* }}} */
 
+/* {{{ proto int TextIterator::key() U
+   Returns the number boundaries iterated through */
 PHP_METHOD(TextIterator, key)
 {
zval *object = getThis();
text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
 
+   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ) == FAILURE) {
+   return;
+   }
+
RETURN_LONG(iter_ops[intern-type]-key(intern, intern-flags 
TSRMLS_CC));
 }
+/* }}} */
 
+/* {{{ proto bool TextIterator::valid() U
+   Determines validity of the iterator */
 PHP_METHOD(TextIterator, valid)
 {
zval *object = getThis();
text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
 
+   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ) == FAILURE) {
+   return;
+   }
+
RETURN_BOOL(iter_ops[intern-type]-valid(intern, intern-flags 
TSRMLS_CC));
 }
+/* }}} */
 
+/* {{{ proto int TextIterator::first() U
+   Positions iterator at the first character in the text and returns the 
offset */
 PHP_METHOD(TextIterator, rewind)
 {
int32_t cp_offset;
zval *object = getThis();
text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
 
+   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ) == FAILURE) {
+   return;
+   }
+
iter_ops[intern-type]-rewind(intern, intern-flags TSRMLS_CC);
 
if (return_value_used) {
@@ -1096,7 +1129,10 @@
RETURN_LONG(cp_offset);
}
 }
+/* }}} */
 
+/* {{{ proto int TextIterator::last() U
+   Positions iterator beyond the last character in the text and returns the 
offset */
 PHP_METHOD(TextIterator, last)
 {
long flags;
@@ -1104,6 +1140,10 @@
zval *object = getThis();
text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
 
+   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ) == FAILURE) {
+   return;
+   }
+
flags = intern-flags ^ ITER_REVERSE;
iter_ops[intern-type]-rewind(intern, flags TSRMLS_CC);
 
@@ -1112,17 +1152,27 @@
RETURN_LONG(cp_offset);
}
 }
+/* }}} */
 
+/* {{{ proto int TextIterator::offset() U
+   Returns the offset of the current text boundary */
 PHP_METHOD(TextIterator, offset)
 {
int32_t cp_offset;
zval *object = getThis();
text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
 
+   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ) == FAILURE) {
+   return;
+   }
+
iter_ops[intern-type]-offset(intern, intern-flags, cp_offset 
TSRMLS_CC);
RETURN_LONG(cp_offset);
 }
+/* }}} */
 
+/* {{{ proto int TextIterator::previous([int n]) U
+   Advances to the n'th text boundary preceding the current one and returns 
its offset */
 PHP_METHOD(TextIterator, previous)
 {
long 

[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-07-11 Thread Andrei Zmievski
andrei  Tue Jul 11 16:20:21 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Use object flags.
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.39r2=1.40diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.39 
php-src/ext/unicode/unicode_iterators.c:1.40
--- php-src/ext/unicode/unicode_iterators.c:1.39Mon Jul 10 23:19:05 2006
+++ php-src/ext/unicode/unicode_iterators.c Tue Jul 11 16:20:21 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.39 2006/07/10 23:19:05 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.40 2006/07/11 16:20:21 andrei Exp $ */
 
 /*
  * TODO
@@ -1132,7 +1132,7 @@
 
 PHP_METHOD(TextIterator, isBoundary)
 {
-   long flags, offset;
+   long offset;
zval *object = getThis();
text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
 
@@ -1143,7 +1143,7 @@
/*
 * ReverseTextIterator will behave the same as the normal one.
 */
-   RETURN_BOOL(iter_ops[intern-type]-isBoundary(intern, offset, flags 
TSRMLS_CC));
+   RETURN_BOOL(iter_ops[intern-type]-isBoundary(intern, offset, 
intern-flags TSRMLS_CC));
 }
 
 PHP_METHOD(TextIterator, getAvailableLocales)

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-07-11 Thread Andrei Zmievski
andrei  Tue Jul 11 19:43:09 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Implement getAll() that can be used to get all the pieces defined by the
  boundaries.
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.42r2=1.43diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.42 
php-src/ext/unicode/unicode_iterators.c:1.43
--- php-src/ext/unicode/unicode_iterators.c:1.42Tue Jul 11 17:59:46 2006
+++ php-src/ext/unicode/unicode_iterators.c Tue Jul 11 19:43:08 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.42 2006/07/11 17:59:46 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.43 2006/07/11 19:43:08 andrei Exp $ */
 
 /*
  * TODO
@@ -85,7 +85,7 @@
int  (*valid) (text_iter_obj* object, long flags TSRMLS_DC);
void (*current)   (text_iter_obj* object, long flags TSRMLS_DC);
int  (*key)   (text_iter_obj* object, long flags TSRMLS_DC);
-   int  (*offset)(text_iter_obj* object, long flags TSRMLS_DC);
+   int  (*offset)(text_iter_obj* object, long flags, int32_t 
*cp_offset TSRMLS_DC);
void (*next)  (text_iter_obj* object, long flags TSRMLS_DC);
void (*rewind)(text_iter_obj* object, long flags TSRMLS_DC);
void (*following) (text_iter_obj* object, int32_t offset, long flags 
TSRMLS_DC);
@@ -144,9 +144,12 @@
return object-u.cp.index;
 }
 
-static int text_iter_cp_offset(text_iter_obj* object, long flags TSRMLS_DC)
+static int text_iter_cp_offset(text_iter_obj* object, long flags, int32_t 
*cp_offset TSRMLS_DC)
 {
-   return object-u.cp.cp_offset;
+   if (cp_offset) {
+   *cp_offset = object-u.cp.cp_offset;
+   }
+   return object-u.cp.offset;
 }
 
 static void text_iter_cp_next(text_iter_obj* object, long flags TSRMLS_DC)
@@ -427,9 +430,12 @@
return object-u.cs.index;
 }
 
-static int text_iter_cs_offset(text_iter_obj* object, long flags TSRMLS_DC)
+static int text_iter_cs_offset(text_iter_obj* object, long flags, int32_t 
*cp_offset TSRMLS_DC)
 {
-   return object-u.cs.start_cp_offset;
+   if (cp_offset) {
+   *cp_offset = object-u.cs.start_cp_offset;
+   }
+   return object-u.cs.start;
 }
 
 static void text_iter_cs_next(text_iter_obj* object, long flags TSRMLS_DC)
@@ -639,9 +645,12 @@
return object-u.brk.index;
 }
 
-static int text_iter_brk_offset(text_iter_obj* object, long flags TSRMLS_DC)
+static int text_iter_brk_offset(text_iter_obj* object, long flags, int32_t 
*cp_offset TSRMLS_DC)
 {
-   return object-u.brk.cp_offset;
+   if (cp_offset) {
+   *cp_offset = object-u.brk.cp_offset;
+   }
+   return object-u.brk.bound;
 }
 
 static void text_iter_brk_next(text_iter_obj* object, long flags TSRMLS_DC)
@@ -1036,6 +1045,7 @@
 PHP_METHOD(TextIterator, next)
 {
long i, step = 1;
+   int32_t cp_offset;
zval *object = getThis();
text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
 
@@ -1052,7 +1062,8 @@
}
 
if (return_value_used) {
-   RETURN_LONG(iter_ops[intern-type]-offset(intern, 
intern-flags TSRMLS_CC));
+   iter_ops[intern-type]-offset(intern, intern-flags, 
cp_offset TSRMLS_CC);
+   RETURN_LONG(cp_offset);
}
 }
 
@@ -1074,35 +1085,48 @@
 
 PHP_METHOD(TextIterator, rewind)
 {
+   int32_t cp_offset;
zval *object = getThis();
text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
 
iter_ops[intern-type]-rewind(intern, intern-flags TSRMLS_CC);
-   RETURN_LONG(iter_ops[intern-type]-offset(intern, intern-flags 
TSRMLS_CC));
+
+   if (return_value_used) {
+   iter_ops[intern-type]-offset(intern, intern-flags, 
cp_offset TSRMLS_CC);
+   RETURN_LONG(cp_offset);
+   }
 }
 
 PHP_METHOD(TextIterator, last)
 {
long flags;
+   int32_t cp_offset;
zval *object = getThis();
text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
 
flags = intern-flags ^ ITER_REVERSE;
iter_ops[intern-type]-rewind(intern, flags TSRMLS_CC);
-   RETURN_LONG(iter_ops[intern-type]-offset(intern, flags TSRMLS_CC));
+
+   if (return_value_used) {
+   iter_ops[intern-type]-offset(intern, flags, cp_offset 
TSRMLS_CC);
+   RETURN_LONG(cp_offset);
+   }
 }
 
 PHP_METHOD(TextIterator, offset)
 {
+   int32_t cp_offset;
zval *object = getThis();
text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
 
-   RETURN_LONG(iter_ops[intern-type]-offset(intern, intern-flags 
TSRMLS_CC));
+   iter_ops[intern-type]-offset(intern, 

[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-07-10 Thread Andrei Zmievski
andrei  Mon Jul 10 20:14:12 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Fix combining sequence iterators for forward and backward movement.
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.34r2=1.35diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.34 
php-src/ext/unicode/unicode_iterators.c:1.35
--- php-src/ext/unicode/unicode_iterators.c:1.34Sat Jul  8 18:46:24 2006
+++ php-src/ext/unicode/unicode_iterators.c Mon Jul 10 20:14:12 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.34 2006/07/08 18:46:24 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.35 2006/07/10 20:14:12 andrei Exp $ */
 
 /*
  * TODO
@@ -306,6 +306,59 @@
 
 /* Combining sequence ops */
 
+static void text_iter_helper_move(zend_bool forward, UChar *text, int32_t 
text_len, int32_t *offset, int32_t *cp_offset)
+{
+   UChar32 cp;
+   int32_t tmp, tmp2;
+
+   if (*offset == UBRK_DONE) {
+   return;
+   }
+
+   if (forward) {
+   if (*offset == text_len) {
+   *offset= UBRK_DONE;
+   *cp_offset = UBRK_DONE;
+   } else {
+   U16_NEXT(text, (*offset), text_len, cp);
+   (*cp_offset)++;
+
+   if (u_getCombiningClass(cp) == 0) {
+   tmp = *offset;
+   tmp2 = *cp_offset;
+   /*
+* At the end of the string cp will be 0 
because of the NULL
+* terminating NULL, so combining class will be 
0 as well.
+*/
+   while (tmp  text_len) {
+   U16_NEXT(text, tmp, text_len, cp);
+   tmp2++;
+   if (u_getCombiningClass(cp) == 0) {
+   break;
+   } else {
+   *offset= tmp;
+   *cp_offset = tmp2;
+   }
+   }
+   }
+   }
+   } else {
+   if (*offset == 0) {
+   *offset= UBRK_DONE;
+   *cp_offset = UBRK_DONE;
+   } else {
+   U16_PREV(text, 0, (*offset), cp);
+   (*cp_offset)--;
+   if (u_getCombiningClass(cp) != 0) {
+   do {
+   U16_PREV(text, 0, (*offset), cp);
+   (*cp_offset)--;
+   } while (*offset  0  u_getCombiningClass(cp) 
!= 0);
+   }
+   }
+   }
+}
+
 static int text_iter_cs_valid(text_iter_obj* object, long flags TSRMLS_DC)
 {
return (object-u.cs.start != UBRK_DONE);
@@ -313,25 +366,41 @@
 
 static void text_iter_cs_current(text_iter_obj* object, long flags TSRMLS_DC)
 {
-   uint32_t length;
UChar *start;
+   int32_t length = -1;
 
-   if (object-u.cs.start == UBRK_DONE || object-u.cs.end == UBRK_DONE) {
-   length = 0;
-   } else {
+   if (object-u.cs.start != UBRK_DONE) {
if (flags  ITER_REVERSE) {
+   if (object-u.cs.end == object-u.cs.start) {
+   text_iter_helper_move(0, object-text, 
object-text_len,
+ 
object-u.cs.start, object-u.cs.start_cp_offset);
+   }
start = object-text + object-u.cs.end;
} else {
+   if (object-u.cs.end == object-u.cs.start) {
+   text_iter_helper_move(1, object-text, 
object-text_len,
+ 
object-u.cs.end, object-u.cs.end_cp_offset);
+   }
start = object-text + object-u.cs.start;
}
-   length = abs(object-u.cs.end - object-u.cs.start);
+
+   if (object-u.cs.end == UBRK_DONE) {
+   length = 0;
+   } else {
+   length = abs(object-u.cs.end - object-u.cs.start);
+   }
+   } else {
+   length = 0;
+   }
+   
+   if (length != 0) {
if (length+1  object-current_alloc) {
object-current_alloc = length+1;
Z_USTRVAL_P(object-current) = 

[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-07-10 Thread Andrei Zmievski
andrei  Mon Jul 10 21:18:01 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Implement following() for combining sequences.
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.35r2=1.36diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.35 
php-src/ext/unicode/unicode_iterators.c:1.36
--- php-src/ext/unicode/unicode_iterators.c:1.35Mon Jul 10 20:14:12 2006
+++ php-src/ext/unicode/unicode_iterators.c Mon Jul 10 21:18:01 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.35 2006/07/10 20:14:12 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.36 2006/07/10 21:18:01 andrei Exp $ */
 
 /*
  * TODO
@@ -424,12 +424,12 @@
if (flags  ITER_REVERSE) {
text_iter_helper_move(0, object-text, object-text_len,
  object-u.cs.start, 
object-u.cs.start_cp_offset);
-   object-u.cs.end = object-u.cs.start;
} else {
text_iter_helper_move(1, object-text, object-text_len,
  object-u.cs.start, 
object-u.cs.start_cp_offset);
-   object-u.cs.end = object-u.cs.start;
}
+   object-u.cs.end = object-u.cs.start;
+   object-u.cs.end_cp_offset = object-u.cs.start_cp_offset;
object-u.cs.index++;
 }
 
@@ -446,6 +446,64 @@
object-u.cs.index = 0; /* because _next increments index */
 }
 
+static void text_iter_cs_following(text_iter_obj *object, int32_t offset, long 
flags TSRMLS_DC)
+{
+   int32_t k, tmp;
+
+   if (offset  0) {
+   offset = 0;
+   }
+
+   /*
+* On invalid iterator we always want to start looking for the code unit
+* offset from the beginning of the string.
+*/
+   if (object-u.cs.start_cp_offset == UBRK_DONE) {
+   object-u.cs.start_cp_offset = 0;
+   object-u.cs.start = 0;
+   }
+
+   /*
+* Try to locate the code unit position relative to the last known 
codepoint
+* offset.
+*/
+   k = object-u.cs.start;
+   if (offset  object-u.cs.start_cp_offset) {
+   U16_FWD_N(object-text, k, object-text_len, offset - 
object-u.cs.start_cp_offset);
+   } else {
+   U16_BACK_N(object-text, 0, k, object-u.cs.start_cp_offset - 
offset);
+   }
+
+   /*
+* Locate the actual boundary.
+*/
+   if (flags  ITER_REVERSE) {
+   /*
+* If offset was at or beyond the length of text, we need to 
adjust it
+* to the number of codepoints in the text.
+*/
+   if (k == object-text_len) {
+   offset = u_countChar32(object-text, object-text_len);
+   }
+   text_iter_helper_move(0, object-text, object-text_len, k, 
offset);
+   } else {
+   text_iter_helper_move(1, object-text, object-text_len, k, 
offset);
+   }
+
+   if (k == object-u.cs.start) {
+   return;
+   }
+
+   object-u.cs.start = k;
+   object-u.cs.start_cp_offset = offset;
+   object-u.cs.end = object-u.cs.start;
+}
+
+static zend_bool text_iter_cs_isBoundary(text_iter_obj *object, int32_t 
offset, long flags TSRMLS_DC)
+{
+   return 1;
+}
+
 static text_iter_ops text_iter_cs_ops = {
text_iter_cs_valid,
text_iter_cs_current,
@@ -453,6 +511,8 @@
text_iter_cs_offset,
text_iter_cs_next,
text_iter_cs_rewind,
+   text_iter_cs_following,
+   text_iter_cs_isBoundary,
 };
 
 
@@ -598,7 +658,6 @@
} else {
object-u.brk.bound = ubrk_following(object-u.brk.iter, k);
}
-   object-u.brk.next  = object-u.brk.bound;
 
/*
 * If boundary is the same one as where we were at before, simply 
return.
@@ -607,6 +666,8 @@
return;
}
 
+   object-u.brk.next  = object-u.brk.bound;
+
/*
 * Adjust the internal codepoint offset based on how far we've moved.
 */

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-07-10 Thread Andrei Zmievski
andrei  Mon Jul 10 21:42:25 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Implement isBoundary() for combining sequences.
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.36r2=1.37diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.36 
php-src/ext/unicode/unicode_iterators.c:1.37
--- php-src/ext/unicode/unicode_iterators.c:1.36Mon Jul 10 21:18:01 2006
+++ php-src/ext/unicode/unicode_iterators.c Mon Jul 10 21:42:25 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.36 2006/07/10 21:18:01 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.37 2006/07/10 21:42:25 andrei Exp $ */
 
 /*
  * TODO
@@ -448,7 +448,7 @@
 
 static void text_iter_cs_following(text_iter_obj *object, int32_t offset, long 
flags TSRMLS_DC)
 {
-   int32_t k, tmp;
+   int32_t k;
 
if (offset  0) {
offset = 0;
@@ -501,7 +501,54 @@
 
 static zend_bool text_iter_cs_isBoundary(text_iter_obj *object, int32_t 
offset, long flags TSRMLS_DC)
 {
-   return 1;
+   UChar32 cp;
+   int32_t k, tmp;
+   zend_bool result;
+
+   if (offset  0) {
+   offset = 0;
+   }
+
+   /*
+* On invalid iterator we always want to start looking for the code unit
+* offset from the beginning of the string.
+*/
+   if (object-u.cs.start_cp_offset == UBRK_DONE) {
+   object-u.cs.start_cp_offset = 0;
+   object-u.cs.start = 0;
+   }
+
+   /*
+* Try to locate the code unit position relative to the last known 
codepoint
+* offset.
+*/
+   k = object-u.cs.start;
+   if (offset  object-u.cs.start_cp_offset) {
+   U16_FWD_N(object-text, k, object-text_len, offset - 
object-u.cs.start_cp_offset);
+   } else {
+   U16_BACK_N(object-text, 0, k, object-u.cs.start_cp_offset - 
offset);
+   }
+
+   /* end of the text is always a boundary */
+   if (k == object-text_len) {
+   offset = u_countChar32(object-text, object-text_len);
+   result = 1;
+   } else {
+   /* if the next codepoint is a base character, it's a boundary */
+   tmp = k;
+   U16_NEXT(object-text, tmp, object-text_len, cp);
+   result = (u_getCombiningClass(cp) == 0);
+   }
+
+   if (k == object-u.cs.start) {
+   return result;
+   }
+
+   object-u.cs.start = k;
+   object-u.cs.start_cp_offset = offset;
+   object-u.cs.end = object-u.cs.start;
+
+   return result;
 }
 
 static text_iter_ops text_iter_cs_ops = {

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-07-10 Thread Andrei Zmievski
andrei  Mon Jul 10 22:12:47 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Fix validity checks.
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.37r2=1.38diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.37 
php-src/ext/unicode/unicode_iterators.c:1.38
--- php-src/ext/unicode/unicode_iterators.c:1.37Mon Jul 10 21:42:25 2006
+++ php-src/ext/unicode/unicode_iterators.c Mon Jul 10 22:12:47 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.37 2006/07/10 21:42:25 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.38 2006/07/10 22:12:47 andrei Exp $ */
 
 /*
  * TODO
@@ -107,7 +107,15 @@
 
 static int text_iter_cp_valid(text_iter_obj* object, long flags TSRMLS_DC)
 {
-   return (object-u.cp.offset != UBRK_DONE);
+   if (object-u.cp.offset == UBRK_DONE) {
+   return 0;
+   }
+
+   if (flags  ITER_REVERSE) {
+   return (object-u.cp.offset != 0);
+   } else {
+   return (object-u.cp.offset != object-text_len);
+   }
 }
 
 static void text_iter_cp_current(text_iter_obj* object, long flags TSRMLS_DC)
@@ -144,24 +152,26 @@
 
 static void text_iter_cp_next(text_iter_obj* object, long flags TSRMLS_DC)
 {
-   if (text_iter_cp_valid(object, flags TSRMLS_CC)) {
-   if (flags  ITER_REVERSE) {
-   U16_BACK_1(object-text, 0, object-u.cp.offset);
-   if (object-u.cp.offset = object-text_len) {
-   object-u.cp.cp_offset--;
-   } else {
-   object-u.cp.offset = object-u.cp.cp_offset = 
UBRK_DONE;
-   }
+   if (object-u.cp.offset == UBRK_DONE) {
+   return;
+   }
+
+   if (flags  ITER_REVERSE) {
+   U16_BACK_1(object-text, 0, object-u.cp.offset);
+   if (object-u.cp.offset = object-text_len) {
+   object-u.cp.cp_offset--;
} else {
-   U16_FWD_1(object-text, object-u.cp.offset, 
object-text_len);
-   if (object-u.cp.offset = object-text_len) {
-   object-u.cp.cp_offset++;
-   } else {
-   object-u.cp.offset = object-u.cp.cp_offset = 
UBRK_DONE;
-   }
+   object-u.cp.offset = object-u.cp.cp_offset = 
UBRK_DONE;
+   }
+   } else {
+   U16_FWD_1(object-text, object-u.cp.offset, object-text_len);
+   if (object-u.cp.offset = object-text_len) {
+   object-u.cp.cp_offset++;
+   } else {
+   object-u.cp.offset = object-u.cp.cp_offset = 
UBRK_DONE;
}
-   object-u.cp.index++;
}
+   object-u.cp.index++;
 }
 
 static void text_iter_cp_rewind(text_iter_obj *object, long flags TSRMLS_DC)
@@ -361,7 +371,15 @@
 
 static int text_iter_cs_valid(text_iter_obj* object, long flags TSRMLS_DC)
 {
-   return (object-u.cs.start != UBRK_DONE);
+   if (object-u.cs.start == UBRK_DONE) {
+   return 0;
+   }
+
+   if (flags  ITER_REVERSE) {
+   return (object-u.cs.start != 0);
+   } else {
+   return (object-u.cs.start != object-text_len);
+   }
 }
 
 static void text_iter_cs_current(text_iter_obj* object, long flags TSRMLS_DC)
@@ -373,7 +391,7 @@
if (flags  ITER_REVERSE) {
if (object-u.cs.end == object-u.cs.start) {
text_iter_helper_move(0, object-text, 
object-text_len,
- 
object-u.cs.start, object-u.cs.start_cp_offset);
+ 
object-u.cs.end, object-u.cs.end_cp_offset);
}
start = object-text + object-u.cs.end;
} else {
@@ -567,7 +585,15 @@
 
 static int text_iter_brk_valid(text_iter_obj* object, long flags TSRMLS_DC)
 {
-   return (object-u.brk.bound != UBRK_DONE);
+   if (object-u.brk.bound == UBRK_DONE) {
+   return 0;
+   }
+
+   if (flags  ITER_REVERSE) {
+   return (object-u.brk.bound != 0);
+   } else {
+   return (object-u.brk.bound != object-text_len);
+   }
 }
 
 static void text_iter_brk_current(text_iter_obj* object, long flags TSRMLS_DC)

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-07-10 Thread Andrei Zmievski
andrei  Mon Jul 10 23:19:05 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Implement getAvailableLocales().
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.38r2=1.39diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.38 
php-src/ext/unicode/unicode_iterators.c:1.39
--- php-src/ext/unicode/unicode_iterators.c:1.38Mon Jul 10 22:12:47 2006
+++ php-src/ext/unicode/unicode_iterators.c Mon Jul 10 23:19:05 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.38 2006/07/10 22:12:47 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.39 2006/07/10 23:19:05 andrei Exp $ */
 
 /*
  * TODO
@@ -1146,6 +1146,25 @@
RETURN_BOOL(iter_ops[intern-type]-isBoundary(intern, offset, flags 
TSRMLS_CC));
 }
 
+PHP_METHOD(TextIterator, getAvailableLocales)
+{
+   int32_t count, i;
+
+   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ) == FAILURE) {
+   return;
+   }
+
+   if (!return_value_used) {
+   return;
+   }
+
+   array_init(return_value);
+   count = ubrk_countAvailable();
+   for (i = 0; i  count; i++) {
+   add_next_index_ascii_string(return_value, 
(char*)ubrk_getAvailable(i), ZSTR_DUPLICATE);
+   }
+}
+
 static zend_function_entry text_iterator_funcs[] = {
 
PHP_ME(TextIterator, __construct, NULL, ZEND_ACC_PUBLIC)
@@ -1164,6 +1183,8 @@
PHP_ME(TextIterator, preceding,   NULL, ZEND_ACC_PUBLIC)
PHP_ME(TextIterator, isBoundary,  NULL, ZEND_ACC_PUBLIC)
 
+   PHP_ME(TextIterator, getAvailableLocales, NULL, ZEND_ACC_PUBLIC | 
ZEND_ACC_STATIC)
+
PHP_MALIAS(TextIterator, first, rewind, NULL, ZEND_ACC_PUBLIC)
{NULL, NULL, NULL}
 };

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-07-08 Thread Andrei Zmievski
andrei  Sat Jul  8 18:46:24 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Implement following() and preceding() for codepoint iterators.
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.33r2=1.34diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.33 
php-src/ext/unicode/unicode_iterators.c:1.34
--- php-src/ext/unicode/unicode_iterators.c:1.33Fri Jul  7 22:52:26 2006
+++ php-src/ext/unicode/unicode_iterators.c Sat Jul  8 18:46:24 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.33 2006/07/07 22:52:26 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.34 2006/07/08 18:46:24 andrei Exp $ */
 
 /*
  * TODO
@@ -178,6 +178,78 @@
 
 static void text_iter_cp_following(text_iter_obj *object, int32_t offset, long 
flags TSRMLS_DC)
 {
+   int32_t k;
+
+   if (offset  0) {
+   offset = 0;
+   }
+
+   /*
+* On invalid iterator we always want to start looking for the code unit
+* offset from the beginning of the string.
+*/
+   if (object-u.cp.cp_offset == UBRK_DONE) {
+   object-u.cp.cp_offset  = 0;
+   object-u.cp.offset = 0;
+   }
+
+   /*
+* Try to locate the code unit position relative to the last known 
codepoint
+* offset.
+*/
+   k = object-u.cp.offset;
+   if (offset  object-u.cp.cp_offset) {
+   U16_FWD_N(object-text, k, object-text_len, offset - 
object-u.cp.cp_offset);
+   } else {
+   U16_BACK_N(object-text, 0, k, object-u.cp.cp_offset - offset);
+   }
+
+   /*
+* Locate the actual boundary.
+*/
+   if (flags  ITER_REVERSE) {
+   if (k == 0) {
+   object-u.cp.cp_offset = UBRK_DONE;
+   object-u.cp.offset = UBRK_DONE;
+   return;
+   } else {
+   U16_BACK_1(object-text, 0, k);
+   }
+   } else {
+   if (k == object-text_len) {
+   object-u.cp.cp_offset = UBRK_DONE;
+   object-u.cp.offset = UBRK_DONE;
+   return;
+   } else {
+   U16_FWD_1(object-text, k, object-text_len);
+   }
+   }
+
+   /*
+* If boundary is the same one as where we were at before, simply 
return.
+*/
+   if (k == object-u.cp.offset) {
+   return;
+   }
+
+   /*
+* Adjust the internal codepoint offset based on how far we've moved.
+*/
+   if (k  object-u.cp.offset) {
+   if (k - object-u.cp.offset  1) {
+   object-u.cp.cp_offset += u_countChar32(object-text + 
object-u.cp.offset, k - object-u.cp.offset);
+   } else {
+   object-u.cp.cp_offset++;
+   }
+   } else {
+   if (object-u.cp.offset - k  1) {
+   object-u.cp.cp_offset -= u_countChar32(object-text + 
k, object-u.cp.offset - k);
+   } else {
+   object-u.cp.cp_offset--;
+   }
+   }
+
+   object-u.cp.offset = k;
 }
 
 static zend_bool text_iter_cp_isBoundary(text_iter_obj *object, int32_t 
offset, long flags TSRMLS_DC)
@@ -863,7 +935,7 @@
 
 PHP_METHOD(TextIterator, following)
 {
-   long flags, offset;
+   long offset;
zval *object = getThis();
text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
 
@@ -871,8 +943,8 @@
return;
}
 
-   iter_ops[intern-type]-following(intern, offset, flags TSRMLS_CC);
-   RETURN_LONG(iter_ops[intern-type]-offset(intern, flags TSRMLS_CC));
+   iter_ops[intern-type]-following(intern, offset, intern-flags 
TSRMLS_CC);
+   RETURN_LONG(iter_ops[intern-type]-offset(intern, intern-flags 
TSRMLS_CC));
 }
 
 PHP_METHOD(TextIterator, preceding)

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-07-07 Thread Andrei Zmievski
andrei  Fri Jul  7 21:41:18 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Implement TextIterator methods following() and preceding().
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.30r2=1.31diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.30 
php-src/ext/unicode/unicode_iterators.c:1.31
--- php-src/ext/unicode/unicode_iterators.c:1.30Thu Jun 29 12:32:00 2006
+++ php-src/ext/unicode/unicode_iterators.c Fri Jul  7 21:41:18 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.30 2006/06/29 12:32:00 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.31 2006/07/07 21:41:18 andrei Exp $ */
 
 /*
  * TODO
@@ -82,12 +82,13 @@
 }
 
 typedef struct {
-   int  (*valid)  (text_iter_obj* object, long flags TSRMLS_DC);
-   void (*current)(text_iter_obj* object, long flags TSRMLS_DC);
-   int  (*key)(text_iter_obj* object, long flags TSRMLS_DC);
-   int  (*offset) (text_iter_obj* object, long flags TSRMLS_DC);
-   void (*next)   (text_iter_obj* object, long flags TSRMLS_DC);
-   void (*rewind) (text_iter_obj* object, long flags TSRMLS_DC);
+   int  (*valid) (text_iter_obj* object, long flags TSRMLS_DC);
+   void (*current)   (text_iter_obj* object, long flags TSRMLS_DC);
+   int  (*key)   (text_iter_obj* object, long flags TSRMLS_DC);
+   int  (*offset)(text_iter_obj* object, long flags TSRMLS_DC);
+   void (*next)  (text_iter_obj* object, long flags TSRMLS_DC);
+   void (*rewind)(text_iter_obj* object, long flags TSRMLS_DC);
+   void (*following) (text_iter_obj* object, int32_t offset, long flags 
TSRMLS_DC);
 } text_iter_ops;
 
 enum UBreakIteratorType brk_type_map[] = {
@@ -410,6 +411,73 @@
object-u.brk.index = 0;
 }
 
+static void text_iter_brk_following(text_iter_obj *object, int32_t offset, 
long flags TSRMLS_DC)
+{
+   int32_t k, tmp;
+
+   if (offset  0) {
+   offset = 0;
+   }
+
+   /*
+* On invalid iterator we always want to start looking for the code unit
+* offset from the beginning of the string.
+*/
+   if (object-u.brk.cp_offset == UBRK_DONE) {
+   object-u.brk.cp_offset = 0;
+   object-u.brk.bound = 0;
+   }
+
+   /*
+* Try to locate the code unit position relative to the last known 
codepoint
+* offset.
+*/
+   k = tmp = object-u.brk.bound;
+   if (offset  object-u.brk.cp_offset) {
+   U16_FWD_N(object-text, k, object-text_len, offset - 
object-u.brk.cp_offset);
+   } else {
+   U16_BACK_N(object-text, 0, k, object-u.brk.cp_offset - 
offset);
+   }
+
+   /*
+* Locate the actual boundary.
+*/
+   if (flags  ITER_REVERSE) {
+   object-u.brk.bound = ubrk_preceding(object-u.brk.iter, k);
+   } else {
+   object-u.brk.bound = ubrk_following(object-u.brk.iter, k);
+   }
+   object-u.brk.next  = object-u.brk.bound;
+
+   /*
+* If boundary is the same one as where we were at before, simply 
return.
+*/
+   if (object-u.brk.bound == tmp) {
+   return;
+   }
+
+   /*
+* Adjust the internal codepoint offset based on how far we've moved.
+*/
+   if (object-u.brk.bound != UBRK_DONE) {
+   if (object-u.brk.bound  tmp) {
+   if (object-u.brk.bound - tmp  1) {
+   object-u.brk.cp_offset += 
u_countChar32(object-text + tmp, object-u.brk.bound - tmp);
+   } else {
+   object-u.brk.cp_offset++;
+   }
+   } else {
+   if (tmp - object-u.brk.bound  1) {
+   object-u.brk.cp_offset -= 
u_countChar32(object-text + object-u.brk.bound, tmp - object-u.brk.bound);
+   } else {
+   object-u.brk.cp_offset--;
+   }
+   }
+   } else {
+   object-u.brk.cp_offset = UBRK_DONE;
+   }
+}
+
 static text_iter_ops text_iter_brk_ops = {
text_iter_brk_valid,
text_iter_brk_current,
@@ -417,6 +485,7 @@
text_iter_brk_offset,
text_iter_brk_next,
text_iter_brk_rewind,
+   text_iter_brk_following,
 };
 
 
@@ -678,7 +747,39 @@
RETURN_LONG(iter_ops[intern-type]-offset(intern, flags TSRMLS_CC));
 }
 
+PHP_METHOD(TextIterator, following)
+{
+   long flags, offset;
+   zval *object = getThis();
+   text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
+
+   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, l, offset) == 
FAILURE) {
+   return;
+  

[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-07-07 Thread Andrei Zmievski
andrei  Fri Jul  7 22:34:46 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Implement TextIterator::isBoundary() for break iterators.
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.31r2=1.32diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.31 
php-src/ext/unicode/unicode_iterators.c:1.32
--- php-src/ext/unicode/unicode_iterators.c:1.31Fri Jul  7 21:41:18 2006
+++ php-src/ext/unicode/unicode_iterators.c Fri Jul  7 22:34:46 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.31 2006/07/07 21:41:18 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.32 2006/07/07 22:34:46 andrei Exp $ */
 
 /*
  * TODO
@@ -89,6 +89,7 @@
void (*next)  (text_iter_obj* object, long flags TSRMLS_DC);
void (*rewind)(text_iter_obj* object, long flags TSRMLS_DC);
void (*following) (text_iter_obj* object, int32_t offset, long flags 
TSRMLS_DC);
+   zend_bool (*isBoundary)(text_iter_obj* object, int32_t offset, long 
flags TSRMLS_DC);
 } text_iter_ops;
 
 enum UBreakIteratorType brk_type_map[] = {
@@ -478,6 +479,71 @@
}
 }
 
+static zend_bool text_iter_brk_isBoundary(text_iter_obj *object, int32_t 
offset, long flags TSRMLS_DC)
+{
+   int32_t k, tmp;
+   UBool result;
+
+   if (offset  0) {
+   offset = 0;
+   }
+
+   /*
+* On invalid iterator we always want to start looking for the code unit
+* offset from the beginning of the string.
+*/
+   if (object-u.brk.cp_offset == UBRK_DONE) {
+   object-u.brk.cp_offset = 0;
+   object-u.brk.bound = 0;
+   }
+
+   /*
+* Try to locate the code unit position relative to the last known 
codepoint
+* offset.
+*/
+   k = tmp = object-u.brk.bound;
+   if (offset  object-u.brk.cp_offset) {
+   U16_FWD_N(object-text, k, object-text_len, offset - 
object-u.brk.cp_offset);
+   } else {
+   U16_BACK_N(object-text, 0, k, object-u.brk.cp_offset - 
offset);
+   }
+
+   result = ubrk_isBoundary(object-u.brk.iter, k);
+
+   object-u.brk.bound = ubrk_current(object-u.brk.iter);
+   object-u.brk.next  = object-u.brk.bound;
+
+   /*
+* If boundary is the same one as where we were at before, simply 
return.
+*/
+   if (object-u.brk.bound == tmp) {
+   return result;
+   }
+
+   /*
+* Adjust the internal codepoint offset based on how far we've moved.
+*/
+   if (object-u.brk.bound != UBRK_DONE) {
+   if (object-u.brk.bound  tmp) {
+   if (object-u.brk.bound - tmp  1) {
+   object-u.brk.cp_offset += 
u_countChar32(object-text + tmp, object-u.brk.bound - tmp);
+   } else {
+   object-u.brk.cp_offset++;
+   }
+   } else {
+   if (tmp - object-u.brk.bound  1) {
+   object-u.brk.cp_offset -= 
u_countChar32(object-text + object-u.brk.bound, tmp - object-u.brk.bound);
+   } else {
+   object-u.brk.cp_offset--;
+   }
+   }
+   } else {
+   object-u.brk.cp_offset = UBRK_DONE;
+   }
+
+   return result;
+}
+
 static text_iter_ops text_iter_brk_ops = {
text_iter_brk_valid,
text_iter_brk_current,
@@ -486,6 +552,7 @@
text_iter_brk_next,
text_iter_brk_rewind,
text_iter_brk_following,
+   text_iter_brk_isBoundary,
 };
 
 
@@ -778,6 +845,23 @@
iter_ops[intern-type]-following(intern, offset, flags TSRMLS_CC);
RETURN_LONG(iter_ops[intern-type]-offset(intern, flags TSRMLS_CC));
 }
+
+PHP_METHOD(TextIterator, isBoundary)
+{
+   long flags, offset;
+   zval *object = getThis();
+   text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
+
+   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, l, offset) == 
FAILURE) {
+   return;
+   }
+
+   /*
+* ReverseTextIterator will behave the same as the normal one.
+*/
+   RETURN_BOOL(iter_ops[intern-type]-isBoundary(intern, offset, flags 
TSRMLS_CC));
+}
+
 static zend_function_entry text_iterator_funcs[] = {
 
PHP_ME(TextIterator, __construct, NULL, ZEND_ACC_PUBLIC)
@@ -794,6 +878,7 @@
PHP_ME(TextIterator, last,NULL, ZEND_ACC_PUBLIC)
PHP_ME(TextIterator, following,   NULL, ZEND_ACC_PUBLIC)
PHP_ME(TextIterator, preceding,   NULL, ZEND_ACC_PUBLIC)
+   PHP_ME(TextIterator, isBoundary,  NULL, ZEND_ACC_PUBLIC)
 
PHP_MALIAS(TextIterator, first, rewind, NULL, ZEND_ACC_PUBLIC)
  

[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-07-07 Thread Andrei Zmievski
andrei  Fri Jul  7 22:52:26 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Implement isBoundary() for code point iterator.
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.32r2=1.33diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.32 
php-src/ext/unicode/unicode_iterators.c:1.33
--- php-src/ext/unicode/unicode_iterators.c:1.32Fri Jul  7 22:34:46 2006
+++ php-src/ext/unicode/unicode_iterators.c Fri Jul  7 22:52:26 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.32 2006/07/07 22:34:46 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.33 2006/07/07 22:52:26 andrei Exp $ */
 
 /*
  * TODO
@@ -176,6 +176,51 @@
object-u.cp.index  = 0;
 }
 
+static void text_iter_cp_following(text_iter_obj *object, int32_t offset, long 
flags TSRMLS_DC)
+{
+}
+
+static zend_bool text_iter_cp_isBoundary(text_iter_obj *object, int32_t 
offset, long flags TSRMLS_DC)
+{
+   int32_t k;
+
+   if (offset  0) {
+   offset = 0;
+   }
+
+   /*
+* On invalid iterator we always want to start looking for the code unit
+* offset from the beginning of the string.
+*/
+   if (object-u.cp.cp_offset == UBRK_DONE) {
+   object-u.cp.cp_offset  = 0;
+   object-u.cp.offset = 0;
+   }
+
+   /*
+* Try to locate the code unit position relative to the last known 
codepoint
+* offset.
+*/
+   k = object-u.cp.offset;
+   if (offset  object-u.cp.cp_offset) {
+   U16_FWD_N(object-text, k, object-text_len, offset - 
object-u.cp.cp_offset);
+   } else {
+   U16_BACK_N(object-text, 0, k, object-u.cp.cp_offset - offset);
+   }
+
+   if (k == object-text_len) {
+   object-u.cp.cp_offset += u_countChar32(object-text + 
object-u.cp.offset, k - object-u.cp.offset);
+   } else {
+   object-u.cp.cp_offset = offset;
+   }
+   object-u.cp.offset = k;
+
+   /*
+* Every codepoint is a boundary.
+*/
+   return TRUE;
+}
+
 static text_iter_ops text_iter_cp_ops = {
text_iter_cp_valid,
text_iter_cp_current,
@@ -183,6 +228,8 @@
text_iter_cp_offset,
text_iter_cp_next,
text_iter_cp_rewind,
+   text_iter_cp_following,
+   text_iter_cp_isBoundary,
 };
 
 /* Combining sequence ops */

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-06-29 Thread Andrei Zmievski
andrei  Thu Jun 29 12:32:01 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Try to make combining sequences work. Not entirely succesful.
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.29r2=1.30diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.29 
php-src/ext/unicode/unicode_iterators.c:1.30
--- php-src/ext/unicode/unicode_iterators.c:1.29Wed Jun 28 15:28:55 2006
+++ php-src/ext/unicode/unicode_iterators.c Thu Jun 29 12:32:00 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.29 2006/06/28 15:28:55 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.30 2006/06/29 12:32:00 andrei Exp $ */
 
 /*
  * TODO
@@ -187,21 +187,30 @@
 
 static int text_iter_cs_valid(text_iter_obj* object, long flags TSRMLS_DC)
 {
-   if (flags  ITER_REVERSE) {
-   return (object-u.cs.end  0);
-   } else {
-   return (object-u.cs.end = object-text_len);
-   }
+   return (object-u.cs.start != UBRK_DONE);
 }
 
 static void text_iter_cs_current(text_iter_obj* object, long flags TSRMLS_DC)
 {
-   uint32_t length = object-u.cs.end - object-u.cs.start;
-   if (length+1  object-current_alloc) {
-   object-current_alloc = length+1;
-   Z_USTRVAL_P(object-current) = 
eurealloc(Z_USTRVAL_P(object-current), object-current_alloc);
+   uint32_t length;
+   UChar *start;
+
+   if (object-u.cs.start == UBRK_DONE || object-u.cs.end == UBRK_DONE) {
+   length = 0;
+   } else {
+   if (flags  ITER_REVERSE) {
+   start = object-text + object-u.cs.end;
+   } else {
+   start = object-text + object-u.cs.start;
+   }
+   length = abs(object-u.cs.end - object-u.cs.start);
+   if (length+1  object-current_alloc) {
+   object-current_alloc = length+1;
+   Z_USTRVAL_P(object-current) = 
eurealloc(Z_USTRVAL_P(object-current), object-current_alloc);
+   }
+   u_memcpy(Z_USTRVAL_P(object-current), start, length);
}
-   u_memcpy(Z_USTRVAL_P(object-current), object-text + 
object-u.cs.start, length);
+   
Z_USTRVAL_P(object-current)[length] = 0;
Z_USTRLEN_P(object-current) = length;
 }
@@ -213,7 +222,11 @@
 
 static int text_iter_cs_offset(text_iter_obj* object, long flags TSRMLS_DC)
 {
-   return object-u.cs.start_cp_offset;
+   if (flags  ITER_REVERSE) {
+   return object-u.cs.end_cp_offset;
+   } else {
+   return object-u.cs.start_cp_offset;
+   }
 }
 
 static void text_iter_cs_next(text_iter_obj* object, long flags TSRMLS_DC)
@@ -221,21 +234,31 @@
UChar32 cp;
int32_t tmp, tmp2;
 
-   if (text_iter_cs_valid(object, flags TSRMLS_CC)) {
-   if (flags  ITER_REVERSE) {
-   object-u.cs.end = object-u.cs.start;
-   object-u.cs.end_cp_offset = 
object-u.cs.start_cp_offset;
-   U16_PREV(object-text, 0, object-u.cs.start, cp);
-   object-u.cs.start_cp_offset--;
+   if (object-u.cs.start == UBRK_DONE) {
+   return;
+   }
+
+   object-u.cs.start = object-u.cs.end;
+   object-u.cs.start_cp_offset = object-u.cs.end_cp_offset;
+   if (flags  ITER_REVERSE) {
+   if (object-u.cs.end == 0) {
+   object-u.cs.end = UBRK_DONE;
+   object-u.cs.end_cp_offset = UBRK_DONE;
+   } else {
+   U16_PREV(object-text, 0, object-u.cs.end, cp);
+   object-u.cs.end_cp_offset--;
if (u_getCombiningClass(cp) != 0) {
do {
-   U16_PREV(object-text, 0, 
object-u.cs.start, cp);
-   object-u.cs.start_cp_offset--;
-   } while (object-u.cs.start  0  
u_getCombiningClass(cp) != 0);
+   U16_PREV(object-text, 0, 
object-u.cs.end, cp);
+   object-u.cs.end_cp_offset--;
+   } while (object-u.cs.end  0  
u_getCombiningClass(cp) != 0);
}
+   }
+   } else {
+   if (object-u.cs.end == object-text_len) {
+   object-u.cs.end = UBRK_DONE;
+   object-u.cs.end_cp_offset = UBRK_DONE;
} else {
-   object-u.cs.start = object-u.cs.end;
-   object-u.cs.start_cp_offset = 
object-u.cs.end_cp_offset;
U16_NEXT(object-text, object-u.cs.end, 
object-text_len, cp);

[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-06-28 Thread Andrei Zmievski
andrei  Wed Jun 28 14:44:36 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Use object's copied text (fixes some bug somehow).
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.27r2=1.28diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.27 
php-src/ext/unicode/unicode_iterators.c:1.28
--- php-src/ext/unicode/unicode_iterators.c:1.27Wed Jun 28 14:12:14 2006
+++ php-src/ext/unicode/unicode_iterators.c Wed Jun 28 14:44:36 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.27 2006/06/28 14:12:14 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.28 2006/06/28 14:44:36 andrei Exp $ */
 
 /*
  * TODO
@@ -563,8 +563,8 @@
UErrorCode status = U_ZERO_ERROR;
UErrorCode status2 = U_ZERO_ERROR;
locale = locale ? locale : UG(default_locale);
-   intern-u.brk.iter = ubrk_open(brk_type_map[intern-type - 
ITER_CHARACTER], locale, text, text_len, status);
-   intern-u.brk.n_iter = ubrk_open(brk_type_map[intern-type - 
ITER_CHARACTER], locale, text, text_len, status);
+   intern-u.brk.iter = ubrk_open(brk_type_map[intern-type - 
ITER_CHARACTER], locale, intern-text, intern-text_len, status);
+   intern-u.brk.n_iter = ubrk_open(brk_type_map[intern-type - 
ITER_CHARACTER], locale, intern-text, intern-text_len, status);
if (!U_SUCCESS(status) || !U_SUCCESS(status2)) {
php_error(E_RECOVERABLE_ERROR, Could not create 
UBreakIterator for '%s' locale: %s, locale, u_errorName(status));
return;

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-06-28 Thread Andrei Zmievski
andrei  Wed Jun 28 15:28:55 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Fix codepoint iterators
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.28r2=1.29diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.28 
php-src/ext/unicode/unicode_iterators.c:1.29
--- php-src/ext/unicode/unicode_iterators.c:1.28Wed Jun 28 14:44:36 2006
+++ php-src/ext/unicode/unicode_iterators.c Wed Jun 28 15:28:55 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.28 2006/06/28 14:44:36 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.29 2006/06/28 15:28:55 andrei Exp $ */
 
 /*
  * TODO
@@ -105,11 +105,7 @@
 
 static int text_iter_cp_valid(text_iter_obj* object, long flags TSRMLS_DC)
 {
-   if (flags  ITER_REVERSE) {
-   return (object-u.cp.offset  0);
-   } else {
-   return (object-u.cp.offset  object-text_len);
-   }
+   return (object-u.cp.offset != UBRK_DONE);
 }
 
 static void text_iter_cp_current(text_iter_obj* object, long flags TSRMLS_DC)
@@ -117,14 +113,18 @@
UChar32 cp = 0;
int32_t tmp, buf_len = 0;
 
-   if (text_iter_cp_valid(object, flags TSRMLS_CC)) {
-   tmp = object-u.cp.offset;
-   if (flags  ITER_REVERSE) {
+   tmp = object-u.cp.offset;
+
+   if (flags  ITER_REVERSE) {
+   if (object-u.cp.offset != UBRK_DONE  object-u.cp.offset  
0) {
U16_PREV(object-text, 0, tmp, cp);
-   } else {
+   buf_len = zend_codepoint_to_uchar(cp, 
Z_USTRVAL_P(object-current));
+   }
+   } else {
+   if (object-u.cp.offset != UBRK_DONE  object-u.cp.offset  
object-text_len) {
U16_NEXT(object-text, tmp, object-text_len, cp);
+   buf_len = zend_codepoint_to_uchar(cp, 
Z_USTRVAL_P(object-current));
}
-   buf_len = zend_codepoint_to_uchar(cp, 
Z_USTRVAL_P(object-current));
}
Z_USTRVAL_P(object-current)[buf_len] = 0;
Z_USTRLEN_P(object-current) = buf_len;
@@ -145,10 +145,18 @@
if (text_iter_cp_valid(object, flags TSRMLS_CC)) {
if (flags  ITER_REVERSE) {
U16_BACK_1(object-text, 0, object-u.cp.offset);
-   object-u.cp.cp_offset--;
+   if (object-u.cp.offset = object-text_len) {
+   object-u.cp.cp_offset--;
+   } else {
+   object-u.cp.offset = object-u.cp.cp_offset = 
UBRK_DONE;
+   }
} else {
U16_FWD_1(object-text, object-u.cp.offset, 
object-text_len);
-   object-u.cp.cp_offset++;
+   if (object-u.cp.offset = object-text_len) {
+   object-u.cp.cp_offset++;
+   } else {
+   object-u.cp.offset = object-u.cp.cp_offset = 
UBRK_DONE;
+   }
}
object-u.cp.index++;
}
@@ -275,7 +283,7 @@
 
 /* UBreakIterator Character Ops */
 
-static int text_iter_brk_char_valid(text_iter_obj* object, long flags 
TSRMLS_DC)
+static int text_iter_brk_valid(text_iter_obj* object, long flags TSRMLS_DC)
 {
if (flags  ITER_REVERSE) {
return (object-u.brk.bound != UBRK_DONE);
@@ -284,7 +292,7 @@
}
 }
 
-static void text_iter_brk_char_current(text_iter_obj* object, long flags 
TSRMLS_DC)
+static void text_iter_brk_current(text_iter_obj* object, long flags TSRMLS_DC)
 {
UChar *start;
int32_t length = -1;
@@ -319,17 +327,17 @@
Z_USTRLEN_P(object-current) = length;
 }
 
-static int text_iter_brk_char_key(text_iter_obj* object, long flags TSRMLS_DC)
+static int text_iter_brk_key(text_iter_obj* object, long flags TSRMLS_DC)
 {
return object-u.brk.index;
 }
 
-static int text_iter_brk_char_offset(text_iter_obj* object, long flags 
TSRMLS_DC)
+static int text_iter_brk_offset(text_iter_obj* object, long flags TSRMLS_DC)
 {
return object-u.brk.cp_offset;
 }
 
-static void text_iter_brk_char_next(text_iter_obj* object, long flags 
TSRMLS_DC)
+static void text_iter_brk_next(text_iter_obj* object, long flags TSRMLS_DC)
 {
int32_t tmp = object-u.brk.bound;
 
@@ -365,7 +373,7 @@
object-u.brk.index++;
 }
 
-static void text_iter_brk_char_rewind(text_iter_obj *object, long flags 
TSRMLS_DC)
+static void text_iter_brk_rewind(text_iter_obj *object, long flags TSRMLS_DC)
 {
if (flags  ITER_REVERSE) {
object-u.brk.bound = ubrk_last(object-u.brk.iter);
@@ -380,12 +388,12 @@
 }
 
 static text_iter_ops text_iter_brk_ops = {
-   

Re: [PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-06-25 Thread Derick Rethans
On Sat, 24 Jun 2006, Andrei Zmievski wrote:

 andreiSat Jun 24 18:18:38 2006 UTC
 
   Modified files:  
 /php-src/ext/unicode  unicode_iterators.c 
   Log:
   - Remove support for code units in TextIterator (people shouldn't be
 examining individual code units anyway)

Heh, originally this was added because there is no other way to get to 
them in a nice way... this was the whole point of my comments about it 
earlier and then we sorta agreed to put it in here... What is the point 
of removing it?

regards,
Derick
-- 
Derick Rethans
http://derickrethans.nl | http://ez.no | http://xdebug.org

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



Re: [PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-06-25 Thread Andrei Zmievski
Why do you want to get to code units in a nice way? What do you plan  
to do with them? I think it's there's very low chance someone would  
want to work on that level, and if they want to, they should convert  
the string to UTF-16 binary form and go from there.


-Andrei


On Jun 25, 2006, at 4:58 AM, Derick Rethans wrote:


On Sat, 24 Jun 2006, Andrei Zmievski wrote:


andrei  Sat Jun 24 18:18:38 2006 UTC

  Modified files:
/php-src/ext/unicodeunicode_iterators.c
  Log:
  - Remove support for code units in TextIterator (people  
shouldn't be

examining individual code units anyway)


Heh, originally this was added because there is no other way to get to
them in a nice way... this was the whole point of my comments about it
earlier and then we sorta agreed to put it in here... What is the  
point

of removing it?

regards,
Derick
--
Derick Rethans
http://derickrethans.nl | http://ez.no | http://xdebug.org


--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-06-24 Thread Andrei Zmievski
andrei  Sat Jun 24 18:18:38 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  - Remove support for code units in TextIterator (people shouldn't be
examining individual code units anyway)
  - Add offset() method.
  - Add optional locale parameter to the constructor.
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.24r2=1.25diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.24 
php-src/ext/unicode/unicode_iterators.c:1.25
--- php-src/ext/unicode/unicode_iterators.c:1.24Fri Mar 24 21:06:36 2006
+++ php-src/ext/unicode/unicode_iterators.c Sat Jun 24 18:18:38 2006
@@ -14,14 +14,13 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.24 2006/03/24 21:06:36 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.25 2006/06/24 18:18:38 andrei Exp $ */
 
 /*
  * TODO
  *
  * - optimize current() to pass return_value to the handler so that it fills it
  *   in directly instead of creating a new zval
- * - return code units as binary strings? integers? or leave as unicode 
strings?
  * - implement Countable (or count_elements handler) and Seekable interfaces
  */
 
@@ -31,7 +30,6 @@
 #include unicode/ubrk.h
 
 typedef enum {
-   ITER_CODE_UNIT,
ITER_CODE_POINT,
ITER_COMB_SEQUENCE,
ITER_CHARACTER,
@@ -53,23 +51,21 @@
size_t  current_alloc;
longflags;
union {
+   int32_t start;
struct {
+   int32_t start;
int32_t index;
-   int32_t offset;
} cp;
struct {
-   int32_t index;
-   } cu;
-   struct {
-   int32_t index;
int32_t start;
int32_t end;
+   int32_t index;
} cs;
struct {
-   UBreakIterator *iter;
-   int32_t index;
int32_t start;
int32_t end;
+   int32_t index;
+   UBreakIterator *iter;
} brk;
} u;
zend_object_iterator iter;
@@ -99,71 +95,14 @@
 PHPAPI zend_class_entry* text_iterator_ce;
 PHPAPI zend_class_entry* rev_text_iterator_ce;
 
-/* Code unit ops */
-
-static int text_iter_cu_valid(text_iter_obj* object TSRMLS_DC)
-{
-   if (object-flags  ITER_REVERSE) {
-   return (object-u.cu.index = 0);
-   } else {
-   return (object-u.cu.index  object-text_len);
-   }
-}
-
-static void text_iter_cu_current(text_iter_obj* object TSRMLS_DC)
-{
-   u_memcpy(Z_USTRVAL_P(object-current), object-text + 
object-u.cu.index, 1);
-   Z_USTRVAL_P(object-current)[1] = 0;
-   Z_USTRLEN_P(object-current) = 1;
-}
-
-static int text_iter_cu_key(text_iter_obj* object TSRMLS_DC)
-{
-   if (object-flags  ITER_REVERSE) {
-   return object-text_len - object-u.cu.index - 1;
-   } else {
-   return object-u.cu.index;
-   }
-}
-
-static void text_iter_cu_next(text_iter_obj* object TSRMLS_DC)
-{
-   if (object-flags  ITER_REVERSE) {
-   if (object-u.cu.index = 0) {
-   object-u.cu.index--;
-   }
-   } else {
-   if (object-u.cu.index  object-text_len) {
-   object-u.cu.index++;
-   }
-   }
-}
-
-static void text_iter_cu_rewind(text_iter_obj *object TSRMLS_DC)
-{
-   if (object-flags  ITER_REVERSE) {
-   object-u.cu.index = object-text_len-1;
-   } else {
-   object-u.cu.index = 0;
-   }
-}
-
-static text_iter_ops text_iter_cu_ops = {
-   text_iter_cu_valid,
-   text_iter_cu_current,
-   text_iter_cu_key,
-   text_iter_cu_next,
-   text_iter_cu_rewind,
-};
-
 /* Code point ops */
 
 static int text_iter_cp_valid(text_iter_obj* object TSRMLS_DC)
 {
if (object-flags  ITER_REVERSE) {
-   return (object-u.cp.offset  0);
+   return (object-u.cp.start  0);
} else {
-   return (object-u.cp.offset  object-text_len);
+   return (object-u.cp.start  object-text_len);
}
 }
 
@@ -172,7 +111,7 @@
UChar32 cp;
int32_t tmp, buf_len;
 
-   tmp = object-u.cp.offset;
+   tmp = object-u.cp.start;
if (object-flags  ITER_REVERSE) {
U16_PREV(object-text, 0, tmp, cp);
} else {
@@ -191,9 +130,9 @@
 static void text_iter_cp_next(text_iter_obj* object TSRMLS_DC)
 {
if (object-flags  ITER_REVERSE) {
-   U16_BACK_1(object-text, 0, object-u.cp.offset);
+   U16_BACK_1(object-text, 0, object-u.cp.start);
} else {
-

[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-06-24 Thread Andrei Zmievski
andrei  Sat Jun 24 21:57:14 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  A lot of work on making TextIterator support propert codepoint-level
  offsets and making it more robust in general.
  
  http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.25r2=1.26diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.25 
php-src/ext/unicode/unicode_iterators.c:1.26
--- php-src/ext/unicode/unicode_iterators.c:1.25Sat Jun 24 18:18:38 2006
+++ php-src/ext/unicode/unicode_iterators.c Sat Jun 24 21:57:14 2006
@@ -14,11 +14,12 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.25 2006/06/24 18:18:38 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.26 2006/06/24 21:57:14 andrei Exp $ */
 
 /*
  * TODO
  *
+ * - test with empty and 1 character strings
  * - optimize current() to pass return_value to the handler so that it fills it
  *   in directly instead of creating a new zval
  * - implement Countable (or count_elements handler) and Seekable interfaces
@@ -51,20 +52,23 @@
size_t  current_alloc;
longflags;
union {
-   int32_t start;
struct {
-   int32_t start;
+   int32_t offset;
+   int32_t cp_offset;
int32_t index;
} cp;
struct {
int32_t start;
int32_t end;
int32_t index;
+   int32_t start_cp_offset;
+   int32_t end_cp_offset;
} cs;
struct {
int32_t start;
int32_t end;
int32_t index;
+   int32_t cp_offset;
UBreakIterator *iter;
} brk;
} u;
@@ -77,11 +81,12 @@
 }
 
 typedef struct {
-   int  (*valid)  (text_iter_obj* object TSRMLS_DC);
-   void (*current)(text_iter_obj* object TSRMLS_DC);
-   int  (*key)(text_iter_obj* object TSRMLS_DC);
-   void (*next)   (text_iter_obj* object TSRMLS_DC);
-   void (*rewind) (text_iter_obj* object TSRMLS_DC);
+   int  (*valid)  (text_iter_obj* object, long flags TSRMLS_DC);
+   void (*current)(text_iter_obj* object, long flags TSRMLS_DC);
+   int  (*key)(text_iter_obj* object, long flags TSRMLS_DC);
+   int  (*offset) (text_iter_obj* object, long flags TSRMLS_DC);
+   void (*next)   (text_iter_obj* object, long flags TSRMLS_DC);
+   void (*rewind) (text_iter_obj* object, long flags TSRMLS_DC);
 } text_iter_ops;
 
 enum UBreakIteratorType brk_type_map[] = {
@@ -97,52 +102,65 @@
 
 /* Code point ops */
 
-static int text_iter_cp_valid(text_iter_obj* object TSRMLS_DC)
+static int text_iter_cp_valid(text_iter_obj* object, long flags TSRMLS_DC)
 {
-   if (object-flags  ITER_REVERSE) {
-   return (object-u.cp.start  0);
+   if (flags  ITER_REVERSE) {
+   return (object-u.cp.offset  0);
} else {
-   return (object-u.cp.start  object-text_len);
+   return (object-u.cp.offset  object-text_len);
}
 }
 
-static void text_iter_cp_current(text_iter_obj* object TSRMLS_DC)
+static void text_iter_cp_current(text_iter_obj* object, long flags TSRMLS_DC)
 {
-   UChar32 cp;
-   int32_t tmp, buf_len;
+   UChar32 cp = 0;
+   int32_t tmp, buf_len = 0;
 
-   tmp = object-u.cp.start;
-   if (object-flags  ITER_REVERSE) {
-   U16_PREV(object-text, 0, tmp, cp);
-   } else {
-   U16_NEXT(object-text, tmp, object-text_len, cp);
+   if (text_iter_cp_valid(object, flags TSRMLS_CC)) {
+   tmp = object-u.cp.offset;
+   if (flags  ITER_REVERSE) {
+   U16_PREV(object-text, 0, tmp, cp);
+   } else {
+   U16_NEXT(object-text, tmp, object-text_len, cp);
+   }
+   buf_len = zend_codepoint_to_uchar(cp, 
Z_USTRVAL_P(object-current));
}
-   buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object-current));
Z_USTRVAL_P(object-current)[buf_len] = 0;
Z_USTRLEN_P(object-current) = buf_len;
 }
 
-static int text_iter_cp_key(text_iter_obj* object TSRMLS_DC)
+static int text_iter_cp_key(text_iter_obj* object, long flags TSRMLS_DC)
 {
return object-u.cp.index;
 }
 
-static void text_iter_cp_next(text_iter_obj* object TSRMLS_DC)
+static int text_iter_cp_offset(text_iter_obj* object, long flags TSRMLS_DC)
 {
-   if (object-flags  ITER_REVERSE) {
-   U16_BACK_1(object-text, 0, object-u.cp.start);
-   } else {
-   U16_FWD_1(object-text, object-u.cp.start, object-text_len);
+   

[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-03-24 Thread Andrei Zmievski
andrei  Fri Mar 24 21:06:36 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Use intern-type for break iterator.
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.23r2=1.24diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.23 
php-src/ext/unicode/unicode_iterators.c:1.24
--- php-src/ext/unicode/unicode_iterators.c:1.23Sun Feb 26 11:57:14 2006
+++ php-src/ext/unicode/unicode_iterators.c Fri Mar 24 21:06:36 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.23 2006/02/26 11:57:14 dmitry Exp $ */
+/* $Id: unicode_iterators.c,v 1.24 2006/03/24 21:06:36 andrei Exp $ */
 
 /*
  * TODO
@@ -538,9 +538,9 @@
intern-flags |= ITER_REVERSE;
}
 
-   if (ti_type = ITER_CHARACTER  ti_type  ITER_TYPE_LAST) {
+   if (intern-type = ITER_CHARACTER  intern-type  ITER_TYPE_LAST) {
UErrorCode status = U_ZERO_ERROR;
-   intern-u.brk.iter = ubrk_open(brk_type_map[ti_type - 
ITER_CHARACTER], UG(default_locale), text, text_len, status);
+   intern-u.brk.iter = ubrk_open(brk_type_map[intern-type - 
ITER_CHARACTER], UG(default_locale), text, text_len, status);
if (!U_SUCCESS(status)) {
php_error(E_RECOVERABLE_ERROR, Could not create 
UBreakIterator: %s, u_errorName(status));
return;

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-02-17 Thread Marcus Boerger
helly   Fri Feb 17 08:24:56 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  - Change to offsetof as suggested by Clayton
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.21r2=1.22diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.21 
php-src/ext/unicode/unicode_iterators.c:1.22
--- php-src/ext/unicode/unicode_iterators.c:1.21Wed Feb 15 21:34:21 2006
+++ php-src/ext/unicode/unicode_iterators.c Fri Feb 17 08:24:56 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.21 2006/02/15 21:34:21 helly Exp $ */
+/* $Id: unicode_iterators.c,v 1.22 2006/02/17 08:24:56 helly Exp $ */
 
 /*
  * TODO
@@ -77,10 +77,7 @@
 
 static inline text_iter_obj* text_iter_to_obj(zend_object_iterator *iter)
 {
-   static text_iter_obj adr;
-   static int ofs = (char*)adr.iter - (char*)adr;
-
-   return (text_iter_obj *)((char*)iter - ofs);
+   return (text_iter_obj *)((char*)iter - offsetof(text_iter_obj, iter));
 }
 
 typedef struct {

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c /ext/unicode/tests .cvsignore iterator_001.phpt

2006-02-15 Thread Marcus Boerger
helly   Wed Feb 15 21:34:21 2006 UTC

  Added files: 
/php-src/ext/unicode/tests  .cvsignore iterator_001.phpt 

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  - Little speedup + first test
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.20r2=1.21diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.20 
php-src/ext/unicode/unicode_iterators.c:1.21
--- php-src/ext/unicode/unicode_iterators.c:1.20Mon Feb 13 10:23:58 2006
+++ php-src/ext/unicode/unicode_iterators.c Wed Feb 15 21:34:21 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.20 2006/02/13 10:23:58 dmitry Exp $ */
+/* $Id: unicode_iterators.c,v 1.21 2006/02/15 21:34:21 helly Exp $ */
 
 /*
  * TODO
@@ -72,12 +72,16 @@
int32_t end;
} brk;
} u;
+   zend_object_iterator iter;
 } text_iter_obj;
 
-typedef struct {
-   zend_object_iterator intern;
-   text_iter_obj*   object;
-} text_iter_it;
+static inline text_iter_obj* text_iter_to_obj(zend_object_iterator *iter)
+{
+   static text_iter_obj adr;
+   static int ofs = (char*)adr.iter - (char*)adr;
+
+   return (text_iter_obj *)((char*)iter - ofs);
+}
 
 typedef struct {
int  (*valid)  (text_iter_obj* object TSRMLS_DC);
@@ -389,54 +393,51 @@
 
 static void text_iter_dtor(zend_object_iterator* iter TSRMLS_DC)
 {
-   text_iter_it* iterator = (text_iter_it *) iter;
-   zval_ptr_dtor((zval **)iterator-intern.data);
-   efree(iterator);
+   text_iter_obj* obj = text_iter_to_obj(iter);
+   zval *object = obj-iter.data;
+
+   zval_ptr_dtor(object);
 }
 
 static int text_iter_valid(zend_object_iterator* iter TSRMLS_DC)
 {
-   text_iter_it*  iterator = (text_iter_it *) iter;
-   text_iter_obj* object   = iterator-object;
+   text_iter_obj* obj = text_iter_to_obj(iter);
 
-   if (iter_ops[object-type]-valid(object TSRMLS_CC))
+   if (iter_ops[obj-type]-valid(obj TSRMLS_CC)) {
return SUCCESS;
-   else
+   } else {
return FAILURE;
+   }
 }
 
 static void text_iter_get_current_data(zend_object_iterator* iter, zval*** 
data TSRMLS_DC)
 {
-   text_iter_it*  iterator = (text_iter_it *) iter;
-   text_iter_obj* object   = iterator-object;
+   text_iter_obj* obj = text_iter_to_obj(iter);
 
-   iter_ops[object-type]-current(object TSRMLS_CC);
-   *data = object-current;
+   iter_ops[obj-type]-current(obj TSRMLS_CC);
+   *data = obj-current;
 }
 
 static int text_iter_get_current_key(zend_object_iterator* iter, char 
**str_key, uint *str_key_len, ulong *int_key TSRMLS_DC)
 {
-   text_iter_it*  iterator = (text_iter_it *) iter;
-   text_iter_obj* object   = iterator-object;
+   text_iter_obj* obj = text_iter_to_obj(iter);
 
-   *int_key = iter_ops[object-type]-key(object TSRMLS_CC);
+   *int_key = iter_ops[obj-type]-key(obj TSRMLS_CC);
return HASH_KEY_IS_LONG;
 }
 
 static void text_iter_move_forward(zend_object_iterator* iter TSRMLS_DC)
 {
-   text_iter_it*  iterator = (text_iter_it *) iter;
-   text_iter_obj* object   = iterator-object;
+   text_iter_obj* obj = text_iter_to_obj(iter);
 
-   iter_ops[object-type]-next(object TSRMLS_CC);
+   iter_ops[obj-type]-next(obj TSRMLS_CC);
 }
 
 static void text_iter_rewind(zend_object_iterator* iter TSRMLS_DC)
 {
-   text_iter_it*  iterator = (text_iter_it *) iter;
-   text_iter_obj* object   = iterator-object;
+   text_iter_obj* obj = text_iter_to_obj(iter);
 
-   iter_ops[object-type]-rewind(object TSRMLS_CC);
+   iter_ops[obj-type]-rewind(obj TSRMLS_CC);
 }
 
 zend_object_iterator_funcs text_iter_funcs = {
@@ -450,21 +451,18 @@
 
 static zend_object_iterator* text_iter_get_iterator(zend_class_entry *ce, zval 
*object, int by_ref TSRMLS_DC)
 {
-   text_iter_it*   iterator;
text_iter_obj*  iter_object;
 
if (by_ref) {
zend_error(E_ERROR, An iterator cannot be used with foreach by 
reference);
}
-   iterator= emalloc(sizeof(text_iter_it));
iter_object = (text_iter_obj *) zend_object_store_get_object(object 
TSRMLS_CC);
 
ZVAL_ADDREF(object);
-   iterator-intern.data  = (void *) object;
-   iterator-intern.funcs = text_iter_funcs;
-   iterator-object   = iter_object;
+   iter_object-iter.data  = (void *) object;
+   iter_object-iter.funcs = text_iter_funcs;
 
-   return (zend_object_iterator *) iterator;
+   return (zend_object_iterator *) iter_object-iter;
 }
 
 static void text_iterator_free_storage(void *object TSRMLS_DC)

http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/tests/.cvsignore?view=markuprev=1.1
Index: 

[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-02-10 Thread Andrei Zmievski
andrei  Sat Feb 11 00:16:43 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Implement character/word/line/sentence iterators and the reverse
  counterparts.
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.18r2=1.19diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.18 
php-src/ext/unicode/unicode_iterators.c:1.19
--- php-src/ext/unicode/unicode_iterators.c:1.18Fri Feb 10 00:23:29 2006
+++ php-src/ext/unicode/unicode_iterators.c Sat Feb 11 00:16:43 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.18 2006/02/10 00:23:29 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.19 2006/02/11 00:16:43 andrei Exp $ */
 
 /*
  * TODO
@@ -28,11 +28,16 @@
 #include php.h
 #include zend_interfaces.h
 #include zend_exceptions.h
+#include unicode/ubrk.h
 
 typedef enum {
ITER_CODE_UNIT,
ITER_CODE_POINT,
ITER_COMB_SEQUENCE,
+   ITER_CHARACTER,
+   ITER_WORD,
+   ITER_LINE,
+   ITER_SENTENCE,
ITER_TYPE_LAST,
 } text_iter_type;
 
@@ -60,6 +65,12 @@
int32_t start;
int32_t end;
} cs;
+   struct {
+   UBreakIterator *iter;
+   int32_t index;
+   int32_t start;
+   int32_t end;
+   } brk;
} u;
 } text_iter_obj;
 
@@ -76,6 +87,13 @@
void (*rewind) (text_iter_obj* object TSRMLS_DC);
 } text_iter_ops;
 
+enum UBreakIteratorType brk_type_map[] = {
+   UBRK_CHARACTER,
+   UBRK_WORD,
+   UBRK_LINE,
+   UBRK_SENTENCE,
+};
+
 PHPAPI zend_class_entry* text_iterator_aggregate_ce;
 PHPAPI zend_class_entry* text_iterator_ce;
 PHPAPI zend_class_entry* rev_text_iterator_ce;
@@ -276,12 +294,95 @@
 };
 
 
+/* UBreakIterator Character Ops */
+
+static int text_iter_brk_char_valid(text_iter_obj* object TSRMLS_DC)
+{
+   if (object-flags  ITER_REVERSE) {
+   return (object-u.brk.start != UBRK_DONE);
+   } else {
+   return (object-u.brk.end != UBRK_DONE);
+   }
+}
+
+static void text_iter_brk_char_current(text_iter_obj* object TSRMLS_DC)
+{
+   uint32_t length;
+   int32_t start = object-u.brk.start;
+   int32_t end = object-u.brk.end;
+
+   if (object-flags  ITER_REVERSE) {
+   if (end == UBRK_DONE) {
+   end = object-text_len;
+   }
+   } else {
+   if (start == UBRK_DONE) {
+   start = 0;
+   }
+   }
+   length = end - start;
+   if (length  object-current_alloc-1) {
+   object-current_alloc = length+1;
+   Z_USTRVAL_P(object-current) = 
eurealloc(Z_USTRVAL_P(object-current), object-current_alloc);
+   }
+   u_memcpy(Z_USTRVAL_P(object-current), object-text + start, length);
+   Z_USTRVAL_P(object-current)[length] = 0;
+   Z_USTRLEN_P(object-current) = length;
+}
+
+static int text_iter_brk_char_key(text_iter_obj* object TSRMLS_DC)
+{
+   return object-u.brk.index;
+}
+
+static void text_iter_brk_char_next(text_iter_obj* object TSRMLS_DC)
+{
+   if (object-flags  ITER_REVERSE) {
+   if (object-u.brk.start != UBRK_DONE) {
+   object-u.brk.end = object-u.brk.start;
+   object-u.brk.start = ubrk_previous(object-u.brk.iter);
+   object-u.brk.index++;
+   }
+   } else {
+   if (object-u.brk.end != UBRK_DONE) {
+   object-u.brk.start = object-u.brk.end;
+   object-u.brk.end = ubrk_next(object-u.brk.iter);
+   object-u.brk.index++;
+   }
+   }
+}
+
+static void text_iter_brk_char_rewind(text_iter_obj *object TSRMLS_DC)
+{
+   if (object-flags  ITER_REVERSE) {
+   object-u.brk.end   = ubrk_last(object-u.brk.iter);
+   object-u.brk.start = ubrk_previous(object-u.brk.iter);
+   } else {
+   object-u.brk.start = ubrk_first(object-u.brk.iter);
+   object-u.brk.end   = ubrk_next(object-u.brk.iter);
+   }
+   object-u.brk.index = 0;
+}
+
+static text_iter_ops text_iter_brk_ops = {
+   text_iter_brk_char_valid,
+   text_iter_brk_char_current,
+   text_iter_brk_char_key,
+   text_iter_brk_char_next,
+   text_iter_brk_char_rewind,
+};
+
+
 /* Ops array */
 
 static text_iter_ops* iter_ops[] = {
text_iter_cu_ops,
text_iter_cp_ops,
text_iter_cs_ops,
+   text_iter_brk_ops,
+   text_iter_brk_ops,
+   text_iter_brk_ops,
+   text_iter_brk_ops,
 };
 
 /* Iterator Funcs */
@@ -376,6 +477,9 @@
if (intern-text) {
efree(intern-text);
}
+   if 

[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-02-09 Thread Andrei Zmievski
andrei  Fri Feb 10 00:23:29 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Make ReverseTextIterator a separate class.
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.17r2=1.18diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.17 
php-src/ext/unicode/unicode_iterators.c:1.18
--- php-src/ext/unicode/unicode_iterators.c:1.17Wed Feb  8 00:16:50 2006
+++ php-src/ext/unicode/unicode_iterators.c Fri Feb 10 00:23:29 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.17 2006/02/08 00:16:50 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.18 2006/02/10 00:23:29 andrei Exp $ */
 
 /*
  * TODO
@@ -78,6 +78,7 @@
 
 PHPAPI zend_class_entry* text_iterator_aggregate_ce;
 PHPAPI zend_class_entry* text_iterator_ce;
+PHPAPI zend_class_entry* rev_text_iterator_ce;
 
 /* Code unit ops */
 
@@ -433,6 +434,10 @@
intern-flags = flags;
}
 
+   if (Z_OBJCE_P(this_ptr) == U_CLASS_ENTRY(rev_text_iterator_ce)) {
+   intern-flags |= ITER_REVERSE;
+   }
+
iter_ops[intern-type]-rewind(intern TSRMLS_CC);
 }
 
@@ -495,13 +500,19 @@
text_iterator_ce = zend_register_internal_class(ce TSRMLS_CC);
text_iterator_ce-create_object = text_iterator_new;
text_iterator_ce-get_iterator  = text_iter_get_iterator;
+   text_iterator_ce-ce_flags |= ZEND_ACC_FINAL_CLASS;
zend_class_implements(text_iterator_ce TSRMLS_CC, 1, 
zend_ce_traversable);
 
+   INIT_CLASS_ENTRY(ce, ReverseTextIterator, text_iterator_funcs);
+   rev_text_iterator_ce = zend_register_internal_class(ce TSRMLS_CC);
+   rev_text_iterator_ce-create_object = text_iterator_new;
+   rev_text_iterator_ce-get_iterator  = text_iter_get_iterator;
+   rev_text_iterator_ce-ce_flags |= ZEND_ACC_FINAL_CLASS;
+   zend_class_implements(rev_text_iterator_ce TSRMLS_CC, 1, 
zend_ce_traversable);
+
zend_declare_class_constant_long(text_iterator_ce, CODE_UNIT, 
sizeof(CODE_UNIT)-1, ITER_CODE_UNIT TSRMLS_CC);
zend_declare_class_constant_long(text_iterator_ce, CODE_POINT, 
sizeof(CODE_POINT)-1, ITER_CODE_POINT TSRMLS_CC);
zend_declare_class_constant_long(text_iterator_ce, COMB_SEQUENCE, 
sizeof(COMB_SEQUENCE)-1, ITER_COMB_SEQUENCE TSRMLS_CC);
-
-   zend_declare_class_constant_long(text_iterator_ce, REVERSE, 
sizeof(REVERSE)-1, ITER_REVERSE TSRMLS_CC);
 }
 
 /*

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-02-07 Thread Andrei Zmievski
andrei  Tue Feb  7 20:01:29 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Implement reverse iteration for codeunits and codepoints. Combining
  sequences are next.
  
  # This is ugly, though.
  # foreach (new TextIterator($a, # 
TextIterator::CODE_POINT|TextIterator::REVERSE) as $k = $c) {
  #var_dump($k: $c);
  # }
  # Any suggestions?
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.15r2=1.16diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.15 
php-src/ext/unicode/unicode_iterators.c:1.16
--- php-src/ext/unicode/unicode_iterators.c:1.15Tue Feb  7 00:13:54 2006
+++ php-src/ext/unicode/unicode_iterators.c Tue Feb  7 20:01:28 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.15 2006/02/07 00:13:54 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.16 2006/02/07 20:01:28 andrei Exp $ */
 
 /*
  * TODO
@@ -36,8 +36,8 @@
ITER_TYPE_LAST,
 } text_iter_type;
 
-const uint32_t ITER_REVERSE = 0x100;
-const uint32_t ITER_TYPE_MASK = 0xFF;
+static const uint32_t ITER_REVERSE = 0x100;
+static const uint32_t ITER_TYPE_MASK = 0xFF;
 
 typedef struct {
zend_object std;
@@ -46,13 +46,14 @@
text_iter_type  type;
zval*   current;
size_t  current_alloc;
+   longflags;
union {
struct {
uint32_t index;
uint32_t offset;
} cp;
struct {
-   uint32_t index;
+   int32_t index;
} cu;
struct {
uint32_t index;
@@ -82,7 +83,11 @@
 
 static int text_iter_cu_valid(text_iter_obj* object TSRMLS_DC)
 {
-   return (object-u.cu.index  object-text_len);
+   if (object-flags  ITER_REVERSE) {
+   return (object-u.cu.index = 0);
+   } else {
+   return (object-u.cu.index  object-text_len);
+   }
 }
 
 static void text_iter_cu_current(text_iter_obj* object TSRMLS_DC)
@@ -94,17 +99,33 @@
 
 static int text_iter_cu_key(text_iter_obj* object TSRMLS_DC)
 {
-   return object-u.cu.index;
+   if (object-flags  ITER_REVERSE) {
+   return object-text_len - object-u.cu.index - 1;
+   } else {
+   return object-u.cu.index;
+   }
 }
 
 static void text_iter_cu_next(text_iter_obj* object TSRMLS_DC)
 {
-   object-u.cu.index++;
+   if (object-flags  ITER_REVERSE) {
+   if (object-u.cu.index = 0) {
+   object-u.cu.index--;
+   }
+   } else {
+   if (object-u.cu.index  object-text_len) {
+   object-u.cu.index++;
+   }
+   }
 }
 
 static void text_iter_cu_rewind(text_iter_obj *object TSRMLS_DC)
 {
-   object-u.cu.index  = 0;
+   if (object-flags  ITER_REVERSE) {
+   object-u.cu.index = object-text_len-1;
+   } else {
+   object-u.cu.index = 0;
+   }
 }
 
 static text_iter_ops text_iter_cu_ops = {
@@ -119,7 +140,11 @@
 
 static int text_iter_cp_valid(text_iter_obj* object TSRMLS_DC)
 {
-   return (object-u.cp.offset  object-text_len);
+   if (object-flags  ITER_REVERSE) {
+   return (object-u.cp.offset  0);
+   } else {
+   return (object-u.cp.offset  object-text_len);
+   }
 }
 
 static void text_iter_cp_current(text_iter_obj* object TSRMLS_DC)
@@ -128,7 +153,11 @@
int32_t tmp, buf_len;
 
tmp = object-u.cp.offset;
-   U16_NEXT(object-text, tmp, object-text_len, cp);
+   if (object-flags  ITER_REVERSE) {
+   U16_PREV(object-text, 0, tmp, cp);
+   } else {
+   U16_NEXT(object-text, tmp, object-text_len, cp);
+   }
buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object-current));
Z_USTRVAL_P(object-current)[buf_len] = 0;
Z_USTRLEN_P(object-current) = buf_len;
@@ -141,13 +170,21 @@
 
 static void text_iter_cp_next(text_iter_obj* object TSRMLS_DC)
 {
-   U16_FWD_1(object-text, object-u.cp.offset, object-text_len);
+   if (object-flags  ITER_REVERSE) {
+   U16_BACK_1(object-text, 0, object-u.cp.offset);
+   } else {
+   U16_FWD_1(object-text, object-u.cp.offset, object-text_len);
+   }
object-u.cp.index++;
 }
 
 static void text_iter_cp_rewind(text_iter_obj *object TSRMLS_DC)
 {
-   object-u.cp.offset = 0;
+   if (object-flags  ITER_REVERSE) {
+   object-u.cp.offset = object-text_len;
+   } else {
+   object-u.cp.offset = 0;
+   }
object-u.cp.index  = 0;
 }
 
@@ -371,11 +408,12 @@
intern-text_len = text_len;
if (ZEND_NUM_ARGS()  1) {
ti_type = 

[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-02-07 Thread Andrei Zmievski
andrei  Wed Feb  8 00:16:50 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Reverse iteration for combining sequences.
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.16r2=1.17diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.16 
php-src/ext/unicode/unicode_iterators.c:1.17
--- php-src/ext/unicode/unicode_iterators.c:1.16Tue Feb  7 20:01:28 2006
+++ php-src/ext/unicode/unicode_iterators.c Wed Feb  8 00:16:50 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.16 2006/02/07 20:01:28 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.17 2006/02/08 00:16:50 andrei Exp $ */
 
 /*
  * TODO
@@ -49,16 +49,16 @@
longflags;
union {
struct {
-   uint32_t index;
-   uint32_t offset;
+   int32_t index;
+   int32_t offset;
} cp;
struct {
int32_t index;
} cu;
struct {
-   uint32_t index;
-   uint32_t start;
-   uint32_t end;
+   int32_t index;
+   int32_t start;
+   int32_t end;
} cs;
} u;
 } text_iter_obj;
@@ -200,7 +200,11 @@
 
 static int text_iter_cs_valid(text_iter_obj* object TSRMLS_DC)
 {
-   return (object-u.cs.end = object-text_len);
+   if (object-flags  ITER_REVERSE) {
+   return (object-u.cs.end  0);
+   } else {
+   return (object-u.cs.end = object-text_len);
+   }
 }
 
 static void text_iter_cs_current(text_iter_obj* object TSRMLS_DC)
@@ -223,18 +227,28 @@
 static void text_iter_cs_next(text_iter_obj* object TSRMLS_DC)
 {
UChar32 cp;
-   uint32_t end;
+   uint32_t tmp;
 
-   object-u.cs.start = object-u.cs.end;
-   U16_NEXT(object-text, object-u.cs.end, object-text_len, cp);
-   if (u_getCombiningClass(cp) == 0) {
-   end = object-u.cs.end;
-   while (end  object-text_len) {
-   U16_NEXT(object-text, end, object-text_len, cp);
-   if (u_getCombiningClass(cp) == 0) {
-   break;
-   } else {
-   object-u.cs.end = end;
+   if (object-flags  ITER_REVERSE) {
+   object-u.cs.end = object-u.cs.start;
+   U16_PREV(object-text, 0, object-u.cs.start, cp);
+   if (u_getCombiningClass(cp) != 0) {
+   do {
+   U16_PREV(object-text, 0, object-u.cs.start, 
cp);
+   } while (object-u.cs.start  0  
u_getCombiningClass(cp) != 0);
+   }
+   } else {
+   object-u.cs.start = object-u.cs.end;
+   U16_NEXT(object-text, object-u.cs.end, object-text_len, cp);
+   if (u_getCombiningClass(cp) == 0) {
+   tmp = object-u.cs.end;
+   while (tmp  object-text_len) {
+   U16_NEXT(object-text, tmp, object-text_len, 
cp);
+   if (u_getCombiningClass(cp) == 0) {
+   break;
+   } else {
+   object-u.cs.end = tmp;
+   }
}
}
}
@@ -243,8 +257,11 @@
 
 static void text_iter_cs_rewind(text_iter_obj *object TSRMLS_DC)
 {
-   object-u.cs.start = 0;
-   object-u.cs.end   = 0;
+   if (object-flags  ITER_REVERSE) {
+   object-u.cs.start = object-u.cs.end = object-text_len;
+   } else {
+   object-u.cs.start = object-u.cs.end = 0;
+   }
text_iter_cs_next(object TSRMLS_CC); /* find first sequence */
object-u.cs.index = 0; /* because _next increments index */
 }

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-02-06 Thread Andrei Zmievski
andrei  Mon Feb  6 17:42:28 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Make TextIterator fast again, now that we don't have to worry about
  references.
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.11r2=1.12diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.11 
php-src/ext/unicode/unicode_iterators.c:1.12
--- php-src/ext/unicode/unicode_iterators.c:1.11Sun Feb  5 23:31:47 2006
+++ php-src/ext/unicode/unicode_iterators.c Mon Feb  6 17:42:28 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.11 2006/02/05 23:31:47 helly Exp $ */
+/* $Id: unicode_iterators.c,v 1.12 2006/02/06 17:42:28 andrei Exp $ */
 
 /*
  * TODO
@@ -126,16 +126,11 @@
UChar32 cp;
int32_t tmp, buf_len;
 
-   if (!object-current) {
-   MAKE_STD_ZVAL(object-current);
-   Z_USTRVAL_P(object-current) = eumalloc(3);
-   Z_TYPE_P(object-current) = IS_UNICODE;
-   tmp = object-u.cp.offset;
-   U16_NEXT(object-text, tmp, object-text_len, cp);
-   buf_len = zend_codepoint_to_uchar(cp, 
Z_USTRVAL_P(object-current));
-   Z_USTRVAL_P(object-current)[buf_len] = 0;
-   Z_USTRLEN_P(object-current) = buf_len;
-   }
+   tmp = object-u.cp.offset;
+   U16_NEXT(object-text, tmp, object-text_len, cp);
+   buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object-current));
+   Z_USTRVAL_P(object-current)[buf_len] = 0;
+   Z_USTRLEN_P(object-current) = buf_len;
 }
 
 static int text_iter_cp_key(text_iter_obj* object TSRMLS_DC)
@@ -147,20 +142,12 @@
 {
U16_FWD_1(object-text, object-u.cp.offset, object-text_len);
object-u.cp.index++;
-   if (object-current) {
-   zval_ptr_dtor(object-current);
-   object-current = NULL;
-   }
 }
 
 static void text_iter_cp_rewind(text_iter_obj *object TSRMLS_DC)
 {
object-u.cp.offset = 0;
object-u.cp.index  = 0;
-   if (object-current) {
-   zval_ptr_dtor(object-current);
-   object-current = NULL;
-   }
 }
 
 static text_iter_ops text_iter_cp_ops = {
@@ -268,9 +255,8 @@
if (intern-text) {
efree(intern-text);
}
-   if (intern-current) {
-   zval_ptr_dtor(intern-current);
-   }
+   ZVAL_DELREF(intern-current);
+   zval_ptr_dtor(intern-current);
efree(object);
 }
 
@@ -289,6 +275,10 @@
zend_hash_copy(intern-std.properties, class_type-default_properties, 
(copy_ctor_func_t) zval_add_ref, (void *) tmp, sizeof(zval *));
 
intern-type = ITER_CODE_POINT;
+   MAKE_STD_ZVAL(intern-current); /* pre-allocate buffer for codepoint */
+   Z_USTRVAL_P(intern-current) = eumalloc(3);
+   Z_TYPE_P(intern-current) = IS_UNICODE;
+   ZVAL_ADDREF(intern-current);
 
retval.handle = zend_objects_store_put(intern, 
(zend_objects_store_dtor_t)zend_objects_destroy_object, 
(zend_objects_free_object_storage_t) text_iterator_free_storage, NULL 
TSRMLS_CC);
retval.handlers = zend_get_std_object_handlers();

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-02-06 Thread Andrei Zmievski
andrei  Mon Feb  6 18:18:41 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Some TODO items.
  
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.12r2=1.13diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.12 
php-src/ext/unicode/unicode_iterators.c:1.13
--- php-src/ext/unicode/unicode_iterators.c:1.12Mon Feb  6 17:42:28 2006
+++ php-src/ext/unicode/unicode_iterators.c Mon Feb  6 18:18:41 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.12 2006/02/06 17:42:28 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.13 2006/02/06 18:18:41 andrei Exp $ */
 
 /*
  * TODO
@@ -22,6 +22,7 @@
  * - optimize current() to pass return_value to the handler so that it fills it
  *   in directly instead of creating a new zval
  * - return code units as binary strings? integers? or leave as unicode 
strings?
+ * - implement Countable (or count_elements handler) and Seekable interfaces
  */
 
 #include php.h
@@ -43,8 +44,8 @@
zval*   current;
union {
struct {
-   int32_t offset;
int32_t index;
+   int32_t offset;
} cp;
struct {
int32_t index;

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-02-06 Thread Andrei Zmievski
andrei  Mon Feb  6 22:58:10 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  - Fix up a bunch of stuff.
  - Register TextIterator type constants.
  
  # Not sure if I like them as class constants. Cleaner, but also longer
  # to type.
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.13r2=1.14diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.13 
php-src/ext/unicode/unicode_iterators.c:1.14
--- php-src/ext/unicode/unicode_iterators.c:1.13Mon Feb  6 18:18:41 2006
+++ php-src/ext/unicode/unicode_iterators.c Mon Feb  6 22:58:10 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.13 2006/02/06 18:18:41 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.14 2006/02/06 22:58:10 andrei Exp $ */
 
 /*
  * TODO
@@ -28,14 +28,17 @@
 #include php.h
 #include zend_interfaces.h
 #include zend_exceptions.h
-#include ext/spl/spl_exceptions.h
 
 typedef enum {
ITER_CODE_UNIT,
ITER_CODE_POINT,
ITER_COMB_SEQUENCE,
+   ITER_TYPE_LAST,
 } text_iter_type;
 
+const uint32_t ITER_REVERSE = 0x100;
+const uint32_t ITER_TYPE_MASK = 0xFF;
+
 typedef struct {
zend_object std;
UChar*  text;
@@ -78,10 +81,9 @@
 
 static void text_iter_cu_current(text_iter_obj* object TSRMLS_DC)
 {
-   if (!object-current) {
-   MAKE_STD_ZVAL(object-current);
-   ZVAL_UNICODEL(object-current, object-text + 
object-u.cu.index, 1, 1);
-   }
+   u_memcpy(Z_USTRVAL_P(object-current), object-text + 
object-u.cu.index, 1);
+   Z_USTRVAL_P(object-current)[1] = 0;
+   Z_USTRLEN_P(object-current) = 1;
 }
 
 static int text_iter_cu_key(text_iter_obj* object TSRMLS_DC)
@@ -92,19 +94,11 @@
 static void text_iter_cu_next(text_iter_obj* object TSRMLS_DC)
 {
object-u.cu.index++;
-   if (object-current) {
-   zval_ptr_dtor(object-current);
-   object-current = NULL;
-   }
 }
 
 static void text_iter_cu_rewind(text_iter_obj *object TSRMLS_DC)
 {
object-u.cu.index  = 0;
-   if (object-current) {
-   zval_ptr_dtor(object-current);
-   object-current = NULL;
-   }
 }
 
 static text_iter_ops text_iter_cu_ops = {
@@ -218,7 +212,7 @@
iter_ops[object-type]-rewind(object TSRMLS_CC);
 }
 
-zend_object_iterator_funcs text_iter_cp_funcs = {
+zend_object_iterator_funcs text_iter_funcs = {
text_iter_dtor,
text_iter_valid,
text_iter_get_current_data,
@@ -240,7 +234,7 @@
 
ZVAL_ADDREF(object);
iterator-intern.data  = (void *) object;
-   iterator-intern.funcs = text_iter_cp_funcs;
+   iterator-intern.funcs = text_iter_funcs;
iterator-object   = iter_object;
 
return (zend_object_iterator *) iterator;
@@ -256,7 +250,6 @@
if (intern-text) {
efree(intern-text);
}
-   ZVAL_DELREF(intern-current);
zval_ptr_dtor(intern-current);
efree(object);
 }
@@ -278,8 +271,8 @@
intern-type = ITER_CODE_POINT;
MAKE_STD_ZVAL(intern-current); /* pre-allocate buffer for codepoint */
Z_USTRVAL_P(intern-current) = eumalloc(3);
+   Z_USTRVAL_P(intern-current)[0] = 0;
Z_TYPE_P(intern-current) = IS_UNICODE;
-   ZVAL_ADDREF(intern-current);
 
retval.handle = zend_objects_store_put(intern, 
(zend_objects_store_dtor_t)zend_objects_destroy_object, 
(zend_objects_free_object_storage_t) text_iterator_free_storage, NULL 
TSRMLS_CC);
retval.handlers = zend_get_std_object_handlers();
@@ -293,8 +286,10 @@
int32_t text_len;
zval *object = getThis();
text_iter_obj *intern;
+   text_iter_type ti_type;
+   long flags = 0;
 
-   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, u, text, 
text_len) == FAILURE) {
+   if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, u|l, text, 
text_len, flags) == FAILURE) {
return;
}
 
@@ -302,8 +297,16 @@
 
intern-text = eustrndup(text, text_len);
intern-text_len = text_len;
+   if (ZEND_NUM_ARGS()  1) {
+   ti_type = flags  ITER_TYPE_MASK;
+   if (flags  ITER_TYPE_LAST) { 
+   intern-type = ti_type;
+   } else {
+   php_error(E_WARNING, Invalid iterator type in 
TextIterator constructor);
+   }
+   }
 
-   text_iter_cp_rewind(intern TSRMLS_CC);
+   iter_ops[intern-type]-rewind(intern TSRMLS_CC);
 }
 
 PHP_METHOD(TextIterator, current)
@@ -344,7 +347,7 @@
zval *object = getThis();
text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
 
-   iter_ops[object-type]-rewind(intern TSRMLS_CC);
+   

[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-02-03 Thread Andrei Zmievski
andrei  Fri Feb  3 21:53:05 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Guard against assign-by-ref.
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.5r2=1.6diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.5 
php-src/ext/unicode/unicode_iterators.c:1.6
--- php-src/ext/unicode/unicode_iterators.c:1.5 Fri Feb  3 00:09:19 2006
+++ php-src/ext/unicode/unicode_iterators.c Fri Feb  3 21:53:05 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.5 2006/02/03 00:09:19 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.6 2006/02/03 21:53:05 andrei Exp $ */
 
 
 #include php.h
@@ -169,6 +169,7 @@
if (intern-text) {
efree(intern-text);
}
+   ZVAL_DELREF(intern-current);
zval_ptr_dtor(intern-current);
efree(object);
 }
@@ -191,6 +192,7 @@
MAKE_STD_ZVAL(intern-current); /* pre-allocate buffer for codepoint */
Z_USTRVAL_P(intern-current) = eumalloc(3);
Z_TYPE_P(intern-current) = IS_UNICODE;
+   ZVAL_ADDREF(intern-current);
 
retval.handle = zend_objects_store_put(intern, 
(zend_objects_store_dtor_t)zend_objects_destroy_object, 
(zend_objects_free_object_storage_t) text_iterator_free_storage, NULL 
TSRMLS_CC);
retval.handlers = zend_get_std_object_handlers();

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-02-03 Thread Andrei Zmievski
andrei  Fri Feb  3 23:50:42 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Gah. In order to avoid memory corruption when using references in
  foreach() this code is necessary. But it makes iterator 6x slower. We
  should keep thinking about how to optimize it.
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.6r2=1.7diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.6 
php-src/ext/unicode/unicode_iterators.c:1.7
--- php-src/ext/unicode/unicode_iterators.c:1.6 Fri Feb  3 21:53:05 2006
+++ php-src/ext/unicode/unicode_iterators.c Fri Feb  3 23:50:42 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.6 2006/02/03 21:53:05 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.7 2006/02/03 23:50:42 andrei Exp $ */
 
 
 #include php.h
@@ -58,11 +58,16 @@
UChar32 cp;
int32_t tmp, buf_len;
 
-   tmp = object-offset;
-   U16_NEXT(object-text, tmp, object-text_len, cp);
-   buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object-current));
-   Z_USTRVAL_P(object-current)[buf_len] = 0;
-   Z_USTRLEN_P(object-current) = buf_len;
+   if (!object-current) {
+   MAKE_STD_ZVAL(object-current);
+   Z_USTRVAL_P(object-current) = eumalloc(3);
+   Z_TYPE_P(object-current) = IS_UNICODE;
+   tmp = object-offset;
+   U16_NEXT(object-text, tmp, object-text_len, cp);
+   buf_len = zend_codepoint_to_uchar(cp, 
Z_USTRVAL_P(object-current));
+   Z_USTRVAL_P(object-current)[buf_len] = 0;
+   Z_USTRLEN_P(object-current) = buf_len;
+   }
 }
 
 static int text_iter_cp_get_current_key(text_iter_obj* object TSRMLS_DC)
@@ -74,12 +79,20 @@
 {
U16_FWD_1(object-text, object-offset, object-text_len);
object-index++;
+   if (object-current) {
+   zval_ptr_dtor(object-current);
+   object-current = NULL;
+   }
 }
 
 static void text_iter_cp_rewind(text_iter_obj *object TSRMLS_DC)
 {
object-offset = 0;
object-index  = 0;
+   if (object-current) {
+   zval_ptr_dtor(object-current);
+   object-current = NULL;
+   }
 }
 
 
@@ -169,8 +182,9 @@
if (intern-text) {
efree(intern-text);
}
-   ZVAL_DELREF(intern-current);
-   zval_ptr_dtor(intern-current);
+   if (intern-current) {
+   zval_ptr_dtor(intern-current);
+   }
efree(object);
 }
 
@@ -189,10 +203,6 @@
zend_hash_copy(intern-std.properties, class_type-default_properties, 
(copy_ctor_func_t) zval_add_ref, (void *) tmp, sizeof(zval *));
 
intern-type = ITER_CODE_POINT;
-   MAKE_STD_ZVAL(intern-current); /* pre-allocate buffer for codepoint */
-   Z_USTRVAL_P(intern-current) = eumalloc(3);
-   Z_TYPE_P(intern-current) = IS_UNICODE;
-   ZVAL_ADDREF(intern-current);
 
retval.handle = zend_objects_store_put(intern, 
(zend_objects_store_dtor_t)zend_objects_destroy_object, 
(zend_objects_free_object_storage_t) text_iterator_free_storage, NULL 
TSRMLS_CC);
retval.handlers = zend_get_std_object_handlers();

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



Re: [PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-02-03 Thread Marcus Boerger
Hello Andrei,

  maybe internal c-level iterators can have a flag that disallows
foreach by reference?

regards
marcus

Saturday, February 4, 2006, 12:50:42 AM, you wrote:

 andrei  Fri Feb  3 23:50:42 2006 UTC

   Modified files:  
 /php-src/ext/unicodeunicode_iterators.c 
   Log:
   Gah. In order to avoid memory corruption when using references in
   foreach() this code is necessary. But it makes iterator 6x slower. We
   should keep thinking about how to optimize it.
   
   
 http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.6r2=1.7diff_format=u
 Index: php-src/ext/unicode/unicode_iterators.c
 diff -u php-src/ext/unicode/unicode_iterators.c:1.6
 php-src/ext/unicode/unicode_iterators.c:1.7
 --- php-src/ext/unicode/unicode_iterators.c:1.6   Fri Feb  3 21:53:05 2006
 +++ php-src/ext/unicode/unicode_iterators.c Fri Feb  3 23:50:42 2006
 @@ -14,7 +14,7 @@
 +--+
  */
  
 -/* $Id: unicode_iterators.c,v 1.6 2006/02/03 21:53:05 andrei Exp $ */
 +/* $Id: unicode_iterators.c,v 1.7 2006/02/03 23:50:42 andrei Exp $ */
  
  
  #include php.h
 @@ -58,11 +58,16 @@
 UChar32 cp;
 int32_t tmp, buf_len;
  
 -   tmp = object-offset;
 -   U16_NEXT(object-text, tmp, object-text_len, cp);
 -   buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object-current));
 -   Z_USTRVAL_P(object-current)[buf_len] = 0;
 -   Z_USTRLEN_P(object-current) = buf_len;
 +   if (!object-current) {
 +   MAKE_STD_ZVAL(object-current);
 +   Z_USTRVAL_P(object-current) = eumalloc(3);
 +   Z_TYPE_P(object-current) = IS_UNICODE;
 +   tmp = object-offset;
 +   U16_NEXT(object-text, tmp, object-text_len, cp);
 +   buf_len = zend_codepoint_to_uchar(cp, 
 Z_USTRVAL_P(object-current));
 +   Z_USTRVAL_P(object-current)[buf_len] = 0;
 +   Z_USTRLEN_P(object-current) = buf_len;
 +   }
  }
  
  static int text_iter_cp_get_current_key(text_iter_obj* object TSRMLS_DC)
 @@ -74,12 +79,20 @@
  {
 U16_FWD_1(object-text, object-offset, object-text_len);
 object-index++;
 +   if (object-current) {
 +   zval_ptr_dtor(object-current);
 +   object-current = NULL;
 +   }
  }
  
  static void text_iter_cp_rewind(text_iter_obj *object TSRMLS_DC)
  {
 object-offset = 0;
 object-index  = 0;
 +   if (object-current) {
 +   zval_ptr_dtor(object-current);
 +   object-current = NULL;
 +   }
  }
  
  
 @@ -169,8 +182,9 @@
 if (intern-text) {
 efree(intern-text);
 }
 -   ZVAL_DELREF(intern-current);
 -   zval_ptr_dtor(intern-current);
 +   if (intern-current) {
 +   zval_ptr_dtor(intern-current);
 +   }
 efree(object);
  }
  
 @@ -189,10 +203,6 @@
 zend_hash_copy(intern-std.properties,
 class_type-default_properties, (copy_ctor_func_t) zval_add_ref, (void *) 
 tmp, sizeof(zval *));
  
 intern-type = ITER_CODE_POINT;
 -   MAKE_STD_ZVAL(intern-current); /* pre-allocate buffer for codepoint 
 */
 -   Z_USTRVAL_P(intern-current) = eumalloc(3);
 -   Z_TYPE_P(intern-current) = IS_UNICODE;
 -   ZVAL_ADDREF(intern-current);
  
 retval.handle = zend_objects_store_put(intern,
 (zend_objects_store_dtor_t)zend_objects_destroy_object,
 (zend_objects_free_object_storage_t) text_iterator_free_storage, NULL 
 TSRMLS_CC);
 retval.handlers = zend_get_std_object_handlers();




Best regards,
 Marcus

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



Re: [PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-02-03 Thread Marcus Boerger
Hello Andrei,

  actually thinking twice we don't need a flag. If something implements
Iterator we shouldn't allow foreach by reference anyway because the
iterator signature is mixed current() and not mixed current().
And since both direct and manual iteration should be compatible we should
just disallow it in the same manner we needed to disallow ArrayAccess
reference handling. Something that only implements Traverable might
however still do foreach by reference. So the change is probably quite
small.

best regards
marcus

Andi?


Saturday, February 4, 2006, 12:51:53 AM, you wrote:

 Hello Andrei,

   maybe internal c-level iterators can have a flag that disallows
 foreach by reference?

 regards
 marcus

 Saturday, February 4, 2006, 12:50:42 AM, you wrote:

 andrei  Fri Feb  3 23:50:42 2006 UTC

   Modified files:  
 /php-src/ext/unicodeunicode_iterators.c 
   Log:
   Gah. In order to avoid memory corruption when using references in
   foreach() this code is necessary. But it makes iterator 6x slower. We
   should keep thinking about how to optimize it.
   
   
 http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.6r2=1.7diff_format=u
 Index: php-src/ext/unicode/unicode_iterators.c
 diff -u php-src/ext/unicode/unicode_iterators.c:1.6
 php-src/ext/unicode/unicode_iterators.c:1.7
 --- php-src/ext/unicode/unicode_iterators.c:1.6   Fri Feb  3 21:53:05 
 2006
 +++ php-src/ext/unicode/unicode_iterators.c Fri Feb  3 23:50:42 2006
 @@ -14,7 +14,7 @@
 +--+
  */
  
 -/* $Id: unicode_iterators.c,v 1.6 2006/02/03 21:53:05 andrei Exp $ */
 +/* $Id: unicode_iterators.c,v 1.7 2006/02/03 23:50:42 andrei Exp $ */
  
  
  #include php.h
 @@ -58,11 +58,16 @@
 UChar32 cp;
 int32_t tmp, buf_len;
  
 -   tmp = object-offset;
 -   U16_NEXT(object-text, tmp, object-text_len, cp);
 -   buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object-current));
 -   Z_USTRVAL_P(object-current)[buf_len] = 0;
 -   Z_USTRLEN_P(object-current) = buf_len;
 +   if (!object-current) {
 +   MAKE_STD_ZVAL(object-current);
 +   Z_USTRVAL_P(object-current) = eumalloc(3);
 +   Z_TYPE_P(object-current) = IS_UNICODE;
 +   tmp = object-offset;
 +   U16_NEXT(object-text, tmp, object-text_len, cp);
 +   buf_len = zend_codepoint_to_uchar(cp, 
 Z_USTRVAL_P(object-current));
 +   Z_USTRVAL_P(object-current)[buf_len] = 0;
 +   Z_USTRLEN_P(object-current) = buf_len;
 +   }
  }
  
  static int text_iter_cp_get_current_key(text_iter_obj* object TSRMLS_DC)
 @@ -74,12 +79,20 @@
  {
 U16_FWD_1(object-text, object-offset, object-text_len);
 object-index++;
 +   if (object-current) {
 +   zval_ptr_dtor(object-current);
 +   object-current = NULL;
 +   }
  }
  
  static void text_iter_cp_rewind(text_iter_obj *object TSRMLS_DC)
  {
 object-offset = 0;
 object-index  = 0;
 +   if (object-current) {
 +   zval_ptr_dtor(object-current);
 +   object-current = NULL;
 +   }
  }
  
  
 @@ -169,8 +182,9 @@
 if (intern-text) {
 efree(intern-text);
 }
 -   ZVAL_DELREF(intern-current);
 -   zval_ptr_dtor(intern-current);
 +   if (intern-current) {
 +   zval_ptr_dtor(intern-current);
 +   }
 efree(object);
  }
  
 @@ -189,10 +203,6 @@
 zend_hash_copy(intern-std.properties,
 class_type-default_properties, (copy_ctor_func_t) zval_add_ref, (void *) 
 tmp, sizeof(zval *));
  
 intern-type = ITER_CODE_POINT;
 -   MAKE_STD_ZVAL(intern-current); /* pre-allocate buffer for codepoint 
 */
 -   Z_USTRVAL_P(intern-current) = eumalloc(3);
 -   Z_TYPE_P(intern-current) = IS_UNICODE;
 -   ZVAL_ADDREF(intern-current);
  
 retval.handle = zend_objects_store_put(intern,
 (zend_objects_store_dtor_t)zend_objects_destroy_object,
 (zend_objects_free_object_storage_t) text_iterator_free_storage, NULL 
 TSRMLS_CC);
 retval.handlers = zend_get_std_object_handlers();




 Best regards,
  Marcus




Best regards,
 Marcus

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-02-03 Thread Andrei Zmievski
andrei  Sat Feb  4 00:23:52 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Abstract the iterator interface so that we can add new types.
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.7r2=1.8diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.7 
php-src/ext/unicode/unicode_iterators.c:1.8
--- php-src/ext/unicode/unicode_iterators.c:1.7 Fri Feb  3 23:50:42 2006
+++ php-src/ext/unicode/unicode_iterators.c Sat Feb  4 00:23:52 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.7 2006/02/03 23:50:42 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.8 2006/02/04 00:23:52 andrei Exp $ */
 
 
 #include php.h
@@ -34,8 +34,15 @@
uint32_ttext_len;
text_iter_type  type;
zval*   current;
-   int32_t offset;
-   int32_t index;
+   union {
+   struct {
+   int32_t offset;
+   int32_t index;
+   } cp;
+   struct {
+   int32_t index;
+   } cu;
+   } u;
 } text_iter_obj;
 
 typedef struct {
@@ -43,17 +50,30 @@
text_iter_obj*   object;
 } text_iter_it;
 
+typedef struct {
+   int  (*valid)  (text_iter_obj* object TSRMLS_DC);
+   void (*current)(text_iter_obj* object TSRMLS_DC);
+   int  (*key)(text_iter_obj* object TSRMLS_DC);
+   void (*next)   (text_iter_obj* object TSRMLS_DC);
+   void (*rewind) (text_iter_obj* object TSRMLS_DC);
+} text_iter_ops;
+
 PHPAPI zend_class_entry* text_iterator_aggregate_ce;
 PHPAPI zend_class_entry* text_iterator_ce;
 
+/* Code unit ops */
+
+static text_iter_ops text_iter_cu_ops = {
+};
+
 /* Code point ops */
 
 static int text_iter_cp_valid(text_iter_obj* object TSRMLS_DC)
 {
-   return (object-offset  object-text_len);
+   return (object-u.cp.offset  object-text_len);
 }
 
-static void text_iter_cp_get_current_data(text_iter_obj* object TSRMLS_DC)
+static void text_iter_cp_current(text_iter_obj* object TSRMLS_DC)
 {
UChar32 cp;
int32_t tmp, buf_len;
@@ -62,7 +82,7 @@
MAKE_STD_ZVAL(object-current);
Z_USTRVAL_P(object-current) = eumalloc(3);
Z_TYPE_P(object-current) = IS_UNICODE;
-   tmp = object-offset;
+   tmp = object-u.cp.offset;
U16_NEXT(object-text, tmp, object-text_len, cp);
buf_len = zend_codepoint_to_uchar(cp, 
Z_USTRVAL_P(object-current));
Z_USTRVAL_P(object-current)[buf_len] = 0;
@@ -70,15 +90,15 @@
}
 }
 
-static int text_iter_cp_get_current_key(text_iter_obj* object TSRMLS_DC)
+static int text_iter_cp_key(text_iter_obj* object TSRMLS_DC)
 {
-   return object-index;
+   return object-u.cp.index;
 }
 
-static void text_iter_cp_move_forward(text_iter_obj* object TSRMLS_DC)
+static void text_iter_cp_next(text_iter_obj* object TSRMLS_DC)
 {
-   U16_FWD_1(object-text, object-offset, object-text_len);
-   object-index++;
+   U16_FWD_1(object-text, object-u.cp.offset, object-text_len);
+   object-u.cp.index++;
if (object-current) {
zval_ptr_dtor(object-current);
object-current = NULL;
@@ -87,14 +107,26 @@
 
 static void text_iter_cp_rewind(text_iter_obj *object TSRMLS_DC)
 {
-   object-offset = 0;
-   object-index  = 0;
+   object-u.cp.offset = 0;
+   object-u.cp.index  = 0;
if (object-current) {
zval_ptr_dtor(object-current);
object-current = NULL;
}
 }
 
+static text_iter_ops text_iter_cp_ops = {
+   text_iter_cp_valid,
+   text_iter_cp_current,
+   text_iter_cp_key,
+   text_iter_cp_next,
+   text_iter_cp_rewind,
+};
+
+static text_iter_ops* iter_ops[2] = {
+   text_iter_cu_ops,
+   text_iter_cp_ops,
+};
 
 /* Iterator Funcs */
 
@@ -110,7 +142,7 @@
text_iter_it*  iterator = (text_iter_it *) iter;
text_iter_obj* object   = iterator-object;
 
-   if (text_iter_cp_valid(object TSRMLS_CC))
+   if (iter_ops[object-type]-valid(object TSRMLS_CC))
return SUCCESS;
else
return FAILURE;
@@ -121,7 +153,7 @@
text_iter_it*  iterator = (text_iter_it *) iter;
text_iter_obj* object   = iterator-object;
 
-   text_iter_cp_get_current_data(object TSRMLS_CC);
+   iter_ops[object-type]-current(object TSRMLS_CC);
*data = object-current;
 }
 
@@ -130,7 +162,7 @@
text_iter_it*  iterator = (text_iter_it *) iter;
text_iter_obj* object   = iterator-object;
 
-   *int_key = text_iter_cp_get_current_key(object TSRMLS_CC);
+   *int_key = iter_ops[object-type]-key(object TSRMLS_CC);
return 

[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-02-03 Thread Andrei Zmievski
andrei  Sat Feb  4 00:35:37 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Add code unit ops.
  
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.8r2=1.9diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.8 
php-src/ext/unicode/unicode_iterators.c:1.9
--- php-src/ext/unicode/unicode_iterators.c:1.8 Sat Feb  4 00:23:52 2006
+++ php-src/ext/unicode/unicode_iterators.c Sat Feb  4 00:35:37 2006
@@ -14,8 +14,15 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.8 2006/02/04 00:23:52 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.9 2006/02/04 00:35:37 andrei Exp $ */
 
+/*
+ * TODO
+ *
+ * - optimize current() to pass return_value to the handler so that it fills it
+ *   in directly instead of creating a new zval
+ * - return code units as binary strings? integers? or leave as unicode 
strings?
+ */
 
 #include php.h
 #include zend_interfaces.h
@@ -63,7 +70,48 @@
 
 /* Code unit ops */
 
+static int text_iter_cu_valid(text_iter_obj* object TSRMLS_DC)
+{
+   return (object-u.cu.index  object-text_len);
+}
+
+static void text_iter_cu_current(text_iter_obj* object TSRMLS_DC)
+{
+   if (!object-current) {
+   MAKE_STD_ZVAL(object-current);
+   ZVAL_UNICODEL(object-current, object-text + 
object-u.cu.index, 1, 1);
+   }
+}
+
+static int text_iter_cu_key(text_iter_obj* object TSRMLS_DC)
+{
+   return object-u.cu.index;
+}
+
+static void text_iter_cu_next(text_iter_obj* object TSRMLS_DC)
+{
+   object-u.cu.index++;
+   if (object-current) {
+   zval_ptr_dtor(object-current);
+   object-current = NULL;
+   }
+}
+
+static void text_iter_cu_rewind(text_iter_obj *object TSRMLS_DC)
+{
+   object-u.cu.index  = 0;
+   if (object-current) {
+   zval_ptr_dtor(object-current);
+   object-current = NULL;
+   }
+}
+
 static text_iter_ops text_iter_cu_ops = {
+   text_iter_cu_valid,
+   text_iter_cu_current,
+   text_iter_cu_key,
+   text_iter_cu_next,
+   text_iter_cu_rewind,
 };
 
 /* Code point ops */

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-02-03 Thread Andrei Zmievski
andrei  Sat Feb  4 00:41:42 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Implement Traversable instead of Iterator.
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.9r2=1.10diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.9 
php-src/ext/unicode/unicode_iterators.c:1.10
--- php-src/ext/unicode/unicode_iterators.c:1.9 Sat Feb  4 00:35:37 2006
+++ php-src/ext/unicode/unicode_iterators.c Sat Feb  4 00:41:42 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.9 2006/02/04 00:35:37 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.10 2006/02/04 00:41:42 andrei Exp $ */
 
 /*
  * TODO
@@ -366,9 +366,9 @@

INIT_CLASS_ENTRY(ce, TextIterator, text_iterator_funcs);
text_iterator_ce = zend_register_internal_class(ce TSRMLS_CC);
-   zend_class_implements(text_iterator_ce TSRMLS_CC, 1, zend_ce_iterator);
text_iterator_ce-create_object = text_iterator_new;
text_iterator_ce-get_iterator  = text_iter_get_iterator;
+   zend_class_implements(text_iterator_ce TSRMLS_CC, 1, 
zend_ce_traversable);
 }
 
 /*

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-02-01 Thread Andrei Zmievski
andrei  Wed Feb  1 23:53:53 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Check for intern-text before destroying it.
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.1r2=1.2diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.1 
php-src/ext/unicode/unicode_iterators.c:1.2
--- php-src/ext/unicode/unicode_iterators.c:1.1 Wed Feb  1 23:50:50 2006
+++ php-src/ext/unicode/unicode_iterators.c Wed Feb  1 23:53:53 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.1 2006/02/01 23:50:50 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.2 2006/02/01 23:53:53 andrei Exp $ */
 
 
 #include php.h
@@ -46,7 +46,7 @@
zend_hash_destroy(intern-std.properties);
FREE_HASHTABLE(intern-std.properties);
 
-   zval_ptr_dtor(intern-text);
+   if (intern-text) zval_ptr_dtor(intern-text);
efree(object);
 }
 
@@ -90,6 +90,7 @@
}
 
if (Z_TYPE_P(text) != IS_UNICODE) {
+   printf(not unicode\n);

zend_throw_exception(U_CLASS_ENTRY(spl_ce_InvalidArgumentException), Text 
iterator expects argument to be a Unicode string, 0 TSRMLS_CC);
return;
}

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-02-01 Thread Andrei Zmievski
andrei  Thu Feb  2 00:05:21 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Remove debug message.
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.2r2=1.3diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.2 
php-src/ext/unicode/unicode_iterators.c:1.3
--- php-src/ext/unicode/unicode_iterators.c:1.2 Wed Feb  1 23:53:53 2006
+++ php-src/ext/unicode/unicode_iterators.c Thu Feb  2 00:05:21 2006
@@ -14,7 +14,7 @@
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.2 2006/02/01 23:53:53 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.3 2006/02/02 00:05:21 andrei Exp $ */
 
 
 #include php.h
@@ -90,7 +90,6 @@
}
 
if (Z_TYPE_P(text) != IS_UNICODE) {
-   printf(not unicode\n);

zend_throw_exception(U_CLASS_ENTRY(spl_ce_InvalidArgumentException), Text 
iterator expects argument to be a Unicode string, 0 TSRMLS_CC);
return;
}

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c

2006-02-01 Thread Sebastian Bergmann
sebastian   Thu Feb  2 06:01:28 2006 UTC

  Modified files:  
/php-src/ext/unicodeunicode_iterators.c 
  Log:
  Fix Andrei.
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.3r2=1.4diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.3 
php-src/ext/unicode/unicode_iterators.c:1.4
--- php-src/ext/unicode/unicode_iterators.c:1.3 Thu Feb  2 00:05:21 2006
+++ php-src/ext/unicode/unicode_iterators.c Thu Feb  2 06:01:27 2006
@@ -10,11 +10,11 @@
| obtain it through the world-wide-web, please send a note to  |
| [EMAIL PROTECTED] so we can mail you a copy immediately.   |
+--+
-   | Authors: Andre Zmievski([EMAIL PROTECTED])  |
+   | Authors: Andrei Zmievski [EMAIL PROTECTED]|
+--+
 */
 
-/* $Id: unicode_iterators.c,v 1.3 2006/02/02 00:05:21 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.4 2006/02/02 06:01:27 sebastian Exp $ */
 
 
 #include php.h

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php