[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Tue Jul 11 17:59:47 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Make next() and previous() take optional step parameter and optimize return value usage. http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.41r2=1.42diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.41 php-src/ext/unicode/unicode_iterators.c:1.42 --- php-src/ext/unicode/unicode_iterators.c:1.41Tue Jul 11 17:48:14 2006 +++ php-src/ext/unicode/unicode_iterators.c Tue Jul 11 17:59:46 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.41 2006/07/11 17:48:14 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.42 2006/07/11 17:59:46 andrei Exp $ */ /* * TODO @@ -1035,11 +1035,25 @@ PHP_METHOD(TextIterator, next) { + long i, step = 1; zval *object = getThis(); text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); - iter_ops[intern-type]-next(intern, intern-flags TSRMLS_CC); - RETURN_LONG(iter_ops[intern-type]-offset(intern, intern-flags TSRMLS_CC)); + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, |l, step) == FAILURE) { + return; + } + + if (step = 0) { + step = 1; + } + + for (i = 0; i step; i++) { + iter_ops[intern-type]-next(intern, intern-flags TSRMLS_CC); + } + + if (return_value_used) { + RETURN_LONG(iter_ops[intern-type]-offset(intern, intern-flags TSRMLS_CC)); + } } PHP_METHOD(TextIterator, key) @@ -1088,13 +1102,26 @@ PHP_METHOD(TextIterator, previous) { - long flags; + long flags, i, step = 1; zval *object = getThis(); text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, |l, step) == FAILURE) { + return; + } + + if (step = 0) { + step = 1; + } flags = intern-flags ^ ITER_REVERSE; - iter_ops[intern-type]-next(intern, flags TSRMLS_CC); - RETURN_LONG(iter_ops[intern-type]-offset(intern, flags TSRMLS_CC)); + + for (i = 0; i step; i++) { + iter_ops[intern-type]-next(intern, flags TSRMLS_CC); + } + + if (return_value_used) { + RETURN_LONG(iter_ops[intern-type]-offset(intern, flags TSRMLS_CC)); + } } PHP_METHOD(TextIterator, following) -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Tue Jul 11 20:51:18 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Protos. http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.43r2=1.44diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.43 php-src/ext/unicode/unicode_iterators.c:1.44 --- php-src/ext/unicode/unicode_iterators.c:1.43Tue Jul 11 19:43:08 2006 +++ php-src/ext/unicode/unicode_iterators.c Tue Jul 11 20:51:18 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.43 2006/07/11 19:43:08 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.44 2006/07/11 20:51:18 andrei Exp $ */ /* * TODO @@ -985,6 +985,8 @@ return retval; } +/* {{{ proto void TextIterator::__construct(unicode text [, int flags = TextIterator::CODEPOINT [, string locale ]]) U + TextIterator constructor */ PHP_METHOD(TextIterator, __construct) { UChar *text; @@ -1032,16 +1034,26 @@ iter_ops[intern-type]-rewind(intern, intern-flags TSRMLS_CC); } +/* }}} */ +/* {{{ proto unicode TextIterator::current() U + Returns the element at the current boundary */ PHP_METHOD(TextIterator, current) { zval *object = getThis(); text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ) == FAILURE) { + return; + } + iter_ops[intern-type]-current(intern, intern-flags TSRMLS_CC); RETURN_UNICODEL(Z_USTRVAL_P(intern-current), Z_USTRLEN_P(intern-current), 1); } +/* }}} */ +/* {{{ proto int TextIterator::next([int n]) U + Advances to the n'th text boundary following the current one and returns its offset */ PHP_METHOD(TextIterator, next) { long i, step = 1; @@ -1066,29 +1078,50 @@ RETURN_LONG(cp_offset); } } +/* }}} */ +/* {{{ proto int TextIterator::key() U + Returns the number boundaries iterated through */ PHP_METHOD(TextIterator, key) { zval *object = getThis(); text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ) == FAILURE) { + return; + } + RETURN_LONG(iter_ops[intern-type]-key(intern, intern-flags TSRMLS_CC)); } +/* }}} */ +/* {{{ proto bool TextIterator::valid() U + Determines validity of the iterator */ PHP_METHOD(TextIterator, valid) { zval *object = getThis(); text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ) == FAILURE) { + return; + } + RETURN_BOOL(iter_ops[intern-type]-valid(intern, intern-flags TSRMLS_CC)); } +/* }}} */ +/* {{{ proto int TextIterator::first() U + Positions iterator at the first character in the text and returns the offset */ PHP_METHOD(TextIterator, rewind) { int32_t cp_offset; zval *object = getThis(); text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ) == FAILURE) { + return; + } + iter_ops[intern-type]-rewind(intern, intern-flags TSRMLS_CC); if (return_value_used) { @@ -1096,7 +1129,10 @@ RETURN_LONG(cp_offset); } } +/* }}} */ +/* {{{ proto int TextIterator::last() U + Positions iterator beyond the last character in the text and returns the offset */ PHP_METHOD(TextIterator, last) { long flags; @@ -1104,6 +1140,10 @@ zval *object = getThis(); text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ) == FAILURE) { + return; + } + flags = intern-flags ^ ITER_REVERSE; iter_ops[intern-type]-rewind(intern, flags TSRMLS_CC); @@ -1112,17 +1152,27 @@ RETURN_LONG(cp_offset); } } +/* }}} */ +/* {{{ proto int TextIterator::offset() U + Returns the offset of the current text boundary */ PHP_METHOD(TextIterator, offset) { int32_t cp_offset; zval *object = getThis(); text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ) == FAILURE) { + return; + } + iter_ops[intern-type]-offset(intern, intern-flags, cp_offset TSRMLS_CC); RETURN_LONG(cp_offset); } +/* }}} */ +/* {{{ proto int TextIterator::previous([int n]) U + Advances to the n'th text boundary preceding the current one and returns its offset */ PHP_METHOD(TextIterator, previous) { long
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Tue Jul 11 16:20:21 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Use object flags. http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.39r2=1.40diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.39 php-src/ext/unicode/unicode_iterators.c:1.40 --- php-src/ext/unicode/unicode_iterators.c:1.39Mon Jul 10 23:19:05 2006 +++ php-src/ext/unicode/unicode_iterators.c Tue Jul 11 16:20:21 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.39 2006/07/10 23:19:05 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.40 2006/07/11 16:20:21 andrei Exp $ */ /* * TODO @@ -1132,7 +1132,7 @@ PHP_METHOD(TextIterator, isBoundary) { - long flags, offset; + long offset; zval *object = getThis(); text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); @@ -1143,7 +1143,7 @@ /* * ReverseTextIterator will behave the same as the normal one. */ - RETURN_BOOL(iter_ops[intern-type]-isBoundary(intern, offset, flags TSRMLS_CC)); + RETURN_BOOL(iter_ops[intern-type]-isBoundary(intern, offset, intern-flags TSRMLS_CC)); } PHP_METHOD(TextIterator, getAvailableLocales) -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Tue Jul 11 19:43:09 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Implement getAll() that can be used to get all the pieces defined by the boundaries. http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.42r2=1.43diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.42 php-src/ext/unicode/unicode_iterators.c:1.43 --- php-src/ext/unicode/unicode_iterators.c:1.42Tue Jul 11 17:59:46 2006 +++ php-src/ext/unicode/unicode_iterators.c Tue Jul 11 19:43:08 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.42 2006/07/11 17:59:46 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.43 2006/07/11 19:43:08 andrei Exp $ */ /* * TODO @@ -85,7 +85,7 @@ int (*valid) (text_iter_obj* object, long flags TSRMLS_DC); void (*current) (text_iter_obj* object, long flags TSRMLS_DC); int (*key) (text_iter_obj* object, long flags TSRMLS_DC); - int (*offset)(text_iter_obj* object, long flags TSRMLS_DC); + int (*offset)(text_iter_obj* object, long flags, int32_t *cp_offset TSRMLS_DC); void (*next) (text_iter_obj* object, long flags TSRMLS_DC); void (*rewind)(text_iter_obj* object, long flags TSRMLS_DC); void (*following) (text_iter_obj* object, int32_t offset, long flags TSRMLS_DC); @@ -144,9 +144,12 @@ return object-u.cp.index; } -static int text_iter_cp_offset(text_iter_obj* object, long flags TSRMLS_DC) +static int text_iter_cp_offset(text_iter_obj* object, long flags, int32_t *cp_offset TSRMLS_DC) { - return object-u.cp.cp_offset; + if (cp_offset) { + *cp_offset = object-u.cp.cp_offset; + } + return object-u.cp.offset; } static void text_iter_cp_next(text_iter_obj* object, long flags TSRMLS_DC) @@ -427,9 +430,12 @@ return object-u.cs.index; } -static int text_iter_cs_offset(text_iter_obj* object, long flags TSRMLS_DC) +static int text_iter_cs_offset(text_iter_obj* object, long flags, int32_t *cp_offset TSRMLS_DC) { - return object-u.cs.start_cp_offset; + if (cp_offset) { + *cp_offset = object-u.cs.start_cp_offset; + } + return object-u.cs.start; } static void text_iter_cs_next(text_iter_obj* object, long flags TSRMLS_DC) @@ -639,9 +645,12 @@ return object-u.brk.index; } -static int text_iter_brk_offset(text_iter_obj* object, long flags TSRMLS_DC) +static int text_iter_brk_offset(text_iter_obj* object, long flags, int32_t *cp_offset TSRMLS_DC) { - return object-u.brk.cp_offset; + if (cp_offset) { + *cp_offset = object-u.brk.cp_offset; + } + return object-u.brk.bound; } static void text_iter_brk_next(text_iter_obj* object, long flags TSRMLS_DC) @@ -1036,6 +1045,7 @@ PHP_METHOD(TextIterator, next) { long i, step = 1; + int32_t cp_offset; zval *object = getThis(); text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); @@ -1052,7 +1062,8 @@ } if (return_value_used) { - RETURN_LONG(iter_ops[intern-type]-offset(intern, intern-flags TSRMLS_CC)); + iter_ops[intern-type]-offset(intern, intern-flags, cp_offset TSRMLS_CC); + RETURN_LONG(cp_offset); } } @@ -1074,35 +1085,48 @@ PHP_METHOD(TextIterator, rewind) { + int32_t cp_offset; zval *object = getThis(); text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); iter_ops[intern-type]-rewind(intern, intern-flags TSRMLS_CC); - RETURN_LONG(iter_ops[intern-type]-offset(intern, intern-flags TSRMLS_CC)); + + if (return_value_used) { + iter_ops[intern-type]-offset(intern, intern-flags, cp_offset TSRMLS_CC); + RETURN_LONG(cp_offset); + } } PHP_METHOD(TextIterator, last) { long flags; + int32_t cp_offset; zval *object = getThis(); text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); flags = intern-flags ^ ITER_REVERSE; iter_ops[intern-type]-rewind(intern, flags TSRMLS_CC); - RETURN_LONG(iter_ops[intern-type]-offset(intern, flags TSRMLS_CC)); + + if (return_value_used) { + iter_ops[intern-type]-offset(intern, flags, cp_offset TSRMLS_CC); + RETURN_LONG(cp_offset); + } } PHP_METHOD(TextIterator, offset) { + int32_t cp_offset; zval *object = getThis(); text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); - RETURN_LONG(iter_ops[intern-type]-offset(intern, intern-flags TSRMLS_CC)); + iter_ops[intern-type]-offset(intern,
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Mon Jul 10 20:14:12 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Fix combining sequence iterators for forward and backward movement. http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.34r2=1.35diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.34 php-src/ext/unicode/unicode_iterators.c:1.35 --- php-src/ext/unicode/unicode_iterators.c:1.34Sat Jul 8 18:46:24 2006 +++ php-src/ext/unicode/unicode_iterators.c Mon Jul 10 20:14:12 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.34 2006/07/08 18:46:24 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.35 2006/07/10 20:14:12 andrei Exp $ */ /* * TODO @@ -306,6 +306,59 @@ /* Combining sequence ops */ +static void text_iter_helper_move(zend_bool forward, UChar *text, int32_t text_len, int32_t *offset, int32_t *cp_offset) +{ + UChar32 cp; + int32_t tmp, tmp2; + + if (*offset == UBRK_DONE) { + return; + } + + if (forward) { + if (*offset == text_len) { + *offset= UBRK_DONE; + *cp_offset = UBRK_DONE; + } else { + U16_NEXT(text, (*offset), text_len, cp); + (*cp_offset)++; + + if (u_getCombiningClass(cp) == 0) { + tmp = *offset; + tmp2 = *cp_offset; + /* +* At the end of the string cp will be 0 because of the NULL +* terminating NULL, so combining class will be 0 as well. +*/ + while (tmp text_len) { + U16_NEXT(text, tmp, text_len, cp); + tmp2++; + if (u_getCombiningClass(cp) == 0) { + break; + } else { + *offset= tmp; + *cp_offset = tmp2; + } + } + } + } + } else { + if (*offset == 0) { + *offset= UBRK_DONE; + *cp_offset = UBRK_DONE; + } else { + U16_PREV(text, 0, (*offset), cp); + (*cp_offset)--; + if (u_getCombiningClass(cp) != 0) { + do { + U16_PREV(text, 0, (*offset), cp); + (*cp_offset)--; + } while (*offset 0 u_getCombiningClass(cp) != 0); + } + } + } +} + static int text_iter_cs_valid(text_iter_obj* object, long flags TSRMLS_DC) { return (object-u.cs.start != UBRK_DONE); @@ -313,25 +366,41 @@ static void text_iter_cs_current(text_iter_obj* object, long flags TSRMLS_DC) { - uint32_t length; UChar *start; + int32_t length = -1; - if (object-u.cs.start == UBRK_DONE || object-u.cs.end == UBRK_DONE) { - length = 0; - } else { + if (object-u.cs.start != UBRK_DONE) { if (flags ITER_REVERSE) { + if (object-u.cs.end == object-u.cs.start) { + text_iter_helper_move(0, object-text, object-text_len, + object-u.cs.start, object-u.cs.start_cp_offset); + } start = object-text + object-u.cs.end; } else { + if (object-u.cs.end == object-u.cs.start) { + text_iter_helper_move(1, object-text, object-text_len, + object-u.cs.end, object-u.cs.end_cp_offset); + } start = object-text + object-u.cs.start; } - length = abs(object-u.cs.end - object-u.cs.start); + + if (object-u.cs.end == UBRK_DONE) { + length = 0; + } else { + length = abs(object-u.cs.end - object-u.cs.start); + } + } else { + length = 0; + } + + if (length != 0) { if (length+1 object-current_alloc) { object-current_alloc = length+1; Z_USTRVAL_P(object-current) =
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Mon Jul 10 21:18:01 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Implement following() for combining sequences. http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.35r2=1.36diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.35 php-src/ext/unicode/unicode_iterators.c:1.36 --- php-src/ext/unicode/unicode_iterators.c:1.35Mon Jul 10 20:14:12 2006 +++ php-src/ext/unicode/unicode_iterators.c Mon Jul 10 21:18:01 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.35 2006/07/10 20:14:12 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.36 2006/07/10 21:18:01 andrei Exp $ */ /* * TODO @@ -424,12 +424,12 @@ if (flags ITER_REVERSE) { text_iter_helper_move(0, object-text, object-text_len, object-u.cs.start, object-u.cs.start_cp_offset); - object-u.cs.end = object-u.cs.start; } else { text_iter_helper_move(1, object-text, object-text_len, object-u.cs.start, object-u.cs.start_cp_offset); - object-u.cs.end = object-u.cs.start; } + object-u.cs.end = object-u.cs.start; + object-u.cs.end_cp_offset = object-u.cs.start_cp_offset; object-u.cs.index++; } @@ -446,6 +446,64 @@ object-u.cs.index = 0; /* because _next increments index */ } +static void text_iter_cs_following(text_iter_obj *object, int32_t offset, long flags TSRMLS_DC) +{ + int32_t k, tmp; + + if (offset 0) { + offset = 0; + } + + /* +* On invalid iterator we always want to start looking for the code unit +* offset from the beginning of the string. +*/ + if (object-u.cs.start_cp_offset == UBRK_DONE) { + object-u.cs.start_cp_offset = 0; + object-u.cs.start = 0; + } + + /* +* Try to locate the code unit position relative to the last known codepoint +* offset. +*/ + k = object-u.cs.start; + if (offset object-u.cs.start_cp_offset) { + U16_FWD_N(object-text, k, object-text_len, offset - object-u.cs.start_cp_offset); + } else { + U16_BACK_N(object-text, 0, k, object-u.cs.start_cp_offset - offset); + } + + /* +* Locate the actual boundary. +*/ + if (flags ITER_REVERSE) { + /* +* If offset was at or beyond the length of text, we need to adjust it +* to the number of codepoints in the text. +*/ + if (k == object-text_len) { + offset = u_countChar32(object-text, object-text_len); + } + text_iter_helper_move(0, object-text, object-text_len, k, offset); + } else { + text_iter_helper_move(1, object-text, object-text_len, k, offset); + } + + if (k == object-u.cs.start) { + return; + } + + object-u.cs.start = k; + object-u.cs.start_cp_offset = offset; + object-u.cs.end = object-u.cs.start; +} + +static zend_bool text_iter_cs_isBoundary(text_iter_obj *object, int32_t offset, long flags TSRMLS_DC) +{ + return 1; +} + static text_iter_ops text_iter_cs_ops = { text_iter_cs_valid, text_iter_cs_current, @@ -453,6 +511,8 @@ text_iter_cs_offset, text_iter_cs_next, text_iter_cs_rewind, + text_iter_cs_following, + text_iter_cs_isBoundary, }; @@ -598,7 +658,6 @@ } else { object-u.brk.bound = ubrk_following(object-u.brk.iter, k); } - object-u.brk.next = object-u.brk.bound; /* * If boundary is the same one as where we were at before, simply return. @@ -607,6 +666,8 @@ return; } + object-u.brk.next = object-u.brk.bound; + /* * Adjust the internal codepoint offset based on how far we've moved. */ -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Mon Jul 10 21:42:25 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Implement isBoundary() for combining sequences. http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.36r2=1.37diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.36 php-src/ext/unicode/unicode_iterators.c:1.37 --- php-src/ext/unicode/unicode_iterators.c:1.36Mon Jul 10 21:18:01 2006 +++ php-src/ext/unicode/unicode_iterators.c Mon Jul 10 21:42:25 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.36 2006/07/10 21:18:01 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.37 2006/07/10 21:42:25 andrei Exp $ */ /* * TODO @@ -448,7 +448,7 @@ static void text_iter_cs_following(text_iter_obj *object, int32_t offset, long flags TSRMLS_DC) { - int32_t k, tmp; + int32_t k; if (offset 0) { offset = 0; @@ -501,7 +501,54 @@ static zend_bool text_iter_cs_isBoundary(text_iter_obj *object, int32_t offset, long flags TSRMLS_DC) { - return 1; + UChar32 cp; + int32_t k, tmp; + zend_bool result; + + if (offset 0) { + offset = 0; + } + + /* +* On invalid iterator we always want to start looking for the code unit +* offset from the beginning of the string. +*/ + if (object-u.cs.start_cp_offset == UBRK_DONE) { + object-u.cs.start_cp_offset = 0; + object-u.cs.start = 0; + } + + /* +* Try to locate the code unit position relative to the last known codepoint +* offset. +*/ + k = object-u.cs.start; + if (offset object-u.cs.start_cp_offset) { + U16_FWD_N(object-text, k, object-text_len, offset - object-u.cs.start_cp_offset); + } else { + U16_BACK_N(object-text, 0, k, object-u.cs.start_cp_offset - offset); + } + + /* end of the text is always a boundary */ + if (k == object-text_len) { + offset = u_countChar32(object-text, object-text_len); + result = 1; + } else { + /* if the next codepoint is a base character, it's a boundary */ + tmp = k; + U16_NEXT(object-text, tmp, object-text_len, cp); + result = (u_getCombiningClass(cp) == 0); + } + + if (k == object-u.cs.start) { + return result; + } + + object-u.cs.start = k; + object-u.cs.start_cp_offset = offset; + object-u.cs.end = object-u.cs.start; + + return result; } static text_iter_ops text_iter_cs_ops = { -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Mon Jul 10 22:12:47 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Fix validity checks. http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.37r2=1.38diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.37 php-src/ext/unicode/unicode_iterators.c:1.38 --- php-src/ext/unicode/unicode_iterators.c:1.37Mon Jul 10 21:42:25 2006 +++ php-src/ext/unicode/unicode_iterators.c Mon Jul 10 22:12:47 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.37 2006/07/10 21:42:25 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.38 2006/07/10 22:12:47 andrei Exp $ */ /* * TODO @@ -107,7 +107,15 @@ static int text_iter_cp_valid(text_iter_obj* object, long flags TSRMLS_DC) { - return (object-u.cp.offset != UBRK_DONE); + if (object-u.cp.offset == UBRK_DONE) { + return 0; + } + + if (flags ITER_REVERSE) { + return (object-u.cp.offset != 0); + } else { + return (object-u.cp.offset != object-text_len); + } } static void text_iter_cp_current(text_iter_obj* object, long flags TSRMLS_DC) @@ -144,24 +152,26 @@ static void text_iter_cp_next(text_iter_obj* object, long flags TSRMLS_DC) { - if (text_iter_cp_valid(object, flags TSRMLS_CC)) { - if (flags ITER_REVERSE) { - U16_BACK_1(object-text, 0, object-u.cp.offset); - if (object-u.cp.offset = object-text_len) { - object-u.cp.cp_offset--; - } else { - object-u.cp.offset = object-u.cp.cp_offset = UBRK_DONE; - } + if (object-u.cp.offset == UBRK_DONE) { + return; + } + + if (flags ITER_REVERSE) { + U16_BACK_1(object-text, 0, object-u.cp.offset); + if (object-u.cp.offset = object-text_len) { + object-u.cp.cp_offset--; } else { - U16_FWD_1(object-text, object-u.cp.offset, object-text_len); - if (object-u.cp.offset = object-text_len) { - object-u.cp.cp_offset++; - } else { - object-u.cp.offset = object-u.cp.cp_offset = UBRK_DONE; - } + object-u.cp.offset = object-u.cp.cp_offset = UBRK_DONE; + } + } else { + U16_FWD_1(object-text, object-u.cp.offset, object-text_len); + if (object-u.cp.offset = object-text_len) { + object-u.cp.cp_offset++; + } else { + object-u.cp.offset = object-u.cp.cp_offset = UBRK_DONE; } - object-u.cp.index++; } + object-u.cp.index++; } static void text_iter_cp_rewind(text_iter_obj *object, long flags TSRMLS_DC) @@ -361,7 +371,15 @@ static int text_iter_cs_valid(text_iter_obj* object, long flags TSRMLS_DC) { - return (object-u.cs.start != UBRK_DONE); + if (object-u.cs.start == UBRK_DONE) { + return 0; + } + + if (flags ITER_REVERSE) { + return (object-u.cs.start != 0); + } else { + return (object-u.cs.start != object-text_len); + } } static void text_iter_cs_current(text_iter_obj* object, long flags TSRMLS_DC) @@ -373,7 +391,7 @@ if (flags ITER_REVERSE) { if (object-u.cs.end == object-u.cs.start) { text_iter_helper_move(0, object-text, object-text_len, - object-u.cs.start, object-u.cs.start_cp_offset); + object-u.cs.end, object-u.cs.end_cp_offset); } start = object-text + object-u.cs.end; } else { @@ -567,7 +585,15 @@ static int text_iter_brk_valid(text_iter_obj* object, long flags TSRMLS_DC) { - return (object-u.brk.bound != UBRK_DONE); + if (object-u.brk.bound == UBRK_DONE) { + return 0; + } + + if (flags ITER_REVERSE) { + return (object-u.brk.bound != 0); + } else { + return (object-u.brk.bound != object-text_len); + } } static void text_iter_brk_current(text_iter_obj* object, long flags TSRMLS_DC) -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Mon Jul 10 23:19:05 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Implement getAvailableLocales(). http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.38r2=1.39diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.38 php-src/ext/unicode/unicode_iterators.c:1.39 --- php-src/ext/unicode/unicode_iterators.c:1.38Mon Jul 10 22:12:47 2006 +++ php-src/ext/unicode/unicode_iterators.c Mon Jul 10 23:19:05 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.38 2006/07/10 22:12:47 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.39 2006/07/10 23:19:05 andrei Exp $ */ /* * TODO @@ -1146,6 +1146,25 @@ RETURN_BOOL(iter_ops[intern-type]-isBoundary(intern, offset, flags TSRMLS_CC)); } +PHP_METHOD(TextIterator, getAvailableLocales) +{ + int32_t count, i; + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ) == FAILURE) { + return; + } + + if (!return_value_used) { + return; + } + + array_init(return_value); + count = ubrk_countAvailable(); + for (i = 0; i count; i++) { + add_next_index_ascii_string(return_value, (char*)ubrk_getAvailable(i), ZSTR_DUPLICATE); + } +} + static zend_function_entry text_iterator_funcs[] = { PHP_ME(TextIterator, __construct, NULL, ZEND_ACC_PUBLIC) @@ -1164,6 +1183,8 @@ PHP_ME(TextIterator, preceding, NULL, ZEND_ACC_PUBLIC) PHP_ME(TextIterator, isBoundary, NULL, ZEND_ACC_PUBLIC) + PHP_ME(TextIterator, getAvailableLocales, NULL, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC) + PHP_MALIAS(TextIterator, first, rewind, NULL, ZEND_ACC_PUBLIC) {NULL, NULL, NULL} }; -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Sat Jul 8 18:46:24 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Implement following() and preceding() for codepoint iterators. http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.33r2=1.34diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.33 php-src/ext/unicode/unicode_iterators.c:1.34 --- php-src/ext/unicode/unicode_iterators.c:1.33Fri Jul 7 22:52:26 2006 +++ php-src/ext/unicode/unicode_iterators.c Sat Jul 8 18:46:24 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.33 2006/07/07 22:52:26 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.34 2006/07/08 18:46:24 andrei Exp $ */ /* * TODO @@ -178,6 +178,78 @@ static void text_iter_cp_following(text_iter_obj *object, int32_t offset, long flags TSRMLS_DC) { + int32_t k; + + if (offset 0) { + offset = 0; + } + + /* +* On invalid iterator we always want to start looking for the code unit +* offset from the beginning of the string. +*/ + if (object-u.cp.cp_offset == UBRK_DONE) { + object-u.cp.cp_offset = 0; + object-u.cp.offset = 0; + } + + /* +* Try to locate the code unit position relative to the last known codepoint +* offset. +*/ + k = object-u.cp.offset; + if (offset object-u.cp.cp_offset) { + U16_FWD_N(object-text, k, object-text_len, offset - object-u.cp.cp_offset); + } else { + U16_BACK_N(object-text, 0, k, object-u.cp.cp_offset - offset); + } + + /* +* Locate the actual boundary. +*/ + if (flags ITER_REVERSE) { + if (k == 0) { + object-u.cp.cp_offset = UBRK_DONE; + object-u.cp.offset = UBRK_DONE; + return; + } else { + U16_BACK_1(object-text, 0, k); + } + } else { + if (k == object-text_len) { + object-u.cp.cp_offset = UBRK_DONE; + object-u.cp.offset = UBRK_DONE; + return; + } else { + U16_FWD_1(object-text, k, object-text_len); + } + } + + /* +* If boundary is the same one as where we were at before, simply return. +*/ + if (k == object-u.cp.offset) { + return; + } + + /* +* Adjust the internal codepoint offset based on how far we've moved. +*/ + if (k object-u.cp.offset) { + if (k - object-u.cp.offset 1) { + object-u.cp.cp_offset += u_countChar32(object-text + object-u.cp.offset, k - object-u.cp.offset); + } else { + object-u.cp.cp_offset++; + } + } else { + if (object-u.cp.offset - k 1) { + object-u.cp.cp_offset -= u_countChar32(object-text + k, object-u.cp.offset - k); + } else { + object-u.cp.cp_offset--; + } + } + + object-u.cp.offset = k; } static zend_bool text_iter_cp_isBoundary(text_iter_obj *object, int32_t offset, long flags TSRMLS_DC) @@ -863,7 +935,7 @@ PHP_METHOD(TextIterator, following) { - long flags, offset; + long offset; zval *object = getThis(); text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); @@ -871,8 +943,8 @@ return; } - iter_ops[intern-type]-following(intern, offset, flags TSRMLS_CC); - RETURN_LONG(iter_ops[intern-type]-offset(intern, flags TSRMLS_CC)); + iter_ops[intern-type]-following(intern, offset, intern-flags TSRMLS_CC); + RETURN_LONG(iter_ops[intern-type]-offset(intern, intern-flags TSRMLS_CC)); } PHP_METHOD(TextIterator, preceding) -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Fri Jul 7 21:41:18 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Implement TextIterator methods following() and preceding(). http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.30r2=1.31diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.30 php-src/ext/unicode/unicode_iterators.c:1.31 --- php-src/ext/unicode/unicode_iterators.c:1.30Thu Jun 29 12:32:00 2006 +++ php-src/ext/unicode/unicode_iterators.c Fri Jul 7 21:41:18 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.30 2006/06/29 12:32:00 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.31 2006/07/07 21:41:18 andrei Exp $ */ /* * TODO @@ -82,12 +82,13 @@ } typedef struct { - int (*valid) (text_iter_obj* object, long flags TSRMLS_DC); - void (*current)(text_iter_obj* object, long flags TSRMLS_DC); - int (*key)(text_iter_obj* object, long flags TSRMLS_DC); - int (*offset) (text_iter_obj* object, long flags TSRMLS_DC); - void (*next) (text_iter_obj* object, long flags TSRMLS_DC); - void (*rewind) (text_iter_obj* object, long flags TSRMLS_DC); + int (*valid) (text_iter_obj* object, long flags TSRMLS_DC); + void (*current) (text_iter_obj* object, long flags TSRMLS_DC); + int (*key) (text_iter_obj* object, long flags TSRMLS_DC); + int (*offset)(text_iter_obj* object, long flags TSRMLS_DC); + void (*next) (text_iter_obj* object, long flags TSRMLS_DC); + void (*rewind)(text_iter_obj* object, long flags TSRMLS_DC); + void (*following) (text_iter_obj* object, int32_t offset, long flags TSRMLS_DC); } text_iter_ops; enum UBreakIteratorType brk_type_map[] = { @@ -410,6 +411,73 @@ object-u.brk.index = 0; } +static void text_iter_brk_following(text_iter_obj *object, int32_t offset, long flags TSRMLS_DC) +{ + int32_t k, tmp; + + if (offset 0) { + offset = 0; + } + + /* +* On invalid iterator we always want to start looking for the code unit +* offset from the beginning of the string. +*/ + if (object-u.brk.cp_offset == UBRK_DONE) { + object-u.brk.cp_offset = 0; + object-u.brk.bound = 0; + } + + /* +* Try to locate the code unit position relative to the last known codepoint +* offset. +*/ + k = tmp = object-u.brk.bound; + if (offset object-u.brk.cp_offset) { + U16_FWD_N(object-text, k, object-text_len, offset - object-u.brk.cp_offset); + } else { + U16_BACK_N(object-text, 0, k, object-u.brk.cp_offset - offset); + } + + /* +* Locate the actual boundary. +*/ + if (flags ITER_REVERSE) { + object-u.brk.bound = ubrk_preceding(object-u.brk.iter, k); + } else { + object-u.brk.bound = ubrk_following(object-u.brk.iter, k); + } + object-u.brk.next = object-u.brk.bound; + + /* +* If boundary is the same one as where we were at before, simply return. +*/ + if (object-u.brk.bound == tmp) { + return; + } + + /* +* Adjust the internal codepoint offset based on how far we've moved. +*/ + if (object-u.brk.bound != UBRK_DONE) { + if (object-u.brk.bound tmp) { + if (object-u.brk.bound - tmp 1) { + object-u.brk.cp_offset += u_countChar32(object-text + tmp, object-u.brk.bound - tmp); + } else { + object-u.brk.cp_offset++; + } + } else { + if (tmp - object-u.brk.bound 1) { + object-u.brk.cp_offset -= u_countChar32(object-text + object-u.brk.bound, tmp - object-u.brk.bound); + } else { + object-u.brk.cp_offset--; + } + } + } else { + object-u.brk.cp_offset = UBRK_DONE; + } +} + static text_iter_ops text_iter_brk_ops = { text_iter_brk_valid, text_iter_brk_current, @@ -417,6 +485,7 @@ text_iter_brk_offset, text_iter_brk_next, text_iter_brk_rewind, + text_iter_brk_following, }; @@ -678,7 +747,39 @@ RETURN_LONG(iter_ops[intern-type]-offset(intern, flags TSRMLS_CC)); } +PHP_METHOD(TextIterator, following) +{ + long flags, offset; + zval *object = getThis(); + text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, l, offset) == FAILURE) { + return; +
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Fri Jul 7 22:34:46 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Implement TextIterator::isBoundary() for break iterators. http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.31r2=1.32diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.31 php-src/ext/unicode/unicode_iterators.c:1.32 --- php-src/ext/unicode/unicode_iterators.c:1.31Fri Jul 7 21:41:18 2006 +++ php-src/ext/unicode/unicode_iterators.c Fri Jul 7 22:34:46 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.31 2006/07/07 21:41:18 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.32 2006/07/07 22:34:46 andrei Exp $ */ /* * TODO @@ -89,6 +89,7 @@ void (*next) (text_iter_obj* object, long flags TSRMLS_DC); void (*rewind)(text_iter_obj* object, long flags TSRMLS_DC); void (*following) (text_iter_obj* object, int32_t offset, long flags TSRMLS_DC); + zend_bool (*isBoundary)(text_iter_obj* object, int32_t offset, long flags TSRMLS_DC); } text_iter_ops; enum UBreakIteratorType brk_type_map[] = { @@ -478,6 +479,71 @@ } } +static zend_bool text_iter_brk_isBoundary(text_iter_obj *object, int32_t offset, long flags TSRMLS_DC) +{ + int32_t k, tmp; + UBool result; + + if (offset 0) { + offset = 0; + } + + /* +* On invalid iterator we always want to start looking for the code unit +* offset from the beginning of the string. +*/ + if (object-u.brk.cp_offset == UBRK_DONE) { + object-u.brk.cp_offset = 0; + object-u.brk.bound = 0; + } + + /* +* Try to locate the code unit position relative to the last known codepoint +* offset. +*/ + k = tmp = object-u.brk.bound; + if (offset object-u.brk.cp_offset) { + U16_FWD_N(object-text, k, object-text_len, offset - object-u.brk.cp_offset); + } else { + U16_BACK_N(object-text, 0, k, object-u.brk.cp_offset - offset); + } + + result = ubrk_isBoundary(object-u.brk.iter, k); + + object-u.brk.bound = ubrk_current(object-u.brk.iter); + object-u.brk.next = object-u.brk.bound; + + /* +* If boundary is the same one as where we were at before, simply return. +*/ + if (object-u.brk.bound == tmp) { + return result; + } + + /* +* Adjust the internal codepoint offset based on how far we've moved. +*/ + if (object-u.brk.bound != UBRK_DONE) { + if (object-u.brk.bound tmp) { + if (object-u.brk.bound - tmp 1) { + object-u.brk.cp_offset += u_countChar32(object-text + tmp, object-u.brk.bound - tmp); + } else { + object-u.brk.cp_offset++; + } + } else { + if (tmp - object-u.brk.bound 1) { + object-u.brk.cp_offset -= u_countChar32(object-text + object-u.brk.bound, tmp - object-u.brk.bound); + } else { + object-u.brk.cp_offset--; + } + } + } else { + object-u.brk.cp_offset = UBRK_DONE; + } + + return result; +} + static text_iter_ops text_iter_brk_ops = { text_iter_brk_valid, text_iter_brk_current, @@ -486,6 +552,7 @@ text_iter_brk_next, text_iter_brk_rewind, text_iter_brk_following, + text_iter_brk_isBoundary, }; @@ -778,6 +845,23 @@ iter_ops[intern-type]-following(intern, offset, flags TSRMLS_CC); RETURN_LONG(iter_ops[intern-type]-offset(intern, flags TSRMLS_CC)); } + +PHP_METHOD(TextIterator, isBoundary) +{ + long flags, offset; + zval *object = getThis(); + text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, l, offset) == FAILURE) { + return; + } + + /* +* ReverseTextIterator will behave the same as the normal one. +*/ + RETURN_BOOL(iter_ops[intern-type]-isBoundary(intern, offset, flags TSRMLS_CC)); +} + static zend_function_entry text_iterator_funcs[] = { PHP_ME(TextIterator, __construct, NULL, ZEND_ACC_PUBLIC) @@ -794,6 +878,7 @@ PHP_ME(TextIterator, last,NULL, ZEND_ACC_PUBLIC) PHP_ME(TextIterator, following, NULL, ZEND_ACC_PUBLIC) PHP_ME(TextIterator, preceding, NULL, ZEND_ACC_PUBLIC) + PHP_ME(TextIterator, isBoundary, NULL, ZEND_ACC_PUBLIC) PHP_MALIAS(TextIterator, first, rewind, NULL, ZEND_ACC_PUBLIC)
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Fri Jul 7 22:52:26 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Implement isBoundary() for code point iterator. http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.32r2=1.33diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.32 php-src/ext/unicode/unicode_iterators.c:1.33 --- php-src/ext/unicode/unicode_iterators.c:1.32Fri Jul 7 22:34:46 2006 +++ php-src/ext/unicode/unicode_iterators.c Fri Jul 7 22:52:26 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.32 2006/07/07 22:34:46 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.33 2006/07/07 22:52:26 andrei Exp $ */ /* * TODO @@ -176,6 +176,51 @@ object-u.cp.index = 0; } +static void text_iter_cp_following(text_iter_obj *object, int32_t offset, long flags TSRMLS_DC) +{ +} + +static zend_bool text_iter_cp_isBoundary(text_iter_obj *object, int32_t offset, long flags TSRMLS_DC) +{ + int32_t k; + + if (offset 0) { + offset = 0; + } + + /* +* On invalid iterator we always want to start looking for the code unit +* offset from the beginning of the string. +*/ + if (object-u.cp.cp_offset == UBRK_DONE) { + object-u.cp.cp_offset = 0; + object-u.cp.offset = 0; + } + + /* +* Try to locate the code unit position relative to the last known codepoint +* offset. +*/ + k = object-u.cp.offset; + if (offset object-u.cp.cp_offset) { + U16_FWD_N(object-text, k, object-text_len, offset - object-u.cp.cp_offset); + } else { + U16_BACK_N(object-text, 0, k, object-u.cp.cp_offset - offset); + } + + if (k == object-text_len) { + object-u.cp.cp_offset += u_countChar32(object-text + object-u.cp.offset, k - object-u.cp.offset); + } else { + object-u.cp.cp_offset = offset; + } + object-u.cp.offset = k; + + /* +* Every codepoint is a boundary. +*/ + return TRUE; +} + static text_iter_ops text_iter_cp_ops = { text_iter_cp_valid, text_iter_cp_current, @@ -183,6 +228,8 @@ text_iter_cp_offset, text_iter_cp_next, text_iter_cp_rewind, + text_iter_cp_following, + text_iter_cp_isBoundary, }; /* Combining sequence ops */ -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Thu Jun 29 12:32:01 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Try to make combining sequences work. Not entirely succesful. http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.29r2=1.30diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.29 php-src/ext/unicode/unicode_iterators.c:1.30 --- php-src/ext/unicode/unicode_iterators.c:1.29Wed Jun 28 15:28:55 2006 +++ php-src/ext/unicode/unicode_iterators.c Thu Jun 29 12:32:00 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.29 2006/06/28 15:28:55 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.30 2006/06/29 12:32:00 andrei Exp $ */ /* * TODO @@ -187,21 +187,30 @@ static int text_iter_cs_valid(text_iter_obj* object, long flags TSRMLS_DC) { - if (flags ITER_REVERSE) { - return (object-u.cs.end 0); - } else { - return (object-u.cs.end = object-text_len); - } + return (object-u.cs.start != UBRK_DONE); } static void text_iter_cs_current(text_iter_obj* object, long flags TSRMLS_DC) { - uint32_t length = object-u.cs.end - object-u.cs.start; - if (length+1 object-current_alloc) { - object-current_alloc = length+1; - Z_USTRVAL_P(object-current) = eurealloc(Z_USTRVAL_P(object-current), object-current_alloc); + uint32_t length; + UChar *start; + + if (object-u.cs.start == UBRK_DONE || object-u.cs.end == UBRK_DONE) { + length = 0; + } else { + if (flags ITER_REVERSE) { + start = object-text + object-u.cs.end; + } else { + start = object-text + object-u.cs.start; + } + length = abs(object-u.cs.end - object-u.cs.start); + if (length+1 object-current_alloc) { + object-current_alloc = length+1; + Z_USTRVAL_P(object-current) = eurealloc(Z_USTRVAL_P(object-current), object-current_alloc); + } + u_memcpy(Z_USTRVAL_P(object-current), start, length); } - u_memcpy(Z_USTRVAL_P(object-current), object-text + object-u.cs.start, length); + Z_USTRVAL_P(object-current)[length] = 0; Z_USTRLEN_P(object-current) = length; } @@ -213,7 +222,11 @@ static int text_iter_cs_offset(text_iter_obj* object, long flags TSRMLS_DC) { - return object-u.cs.start_cp_offset; + if (flags ITER_REVERSE) { + return object-u.cs.end_cp_offset; + } else { + return object-u.cs.start_cp_offset; + } } static void text_iter_cs_next(text_iter_obj* object, long flags TSRMLS_DC) @@ -221,21 +234,31 @@ UChar32 cp; int32_t tmp, tmp2; - if (text_iter_cs_valid(object, flags TSRMLS_CC)) { - if (flags ITER_REVERSE) { - object-u.cs.end = object-u.cs.start; - object-u.cs.end_cp_offset = object-u.cs.start_cp_offset; - U16_PREV(object-text, 0, object-u.cs.start, cp); - object-u.cs.start_cp_offset--; + if (object-u.cs.start == UBRK_DONE) { + return; + } + + object-u.cs.start = object-u.cs.end; + object-u.cs.start_cp_offset = object-u.cs.end_cp_offset; + if (flags ITER_REVERSE) { + if (object-u.cs.end == 0) { + object-u.cs.end = UBRK_DONE; + object-u.cs.end_cp_offset = UBRK_DONE; + } else { + U16_PREV(object-text, 0, object-u.cs.end, cp); + object-u.cs.end_cp_offset--; if (u_getCombiningClass(cp) != 0) { do { - U16_PREV(object-text, 0, object-u.cs.start, cp); - object-u.cs.start_cp_offset--; - } while (object-u.cs.start 0 u_getCombiningClass(cp) != 0); + U16_PREV(object-text, 0, object-u.cs.end, cp); + object-u.cs.end_cp_offset--; + } while (object-u.cs.end 0 u_getCombiningClass(cp) != 0); } + } + } else { + if (object-u.cs.end == object-text_len) { + object-u.cs.end = UBRK_DONE; + object-u.cs.end_cp_offset = UBRK_DONE; } else { - object-u.cs.start = object-u.cs.end; - object-u.cs.start_cp_offset = object-u.cs.end_cp_offset; U16_NEXT(object-text, object-u.cs.end, object-text_len, cp);
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Wed Jun 28 14:44:36 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Use object's copied text (fixes some bug somehow). http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.27r2=1.28diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.27 php-src/ext/unicode/unicode_iterators.c:1.28 --- php-src/ext/unicode/unicode_iterators.c:1.27Wed Jun 28 14:12:14 2006 +++ php-src/ext/unicode/unicode_iterators.c Wed Jun 28 14:44:36 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.27 2006/06/28 14:12:14 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.28 2006/06/28 14:44:36 andrei Exp $ */ /* * TODO @@ -563,8 +563,8 @@ UErrorCode status = U_ZERO_ERROR; UErrorCode status2 = U_ZERO_ERROR; locale = locale ? locale : UG(default_locale); - intern-u.brk.iter = ubrk_open(brk_type_map[intern-type - ITER_CHARACTER], locale, text, text_len, status); - intern-u.brk.n_iter = ubrk_open(brk_type_map[intern-type - ITER_CHARACTER], locale, text, text_len, status); + intern-u.brk.iter = ubrk_open(brk_type_map[intern-type - ITER_CHARACTER], locale, intern-text, intern-text_len, status); + intern-u.brk.n_iter = ubrk_open(brk_type_map[intern-type - ITER_CHARACTER], locale, intern-text, intern-text_len, status); if (!U_SUCCESS(status) || !U_SUCCESS(status2)) { php_error(E_RECOVERABLE_ERROR, Could not create UBreakIterator for '%s' locale: %s, locale, u_errorName(status)); return; -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Wed Jun 28 15:28:55 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Fix codepoint iterators http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.28r2=1.29diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.28 php-src/ext/unicode/unicode_iterators.c:1.29 --- php-src/ext/unicode/unicode_iterators.c:1.28Wed Jun 28 14:44:36 2006 +++ php-src/ext/unicode/unicode_iterators.c Wed Jun 28 15:28:55 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.28 2006/06/28 14:44:36 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.29 2006/06/28 15:28:55 andrei Exp $ */ /* * TODO @@ -105,11 +105,7 @@ static int text_iter_cp_valid(text_iter_obj* object, long flags TSRMLS_DC) { - if (flags ITER_REVERSE) { - return (object-u.cp.offset 0); - } else { - return (object-u.cp.offset object-text_len); - } + return (object-u.cp.offset != UBRK_DONE); } static void text_iter_cp_current(text_iter_obj* object, long flags TSRMLS_DC) @@ -117,14 +113,18 @@ UChar32 cp = 0; int32_t tmp, buf_len = 0; - if (text_iter_cp_valid(object, flags TSRMLS_CC)) { - tmp = object-u.cp.offset; - if (flags ITER_REVERSE) { + tmp = object-u.cp.offset; + + if (flags ITER_REVERSE) { + if (object-u.cp.offset != UBRK_DONE object-u.cp.offset 0) { U16_PREV(object-text, 0, tmp, cp); - } else { + buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object-current)); + } + } else { + if (object-u.cp.offset != UBRK_DONE object-u.cp.offset object-text_len) { U16_NEXT(object-text, tmp, object-text_len, cp); + buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object-current)); } - buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object-current)); } Z_USTRVAL_P(object-current)[buf_len] = 0; Z_USTRLEN_P(object-current) = buf_len; @@ -145,10 +145,18 @@ if (text_iter_cp_valid(object, flags TSRMLS_CC)) { if (flags ITER_REVERSE) { U16_BACK_1(object-text, 0, object-u.cp.offset); - object-u.cp.cp_offset--; + if (object-u.cp.offset = object-text_len) { + object-u.cp.cp_offset--; + } else { + object-u.cp.offset = object-u.cp.cp_offset = UBRK_DONE; + } } else { U16_FWD_1(object-text, object-u.cp.offset, object-text_len); - object-u.cp.cp_offset++; + if (object-u.cp.offset = object-text_len) { + object-u.cp.cp_offset++; + } else { + object-u.cp.offset = object-u.cp.cp_offset = UBRK_DONE; + } } object-u.cp.index++; } @@ -275,7 +283,7 @@ /* UBreakIterator Character Ops */ -static int text_iter_brk_char_valid(text_iter_obj* object, long flags TSRMLS_DC) +static int text_iter_brk_valid(text_iter_obj* object, long flags TSRMLS_DC) { if (flags ITER_REVERSE) { return (object-u.brk.bound != UBRK_DONE); @@ -284,7 +292,7 @@ } } -static void text_iter_brk_char_current(text_iter_obj* object, long flags TSRMLS_DC) +static void text_iter_brk_current(text_iter_obj* object, long flags TSRMLS_DC) { UChar *start; int32_t length = -1; @@ -319,17 +327,17 @@ Z_USTRLEN_P(object-current) = length; } -static int text_iter_brk_char_key(text_iter_obj* object, long flags TSRMLS_DC) +static int text_iter_brk_key(text_iter_obj* object, long flags TSRMLS_DC) { return object-u.brk.index; } -static int text_iter_brk_char_offset(text_iter_obj* object, long flags TSRMLS_DC) +static int text_iter_brk_offset(text_iter_obj* object, long flags TSRMLS_DC) { return object-u.brk.cp_offset; } -static void text_iter_brk_char_next(text_iter_obj* object, long flags TSRMLS_DC) +static void text_iter_brk_next(text_iter_obj* object, long flags TSRMLS_DC) { int32_t tmp = object-u.brk.bound; @@ -365,7 +373,7 @@ object-u.brk.index++; } -static void text_iter_brk_char_rewind(text_iter_obj *object, long flags TSRMLS_DC) +static void text_iter_brk_rewind(text_iter_obj *object, long flags TSRMLS_DC) { if (flags ITER_REVERSE) { object-u.brk.bound = ubrk_last(object-u.brk.iter); @@ -380,12 +388,12 @@ } static text_iter_ops text_iter_brk_ops = { -
Re: [PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
On Sat, 24 Jun 2006, Andrei Zmievski wrote: andreiSat Jun 24 18:18:38 2006 UTC Modified files: /php-src/ext/unicode unicode_iterators.c Log: - Remove support for code units in TextIterator (people shouldn't be examining individual code units anyway) Heh, originally this was added because there is no other way to get to them in a nice way... this was the whole point of my comments about it earlier and then we sorta agreed to put it in here... What is the point of removing it? regards, Derick -- Derick Rethans http://derickrethans.nl | http://ez.no | http://xdebug.org -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
Re: [PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
Why do you want to get to code units in a nice way? What do you plan to do with them? I think it's there's very low chance someone would want to work on that level, and if they want to, they should convert the string to UTF-16 binary form and go from there. -Andrei On Jun 25, 2006, at 4:58 AM, Derick Rethans wrote: On Sat, 24 Jun 2006, Andrei Zmievski wrote: andrei Sat Jun 24 18:18:38 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: - Remove support for code units in TextIterator (people shouldn't be examining individual code units anyway) Heh, originally this was added because there is no other way to get to them in a nice way... this was the whole point of my comments about it earlier and then we sorta agreed to put it in here... What is the point of removing it? regards, Derick -- Derick Rethans http://derickrethans.nl | http://ez.no | http://xdebug.org -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Sat Jun 24 18:18:38 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: - Remove support for code units in TextIterator (people shouldn't be examining individual code units anyway) - Add offset() method. - Add optional locale parameter to the constructor. http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.24r2=1.25diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.24 php-src/ext/unicode/unicode_iterators.c:1.25 --- php-src/ext/unicode/unicode_iterators.c:1.24Fri Mar 24 21:06:36 2006 +++ php-src/ext/unicode/unicode_iterators.c Sat Jun 24 18:18:38 2006 @@ -14,14 +14,13 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.24 2006/03/24 21:06:36 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.25 2006/06/24 18:18:38 andrei Exp $ */ /* * TODO * * - optimize current() to pass return_value to the handler so that it fills it * in directly instead of creating a new zval - * - return code units as binary strings? integers? or leave as unicode strings? * - implement Countable (or count_elements handler) and Seekable interfaces */ @@ -31,7 +30,6 @@ #include unicode/ubrk.h typedef enum { - ITER_CODE_UNIT, ITER_CODE_POINT, ITER_COMB_SEQUENCE, ITER_CHARACTER, @@ -53,23 +51,21 @@ size_t current_alloc; longflags; union { + int32_t start; struct { + int32_t start; int32_t index; - int32_t offset; } cp; struct { - int32_t index; - } cu; - struct { - int32_t index; int32_t start; int32_t end; + int32_t index; } cs; struct { - UBreakIterator *iter; - int32_t index; int32_t start; int32_t end; + int32_t index; + UBreakIterator *iter; } brk; } u; zend_object_iterator iter; @@ -99,71 +95,14 @@ PHPAPI zend_class_entry* text_iterator_ce; PHPAPI zend_class_entry* rev_text_iterator_ce; -/* Code unit ops */ - -static int text_iter_cu_valid(text_iter_obj* object TSRMLS_DC) -{ - if (object-flags ITER_REVERSE) { - return (object-u.cu.index = 0); - } else { - return (object-u.cu.index object-text_len); - } -} - -static void text_iter_cu_current(text_iter_obj* object TSRMLS_DC) -{ - u_memcpy(Z_USTRVAL_P(object-current), object-text + object-u.cu.index, 1); - Z_USTRVAL_P(object-current)[1] = 0; - Z_USTRLEN_P(object-current) = 1; -} - -static int text_iter_cu_key(text_iter_obj* object TSRMLS_DC) -{ - if (object-flags ITER_REVERSE) { - return object-text_len - object-u.cu.index - 1; - } else { - return object-u.cu.index; - } -} - -static void text_iter_cu_next(text_iter_obj* object TSRMLS_DC) -{ - if (object-flags ITER_REVERSE) { - if (object-u.cu.index = 0) { - object-u.cu.index--; - } - } else { - if (object-u.cu.index object-text_len) { - object-u.cu.index++; - } - } -} - -static void text_iter_cu_rewind(text_iter_obj *object TSRMLS_DC) -{ - if (object-flags ITER_REVERSE) { - object-u.cu.index = object-text_len-1; - } else { - object-u.cu.index = 0; - } -} - -static text_iter_ops text_iter_cu_ops = { - text_iter_cu_valid, - text_iter_cu_current, - text_iter_cu_key, - text_iter_cu_next, - text_iter_cu_rewind, -}; - /* Code point ops */ static int text_iter_cp_valid(text_iter_obj* object TSRMLS_DC) { if (object-flags ITER_REVERSE) { - return (object-u.cp.offset 0); + return (object-u.cp.start 0); } else { - return (object-u.cp.offset object-text_len); + return (object-u.cp.start object-text_len); } } @@ -172,7 +111,7 @@ UChar32 cp; int32_t tmp, buf_len; - tmp = object-u.cp.offset; + tmp = object-u.cp.start; if (object-flags ITER_REVERSE) { U16_PREV(object-text, 0, tmp, cp); } else { @@ -191,9 +130,9 @@ static void text_iter_cp_next(text_iter_obj* object TSRMLS_DC) { if (object-flags ITER_REVERSE) { - U16_BACK_1(object-text, 0, object-u.cp.offset); + U16_BACK_1(object-text, 0, object-u.cp.start); } else { -
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Sat Jun 24 21:57:14 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: A lot of work on making TextIterator support propert codepoint-level offsets and making it more robust in general. http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.25r2=1.26diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.25 php-src/ext/unicode/unicode_iterators.c:1.26 --- php-src/ext/unicode/unicode_iterators.c:1.25Sat Jun 24 18:18:38 2006 +++ php-src/ext/unicode/unicode_iterators.c Sat Jun 24 21:57:14 2006 @@ -14,11 +14,12 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.25 2006/06/24 18:18:38 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.26 2006/06/24 21:57:14 andrei Exp $ */ /* * TODO * + * - test with empty and 1 character strings * - optimize current() to pass return_value to the handler so that it fills it * in directly instead of creating a new zval * - implement Countable (or count_elements handler) and Seekable interfaces @@ -51,20 +52,23 @@ size_t current_alloc; longflags; union { - int32_t start; struct { - int32_t start; + int32_t offset; + int32_t cp_offset; int32_t index; } cp; struct { int32_t start; int32_t end; int32_t index; + int32_t start_cp_offset; + int32_t end_cp_offset; } cs; struct { int32_t start; int32_t end; int32_t index; + int32_t cp_offset; UBreakIterator *iter; } brk; } u; @@ -77,11 +81,12 @@ } typedef struct { - int (*valid) (text_iter_obj* object TSRMLS_DC); - void (*current)(text_iter_obj* object TSRMLS_DC); - int (*key)(text_iter_obj* object TSRMLS_DC); - void (*next) (text_iter_obj* object TSRMLS_DC); - void (*rewind) (text_iter_obj* object TSRMLS_DC); + int (*valid) (text_iter_obj* object, long flags TSRMLS_DC); + void (*current)(text_iter_obj* object, long flags TSRMLS_DC); + int (*key)(text_iter_obj* object, long flags TSRMLS_DC); + int (*offset) (text_iter_obj* object, long flags TSRMLS_DC); + void (*next) (text_iter_obj* object, long flags TSRMLS_DC); + void (*rewind) (text_iter_obj* object, long flags TSRMLS_DC); } text_iter_ops; enum UBreakIteratorType brk_type_map[] = { @@ -97,52 +102,65 @@ /* Code point ops */ -static int text_iter_cp_valid(text_iter_obj* object TSRMLS_DC) +static int text_iter_cp_valid(text_iter_obj* object, long flags TSRMLS_DC) { - if (object-flags ITER_REVERSE) { - return (object-u.cp.start 0); + if (flags ITER_REVERSE) { + return (object-u.cp.offset 0); } else { - return (object-u.cp.start object-text_len); + return (object-u.cp.offset object-text_len); } } -static void text_iter_cp_current(text_iter_obj* object TSRMLS_DC) +static void text_iter_cp_current(text_iter_obj* object, long flags TSRMLS_DC) { - UChar32 cp; - int32_t tmp, buf_len; + UChar32 cp = 0; + int32_t tmp, buf_len = 0; - tmp = object-u.cp.start; - if (object-flags ITER_REVERSE) { - U16_PREV(object-text, 0, tmp, cp); - } else { - U16_NEXT(object-text, tmp, object-text_len, cp); + if (text_iter_cp_valid(object, flags TSRMLS_CC)) { + tmp = object-u.cp.offset; + if (flags ITER_REVERSE) { + U16_PREV(object-text, 0, tmp, cp); + } else { + U16_NEXT(object-text, tmp, object-text_len, cp); + } + buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object-current)); } - buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object-current)); Z_USTRVAL_P(object-current)[buf_len] = 0; Z_USTRLEN_P(object-current) = buf_len; } -static int text_iter_cp_key(text_iter_obj* object TSRMLS_DC) +static int text_iter_cp_key(text_iter_obj* object, long flags TSRMLS_DC) { return object-u.cp.index; } -static void text_iter_cp_next(text_iter_obj* object TSRMLS_DC) +static int text_iter_cp_offset(text_iter_obj* object, long flags TSRMLS_DC) { - if (object-flags ITER_REVERSE) { - U16_BACK_1(object-text, 0, object-u.cp.start); - } else { - U16_FWD_1(object-text, object-u.cp.start, object-text_len); +
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Fri Mar 24 21:06:36 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Use intern-type for break iterator. http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.23r2=1.24diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.23 php-src/ext/unicode/unicode_iterators.c:1.24 --- php-src/ext/unicode/unicode_iterators.c:1.23Sun Feb 26 11:57:14 2006 +++ php-src/ext/unicode/unicode_iterators.c Fri Mar 24 21:06:36 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.23 2006/02/26 11:57:14 dmitry Exp $ */ +/* $Id: unicode_iterators.c,v 1.24 2006/03/24 21:06:36 andrei Exp $ */ /* * TODO @@ -538,9 +538,9 @@ intern-flags |= ITER_REVERSE; } - if (ti_type = ITER_CHARACTER ti_type ITER_TYPE_LAST) { + if (intern-type = ITER_CHARACTER intern-type ITER_TYPE_LAST) { UErrorCode status = U_ZERO_ERROR; - intern-u.brk.iter = ubrk_open(brk_type_map[ti_type - ITER_CHARACTER], UG(default_locale), text, text_len, status); + intern-u.brk.iter = ubrk_open(brk_type_map[intern-type - ITER_CHARACTER], UG(default_locale), text, text_len, status); if (!U_SUCCESS(status)) { php_error(E_RECOVERABLE_ERROR, Could not create UBreakIterator: %s, u_errorName(status)); return; -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
helly Fri Feb 17 08:24:56 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: - Change to offsetof as suggested by Clayton http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.21r2=1.22diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.21 php-src/ext/unicode/unicode_iterators.c:1.22 --- php-src/ext/unicode/unicode_iterators.c:1.21Wed Feb 15 21:34:21 2006 +++ php-src/ext/unicode/unicode_iterators.c Fri Feb 17 08:24:56 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.21 2006/02/15 21:34:21 helly Exp $ */ +/* $Id: unicode_iterators.c,v 1.22 2006/02/17 08:24:56 helly Exp $ */ /* * TODO @@ -77,10 +77,7 @@ static inline text_iter_obj* text_iter_to_obj(zend_object_iterator *iter) { - static text_iter_obj adr; - static int ofs = (char*)adr.iter - (char*)adr; - - return (text_iter_obj *)((char*)iter - ofs); + return (text_iter_obj *)((char*)iter - offsetof(text_iter_obj, iter)); } typedef struct { -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c /ext/unicode/tests .cvsignore iterator_001.phpt
helly Wed Feb 15 21:34:21 2006 UTC Added files: /php-src/ext/unicode/tests .cvsignore iterator_001.phpt Modified files: /php-src/ext/unicodeunicode_iterators.c Log: - Little speedup + first test http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.20r2=1.21diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.20 php-src/ext/unicode/unicode_iterators.c:1.21 --- php-src/ext/unicode/unicode_iterators.c:1.20Mon Feb 13 10:23:58 2006 +++ php-src/ext/unicode/unicode_iterators.c Wed Feb 15 21:34:21 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.20 2006/02/13 10:23:58 dmitry Exp $ */ +/* $Id: unicode_iterators.c,v 1.21 2006/02/15 21:34:21 helly Exp $ */ /* * TODO @@ -72,12 +72,16 @@ int32_t end; } brk; } u; + zend_object_iterator iter; } text_iter_obj; -typedef struct { - zend_object_iterator intern; - text_iter_obj* object; -} text_iter_it; +static inline text_iter_obj* text_iter_to_obj(zend_object_iterator *iter) +{ + static text_iter_obj adr; + static int ofs = (char*)adr.iter - (char*)adr; + + return (text_iter_obj *)((char*)iter - ofs); +} typedef struct { int (*valid) (text_iter_obj* object TSRMLS_DC); @@ -389,54 +393,51 @@ static void text_iter_dtor(zend_object_iterator* iter TSRMLS_DC) { - text_iter_it* iterator = (text_iter_it *) iter; - zval_ptr_dtor((zval **)iterator-intern.data); - efree(iterator); + text_iter_obj* obj = text_iter_to_obj(iter); + zval *object = obj-iter.data; + + zval_ptr_dtor(object); } static int text_iter_valid(zend_object_iterator* iter TSRMLS_DC) { - text_iter_it* iterator = (text_iter_it *) iter; - text_iter_obj* object = iterator-object; + text_iter_obj* obj = text_iter_to_obj(iter); - if (iter_ops[object-type]-valid(object TSRMLS_CC)) + if (iter_ops[obj-type]-valid(obj TSRMLS_CC)) { return SUCCESS; - else + } else { return FAILURE; + } } static void text_iter_get_current_data(zend_object_iterator* iter, zval*** data TSRMLS_DC) { - text_iter_it* iterator = (text_iter_it *) iter; - text_iter_obj* object = iterator-object; + text_iter_obj* obj = text_iter_to_obj(iter); - iter_ops[object-type]-current(object TSRMLS_CC); - *data = object-current; + iter_ops[obj-type]-current(obj TSRMLS_CC); + *data = obj-current; } static int text_iter_get_current_key(zend_object_iterator* iter, char **str_key, uint *str_key_len, ulong *int_key TSRMLS_DC) { - text_iter_it* iterator = (text_iter_it *) iter; - text_iter_obj* object = iterator-object; + text_iter_obj* obj = text_iter_to_obj(iter); - *int_key = iter_ops[object-type]-key(object TSRMLS_CC); + *int_key = iter_ops[obj-type]-key(obj TSRMLS_CC); return HASH_KEY_IS_LONG; } static void text_iter_move_forward(zend_object_iterator* iter TSRMLS_DC) { - text_iter_it* iterator = (text_iter_it *) iter; - text_iter_obj* object = iterator-object; + text_iter_obj* obj = text_iter_to_obj(iter); - iter_ops[object-type]-next(object TSRMLS_CC); + iter_ops[obj-type]-next(obj TSRMLS_CC); } static void text_iter_rewind(zend_object_iterator* iter TSRMLS_DC) { - text_iter_it* iterator = (text_iter_it *) iter; - text_iter_obj* object = iterator-object; + text_iter_obj* obj = text_iter_to_obj(iter); - iter_ops[object-type]-rewind(object TSRMLS_CC); + iter_ops[obj-type]-rewind(obj TSRMLS_CC); } zend_object_iterator_funcs text_iter_funcs = { @@ -450,21 +451,18 @@ static zend_object_iterator* text_iter_get_iterator(zend_class_entry *ce, zval *object, int by_ref TSRMLS_DC) { - text_iter_it* iterator; text_iter_obj* iter_object; if (by_ref) { zend_error(E_ERROR, An iterator cannot be used with foreach by reference); } - iterator= emalloc(sizeof(text_iter_it)); iter_object = (text_iter_obj *) zend_object_store_get_object(object TSRMLS_CC); ZVAL_ADDREF(object); - iterator-intern.data = (void *) object; - iterator-intern.funcs = text_iter_funcs; - iterator-object = iter_object; + iter_object-iter.data = (void *) object; + iter_object-iter.funcs = text_iter_funcs; - return (zend_object_iterator *) iterator; + return (zend_object_iterator *) iter_object-iter; } static void text_iterator_free_storage(void *object TSRMLS_DC) http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/tests/.cvsignore?view=markuprev=1.1 Index:
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Sat Feb 11 00:16:43 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Implement character/word/line/sentence iterators and the reverse counterparts. http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.18r2=1.19diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.18 php-src/ext/unicode/unicode_iterators.c:1.19 --- php-src/ext/unicode/unicode_iterators.c:1.18Fri Feb 10 00:23:29 2006 +++ php-src/ext/unicode/unicode_iterators.c Sat Feb 11 00:16:43 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.18 2006/02/10 00:23:29 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.19 2006/02/11 00:16:43 andrei Exp $ */ /* * TODO @@ -28,11 +28,16 @@ #include php.h #include zend_interfaces.h #include zend_exceptions.h +#include unicode/ubrk.h typedef enum { ITER_CODE_UNIT, ITER_CODE_POINT, ITER_COMB_SEQUENCE, + ITER_CHARACTER, + ITER_WORD, + ITER_LINE, + ITER_SENTENCE, ITER_TYPE_LAST, } text_iter_type; @@ -60,6 +65,12 @@ int32_t start; int32_t end; } cs; + struct { + UBreakIterator *iter; + int32_t index; + int32_t start; + int32_t end; + } brk; } u; } text_iter_obj; @@ -76,6 +87,13 @@ void (*rewind) (text_iter_obj* object TSRMLS_DC); } text_iter_ops; +enum UBreakIteratorType brk_type_map[] = { + UBRK_CHARACTER, + UBRK_WORD, + UBRK_LINE, + UBRK_SENTENCE, +}; + PHPAPI zend_class_entry* text_iterator_aggregate_ce; PHPAPI zend_class_entry* text_iterator_ce; PHPAPI zend_class_entry* rev_text_iterator_ce; @@ -276,12 +294,95 @@ }; +/* UBreakIterator Character Ops */ + +static int text_iter_brk_char_valid(text_iter_obj* object TSRMLS_DC) +{ + if (object-flags ITER_REVERSE) { + return (object-u.brk.start != UBRK_DONE); + } else { + return (object-u.brk.end != UBRK_DONE); + } +} + +static void text_iter_brk_char_current(text_iter_obj* object TSRMLS_DC) +{ + uint32_t length; + int32_t start = object-u.brk.start; + int32_t end = object-u.brk.end; + + if (object-flags ITER_REVERSE) { + if (end == UBRK_DONE) { + end = object-text_len; + } + } else { + if (start == UBRK_DONE) { + start = 0; + } + } + length = end - start; + if (length object-current_alloc-1) { + object-current_alloc = length+1; + Z_USTRVAL_P(object-current) = eurealloc(Z_USTRVAL_P(object-current), object-current_alloc); + } + u_memcpy(Z_USTRVAL_P(object-current), object-text + start, length); + Z_USTRVAL_P(object-current)[length] = 0; + Z_USTRLEN_P(object-current) = length; +} + +static int text_iter_brk_char_key(text_iter_obj* object TSRMLS_DC) +{ + return object-u.brk.index; +} + +static void text_iter_brk_char_next(text_iter_obj* object TSRMLS_DC) +{ + if (object-flags ITER_REVERSE) { + if (object-u.brk.start != UBRK_DONE) { + object-u.brk.end = object-u.brk.start; + object-u.brk.start = ubrk_previous(object-u.brk.iter); + object-u.brk.index++; + } + } else { + if (object-u.brk.end != UBRK_DONE) { + object-u.brk.start = object-u.brk.end; + object-u.brk.end = ubrk_next(object-u.brk.iter); + object-u.brk.index++; + } + } +} + +static void text_iter_brk_char_rewind(text_iter_obj *object TSRMLS_DC) +{ + if (object-flags ITER_REVERSE) { + object-u.brk.end = ubrk_last(object-u.brk.iter); + object-u.brk.start = ubrk_previous(object-u.brk.iter); + } else { + object-u.brk.start = ubrk_first(object-u.brk.iter); + object-u.brk.end = ubrk_next(object-u.brk.iter); + } + object-u.brk.index = 0; +} + +static text_iter_ops text_iter_brk_ops = { + text_iter_brk_char_valid, + text_iter_brk_char_current, + text_iter_brk_char_key, + text_iter_brk_char_next, + text_iter_brk_char_rewind, +}; + + /* Ops array */ static text_iter_ops* iter_ops[] = { text_iter_cu_ops, text_iter_cp_ops, text_iter_cs_ops, + text_iter_brk_ops, + text_iter_brk_ops, + text_iter_brk_ops, + text_iter_brk_ops, }; /* Iterator Funcs */ @@ -376,6 +477,9 @@ if (intern-text) { efree(intern-text); } + if
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Fri Feb 10 00:23:29 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Make ReverseTextIterator a separate class. http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.17r2=1.18diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.17 php-src/ext/unicode/unicode_iterators.c:1.18 --- php-src/ext/unicode/unicode_iterators.c:1.17Wed Feb 8 00:16:50 2006 +++ php-src/ext/unicode/unicode_iterators.c Fri Feb 10 00:23:29 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.17 2006/02/08 00:16:50 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.18 2006/02/10 00:23:29 andrei Exp $ */ /* * TODO @@ -78,6 +78,7 @@ PHPAPI zend_class_entry* text_iterator_aggregate_ce; PHPAPI zend_class_entry* text_iterator_ce; +PHPAPI zend_class_entry* rev_text_iterator_ce; /* Code unit ops */ @@ -433,6 +434,10 @@ intern-flags = flags; } + if (Z_OBJCE_P(this_ptr) == U_CLASS_ENTRY(rev_text_iterator_ce)) { + intern-flags |= ITER_REVERSE; + } + iter_ops[intern-type]-rewind(intern TSRMLS_CC); } @@ -495,13 +500,19 @@ text_iterator_ce = zend_register_internal_class(ce TSRMLS_CC); text_iterator_ce-create_object = text_iterator_new; text_iterator_ce-get_iterator = text_iter_get_iterator; + text_iterator_ce-ce_flags |= ZEND_ACC_FINAL_CLASS; zend_class_implements(text_iterator_ce TSRMLS_CC, 1, zend_ce_traversable); + INIT_CLASS_ENTRY(ce, ReverseTextIterator, text_iterator_funcs); + rev_text_iterator_ce = zend_register_internal_class(ce TSRMLS_CC); + rev_text_iterator_ce-create_object = text_iterator_new; + rev_text_iterator_ce-get_iterator = text_iter_get_iterator; + rev_text_iterator_ce-ce_flags |= ZEND_ACC_FINAL_CLASS; + zend_class_implements(rev_text_iterator_ce TSRMLS_CC, 1, zend_ce_traversable); + zend_declare_class_constant_long(text_iterator_ce, CODE_UNIT, sizeof(CODE_UNIT)-1, ITER_CODE_UNIT TSRMLS_CC); zend_declare_class_constant_long(text_iterator_ce, CODE_POINT, sizeof(CODE_POINT)-1, ITER_CODE_POINT TSRMLS_CC); zend_declare_class_constant_long(text_iterator_ce, COMB_SEQUENCE, sizeof(COMB_SEQUENCE)-1, ITER_COMB_SEQUENCE TSRMLS_CC); - - zend_declare_class_constant_long(text_iterator_ce, REVERSE, sizeof(REVERSE)-1, ITER_REVERSE TSRMLS_CC); } /* -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Tue Feb 7 20:01:29 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Implement reverse iteration for codeunits and codepoints. Combining sequences are next. # This is ugly, though. # foreach (new TextIterator($a, # TextIterator::CODE_POINT|TextIterator::REVERSE) as $k = $c) { #var_dump($k: $c); # } # Any suggestions? http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.15r2=1.16diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.15 php-src/ext/unicode/unicode_iterators.c:1.16 --- php-src/ext/unicode/unicode_iterators.c:1.15Tue Feb 7 00:13:54 2006 +++ php-src/ext/unicode/unicode_iterators.c Tue Feb 7 20:01:28 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.15 2006/02/07 00:13:54 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.16 2006/02/07 20:01:28 andrei Exp $ */ /* * TODO @@ -36,8 +36,8 @@ ITER_TYPE_LAST, } text_iter_type; -const uint32_t ITER_REVERSE = 0x100; -const uint32_t ITER_TYPE_MASK = 0xFF; +static const uint32_t ITER_REVERSE = 0x100; +static const uint32_t ITER_TYPE_MASK = 0xFF; typedef struct { zend_object std; @@ -46,13 +46,14 @@ text_iter_type type; zval* current; size_t current_alloc; + longflags; union { struct { uint32_t index; uint32_t offset; } cp; struct { - uint32_t index; + int32_t index; } cu; struct { uint32_t index; @@ -82,7 +83,11 @@ static int text_iter_cu_valid(text_iter_obj* object TSRMLS_DC) { - return (object-u.cu.index object-text_len); + if (object-flags ITER_REVERSE) { + return (object-u.cu.index = 0); + } else { + return (object-u.cu.index object-text_len); + } } static void text_iter_cu_current(text_iter_obj* object TSRMLS_DC) @@ -94,17 +99,33 @@ static int text_iter_cu_key(text_iter_obj* object TSRMLS_DC) { - return object-u.cu.index; + if (object-flags ITER_REVERSE) { + return object-text_len - object-u.cu.index - 1; + } else { + return object-u.cu.index; + } } static void text_iter_cu_next(text_iter_obj* object TSRMLS_DC) { - object-u.cu.index++; + if (object-flags ITER_REVERSE) { + if (object-u.cu.index = 0) { + object-u.cu.index--; + } + } else { + if (object-u.cu.index object-text_len) { + object-u.cu.index++; + } + } } static void text_iter_cu_rewind(text_iter_obj *object TSRMLS_DC) { - object-u.cu.index = 0; + if (object-flags ITER_REVERSE) { + object-u.cu.index = object-text_len-1; + } else { + object-u.cu.index = 0; + } } static text_iter_ops text_iter_cu_ops = { @@ -119,7 +140,11 @@ static int text_iter_cp_valid(text_iter_obj* object TSRMLS_DC) { - return (object-u.cp.offset object-text_len); + if (object-flags ITER_REVERSE) { + return (object-u.cp.offset 0); + } else { + return (object-u.cp.offset object-text_len); + } } static void text_iter_cp_current(text_iter_obj* object TSRMLS_DC) @@ -128,7 +153,11 @@ int32_t tmp, buf_len; tmp = object-u.cp.offset; - U16_NEXT(object-text, tmp, object-text_len, cp); + if (object-flags ITER_REVERSE) { + U16_PREV(object-text, 0, tmp, cp); + } else { + U16_NEXT(object-text, tmp, object-text_len, cp); + } buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object-current)); Z_USTRVAL_P(object-current)[buf_len] = 0; Z_USTRLEN_P(object-current) = buf_len; @@ -141,13 +170,21 @@ static void text_iter_cp_next(text_iter_obj* object TSRMLS_DC) { - U16_FWD_1(object-text, object-u.cp.offset, object-text_len); + if (object-flags ITER_REVERSE) { + U16_BACK_1(object-text, 0, object-u.cp.offset); + } else { + U16_FWD_1(object-text, object-u.cp.offset, object-text_len); + } object-u.cp.index++; } static void text_iter_cp_rewind(text_iter_obj *object TSRMLS_DC) { - object-u.cp.offset = 0; + if (object-flags ITER_REVERSE) { + object-u.cp.offset = object-text_len; + } else { + object-u.cp.offset = 0; + } object-u.cp.index = 0; } @@ -371,11 +408,12 @@ intern-text_len = text_len; if (ZEND_NUM_ARGS() 1) { ti_type =
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Wed Feb 8 00:16:50 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Reverse iteration for combining sequences. http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.16r2=1.17diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.16 php-src/ext/unicode/unicode_iterators.c:1.17 --- php-src/ext/unicode/unicode_iterators.c:1.16Tue Feb 7 20:01:28 2006 +++ php-src/ext/unicode/unicode_iterators.c Wed Feb 8 00:16:50 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.16 2006/02/07 20:01:28 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.17 2006/02/08 00:16:50 andrei Exp $ */ /* * TODO @@ -49,16 +49,16 @@ longflags; union { struct { - uint32_t index; - uint32_t offset; + int32_t index; + int32_t offset; } cp; struct { int32_t index; } cu; struct { - uint32_t index; - uint32_t start; - uint32_t end; + int32_t index; + int32_t start; + int32_t end; } cs; } u; } text_iter_obj; @@ -200,7 +200,11 @@ static int text_iter_cs_valid(text_iter_obj* object TSRMLS_DC) { - return (object-u.cs.end = object-text_len); + if (object-flags ITER_REVERSE) { + return (object-u.cs.end 0); + } else { + return (object-u.cs.end = object-text_len); + } } static void text_iter_cs_current(text_iter_obj* object TSRMLS_DC) @@ -223,18 +227,28 @@ static void text_iter_cs_next(text_iter_obj* object TSRMLS_DC) { UChar32 cp; - uint32_t end; + uint32_t tmp; - object-u.cs.start = object-u.cs.end; - U16_NEXT(object-text, object-u.cs.end, object-text_len, cp); - if (u_getCombiningClass(cp) == 0) { - end = object-u.cs.end; - while (end object-text_len) { - U16_NEXT(object-text, end, object-text_len, cp); - if (u_getCombiningClass(cp) == 0) { - break; - } else { - object-u.cs.end = end; + if (object-flags ITER_REVERSE) { + object-u.cs.end = object-u.cs.start; + U16_PREV(object-text, 0, object-u.cs.start, cp); + if (u_getCombiningClass(cp) != 0) { + do { + U16_PREV(object-text, 0, object-u.cs.start, cp); + } while (object-u.cs.start 0 u_getCombiningClass(cp) != 0); + } + } else { + object-u.cs.start = object-u.cs.end; + U16_NEXT(object-text, object-u.cs.end, object-text_len, cp); + if (u_getCombiningClass(cp) == 0) { + tmp = object-u.cs.end; + while (tmp object-text_len) { + U16_NEXT(object-text, tmp, object-text_len, cp); + if (u_getCombiningClass(cp) == 0) { + break; + } else { + object-u.cs.end = tmp; + } } } } @@ -243,8 +257,11 @@ static void text_iter_cs_rewind(text_iter_obj *object TSRMLS_DC) { - object-u.cs.start = 0; - object-u.cs.end = 0; + if (object-flags ITER_REVERSE) { + object-u.cs.start = object-u.cs.end = object-text_len; + } else { + object-u.cs.start = object-u.cs.end = 0; + } text_iter_cs_next(object TSRMLS_CC); /* find first sequence */ object-u.cs.index = 0; /* because _next increments index */ } -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Mon Feb 6 17:42:28 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Make TextIterator fast again, now that we don't have to worry about references. http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.11r2=1.12diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.11 php-src/ext/unicode/unicode_iterators.c:1.12 --- php-src/ext/unicode/unicode_iterators.c:1.11Sun Feb 5 23:31:47 2006 +++ php-src/ext/unicode/unicode_iterators.c Mon Feb 6 17:42:28 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.11 2006/02/05 23:31:47 helly Exp $ */ +/* $Id: unicode_iterators.c,v 1.12 2006/02/06 17:42:28 andrei Exp $ */ /* * TODO @@ -126,16 +126,11 @@ UChar32 cp; int32_t tmp, buf_len; - if (!object-current) { - MAKE_STD_ZVAL(object-current); - Z_USTRVAL_P(object-current) = eumalloc(3); - Z_TYPE_P(object-current) = IS_UNICODE; - tmp = object-u.cp.offset; - U16_NEXT(object-text, tmp, object-text_len, cp); - buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object-current)); - Z_USTRVAL_P(object-current)[buf_len] = 0; - Z_USTRLEN_P(object-current) = buf_len; - } + tmp = object-u.cp.offset; + U16_NEXT(object-text, tmp, object-text_len, cp); + buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object-current)); + Z_USTRVAL_P(object-current)[buf_len] = 0; + Z_USTRLEN_P(object-current) = buf_len; } static int text_iter_cp_key(text_iter_obj* object TSRMLS_DC) @@ -147,20 +142,12 @@ { U16_FWD_1(object-text, object-u.cp.offset, object-text_len); object-u.cp.index++; - if (object-current) { - zval_ptr_dtor(object-current); - object-current = NULL; - } } static void text_iter_cp_rewind(text_iter_obj *object TSRMLS_DC) { object-u.cp.offset = 0; object-u.cp.index = 0; - if (object-current) { - zval_ptr_dtor(object-current); - object-current = NULL; - } } static text_iter_ops text_iter_cp_ops = { @@ -268,9 +255,8 @@ if (intern-text) { efree(intern-text); } - if (intern-current) { - zval_ptr_dtor(intern-current); - } + ZVAL_DELREF(intern-current); + zval_ptr_dtor(intern-current); efree(object); } @@ -289,6 +275,10 @@ zend_hash_copy(intern-std.properties, class_type-default_properties, (copy_ctor_func_t) zval_add_ref, (void *) tmp, sizeof(zval *)); intern-type = ITER_CODE_POINT; + MAKE_STD_ZVAL(intern-current); /* pre-allocate buffer for codepoint */ + Z_USTRVAL_P(intern-current) = eumalloc(3); + Z_TYPE_P(intern-current) = IS_UNICODE; + ZVAL_ADDREF(intern-current); retval.handle = zend_objects_store_put(intern, (zend_objects_store_dtor_t)zend_objects_destroy_object, (zend_objects_free_object_storage_t) text_iterator_free_storage, NULL TSRMLS_CC); retval.handlers = zend_get_std_object_handlers(); -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Mon Feb 6 18:18:41 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Some TODO items. http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.12r2=1.13diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.12 php-src/ext/unicode/unicode_iterators.c:1.13 --- php-src/ext/unicode/unicode_iterators.c:1.12Mon Feb 6 17:42:28 2006 +++ php-src/ext/unicode/unicode_iterators.c Mon Feb 6 18:18:41 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.12 2006/02/06 17:42:28 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.13 2006/02/06 18:18:41 andrei Exp $ */ /* * TODO @@ -22,6 +22,7 @@ * - optimize current() to pass return_value to the handler so that it fills it * in directly instead of creating a new zval * - return code units as binary strings? integers? or leave as unicode strings? + * - implement Countable (or count_elements handler) and Seekable interfaces */ #include php.h @@ -43,8 +44,8 @@ zval* current; union { struct { - int32_t offset; int32_t index; + int32_t offset; } cp; struct { int32_t index; -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Mon Feb 6 22:58:10 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: - Fix up a bunch of stuff. - Register TextIterator type constants. # Not sure if I like them as class constants. Cleaner, but also longer # to type. http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.13r2=1.14diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.13 php-src/ext/unicode/unicode_iterators.c:1.14 --- php-src/ext/unicode/unicode_iterators.c:1.13Mon Feb 6 18:18:41 2006 +++ php-src/ext/unicode/unicode_iterators.c Mon Feb 6 22:58:10 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.13 2006/02/06 18:18:41 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.14 2006/02/06 22:58:10 andrei Exp $ */ /* * TODO @@ -28,14 +28,17 @@ #include php.h #include zend_interfaces.h #include zend_exceptions.h -#include ext/spl/spl_exceptions.h typedef enum { ITER_CODE_UNIT, ITER_CODE_POINT, ITER_COMB_SEQUENCE, + ITER_TYPE_LAST, } text_iter_type; +const uint32_t ITER_REVERSE = 0x100; +const uint32_t ITER_TYPE_MASK = 0xFF; + typedef struct { zend_object std; UChar* text; @@ -78,10 +81,9 @@ static void text_iter_cu_current(text_iter_obj* object TSRMLS_DC) { - if (!object-current) { - MAKE_STD_ZVAL(object-current); - ZVAL_UNICODEL(object-current, object-text + object-u.cu.index, 1, 1); - } + u_memcpy(Z_USTRVAL_P(object-current), object-text + object-u.cu.index, 1); + Z_USTRVAL_P(object-current)[1] = 0; + Z_USTRLEN_P(object-current) = 1; } static int text_iter_cu_key(text_iter_obj* object TSRMLS_DC) @@ -92,19 +94,11 @@ static void text_iter_cu_next(text_iter_obj* object TSRMLS_DC) { object-u.cu.index++; - if (object-current) { - zval_ptr_dtor(object-current); - object-current = NULL; - } } static void text_iter_cu_rewind(text_iter_obj *object TSRMLS_DC) { object-u.cu.index = 0; - if (object-current) { - zval_ptr_dtor(object-current); - object-current = NULL; - } } static text_iter_ops text_iter_cu_ops = { @@ -218,7 +212,7 @@ iter_ops[object-type]-rewind(object TSRMLS_CC); } -zend_object_iterator_funcs text_iter_cp_funcs = { +zend_object_iterator_funcs text_iter_funcs = { text_iter_dtor, text_iter_valid, text_iter_get_current_data, @@ -240,7 +234,7 @@ ZVAL_ADDREF(object); iterator-intern.data = (void *) object; - iterator-intern.funcs = text_iter_cp_funcs; + iterator-intern.funcs = text_iter_funcs; iterator-object = iter_object; return (zend_object_iterator *) iterator; @@ -256,7 +250,6 @@ if (intern-text) { efree(intern-text); } - ZVAL_DELREF(intern-current); zval_ptr_dtor(intern-current); efree(object); } @@ -278,8 +271,8 @@ intern-type = ITER_CODE_POINT; MAKE_STD_ZVAL(intern-current); /* pre-allocate buffer for codepoint */ Z_USTRVAL_P(intern-current) = eumalloc(3); + Z_USTRVAL_P(intern-current)[0] = 0; Z_TYPE_P(intern-current) = IS_UNICODE; - ZVAL_ADDREF(intern-current); retval.handle = zend_objects_store_put(intern, (zend_objects_store_dtor_t)zend_objects_destroy_object, (zend_objects_free_object_storage_t) text_iterator_free_storage, NULL TSRMLS_CC); retval.handlers = zend_get_std_object_handlers(); @@ -293,8 +286,10 @@ int32_t text_len; zval *object = getThis(); text_iter_obj *intern; + text_iter_type ti_type; + long flags = 0; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, u, text, text_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, u|l, text, text_len, flags) == FAILURE) { return; } @@ -302,8 +297,16 @@ intern-text = eustrndup(text, text_len); intern-text_len = text_len; + if (ZEND_NUM_ARGS() 1) { + ti_type = flags ITER_TYPE_MASK; + if (flags ITER_TYPE_LAST) { + intern-type = ti_type; + } else { + php_error(E_WARNING, Invalid iterator type in TextIterator constructor); + } + } - text_iter_cp_rewind(intern TSRMLS_CC); + iter_ops[intern-type]-rewind(intern TSRMLS_CC); } PHP_METHOD(TextIterator, current) @@ -344,7 +347,7 @@ zval *object = getThis(); text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); - iter_ops[object-type]-rewind(intern TSRMLS_CC); +
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Fri Feb 3 21:53:05 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Guard against assign-by-ref. http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.5r2=1.6diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.5 php-src/ext/unicode/unicode_iterators.c:1.6 --- php-src/ext/unicode/unicode_iterators.c:1.5 Fri Feb 3 00:09:19 2006 +++ php-src/ext/unicode/unicode_iterators.c Fri Feb 3 21:53:05 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.5 2006/02/03 00:09:19 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.6 2006/02/03 21:53:05 andrei Exp $ */ #include php.h @@ -169,6 +169,7 @@ if (intern-text) { efree(intern-text); } + ZVAL_DELREF(intern-current); zval_ptr_dtor(intern-current); efree(object); } @@ -191,6 +192,7 @@ MAKE_STD_ZVAL(intern-current); /* pre-allocate buffer for codepoint */ Z_USTRVAL_P(intern-current) = eumalloc(3); Z_TYPE_P(intern-current) = IS_UNICODE; + ZVAL_ADDREF(intern-current); retval.handle = zend_objects_store_put(intern, (zend_objects_store_dtor_t)zend_objects_destroy_object, (zend_objects_free_object_storage_t) text_iterator_free_storage, NULL TSRMLS_CC); retval.handlers = zend_get_std_object_handlers(); -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Fri Feb 3 23:50:42 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Gah. In order to avoid memory corruption when using references in foreach() this code is necessary. But it makes iterator 6x slower. We should keep thinking about how to optimize it. http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.6r2=1.7diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.6 php-src/ext/unicode/unicode_iterators.c:1.7 --- php-src/ext/unicode/unicode_iterators.c:1.6 Fri Feb 3 21:53:05 2006 +++ php-src/ext/unicode/unicode_iterators.c Fri Feb 3 23:50:42 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.6 2006/02/03 21:53:05 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.7 2006/02/03 23:50:42 andrei Exp $ */ #include php.h @@ -58,11 +58,16 @@ UChar32 cp; int32_t tmp, buf_len; - tmp = object-offset; - U16_NEXT(object-text, tmp, object-text_len, cp); - buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object-current)); - Z_USTRVAL_P(object-current)[buf_len] = 0; - Z_USTRLEN_P(object-current) = buf_len; + if (!object-current) { + MAKE_STD_ZVAL(object-current); + Z_USTRVAL_P(object-current) = eumalloc(3); + Z_TYPE_P(object-current) = IS_UNICODE; + tmp = object-offset; + U16_NEXT(object-text, tmp, object-text_len, cp); + buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object-current)); + Z_USTRVAL_P(object-current)[buf_len] = 0; + Z_USTRLEN_P(object-current) = buf_len; + } } static int text_iter_cp_get_current_key(text_iter_obj* object TSRMLS_DC) @@ -74,12 +79,20 @@ { U16_FWD_1(object-text, object-offset, object-text_len); object-index++; + if (object-current) { + zval_ptr_dtor(object-current); + object-current = NULL; + } } static void text_iter_cp_rewind(text_iter_obj *object TSRMLS_DC) { object-offset = 0; object-index = 0; + if (object-current) { + zval_ptr_dtor(object-current); + object-current = NULL; + } } @@ -169,8 +182,9 @@ if (intern-text) { efree(intern-text); } - ZVAL_DELREF(intern-current); - zval_ptr_dtor(intern-current); + if (intern-current) { + zval_ptr_dtor(intern-current); + } efree(object); } @@ -189,10 +203,6 @@ zend_hash_copy(intern-std.properties, class_type-default_properties, (copy_ctor_func_t) zval_add_ref, (void *) tmp, sizeof(zval *)); intern-type = ITER_CODE_POINT; - MAKE_STD_ZVAL(intern-current); /* pre-allocate buffer for codepoint */ - Z_USTRVAL_P(intern-current) = eumalloc(3); - Z_TYPE_P(intern-current) = IS_UNICODE; - ZVAL_ADDREF(intern-current); retval.handle = zend_objects_store_put(intern, (zend_objects_store_dtor_t)zend_objects_destroy_object, (zend_objects_free_object_storage_t) text_iterator_free_storage, NULL TSRMLS_CC); retval.handlers = zend_get_std_object_handlers(); -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
Re: [PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
Hello Andrei, maybe internal c-level iterators can have a flag that disallows foreach by reference? regards marcus Saturday, February 4, 2006, 12:50:42 AM, you wrote: andrei Fri Feb 3 23:50:42 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Gah. In order to avoid memory corruption when using references in foreach() this code is necessary. But it makes iterator 6x slower. We should keep thinking about how to optimize it. http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.6r2=1.7diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.6 php-src/ext/unicode/unicode_iterators.c:1.7 --- php-src/ext/unicode/unicode_iterators.c:1.6 Fri Feb 3 21:53:05 2006 +++ php-src/ext/unicode/unicode_iterators.c Fri Feb 3 23:50:42 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.6 2006/02/03 21:53:05 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.7 2006/02/03 23:50:42 andrei Exp $ */ #include php.h @@ -58,11 +58,16 @@ UChar32 cp; int32_t tmp, buf_len; - tmp = object-offset; - U16_NEXT(object-text, tmp, object-text_len, cp); - buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object-current)); - Z_USTRVAL_P(object-current)[buf_len] = 0; - Z_USTRLEN_P(object-current) = buf_len; + if (!object-current) { + MAKE_STD_ZVAL(object-current); + Z_USTRVAL_P(object-current) = eumalloc(3); + Z_TYPE_P(object-current) = IS_UNICODE; + tmp = object-offset; + U16_NEXT(object-text, tmp, object-text_len, cp); + buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object-current)); + Z_USTRVAL_P(object-current)[buf_len] = 0; + Z_USTRLEN_P(object-current) = buf_len; + } } static int text_iter_cp_get_current_key(text_iter_obj* object TSRMLS_DC) @@ -74,12 +79,20 @@ { U16_FWD_1(object-text, object-offset, object-text_len); object-index++; + if (object-current) { + zval_ptr_dtor(object-current); + object-current = NULL; + } } static void text_iter_cp_rewind(text_iter_obj *object TSRMLS_DC) { object-offset = 0; object-index = 0; + if (object-current) { + zval_ptr_dtor(object-current); + object-current = NULL; + } } @@ -169,8 +182,9 @@ if (intern-text) { efree(intern-text); } - ZVAL_DELREF(intern-current); - zval_ptr_dtor(intern-current); + if (intern-current) { + zval_ptr_dtor(intern-current); + } efree(object); } @@ -189,10 +203,6 @@ zend_hash_copy(intern-std.properties, class_type-default_properties, (copy_ctor_func_t) zval_add_ref, (void *) tmp, sizeof(zval *)); intern-type = ITER_CODE_POINT; - MAKE_STD_ZVAL(intern-current); /* pre-allocate buffer for codepoint */ - Z_USTRVAL_P(intern-current) = eumalloc(3); - Z_TYPE_P(intern-current) = IS_UNICODE; - ZVAL_ADDREF(intern-current); retval.handle = zend_objects_store_put(intern, (zend_objects_store_dtor_t)zend_objects_destroy_object, (zend_objects_free_object_storage_t) text_iterator_free_storage, NULL TSRMLS_CC); retval.handlers = zend_get_std_object_handlers(); Best regards, Marcus -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
Re: [PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
Hello Andrei, actually thinking twice we don't need a flag. If something implements Iterator we shouldn't allow foreach by reference anyway because the iterator signature is mixed current() and not mixed current(). And since both direct and manual iteration should be compatible we should just disallow it in the same manner we needed to disallow ArrayAccess reference handling. Something that only implements Traverable might however still do foreach by reference. So the change is probably quite small. best regards marcus Andi? Saturday, February 4, 2006, 12:51:53 AM, you wrote: Hello Andrei, maybe internal c-level iterators can have a flag that disallows foreach by reference? regards marcus Saturday, February 4, 2006, 12:50:42 AM, you wrote: andrei Fri Feb 3 23:50:42 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Gah. In order to avoid memory corruption when using references in foreach() this code is necessary. But it makes iterator 6x slower. We should keep thinking about how to optimize it. http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.6r2=1.7diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.6 php-src/ext/unicode/unicode_iterators.c:1.7 --- php-src/ext/unicode/unicode_iterators.c:1.6 Fri Feb 3 21:53:05 2006 +++ php-src/ext/unicode/unicode_iterators.c Fri Feb 3 23:50:42 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.6 2006/02/03 21:53:05 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.7 2006/02/03 23:50:42 andrei Exp $ */ #include php.h @@ -58,11 +58,16 @@ UChar32 cp; int32_t tmp, buf_len; - tmp = object-offset; - U16_NEXT(object-text, tmp, object-text_len, cp); - buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object-current)); - Z_USTRVAL_P(object-current)[buf_len] = 0; - Z_USTRLEN_P(object-current) = buf_len; + if (!object-current) { + MAKE_STD_ZVAL(object-current); + Z_USTRVAL_P(object-current) = eumalloc(3); + Z_TYPE_P(object-current) = IS_UNICODE; + tmp = object-offset; + U16_NEXT(object-text, tmp, object-text_len, cp); + buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object-current)); + Z_USTRVAL_P(object-current)[buf_len] = 0; + Z_USTRLEN_P(object-current) = buf_len; + } } static int text_iter_cp_get_current_key(text_iter_obj* object TSRMLS_DC) @@ -74,12 +79,20 @@ { U16_FWD_1(object-text, object-offset, object-text_len); object-index++; + if (object-current) { + zval_ptr_dtor(object-current); + object-current = NULL; + } } static void text_iter_cp_rewind(text_iter_obj *object TSRMLS_DC) { object-offset = 0; object-index = 0; + if (object-current) { + zval_ptr_dtor(object-current); + object-current = NULL; + } } @@ -169,8 +182,9 @@ if (intern-text) { efree(intern-text); } - ZVAL_DELREF(intern-current); - zval_ptr_dtor(intern-current); + if (intern-current) { + zval_ptr_dtor(intern-current); + } efree(object); } @@ -189,10 +203,6 @@ zend_hash_copy(intern-std.properties, class_type-default_properties, (copy_ctor_func_t) zval_add_ref, (void *) tmp, sizeof(zval *)); intern-type = ITER_CODE_POINT; - MAKE_STD_ZVAL(intern-current); /* pre-allocate buffer for codepoint */ - Z_USTRVAL_P(intern-current) = eumalloc(3); - Z_TYPE_P(intern-current) = IS_UNICODE; - ZVAL_ADDREF(intern-current); retval.handle = zend_objects_store_put(intern, (zend_objects_store_dtor_t)zend_objects_destroy_object, (zend_objects_free_object_storage_t) text_iterator_free_storage, NULL TSRMLS_CC); retval.handlers = zend_get_std_object_handlers(); Best regards, Marcus Best regards, Marcus -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Sat Feb 4 00:23:52 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Abstract the iterator interface so that we can add new types. http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.7r2=1.8diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.7 php-src/ext/unicode/unicode_iterators.c:1.8 --- php-src/ext/unicode/unicode_iterators.c:1.7 Fri Feb 3 23:50:42 2006 +++ php-src/ext/unicode/unicode_iterators.c Sat Feb 4 00:23:52 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.7 2006/02/03 23:50:42 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.8 2006/02/04 00:23:52 andrei Exp $ */ #include php.h @@ -34,8 +34,15 @@ uint32_ttext_len; text_iter_type type; zval* current; - int32_t offset; - int32_t index; + union { + struct { + int32_t offset; + int32_t index; + } cp; + struct { + int32_t index; + } cu; + } u; } text_iter_obj; typedef struct { @@ -43,17 +50,30 @@ text_iter_obj* object; } text_iter_it; +typedef struct { + int (*valid) (text_iter_obj* object TSRMLS_DC); + void (*current)(text_iter_obj* object TSRMLS_DC); + int (*key)(text_iter_obj* object TSRMLS_DC); + void (*next) (text_iter_obj* object TSRMLS_DC); + void (*rewind) (text_iter_obj* object TSRMLS_DC); +} text_iter_ops; + PHPAPI zend_class_entry* text_iterator_aggregate_ce; PHPAPI zend_class_entry* text_iterator_ce; +/* Code unit ops */ + +static text_iter_ops text_iter_cu_ops = { +}; + /* Code point ops */ static int text_iter_cp_valid(text_iter_obj* object TSRMLS_DC) { - return (object-offset object-text_len); + return (object-u.cp.offset object-text_len); } -static void text_iter_cp_get_current_data(text_iter_obj* object TSRMLS_DC) +static void text_iter_cp_current(text_iter_obj* object TSRMLS_DC) { UChar32 cp; int32_t tmp, buf_len; @@ -62,7 +82,7 @@ MAKE_STD_ZVAL(object-current); Z_USTRVAL_P(object-current) = eumalloc(3); Z_TYPE_P(object-current) = IS_UNICODE; - tmp = object-offset; + tmp = object-u.cp.offset; U16_NEXT(object-text, tmp, object-text_len, cp); buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object-current)); Z_USTRVAL_P(object-current)[buf_len] = 0; @@ -70,15 +90,15 @@ } } -static int text_iter_cp_get_current_key(text_iter_obj* object TSRMLS_DC) +static int text_iter_cp_key(text_iter_obj* object TSRMLS_DC) { - return object-index; + return object-u.cp.index; } -static void text_iter_cp_move_forward(text_iter_obj* object TSRMLS_DC) +static void text_iter_cp_next(text_iter_obj* object TSRMLS_DC) { - U16_FWD_1(object-text, object-offset, object-text_len); - object-index++; + U16_FWD_1(object-text, object-u.cp.offset, object-text_len); + object-u.cp.index++; if (object-current) { zval_ptr_dtor(object-current); object-current = NULL; @@ -87,14 +107,26 @@ static void text_iter_cp_rewind(text_iter_obj *object TSRMLS_DC) { - object-offset = 0; - object-index = 0; + object-u.cp.offset = 0; + object-u.cp.index = 0; if (object-current) { zval_ptr_dtor(object-current); object-current = NULL; } } +static text_iter_ops text_iter_cp_ops = { + text_iter_cp_valid, + text_iter_cp_current, + text_iter_cp_key, + text_iter_cp_next, + text_iter_cp_rewind, +}; + +static text_iter_ops* iter_ops[2] = { + text_iter_cu_ops, + text_iter_cp_ops, +}; /* Iterator Funcs */ @@ -110,7 +142,7 @@ text_iter_it* iterator = (text_iter_it *) iter; text_iter_obj* object = iterator-object; - if (text_iter_cp_valid(object TSRMLS_CC)) + if (iter_ops[object-type]-valid(object TSRMLS_CC)) return SUCCESS; else return FAILURE; @@ -121,7 +153,7 @@ text_iter_it* iterator = (text_iter_it *) iter; text_iter_obj* object = iterator-object; - text_iter_cp_get_current_data(object TSRMLS_CC); + iter_ops[object-type]-current(object TSRMLS_CC); *data = object-current; } @@ -130,7 +162,7 @@ text_iter_it* iterator = (text_iter_it *) iter; text_iter_obj* object = iterator-object; - *int_key = text_iter_cp_get_current_key(object TSRMLS_CC); + *int_key = iter_ops[object-type]-key(object TSRMLS_CC); return
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Sat Feb 4 00:35:37 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Add code unit ops. http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.8r2=1.9diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.8 php-src/ext/unicode/unicode_iterators.c:1.9 --- php-src/ext/unicode/unicode_iterators.c:1.8 Sat Feb 4 00:23:52 2006 +++ php-src/ext/unicode/unicode_iterators.c Sat Feb 4 00:35:37 2006 @@ -14,8 +14,15 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.8 2006/02/04 00:23:52 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.9 2006/02/04 00:35:37 andrei Exp $ */ +/* + * TODO + * + * - optimize current() to pass return_value to the handler so that it fills it + * in directly instead of creating a new zval + * - return code units as binary strings? integers? or leave as unicode strings? + */ #include php.h #include zend_interfaces.h @@ -63,7 +70,48 @@ /* Code unit ops */ +static int text_iter_cu_valid(text_iter_obj* object TSRMLS_DC) +{ + return (object-u.cu.index object-text_len); +} + +static void text_iter_cu_current(text_iter_obj* object TSRMLS_DC) +{ + if (!object-current) { + MAKE_STD_ZVAL(object-current); + ZVAL_UNICODEL(object-current, object-text + object-u.cu.index, 1, 1); + } +} + +static int text_iter_cu_key(text_iter_obj* object TSRMLS_DC) +{ + return object-u.cu.index; +} + +static void text_iter_cu_next(text_iter_obj* object TSRMLS_DC) +{ + object-u.cu.index++; + if (object-current) { + zval_ptr_dtor(object-current); + object-current = NULL; + } +} + +static void text_iter_cu_rewind(text_iter_obj *object TSRMLS_DC) +{ + object-u.cu.index = 0; + if (object-current) { + zval_ptr_dtor(object-current); + object-current = NULL; + } +} + static text_iter_ops text_iter_cu_ops = { + text_iter_cu_valid, + text_iter_cu_current, + text_iter_cu_key, + text_iter_cu_next, + text_iter_cu_rewind, }; /* Code point ops */ -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Sat Feb 4 00:41:42 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Implement Traversable instead of Iterator. http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.9r2=1.10diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.9 php-src/ext/unicode/unicode_iterators.c:1.10 --- php-src/ext/unicode/unicode_iterators.c:1.9 Sat Feb 4 00:35:37 2006 +++ php-src/ext/unicode/unicode_iterators.c Sat Feb 4 00:41:42 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.9 2006/02/04 00:35:37 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.10 2006/02/04 00:41:42 andrei Exp $ */ /* * TODO @@ -366,9 +366,9 @@ INIT_CLASS_ENTRY(ce, TextIterator, text_iterator_funcs); text_iterator_ce = zend_register_internal_class(ce TSRMLS_CC); - zend_class_implements(text_iterator_ce TSRMLS_CC, 1, zend_ce_iterator); text_iterator_ce-create_object = text_iterator_new; text_iterator_ce-get_iterator = text_iter_get_iterator; + zend_class_implements(text_iterator_ce TSRMLS_CC, 1, zend_ce_traversable); } /* -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Wed Feb 1 23:53:53 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Check for intern-text before destroying it. http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.1r2=1.2diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.1 php-src/ext/unicode/unicode_iterators.c:1.2 --- php-src/ext/unicode/unicode_iterators.c:1.1 Wed Feb 1 23:50:50 2006 +++ php-src/ext/unicode/unicode_iterators.c Wed Feb 1 23:53:53 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.1 2006/02/01 23:50:50 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.2 2006/02/01 23:53:53 andrei Exp $ */ #include php.h @@ -46,7 +46,7 @@ zend_hash_destroy(intern-std.properties); FREE_HASHTABLE(intern-std.properties); - zval_ptr_dtor(intern-text); + if (intern-text) zval_ptr_dtor(intern-text); efree(object); } @@ -90,6 +90,7 @@ } if (Z_TYPE_P(text) != IS_UNICODE) { + printf(not unicode\n); zend_throw_exception(U_CLASS_ENTRY(spl_ce_InvalidArgumentException), Text iterator expects argument to be a Unicode string, 0 TSRMLS_CC); return; } -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
andrei Thu Feb 2 00:05:21 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Remove debug message. http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.2r2=1.3diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.2 php-src/ext/unicode/unicode_iterators.c:1.3 --- php-src/ext/unicode/unicode_iterators.c:1.2 Wed Feb 1 23:53:53 2006 +++ php-src/ext/unicode/unicode_iterators.c Thu Feb 2 00:05:21 2006 @@ -14,7 +14,7 @@ +--+ */ -/* $Id: unicode_iterators.c,v 1.2 2006/02/01 23:53:53 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.3 2006/02/02 00:05:21 andrei Exp $ */ #include php.h @@ -90,7 +90,6 @@ } if (Z_TYPE_P(text) != IS_UNICODE) { - printf(not unicode\n); zend_throw_exception(U_CLASS_ENTRY(spl_ce_InvalidArgumentException), Text iterator expects argument to be a Unicode string, 0 TSRMLS_CC); return; } -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP-CVS] cvs: php-src /ext/unicode unicode_iterators.c
sebastian Thu Feb 2 06:01:28 2006 UTC Modified files: /php-src/ext/unicodeunicode_iterators.c Log: Fix Andrei. http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.3r2=1.4diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.3 php-src/ext/unicode/unicode_iterators.c:1.4 --- php-src/ext/unicode/unicode_iterators.c:1.3 Thu Feb 2 00:05:21 2006 +++ php-src/ext/unicode/unicode_iterators.c Thu Feb 2 06:01:27 2006 @@ -10,11 +10,11 @@ | obtain it through the world-wide-web, please send a note to | | [EMAIL PROTECTED] so we can mail you a copy immediately. | +--+ - | Authors: Andre Zmievski([EMAIL PROTECTED]) | + | Authors: Andrei Zmievski [EMAIL PROTECTED]| +--+ */ -/* $Id: unicode_iterators.c,v 1.3 2006/02/02 00:05:21 andrei Exp $ */ +/* $Id: unicode_iterators.c,v 1.4 2006/02/02 06:01:27 sebastian Exp $ */ #include php.h -- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php