andrei Fri Feb 3 00:09:19 2006 UTC
Modified files:
/php-src/ext/unicode unicode_iterators.c
Log:
Rewrite to use C-level iterators for performance. Also, cache the string
in the iterator object for immutability.
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.4&r2=1.5&diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.4
php-src/ext/unicode/unicode_iterators.c:1.5
--- php-src/ext/unicode/unicode_iterators.c:1.4 Thu Feb 2 06:01:27 2006
+++ php-src/ext/unicode/unicode_iterators.c Fri Feb 3 00:09:19 2006
@@ -14,7 +14,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: unicode_iterators.c,v 1.4 2006/02/02 06:01:27 sebastian Exp $ */
+/* $Id: unicode_iterators.c,v 1.5 2006/02/03 00:09:19 andrei Exp $ */
#include "php.h"
@@ -30,34 +30,157 @@
typedef struct {
zend_object std;
- zval* text;
+ UChar* text;
+ uint32_t text_len;
+ text_iter_type type;
+ zval* current;
int32_t offset;
int32_t index;
- text_iter_type type;
-} text_iter_t;
+} text_iter_obj;
+
+typedef struct {
+ zend_object_iterator intern;
+ text_iter_obj* object;
+} text_iter_it;
PHPAPI zend_class_entry* text_iterator_aggregate_ce;
PHPAPI zend_class_entry* text_iterator_ce;
+/* Code point ops */
+
+static int text_iter_cp_valid(text_iter_obj* object TSRMLS_DC)
+{
+ return (object->offset < object->text_len);
+}
+
+static void text_iter_cp_get_current_data(text_iter_obj* object TSRMLS_DC)
+{
+ UChar32 cp;
+ int32_t tmp, buf_len;
+
+ tmp = object->offset;
+ U16_NEXT(object->text, tmp, object->text_len, cp);
+ buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object->current));
+ Z_USTRVAL_P(object->current)[buf_len] = 0;
+ Z_USTRLEN_P(object->current) = buf_len;
+}
+
+static int text_iter_cp_get_current_key(text_iter_obj* object TSRMLS_DC)
+{
+ return object->index;
+}
+
+static void text_iter_cp_move_forward(text_iter_obj* object TSRMLS_DC)
+{
+ U16_FWD_1(object->text, object->offset, object->text_len);
+ object->index++;
+}
+
+static void text_iter_cp_rewind(text_iter_obj *object TSRMLS_DC)
+{
+ object->offset = 0;
+ object->index = 0;
+}
+
+
+/* Iterator Funcs */
+
+static void text_iter_dtor(zend_object_iterator* iter TSRMLS_DC)
+{
+ text_iter_it* iterator = (text_iter_it *) iter;
+ zval_ptr_dtor((zval **)&iterator->intern.data);
+ efree(iterator);
+}
+
+static int text_iter_valid(zend_object_iterator* iter TSRMLS_DC)
+{
+ text_iter_it* iterator = (text_iter_it *) iter;
+ text_iter_obj* object = iterator->object;
+
+ if (text_iter_cp_valid(object TSRMLS_CC))
+ return SUCCESS;
+ else
+ return FAILURE;
+}
+
+static void text_iter_get_current_data(zend_object_iterator* iter, zval***
data TSRMLS_DC)
+{
+ text_iter_it* iterator = (text_iter_it *) iter;
+ text_iter_obj* object = iterator->object;
+
+ text_iter_cp_get_current_data(object TSRMLS_CC);
+ *data = &object->current;
+}
+
+static int text_iter_get_current_key(zend_object_iterator* iter, char
**str_key, uint *str_key_len, ulong *int_key TSRMLS_DC)
+{
+ text_iter_it* iterator = (text_iter_it *) iter;
+ text_iter_obj* object = iterator->object;
+
+ *int_key = text_iter_cp_get_current_key(object TSRMLS_CC);
+ return HASH_KEY_IS_LONG;
+}
+
+static void text_iter_move_forward(zend_object_iterator* iter TSRMLS_DC)
+{
+ text_iter_it* iterator = (text_iter_it *) iter;
+ text_iter_obj* object = iterator->object;
+
+ text_iter_cp_move_forward(object TSRMLS_CC);
+}
+
+static void text_iter_rewind(zend_object_iterator* iter TSRMLS_DC)
+{
+ text_iter_it* iterator = (text_iter_it *) iter;
+ text_iter_obj* object = iterator->object;
+
+ text_iter_cp_rewind(object TSRMLS_CC);
+}
+
+zend_object_iterator_funcs text_iter_cp_funcs = {
+ text_iter_dtor,
+ text_iter_valid,
+ text_iter_get_current_data,
+ text_iter_get_current_key,
+ text_iter_move_forward,
+ text_iter_rewind,
+};
+
+static zend_object_iterator* text_iter_get_iterator(zend_class_entry *ce, zval
*object TSRMLS_DC)
+{
+ text_iter_it* iterator = emalloc(sizeof(text_iter_it));
+ text_iter_obj* iter_object = (text_iter_obj *)
zend_object_store_get_object(object TSRMLS_CC);
+
+ ZVAL_ADDREF(object);
+ iterator->intern.data = (void *) object;
+ iterator->intern.funcs = &text_iter_cp_funcs;
+ iterator->object = iter_object;
+
+ return (zend_object_iterator *) iterator;
+}
+
static void text_iterator_free_storage(void *object TSRMLS_DC)
{
- text_iter_t *intern = (text_iter_t *) object;
+ text_iter_obj *intern = (text_iter_obj *) object;
zend_hash_destroy(intern->std.properties);
FREE_HASHTABLE(intern->std.properties);
- if (intern->text) zval_ptr_dtor(&intern->text);
+ if (intern->text) {
+ efree(intern->text);
+ }
+ zval_ptr_dtor(&intern->current);
efree(object);
}
static zend_object_value text_iterator_new(zend_class_entry *class_type
TSRMLS_DC)
{
zend_object_value retval;
- text_iter_t *intern;
+ text_iter_obj *intern;
zval *tmp;
- intern = emalloc(sizeof(text_iter_t));
- memset(intern, 0, sizeof(text_iter_t));
+ intern = emalloc(sizeof(text_iter_obj));
+ memset(intern, 0, sizeof(text_iter_obj));
intern->std.ce = class_type;
ALLOC_HASHTABLE(intern->std.properties);
@@ -65,6 +188,9 @@
zend_hash_copy(intern->std.properties, &class_type->default_properties,
(copy_ctor_func_t) zval_add_ref, (void *) &tmp, sizeof(zval *));
intern->type = ITER_CODE_POINT;
+ MAKE_STD_ZVAL(intern->current); /* pre-allocate buffer for codepoint */
+ Z_USTRVAL_P(intern->current) = eumalloc(3);
+ Z_TYPE_P(intern->current) = IS_UNICODE;
retval.handle = zend_objects_store_put(intern,
(zend_objects_store_dtor_t)zend_objects_destroy_object,
(zend_objects_free_object_storage_t) text_iterator_free_storage, NULL
TSRMLS_CC);
retval.handlers = zend_get_std_object_handlers();
@@ -72,81 +198,64 @@
return retval;
}
-static void text_iter_rewind(text_iter_t *intern TSRMLS_DC)
-{
- intern->offset = 0;
- intern->index = 0;
-}
-
-
PHP_METHOD(TextIterator, __construct)
{
- zval *text;
+ UChar *text;
+ int32_t text_len;
zval *object = getThis();
- text_iter_t *intern;
+ text_iter_obj *intern;
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &text) ==
FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "u", &text,
&text_len) == FAILURE) {
return;
}
- if (Z_TYPE_P(text) != IS_UNICODE) {
-
zend_throw_exception(U_CLASS_ENTRY(spl_ce_InvalidArgumentException), "Text
iterator expects argument to be a Unicode string", 0 TSRMLS_CC);
- return;
- }
-
- intern = (text_iter_t*) zend_object_store_get_object(object TSRMLS_CC);
+ intern = (text_iter_obj*) zend_object_store_get_object(object
TSRMLS_CC);
- ZVAL_ADDREF(text);
- intern->text = text;
+ intern->text = eustrndup(text, text_len);
+ intern->text_len = text_len;
- text_iter_rewind(intern TSRMLS_CC);
+ text_iter_cp_rewind(intern TSRMLS_CC);
}
PHP_METHOD(TextIterator, current)
{
- UChar32 cp;
- UChar buf[3];
- uint32_t tmp, buf_len;
zval *object = getThis();
- text_iter_t *intern = (text_iter_t*)
zend_object_store_get_object(object TSRMLS_CC);
+ text_iter_obj *intern = (text_iter_obj*)
zend_object_store_get_object(object TSRMLS_CC);
- tmp = intern->offset;
- U16_NEXT(Z_USTRVAL_P(intern->text), tmp, Z_USTRLEN_P(intern->text), cp);
- buf_len = zend_codepoint_to_uchar(cp, buf);
- RETURN_UNICODEL(buf, buf_len, 1);
+ text_iter_cp_get_current_data(intern TSRMLS_CC);
+ RETURN_UNICODEL(Z_USTRVAL_P(intern->current),
Z_USTRLEN_P(intern->current), 1);
}
PHP_METHOD(TextIterator, next)
{
zval *object = getThis();
- text_iter_t *intern = (text_iter_t*)
zend_object_store_get_object(object TSRMLS_CC);
+ text_iter_obj *intern = (text_iter_obj*)
zend_object_store_get_object(object TSRMLS_CC);
- U16_FWD_1(Z_USTRVAL_P(intern->text), intern->offset,
Z_USTRLEN_P(intern->text));
- intern->index++;
+ text_iter_cp_move_forward(intern TSRMLS_CC);
}
PHP_METHOD(TextIterator, key)
{
zval *object = getThis();
- text_iter_t *intern = (text_iter_t*)
zend_object_store_get_object(object TSRMLS_CC);
+ text_iter_obj *intern = (text_iter_obj*)
zend_object_store_get_object(object TSRMLS_CC);
- RETURN_LONG(intern->index);
+ RETURN_LONG(text_iter_cp_get_current_key(intern TSRMLS_CC));
}
PHP_METHOD(TextIterator, valid)
{
zval *object = getThis();
- text_iter_t *intern = (text_iter_t*)
zend_object_store_get_object(object TSRMLS_CC);
+ text_iter_obj *intern = (text_iter_obj*)
zend_object_store_get_object(object TSRMLS_CC);
- RETURN_BOOL(intern->offset < Z_USTRLEN_P(intern->text));
+ RETURN_BOOL(text_iter_cp_valid(intern TSRMLS_CC));
}
PHP_METHOD(TextIterator, rewind)
{
zval *object = getThis();
- text_iter_t *intern = (text_iter_t*)
zend_object_store_get_object(object TSRMLS_CC);
+ text_iter_obj *intern = (text_iter_obj*)
zend_object_store_get_object(object TSRMLS_CC);
- text_iter_rewind(intern TSRMLS_CC);
+ text_iter_cp_rewind(intern TSRMLS_CC);
}
static zend_function_entry text_iterator_funcs[] = {
@@ -167,6 +276,7 @@
text_iterator_ce = zend_register_internal_class(&ce TSRMLS_CC);
zend_class_implements(text_iterator_ce TSRMLS_CC, 1, zend_ce_iterator);
text_iterator_ce->create_object = text_iterator_new;
+ text_iterator_ce->get_iterator = text_iter_get_iterator;
}
/*
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php