rolland Thu Sep 29 07:05:32 2005 EDT
Modified files:
/php-src/ext/standard basic_functions.h string.c
Log:
- Unicode impl of strtok()
http://cvs.php.net/diff.php/php-src/ext/standard/basic_functions.h?r1=1.140&r2=1.141&ty=u
Index: php-src/ext/standard/basic_functions.h
diff -u php-src/ext/standard/basic_functions.h:1.140
php-src/ext/standard/basic_functions.h:1.141
--- php-src/ext/standard/basic_functions.h:1.140 Fri Aug 12 22:23:29 2005
+++ php-src/ext/standard/basic_functions.h Thu Sep 29 07:05:29 2005
@@ -17,7 +17,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: basic_functions.h,v 1.140 2005/08/13 02:23:29 wez Exp $ */
+/* $Id: basic_functions.h,v 1.141 2005/09/29 11:05:29 rolland Exp $ */
#ifndef BASIC_FUNCTIONS_H
#define BASIC_FUNCTIONS_H
@@ -153,9 +153,9 @@
HashTable *user_shutdown_function_names;
HashTable putenv_ht;
zval *strtok_zval;
- char *strtok_string;
+ void *strtok_string;
char *locale_string;
- char *strtok_last;
+ void *strtok_last;
char strtok_table[256];
ulong strtok_len;
char str_ebuf[40];
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.487&r2=1.488&ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.487 php-src/ext/standard/string.c:1.488
--- php-src/ext/standard/string.c:1.487 Thu Sep 29 05:33:38 2005
+++ php-src/ext/standard/string.c Thu Sep 29 07:05:30 2005
@@ -18,7 +18,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: string.c,v 1.487 2005/09/29 09:33:38 rolland Exp $ */
+/* $Id: string.c,v 1.488 2005/09/29 11:05:30 rolland Exp $ */
/* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
@@ -1315,88 +1315,168 @@
Tokenize a string */
PHP_FUNCTION(strtok)
{
- zval **args[2];
- zval **tok, **str;
- char *token;
- char *token_end;
- char *p;
- char *pe;
+ void *tok, *str;
+ int32_t tok_len, str_len;
+ zend_uchar tok_type, str_type;
+ zval *zv;
+ char *token, *token_end, *p, *pe;
+ UChar *u_token, *u_p, *u_pe;
+
+ UChar32 ch, th;
+ int32_t start, end, i, j, rem_len;
+ int delim_found, token_present;
int skipped = 0;
-
- if (ZEND_NUM_ARGS() < 1 || ZEND_NUM_ARGS() > 2 ||
zend_get_parameters_array_ex(ZEND_NUM_ARGS(), args) == FAILURE) {
+
+ if (ZEND_NUM_ARGS() < 1 || ZEND_NUM_ARGS() > 2) {
WRONG_PARAM_COUNT;
}
-
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "T|T",
+ &str, &str_len,
&str_type,
+ &tok, &tok_len,
&tok_type) == FAILURE) {
+ return;
+ }
+
switch (ZEND_NUM_ARGS()) {
case 1:
- tok = args[0];
+ tok = str;
+ tok_len = str_len;
+ tok_type = str_type;
break;
default:
case 2:
- str = args[0];
- tok = args[1];
- convert_to_string_ex(str);
-
- zval_add_ref(str);
if (BG(strtok_zval)) {
zval_ptr_dtor(&BG(strtok_zval));
}
- BG(strtok_zval) = *str;
- BG(strtok_last) = BG(strtok_string) = Z_STRVAL_PP(str);
- BG(strtok_len) = Z_STRLEN_PP(str);
+ MAKE_STD_ZVAL(zv);
+ if (str_type == IS_UNICODE) {
+ ZVAL_UNICODEL(zv, (UChar *)str, str_len, 1);
+ } else if (str_type == IS_BINARY) {
+ ZVAL_BINARYL(zv, (char *)str, str_len, 1);
+ } else {
+ ZVAL_STRINGL(zv, (char *)str, str_len, 1);
+ }
+ BG(strtok_zval) = zv;
+ if (str_type == IS_UNICODE) {
+ BG(strtok_last) = BG(strtok_string) =
Z_USTRVAL_P(zv);
+ } else {
+ BG(strtok_last) = BG(strtok_string) =
Z_STRVAL_P(zv);
+ }
+ BG(strtok_len) = str_len;
break;
}
-
- p = BG(strtok_last); /* Where we start to search */
- pe = BG(strtok_string) + BG(strtok_len);
- if (!p || p >= pe) {
+ if (BG(strtok_zval) && tok_type != Z_TYPE_P(BG(strtok_zval))) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Delimiter type
must match string type.");
RETURN_FALSE;
}
- convert_to_string_ex(tok);
-
- token = Z_STRVAL_PP(tok);
- token_end = token + Z_STRLEN_PP(tok);
+ if (tok_type == IS_UNICODE) {
+ u_p = (UChar *)BG(strtok_last); /* Where we start to search */
+ u_pe = (UChar *)BG(strtok_string) + BG(strtok_len);
+ u_token = (UChar *)tok;
+ if (!u_p || u_p >= u_pe) {
+ RETURN_FALSE;
+ }
+ rem_len = u_pe - u_p;
- while (token < token_end) {
- STRTOK_TABLE(token++) = 1;
- }
-
- /* Skip leading delimiters */
- while (STRTOK_TABLE(p)) {
- if (++p >= pe) {
- /* no other chars left */
+ /* Skip leading delimiters */
+ token_present = 0;
+ for (i = 0 ; (u_p + i) < u_pe ; ) {
+ delim_found = 0;
+ U16_NEXT(u_p, i, rem_len, ch);
+ for (j = 0 ; j < tok_len ; ) {
+ U16_NEXT(u_token, j, tok_len, th);
+ if ( ch == th ) {
+ delim_found = 1;
+ break;
+ }
+ }
+ if (delim_found == 0) {
+ U16_BACK_1(u_p, 0, i); /* U16_NEXT() post-incrs
'i' */
+ start = i;
+ token_present = 1;
+ break;
+ }
+ }
+ if (token_present == 0) {
BG(strtok_last) = NULL;
- RETVAL_FALSE;
- goto restore;
+ RETURN_FALSE;
}
- skipped++;
- }
-
- /* We know at this place that *p is no delimiter, so skip it */
- while (++p < pe) {
- if (STRTOK_TABLE(p)) {
- goto return_token;
+
+ /* Seek to next delimiter */
+ delim_found = 0;
+ for (i = start ; (u_p + i) < u_pe ; ) {
+ U16_NEXT(u_p, i, rem_len, ch);
+ for (j = 0 ; j < tok_len ; ) {
+ U16_NEXT(u_token, j, tok_len, th);
+ if ( ch == th ) {
+ delim_found = 1;
+ break;
+ }
+ }
+ if (delim_found) {
+ U16_BACK_1(u_p, 0, i); /* 'i' was beyond
delimiter */
+ break;
+ }
+ }
+ end = i;
+
+ if (end - start) {
+ BG(strtok_last) = u_p + end;
+ RETURN_UNICODEL(u_p + start, end - start, 1);
+ } else {
+ BG(strtok_last) = NULL;
+ RETURN_FALSE;
}
- }
-
- if (p - BG(strtok_last)) {
-return_token:
- RETVAL_STRINGL(BG(strtok_last) + skipped, (p - BG(strtok_last))
- skipped, 1);
- BG(strtok_last) = p + 1;
} else {
- RETVAL_FALSE;
- BG(strtok_last) = NULL;
- }
+ p = (char *)BG(strtok_last); /* Where we start to search */
+ pe = (char *)BG(strtok_string) + BG(strtok_len);
+ if (!p || p >= pe) {
+ RETURN_FALSE;
+ }
+ token = (char *)tok;
+ token_end = token + tok_len;
+ while (token < token_end) {
+ STRTOK_TABLE(token++) = 1;
+ }
- /* Restore table -- usually faster then memset'ing the table on every
invocation */
+ /* Skip leading delimiters */
+ while (STRTOK_TABLE(p)) {
+ if (++p >= pe) {
+ /* no other chars left */
+ BG(strtok_last) = NULL;
+ RETVAL_FALSE;
+ goto restore;
+ }
+ skipped++;
+ }
+ /* We know at this place that *p is no delimiter, so skip it */
+ while (++p < pe) {
+ if (STRTOK_TABLE(p)) {
+ goto return_token;
+ }
+ }
+
+ if (p - (char *)BG(strtok_last)) {
+return_token:
+ if (tok_type == IS_BINARY) {
+ RETVAL_BINARYL((char *)BG(strtok_last) +
skipped, (p - (char *)BG(strtok_last)) - skipped, 1);
+ } else {
+ RETVAL_STRINGL((char *)BG(strtok_last) +
skipped, (p - (char *)BG(strtok_last)) - skipped, 1);
+ }
+ BG(strtok_last) = p + 1;
+ } else {
+ RETVAL_FALSE;
+ BG(strtok_last) = NULL;
+ }
+
+ /* Restore table -- usually faster then memset'ing the table on
every invocation */
restore:
- token = Z_STRVAL_PP(tok);
-
- while (token < token_end) {
- STRTOK_TABLE(token++) = 0;
+ token = (char *)tok;
+ while (token < token_end) {
+ STRTOK_TABLE(token++) = 0;
+ }
}
}
/* }}} */
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php