After doing a number of tests on PHP's various string functions, I've came up with a patch that significantly improves the performance on those functions. The patch optimizes: php_addslashes() - internal PHP function used to add slashes to a string (15-20% speed increase) php_memnstr() - internal PHP function used to find a multibyte string inside another string. 35-40% speed increase when string is found, 90-100% speed increase when the string is not found. substr_count() - PHP function used to determine the number of times a string occurs within another string. 20-25% speed increase + additional speed gains from usage of optimized php_memnstr() function.
Please let me know if there are any objections, better suggestions, bug reports (pertaining to this patch) that would need to be resolved before this bug goes into the CVS. Ilia
Index: string.c =================================================================== RCS file: /repository/php4/ext/standard/string.c,v retrieving revision 1.308 diff -u -3 -p -r1.308 string.c --- string.c 3 Oct 2002 18:15:18 -0000 1.308 +++ string.c 4 Oct 2002 01:43:19 -0000 @@ -2423,10 +2423,13 @@ PHPAPI char *php_addslashes(char *str, i return str; } new_str = (char *) emalloc((length?length:(length=strlen(str)))*2+1); + source = str; + end = source + length; + target = new_str; + if (PG(magic_quotes_sybase)) { - for (source = str, end = source+length, target = new_str; source < end; source++) { - c = *source; - switch (c) { + while (source<end) { + switch (*source) { case '\0': *target++ = '\\'; *target++ = '0'; @@ -2436,14 +2439,15 @@ PHPAPI char *php_addslashes(char *str, i *target++ = '\''; break; default: - *target++ = c; - break; + *target++ = *source; + break; } } - } else { - for (source = str, end = source+length, target = new_str; source < end; source++) { - c = *source; - switch (c) { + } + else { + while (source<end) { + switch (*source) + { case '\0': *target++ = '\\'; *target++ = '0'; @@ -2454,11 +2458,14 @@ PHPAPI char *php_addslashes(char *str, i *target++ = '\\'; /* break is missing *intentionally* */ default: - *target++ = c; - break; + *target++ = *source; + break; } + + source++; } } + *target = 0; if (new_length) { *new_length = target - new_str; @@ -3794,7 +3801,7 @@ PHP_FUNCTION(strnatcasecmp) PHP_FUNCTION(substr_count) { zval **haystack, **needle; - int i, length, count = 0; + int count = 0; char *p, *endp, cmp; if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &haystack, &needle) == FAILURE) { @@ -3807,25 +3814,23 @@ PHP_FUNCTION(substr_count) if (Z_STRLEN_PP(needle) == 0) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring."); RETURN_FALSE; - } else if (Z_STRLEN_PP(needle) == 1) { - /* Special optimized case to avoid calls to php_memnstr(). */ - for (i = 0, p = Z_STRVAL_PP(haystack), - length = Z_STRLEN_PP(haystack), cmp = Z_STRVAL_PP(needle)[0]; - i < length; i++) { - if (p[i] == cmp) { - count++; + } + + p = Z_STRVAL_PP(haystack); + endp = p + Z_STRLEN_PP(haystack); + + if (Z_STRLEN_PP(needle) == 1) { + cmp = Z_STRVAL_PP(needle)[0]; + + while (p < endp) { + if (*(p++) == cmp) { + count++; } } } else { - p = Z_STRVAL_PP(haystack); - endp = p + Z_STRLEN_PP(haystack); - while (p <= endp) { - if ( (p = php_memnstr(p, Z_STRVAL_PP(needle), Z_STRLEN_PP(needle), endp)) != NULL ) { - p += Z_STRLEN_PP(needle); - count++; - } else { - break; - } + while ((p = php_memnstr(p, Z_STRVAL_PP(needle), Z_STRLEN_PP(needle), endp))) { + p += Z_STRLEN_PP(needle); + count++; } } Index: php_string.h =================================================================== RCS file: /repository/php4/ext/standard/php_string.h,v retrieving revision 1.61 diff -u -3 -p -r1.61 php_string.h --- php_string.h 25 Sep 2002 18:06:05 -0000 1.61 +++ php_string.h 4 Oct 2002 01:43:19 -0000 @@ -133,19 +133,24 @@ static inline char * php_memnstr(char *haystack, char *needle, int needle_len, char *end) { char *p = haystack; - char first = *needle; + char ne = needle[needle_len-1]; - /* let end point to the last character where needle may start */ end -= needle_len; - + while (p <= end) { - while (*p != first) - if (++p > end) - return NULL; - if (memcmp(p, needle, needle_len) == 0) - return p; + if ((p = memchr(p, *needle, (end-p))) && ne == p[needle_len-1]) { + if (!memcmp(needle, p, needle_len-1)) { + return p; + } + } + + if (p == NULL) { + return NULL; + } + p++; } + return NULL; }
-- PHP Development Mailing List <http://www.php.net/> To unsubscribe, visit: http://www.php.net/unsub.php