After doing a number of tests on PHP's various string functions, I've came up 
with a patch that significantly improves the performance on those functions.
The patch optimizes:
        php_addslashes() - internal PHP function used to add slashes to a string 
                                        (15-20% speed increase)
        php_memnstr() - internal PHP function used to find a multibyte string inside
                                another string. 35-40% speed increase when string is 
found, 90-100%
                                speed increase when the string is not found.
        substr_count() - PHP function used to determine the number of times a string
                                occurs within another string. 20-25% speed increase + 
additional speed
                                gains from usage of optimized php_memnstr() function.

Please let me know if there are any objections, better suggestions, bug 
reports (pertaining to this patch) that would need to be resolved before this 
bug goes into the CVS.

Ilia
Index: string.c
===================================================================
RCS file: /repository/php4/ext/standard/string.c,v
retrieving revision 1.308
diff -u -3 -p -r1.308 string.c
--- string.c	3 Oct 2002 18:15:18 -0000	1.308
+++ string.c	4 Oct 2002 01:43:19 -0000
@@ -2423,10 +2423,13 @@ PHPAPI char *php_addslashes(char *str, i
 		return str;
 	}
 	new_str = (char *) emalloc((length?length:(length=strlen(str)))*2+1);
+	source = str;
+	end = source + length;
+	target = new_str;
+	
 	if (PG(magic_quotes_sybase)) {
-		for (source = str, end = source+length, target = new_str; source < end; source++) {
-			c = *source;
-			switch (c) {
+		while (source<end) {
+			switch (*source) {
 				case '\0':
 					*target++ = '\\';
 					*target++ = '0';
@@ -2436,14 +2439,15 @@ PHPAPI char *php_addslashes(char *str, i
 					*target++ = '\'';
 					break;
 				default:
-					*target++ = c;
-				break;
+					*target++ = *source;
+					break;
 			}
 		}
-	} else {
-		for (source = str, end = source+length, target = new_str; source < end; source++) {
-			c = *source;
-			switch (c) {
+	}
+	else {
+		while (source<end) {
+			switch (*source)
+			{
 				case '\0':
 					*target++ = '\\';
 					*target++ = '0';
@@ -2454,11 +2458,14 @@ PHPAPI char *php_addslashes(char *str, i
 					*target++ = '\\';
 					/* break is missing *intentionally* */
 				default:
-					*target++ = c;
-					break;
+					*target++ = *source;
+					break;	
 			}
+		
+			source++;
 		}
 	}
+	
 	*target = 0;
 	if (new_length) {
 		*new_length = target - new_str;
@@ -3794,7 +3801,7 @@ PHP_FUNCTION(strnatcasecmp)
 PHP_FUNCTION(substr_count)
 {
 	zval **haystack, **needle;	
-	int i, length, count = 0;
+	int count = 0;
 	char *p, *endp, cmp;
 
 	if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &haystack, &needle) == FAILURE) {
@@ -3807,25 +3814,23 @@ PHP_FUNCTION(substr_count)
 	if (Z_STRLEN_PP(needle) == 0) {
 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring.");
 		RETURN_FALSE;
-	} else if (Z_STRLEN_PP(needle) == 1) {
-		/* Special optimized case to avoid calls to php_memnstr(). */
-		for (i = 0, p = Z_STRVAL_PP(haystack), 
-		     length = Z_STRLEN_PP(haystack), cmp = Z_STRVAL_PP(needle)[0]; 
-		     i < length; i++) {
-			if (p[i] == cmp) {
-				count++;
+	}
+	
+	p = Z_STRVAL_PP(haystack);
+	endp = p + Z_STRLEN_PP(haystack);
+	
+	if (Z_STRLEN_PP(needle) == 1) {
+		cmp = Z_STRVAL_PP(needle)[0];
+	
+		while (p < endp) {
+			if (*(p++) == cmp) {
+				count++;	
 			}
 		}
 	} else {
- 		p = Z_STRVAL_PP(haystack);
-		endp = p + Z_STRLEN_PP(haystack);
-		while (p <= endp) {
-			if ( (p = php_memnstr(p, Z_STRVAL_PP(needle), Z_STRLEN_PP(needle), endp)) != NULL ) {
-				p += Z_STRLEN_PP(needle);
-				count++;
-			} else {
-				break;
-			}
+		while ((p = php_memnstr(p, Z_STRVAL_PP(needle), Z_STRLEN_PP(needle), endp))) {
+			p += Z_STRLEN_PP(needle);
+			count++;
 		}
 	}
 
Index: php_string.h
===================================================================
RCS file: /repository/php4/ext/standard/php_string.h,v
retrieving revision 1.61
diff -u -3 -p -r1.61 php_string.h
--- php_string.h	25 Sep 2002 18:06:05 -0000	1.61
+++ php_string.h	4 Oct 2002 01:43:19 -0000
@@ -133,19 +133,24 @@ static inline char *
 php_memnstr(char *haystack, char *needle, int needle_len, char *end)
 {
 	char *p = haystack;
-	char first = *needle;
+	char ne = needle[needle_len-1];
 
-	/* let end point to the last character where needle may start */
 	end -= needle_len;
-	
+
 	while (p <= end) {
-		while (*p != first)
-			if (++p > end)
-				return NULL;
-		if (memcmp(p, needle, needle_len) == 0)
-			return p;
+		if ((p = memchr(p, *needle, (end-p))) &&  ne == p[needle_len-1]) {
+			if (!memcmp(needle, p, needle_len-1)) {
+				return p;
+			}
+		}
+		
+		if (p == NULL) {
+			return NULL;
+		}
+		
 		p++;
 	}
+	
 	return NULL;
 }
 

-- 
PHP Development Mailing List <http://www.php.net/>
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to