Small correction to the original patch, fixes a segv in sybase-style 
addslashes code as well as removes unused variable.

Ilia

On October 3, 2002 10:21 pm, Ilia A. wrote:
> After doing a number of tests on PHP's various string functions, I've came
> up with a patch that significantly improves the performance on those
> functions. The patch optimizes:
>       php_addslashes() - internal PHP function used to add slashes to a string
>                                       (15-20% speed increase)
>       php_memnstr() - internal PHP function used to find a multibyte string
> inside another string. 35-40% speed increase when string is found, 90-100%
> speed increase when the string is not found.
>       substr_count() - PHP function used to determine the number of times a
> string occurs within another string. 20-25% speed increase + additional
> speed gains from usage of optimized php_memnstr() function.
>
> Please let me know if there are any objections, better suggestions, bug
> reports (pertaining to this patch) that would need to be resolved before
> this bug goes into the CVS.
>
> Ilia
Index: string.c
===================================================================
RCS file: /repository/php4/ext/standard/string.c,v
retrieving revision 1.308
diff -u -3 -p -r1.308 string.c
--- string.c	3 Oct 2002 18:15:18 -0000	1.308
+++ string.c	4 Oct 2002 02:57:47 -0000
@@ -2416,17 +2416,19 @@ PHPAPI char *php_addslashes(char *str, i
 	char *new_str;
 	char *source, *target;
 	char *end;
-	char c;
  	
 	if (!str) {
 		*new_length = 0;
 		return str;
 	}
 	new_str = (char *) emalloc((length?length:(length=strlen(str)))*2+1);
+	source = str;
+	end = source + length;
+	target = new_str;
+	
 	if (PG(magic_quotes_sybase)) {
-		for (source = str, end = source+length, target = new_str; source < end; source++) {
-			c = *source;
-			switch (c) {
+		while (source<end) {
+			switch (*source) {
 				case '\0':
 					*target++ = '\\';
 					*target++ = '0';
@@ -2436,14 +2438,16 @@ PHPAPI char *php_addslashes(char *str, i
 					*target++ = '\'';
 					break;
 				default:
-					*target++ = c;
-				break;
+					*target++ = *source;
+					break;
 			}
+			source++;
 		}
-	} else {
-		for (source = str, end = source+length, target = new_str; source < end; source++) {
-			c = *source;
-			switch (c) {
+	}
+	else {
+		while (source<end) {
+			switch (*source)
+			{
 				case '\0':
 					*target++ = '\\';
 					*target++ = '0';
@@ -2454,11 +2458,14 @@ PHPAPI char *php_addslashes(char *str, i
 					*target++ = '\\';
 					/* break is missing *intentionally* */
 				default:
-					*target++ = c;
-					break;
+					*target++ = *source;
+					break;	
 			}
+		
+			source++;
 		}
 	}
+	
 	*target = 0;
 	if (new_length) {
 		*new_length = target - new_str;
@@ -3794,7 +3801,7 @@ PHP_FUNCTION(strnatcasecmp)
 PHP_FUNCTION(substr_count)
 {
 	zval **haystack, **needle;	
-	int i, length, count = 0;
+	int count = 0;
 	char *p, *endp, cmp;
 
 	if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &haystack, &needle) == FAILURE) {
@@ -3807,25 +3814,23 @@ PHP_FUNCTION(substr_count)
 	if (Z_STRLEN_PP(needle) == 0) {
 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring.");
 		RETURN_FALSE;
-	} else if (Z_STRLEN_PP(needle) == 1) {
-		/* Special optimized case to avoid calls to php_memnstr(). */
-		for (i = 0, p = Z_STRVAL_PP(haystack), 
-		     length = Z_STRLEN_PP(haystack), cmp = Z_STRVAL_PP(needle)[0]; 
-		     i < length; i++) {
-			if (p[i] == cmp) {
-				count++;
+	}
+	
+	p = Z_STRVAL_PP(haystack);
+	endp = p + Z_STRLEN_PP(haystack);
+	
+	if (Z_STRLEN_PP(needle) == 1) {
+		cmp = Z_STRVAL_PP(needle)[0];
+	
+		while (p < endp) {
+			if (*(p++) == cmp) {
+				count++;	
 			}
 		}
 	} else {
- 		p = Z_STRVAL_PP(haystack);
-		endp = p + Z_STRLEN_PP(haystack);
-		while (p <= endp) {
-			if ( (p = php_memnstr(p, Z_STRVAL_PP(needle), Z_STRLEN_PP(needle), endp)) != NULL ) {
-				p += Z_STRLEN_PP(needle);
-				count++;
-			} else {
-				break;
-			}
+		while ((p = php_memnstr(p, Z_STRVAL_PP(needle), Z_STRLEN_PP(needle), endp))) {
+			p += Z_STRLEN_PP(needle);
+			count++;
 		}
 	}
 
Index: php_string.h
===================================================================
RCS file: /repository/php4/ext/standard/php_string.h,v
retrieving revision 1.61
diff -u -3 -p -r1.61 php_string.h
--- php_string.h	25 Sep 2002 18:06:05 -0000	1.61
+++ php_string.h	4 Oct 2002 02:57:47 -0000
@@ -133,19 +133,24 @@ static inline char *
 php_memnstr(char *haystack, char *needle, int needle_len, char *end)
 {
 	char *p = haystack;
-	char first = *needle;
+	char ne = needle[needle_len-1];
 
-	/* let end point to the last character where needle may start */
 	end -= needle_len;
-	
+
 	while (p <= end) {
-		while (*p != first)
-			if (++p > end)
-				return NULL;
-		if (memcmp(p, needle, needle_len) == 0)
-			return p;
+		if ((p = memchr(p, *needle, (end-p))) &&  ne == p[needle_len-1]) {
+			if (!memcmp(needle, p, needle_len-1)) {
+				return p;
+			}
+		}
+		
+		if (p == NULL) {
+			return NULL;
+		}
+		
 		p++;
 	}
+	
 	return NULL;
 }
 

-- 
PHP Development Mailing List <http://www.php.net/>
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to