andrei Thu Dec 21 21:47:56 2006 UTC
Modified files:
/php-src unicode-progress.txt
/php-src/ext/standard array.c php_string.h string.c strnatcmp.c
Log:
Bite the bullet and port the natural comparison algorithm to support
UChar strings. Also, simplify the original code.
# Argggghh, post-incremented iteration sucks. That means you, U16_* stuff.
http://cvs.php.net/viewvc.cgi/php-src/unicode-progress.txt?r1=1.70&r2=1.71&diff_format=u
Index: php-src/unicode-progress.txt
diff -u php-src/unicode-progress.txt:1.70 php-src/unicode-progress.txt:1.71
--- php-src/unicode-progress.txt:1.70 Wed Dec 20 21:02:11 2006
+++ php-src/unicode-progress.txt Thu Dec 21 21:47:56 2006
@@ -11,15 +11,6 @@
Params API, what encoding to use for the message, handling email
option
- array.c
- -------
- natsort(), natcasesort()
- Params API
- Either port strnatcmp() to support Unicode or maybe use ICU's
- numeric collation. Update: can't seem to get the right collation
- parameters to duplicate strnatcmp() functionality. Conclusion: port
- to support Unicode.
-
string.c
--------
parse_str()
@@ -28,9 +19,6 @@
sscanf()
Params API. Rest - no idea yet.
- strnatcmp(), strnatcasecmp()
- Params API. The rest depends on porting of strnatcmp.c
-
wordwrap()
Upgrade, do wordwrapping on codepoint (or glyph ?) level, maybe use
additional whitespace chars instead of just space.
@@ -538,6 +526,7 @@
in_array()
min()
max()
+ natsort(), natcasesort()
range()
shuffle()
@@ -609,6 +598,7 @@
stripslashes()
stripos()
stristr()
+ strnatcmp(), strnatcasecmp()
strpbrk()
strpos()
strrchr()
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/array.c?r1=1.400&r2=1.401&diff_format=u
Index: php-src/ext/standard/array.c
diff -u php-src/ext/standard/array.c:1.400 php-src/ext/standard/array.c:1.401
--- php-src/ext/standard/array.c:1.400 Tue Dec 19 21:38:59 2006
+++ php-src/ext/standard/array.c Thu Dec 21 21:47:56 2006
@@ -21,7 +21,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: array.c,v 1.400 2006/12/19 21:38:59 andrei Exp $ */
+/* $Id: array.c,v 1.401 2006/12/21 21:47:56 andrei Exp $ */
#include "php.h"
#include "php_ini.h"
@@ -394,6 +394,7 @@
zval *fval, *sval;
zval first, second;
int result;
+ zend_uchar type;
f = *((Bucket **) a);
s = *((Bucket **) b);
@@ -402,20 +403,26 @@
sval = *((zval **) s->pData);
first = *fval;
second = *sval;
- if (Z_TYPE_P(fval) != IS_STRING) {
+
+ type = zend_get_unified_string_type(2 TSRMLS_CC, Z_TYPE_P(fval),
Z_TYPE_P(sval));
+ if (Z_TYPE_P(fval) != type) {
zval_copy_ctor(&first);
- convert_to_string(&first);
+ convert_to_explicit_type(&first, type);
}
- if (Z_TYPE_P(sval) != IS_STRING) {
+ if (Z_TYPE_P(sval) != type) {
zval_copy_ctor(&second);
- convert_to_string(&second);
+ convert_to_explicit_type(&second, type);
}
- result = strnatcmp_ex(Z_STRVAL(first), Z_STRLEN(first),
Z_STRVAL(second), Z_STRLEN(second), fold_case);
+ if (type == IS_UNICODE) {
+ result = u_strnatcmp_ex(Z_USTRVAL(first), Z_USTRLEN(first),
Z_USTRVAL(second), Z_USTRLEN(second), fold_case);
+ } else {
+ result = strnatcmp_ex(Z_STRVAL(first), Z_STRLEN(first),
Z_STRVAL(second), Z_STRLEN(second), fold_case);
+ }
- if (Z_TYPE_P(fval) != IS_STRING)
+ if (Z_TYPE_P(fval) != type)
zval_dtor(&first);
- if (Z_TYPE_P(sval) != IS_STRING)
+ if (Z_TYPE_P(sval) != type)
zval_dtor(&second);
return result;
@@ -433,14 +440,14 @@
static void php_natsort(INTERNAL_FUNCTION_PARAMETERS, int fold_case)
{
- zval **array;
+ zval *array;
HashTable *target_hash;
- if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &array) ==
FAILURE) {
- WRONG_PARAM_COUNT;
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &array) ==
FAILURE) {
+ return;
}
- target_hash = HASH_OF(*array);
+ target_hash = HASH_OF(array);
if (!target_hash) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "The argument
should be an array");
return;
@@ -460,7 +467,7 @@
}
-/* {{{ proto void natsort(array &array_arg)
+/* {{{ proto void natsort(array &array_arg) U
Sort an array using natural sort */
PHP_FUNCTION(natsort)
{
@@ -469,7 +476,7 @@
/* }}} */
-/* {{{ proto void natcasesort(array &array_arg)
+/* {{{ proto void natcasesort(array &array_arg) U
Sort an array using case-insensitive natural sort */
PHP_FUNCTION(natcasesort)
{
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/php_string.h?r1=1.105&r2=1.106&diff_format=u
Index: php-src/ext/standard/php_string.h
diff -u php-src/ext/standard/php_string.h:1.105
php-src/ext/standard/php_string.h:1.106
--- php-src/ext/standard/php_string.h:1.105 Mon Dec 18 15:04:36 2006
+++ php-src/ext/standard/php_string.h Thu Dec 21 21:47:56 2006
@@ -17,7 +17,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: php_string.h,v 1.105 2006/12/18 15:04:36 iliaa Exp $ */
+/* $Id: php_string.h,v 1.106 2006/12/21 21:47:56 andrei Exp $ */
/* Synced with php 3.0 revision 1.43 1999-06-16 [ssb] */
@@ -111,7 +111,12 @@
strnatcmp_ex(a, strlen(a), b, strlen(b), 0)
#define strnatcasecmp(a, b) \
strnatcmp_ex(a, strlen(a), b, strlen(b), 1)
+#define u_strnatcmp(a, b) \
+ u_strnatcmp_ex(a, u_strlen(a), b, strlen(b), 0)
+#define u_strnatcasecmp(a, b) \
+ u_strnatcmp_ex(a, u_strlen(a), b, strlen(b), 1)
PHPAPI int strnatcmp_ex(char const *a, size_t a_len, char const *b, size_t
b_len, int fold_case);
+PHPAPI int u_strnatcmp_ex(UChar const *a, size_t a_len, UChar const *b, size_t
b_len, int fold_case);
#ifdef HAVE_LOCALECONV
PHPAPI struct lconv *localeconv_r(struct lconv *out);
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/string.c?r1=1.626&r2=1.627&diff_format=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.626 php-src/ext/standard/string.c:1.627
--- php-src/ext/standard/string.c:1.626 Wed Dec 20 23:36:43 2006
+++ php-src/ext/standard/string.c Thu Dec 21 21:47:56 2006
@@ -18,7 +18,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: string.c,v 1.626 2006/12/20 23:36:43 tony2001 Exp $ */
+/* $Id: string.c,v 1.627 2006/12/21 21:47:56 andrei Exp $ */
/* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
@@ -6970,22 +6970,24 @@
*/
static void php_strnatcmp(INTERNAL_FUNCTION_PARAMETERS, int fold_case)
{
- zval **s1, **s2;
+ zstr s1, s2;
+ int s1_len, s2_len;
+ zend_uchar type;
- if (ZEND_NUM_ARGS()!=2 || zend_get_parameters_ex(2, &s1, &s2) ==
FAILURE) {
- WRONG_PARAM_COUNT;
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "TT", &s1, &s1_len,
+ &type, &s2, &s2_len,
&type) == FAILURE) {
+ return;
}
- convert_to_string_ex(s1);
- convert_to_string_ex(s2);
-
- RETURN_LONG(strnatcmp_ex(Z_STRVAL_PP(s1), Z_STRLEN_PP(s1),
- Z_STRVAL_PP(s2),
Z_STRLEN_PP(s2),
- fold_case));
+ if (type == IS_UNICODE) {
+ RETURN_LONG(u_strnatcmp_ex(s1.u, s1_len, s2.u, s2_len,
fold_case));
+ } else {
+ RETURN_LONG(strnatcmp_ex(s1.s, s1_len, s2.s, s2_len,
fold_case));
+ }
}
/* }}} */
-/* {{{ proto int strnatcmp(string s1, string s2)
+/* {{{ proto int strnatcmp(string s1, string s2) U
Returns the result of string comparison using 'natural' algorithm */
PHP_FUNCTION(strnatcmp)
{
@@ -7083,7 +7085,7 @@
}
/* }}} */
-/* {{{ proto int strnatcasecmp(string s1, string s2)
+/* {{{ proto int strnatcasecmp(string s1, string s2) U
Returns the result of case-insensitive string comparison using 'natural'
algorithm */
PHP_FUNCTION(strnatcasecmp)
{
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/strnatcmp.c?r1=1.10&r2=1.11&diff_format=u
Index: php-src/ext/standard/strnatcmp.c
diff -u php-src/ext/standard/strnatcmp.c:1.10
php-src/ext/standard/strnatcmp.c:1.11
--- php-src/ext/standard/strnatcmp.c:1.10 Thu Jul 15 01:26:03 2004
+++ php-src/ext/standard/strnatcmp.c Thu Dec 21 21:47:56 2006
@@ -1,9 +1,9 @@
/* -*- mode: c; c-file-style: "k&r" -*-
- Modified for PHP by Andrei Zmievski <[EMAIL PROTECTED]>
+ Modified for PHP by Andrei Zmievski <[EMAIL PROTECTED]>
strnatcmp.c -- Perform 'natural order' comparisons of strings in C.
- Copyright (C) 2000 by Martin Pool <[EMAIL PROTECTED]>
+ Copyright (C) 2000, 2004 by Martin Pool <mbp sourcefrog net>
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
@@ -38,12 +38,12 @@
#if 0
static char const *version UNUSED =
- "$Id: strnatcmp.c,v 1.10 2004/07/15 01:26:03 iliaa Exp $";
+ "$Id: strnatcmp.c,v 1.11 2006/12/21 21:47:56 andrei Exp $";
#endif
/* {{{ compare_right
*/
static int
-compare_right(char const **a, char const *aend, char const **b, char const
*bend)
+compare_right(char const *a, char const *b)
{
int bias = 0;
@@ -51,20 +51,22 @@
value wins, but we can't know that it will until we've scanned
both numbers to know that they have the same magnitude, so we
remember it in BIAS. */
- for(;; (*a)++, (*b)++) {
- if ((*a == aend || !isdigit((int)(unsigned char)**a)) &&
- (*b == bend || !isdigit((int)(unsigned char)**b)))
+ for(;; a++, b++) {
+ if (!isdigit((int)(unsigned char)*a) &&
+ !isdigit((int)(unsigned char)*b))
return bias;
- else if (*a == aend || !isdigit((int)(unsigned char)**a))
+ else if (!isdigit((int)(unsigned char)*a))
return -1;
- else if (*b == bend || !isdigit((int)(unsigned char)**b))
+ else if (!isdigit((int)(unsigned char)*b))
return +1;
- else if (**a < **b) {
+ else if (*a < *b) {
if (!bias)
bias = -1;
- } else if (**a > **b) {
+ } else if (*a > *b) {
if (!bias)
bias = +1;
+ } else if (!*a && !*b) {
+ return bias;
}
}
@@ -75,21 +77,21 @@
/* {{{ compare_left
*/
static int
-compare_left(char const **a, char const *aend, char const **b, char const
*bend)
+compare_left(char const *a, char const *b)
{
/* Compare two left-aligned numbers: the first to have a
different value wins. */
- for(;; (*a)++, (*b)++) {
- if ((*a == aend || !isdigit((int)(unsigned char)**a)) &&
- (*b == bend || !isdigit((int)(unsigned char)**b)))
+ for(;; a++, b++) {
+ if (!isdigit((int)(unsigned char)*a) &&
+ !isdigit((int)(unsigned char)*b))
return 0;
- else if (*a == aend || !isdigit((int)(unsigned char)**a))
+ else if (!isdigit((int)(unsigned char)*a))
return -1;
- else if (*b == bend || !isdigit((int)(unsigned char)**b))
+ else if (!isdigit((int)(unsigned char)*b))
return +1;
- else if (**a < **b)
+ else if (*a < *b)
return -1;
- else if (**a > **b)
+ else if (*a > *b)
return +1;
}
@@ -102,46 +104,40 @@
PHPAPI int strnatcmp_ex(char const *a, size_t a_len, char const *b, size_t
b_len, int fold_case)
{
char ca, cb;
- char const *ap, *bp;
- char const *aend = a + a_len,
- *bend = b + b_len;
+ int ai, bi;
int fractional, result;
- if (a_len == 0 || b_len == 0)
- return a_len - b_len;
-
- ap = a;
- bp = b;
+ ai = bi = 0;
while (1) {
- ca = *ap; cb = *bp;
+ ca = a[ai]; cb = b[bi];
/* skip over leading spaces or zeros */
while (isspace((int)(unsigned char)ca))
- ca = *++ap;
+ ca = a[++ai];
while (isspace((int)(unsigned char)cb))
- cb = *++bp;
+ cb = b[++bi];
/* process run of digits */
if (isdigit((int)(unsigned char)ca) && isdigit((int)(unsigned
char)cb)) {
fractional = (ca == '0' || cb == '0');
- if (fractional)
- result = compare_left(&ap, aend, &bp, bend);
- else
- result = compare_right(&ap, aend, &bp, bend);
-
- if (result != 0)
- return result;
- else if (ap == aend && bp == bend)
- /* End of the strings. Let caller sort them
out. */
- return 0;
- else {
- /* Keep on comparing from the current point. */
- ca = *ap; cb = *bp;
+ if (fractional) {
+ if ((result = compare_left(a+ai, b+bi)) != 0) {
+ return result;
+ }
+ } else {
+ if ((result = compare_right(a+ai, b+bi)) != 0)
+ return result;
}
}
+ if (!ca && !cb) {
+ /* The strings compare the same. Perhaps the caller
+ will want to call strcmp to break the tie. */
+ return 0;
+ }
+
if (fold_case) {
ca = toupper((int)(unsigned char)ca);
cb = toupper((int)(unsigned char)cb);
@@ -152,19 +148,159 @@
else if (ca > cb)
return +1;
- ++ap; ++bp;
- if (ap >= aend && bp >= bend)
+ ++ai; ++bi;
+ }
+}
+/* }}} */
+
+/* {{{ u_compare_right
+ */
+static int
+u_compare_right(UChar const *a, int a_len, int *a_curr, UChar const *b, int
b_len, int *b_curr)
+{
+ UChar32 ca, cb;
+ int a_off, b_off;
+ int bias = 0;
+
+ /* The longest run of digits wins. That aside, the greatest
+ value wins, but we can't know that it will until we've scanned
+ both numbers to know that they have the same magnitude, so we
+ remember it in BIAS. */
+
+ for (;;) {
+ a_off = *a_curr;
+ b_off = *b_curr;
+ U16_NEXT(a, a_off, a_len, ca);
+ U16_NEXT(b, b_off, b_len, cb);
+
+ if (!u_isdigit(ca) && !u_isdigit(cb)) {
+ return bias;
+ } else if (!u_isdigit(ca)) {
+ return -1;
+ } else if (!u_isdigit(cb)) {
+ return +1;
+ } else if (ca < cb) {
+ if (!bias)
+ bias = -1;
+ } else if (ca > cb) {
+ if (!bias)
+ bias = +1;
+ } else if (ca == 0 && cb == 0) {
+ return bias;
+ }
+ *a_curr = a_off;
+ *b_curr = b_off;
+ }
+
+ return 0;
+}
+/* }}} */
+
+/* {{{ u_compare_left
+ */
+static int
+u_compare_left(UChar const *a, int a_len, int *a_curr, UChar const *b, int
b_len, int *b_curr)
+{
+ int a_off, b_off;
+ UChar32 ca, cb;
+
+ /* Compare two left-aligned numbers: the first to have a
+ different value wins. */
+ for (;;) {
+ a_off = *a_curr;
+ b_off = *b_curr;
+ U16_NEXT(a, a_off, a_len, ca);
+ U16_NEXT(b, b_off, b_len, cb);
+
+ if (!u_isdigit(ca) && !u_isdigit(cb)) {
+ return 0;
+ } else if (!u_isdigit(ca)) {
+ return -1;
+ } else if (!u_isdigit(cb)) {
+ return +1;
+ } else if (ca < cb) {
+ return -1;
+ } else if (ca > cb) {
+ return +1;
+ }
+ *a_curr = a_off;
+ *b_curr = b_off;
+ }
+
+ return 0;
+}
+/* }}} */
+
+/* {{{ u_strnatcmp_ex
+ */
+PHPAPI int u_strnatcmp_ex(UChar const *a, size_t a_len, UChar const *b, size_t
b_len, int fold_case)
+{
+ UChar ca, cb;
+ UChar const *ap, *bp;
+ int fractional, result;
+ int a_off, b_off;
+ int a_curr, b_curr;
+
+ if (a_len == 0 || b_len == 0)
+ return a_len - b_len;
+
+ ap = a;
+ bp = b;
+ a_curr = b_curr = 0;
+
+ while (1) {
+ a_off = a_curr;
+ b_off = b_curr;
+ U16_NEXT(a, a_curr, a_len, ca);
+ U16_NEXT(b, b_curr, b_len, cb);
+
+ /* skip over leading spaces */
+ for ( ; u_isspace(ca) && a_curr < a_len; ) {
+ a_off = a_curr;
+ U16_NEXT(a, a_curr, a_len, ca);
+ }
+
+ for ( ; u_isspace(cb) && b_curr < b_len; ) {
+ b_off = b_curr;
+ U16_NEXT(b, b_curr, b_len, cb);
+ }
+
+ /* process run of digits */
+ if (u_isdigit(ca) && u_isdigit(cb)) {
+ fractional = (ca == 0x30 /*'0'*/ || cb == 0x30 /*'0'*/);
+
+ if (fractional) {
+ if ((result = u_compare_left(a, a_len, &a_off,
b, b_len, &b_off)) != 0) {
+ return result;
+ }
+ } else {
+ if ((result = u_compare_right(a, a_len, &a_off,
b, b_len, &b_off)) != 0) {
+ return result;
+ }
+ }
+
+ a_curr = a_off;
+ b_curr = b_off;
+ }
+
+ if (ca == 0 && cb == 0) {
/* The strings compare the same. Perhaps the caller
will want to call strcmp to break the tie. */
return 0;
- else if (ap >= aend)
+ }
+
+ if (fold_case) {
+ ca = u_toupper(ca);
+ cb = u_toupper(cb);
+ }
+
+ if (ca < cb)
return -1;
- else if (bp >= bend)
- return 1;
+ else if (ca > cb)
+ return +1;
}
}
/* }}} */
-
/*
* Local variables:
* tab-width: 4
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php