Rolland,
The implementation of this is problematic for two reasons:
1) You are assuming well-formed UTF-16 here. It would be better to use
U16_PREV() macro to obtain a codepoint and then put it in proper place
with zend_codepoint_to_uchar().
2) Combining sequences are not respected. We can't swap base character
and the combining chars that follow it because the string may be
concatenated with something else and the combining chars may end up
affecting something else. So we need to work at grapheme level here,
using u_getCombiningClass() to check for combining chars and copying
the base+combining as a unit.
Also, it'd be great to have the function upgrade proposals first,
before the implementation is committed.
-Andrei
On Aug 19, 2005, at 3:59 AM, Rolland Santimano wrote:
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?
r1=1.466&r2=1.467&ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.466
php-src/ext/standard/string.c:1.467
--- php-src/ext/standard/string.c:1.466 Thu Aug 18 18:37:22 2005
+++ php-src/ext/standard/string.c Fri Aug 19 06:59:19 2005
@@ -18,7 +18,7 @@
+----------------------------------------------------------------------
+
*/
-/* $Id: string.c,v 1.466 2005/08/18 22:37:22 andrei Exp $ */
+/* $Id: string.c,v 1.467 2005/08/19 10:59:19 rolland Exp $ */
/* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
@@ -3212,25 +3212,51 @@
{
zval **str;
char *s, *e, *n, *p;
+ UChar *u_s, *u_e, *u_n, *u_p;
if (ZEND_NUM_ARGS()!=1 || zend_get_parameters_ex(1, &str) ==
FAILURE) {
WRONG_PARAM_COUNT;
}
- convert_to_string_ex(str);
-
- n = emalloc(Z_STRLEN_PP(str)+1);
- p = n;
-
- s = Z_STRVAL_PP(str);
- e = s + Z_STRLEN_PP(str);
-
- while (--e>=s) {
- *p++ = *e;
+
+ if (Z_TYPE_PP(str) != IS_UNICODE && Z_TYPE_PP(str) != IS_BINARY &&
Z_TYPE_PP(str) != IS_STRING) {
+ convert_to_text_ex(str);
+ }
+
+ if (Z_TYPE_PP(str) == IS_UNICODE) {
+ u_n = eumalloc(Z_USTRLEN_PP(str)+1);
+ u_p = u_n;
+ u_s = Z_USTRVAL_PP(str);
+ u_e = u_s + Z_USTRLEN_PP(str) - 1;
+
+ while (u_e >= u_s) {
+ if (U16_IS_TRAIL(*u_e)) {
+ *u_p = *(u_e-1);
+ *(u_p+1) = *u_e;
+ u_e -= 2; u_p += 2;
+ } else {
+ *u_p++ = *u_e--;
+ }
+ }
+ *u_p = 0;
+ } else {
+ n = emalloc(Z_STRLEN_PP(str)+1);
+ p = n;
+ s = Z_STRVAL_PP(str);
+ e = s + Z_STRLEN_PP(str);
+
+ while (--e >= s) {
+ *(p++) = *e;
+ }
+ *p = '\0';
}
- *p = '\0';
-
- RETVAL_STRINGL(n, Z_STRLEN_PP(str), 0);
+ if (Z_TYPE_PP(str) == IS_UNICODE) {
+ RETVAL_UNICODEL(u_n, Z_USTRLEN_PP(str), 0);
+ } else if (Z_TYPE_PP(str) == IS_BINARY) {
+ RETVAL_BINARYL(n, Z_BINLEN_PP(str), 0);
+ } else {
+ RETVAL_STRINGL(n, Z_STRLEN_PP(str), 0);
+ }
}
/* }}} */
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php
--
PHP Internals - PHP Runtime Development Mailing List
To unsubscribe, visit: http://www.php.net/unsub.php