ID: 2685
Updated by: [EMAIL PROTECTED]
-Summary: HtmlEntities does handle UTF-8
Reported By: [EMAIL PROTECTED]
-Status: Open
+Status: Closed
Bug Type: Feature/Change Request
Operating System: Linux RH 6.0
PHP Version: 4.2.3
New Comment:
IIRC, this is in the 4.2 branch already.
htmlentities($str, ENT_COMPAT, "utf-8");
If not, it's in 4.3 due soon.
Previous Comments:
------------------------------------------------------------------------
[2002-10-01 10:24:19] [EMAIL PROTECTED]
changed to feature request and bumped up the version as the request
still looks valid ...
------------------------------------------------------------------------
[1999-11-08 12:07:01] [EMAIL PROTECTED]
I needed a version of HtmlEntities that handled UTF-8 better then the
default php one, I needed to cater for σ in an XML document and
output it as at least Σ. I have created another function call
Utf8_HtmlEntities which does exactly this and have included the diff.
diff -c php-3.0.12.orig/functions/basic_functions.c
php-3.0.12/functions/basic_functions.c
*** php-3.0.12.orig/functions/basic_functions.c Mon Nov 8 16:51:15
1999
--- php-3.0.12/functions/basic_functions.c Mon Nov 8 16:59:43 1999
***************
*** 167,172 ****
--- 167,173 ----
{"getimagesize", php3_getimagesize, NULL},
{"htmlspecialchars", php3_htmlspecialchars, NULL},
{"htmlentities", php3_htmlentities, NULL},
+ {"utf8_htmlentities", php3_utf8_htmlentities, NULL},
{"md5", php3_md5, NULL},
{"iptcparse", php3_iptcparse, NULL},
diff -c php-3.0.12.orig/functions/html.c php-3.0.12/functions/html.c
*** php-3.0.12.orig/functions/html.c Mon Nov 8 16:51:15 1999
--- php-3.0.12/functions/html.c Mon Nov 8 16:58:53 1999
***************
*** 60,123 ****
"uuml","yacute","thorn","yuml"
};
! PHPAPI char * _php3_htmlentities(char *s, int i, int all)
{
! int len, maxlen;
! unsigned char *old;
char *new;
! old = (unsigned char *)s;
! maxlen = 2 * i;
if (maxlen < 128)
maxlen = 128;
new = emalloc (maxlen);
! len = 0;
! while (i--) {
! if (len + 9 > maxlen)
! new = erealloc (new, maxlen += 128);
! if (38 == *old) {
! memcpy (new + len, "&", 5);
! len += 5;
! } else if (34 == *old) {
! memcpy (new + len, """, 6);
! len += 6;
! } else if (60 == *old) {
! memcpy (new + len, "<", 4);
! len += 4;
! } else if (62 == *old) {
! memcpy (new + len, ">", 4);
! len += 4;
! } else if (all && 160 <= *old) {
! new [len++] = '&';
! strcpy (new + len, EntTable [*old - 160]);
! len += strlen (EntTable [*old - 160]);
! new [len++] = ';';
} else {
! new [len++] = *old;
}
- old++;
}
! new [len] = '\0';
return new;
}
static void _htmlentities(INTERNAL_FUNCTION_PARAMETERS, int all)
{
! pval *arg;
char *new;
TLS_VARS;
! if (ARG_COUNT(ht) != 1 || getParameters(ht, 1, &arg) == FAILURE)
{
WRONG_PARAM_COUNT;
! }
! convert_to_string(arg);
new = _php3_htmlentities(arg->value.str.val, arg->value.str.len,
all);
! RETVAL_STRINGL(new,strlen(new),0);
}
/* {{{ proto string htmlspecialchars(string string)
--- 60,210 ----
"uuml","yacute","thorn","yuml"
};
! PHPAPI char * _php3_htmlentities(char *s, int len, int all)
{
! int newlen, maxlen;
! int pos = len;
! unsigned char c;
char *new;
! maxlen = 2 * len;
! if (maxlen < 128)
! maxlen = 128;
! new = emalloc(maxlen);
! newlen = 0;
!
! while (pos > 0) {
! c = (unsigned char)(*s);
! if (newlen + 9 > maxlen)
! new = erealloc(new, maxlen += 128);
! if (c == 38) {
! memcpy(new + newlen, "&", 5);
! newlen += 5;
! } else if (c == 34) {
! memcpy(new + newlen, """, 6);
! newlen += 6;
! } else if (c == 60) {
! memcpy(new + newlen, "<", 4);
! newlen += 4;
! } else if (c == 62) {
! memcpy(new + newlen, ">", 4);
! newlen += 4;
! } else if (all && c >= 160) {
! new[newlen++] = '&';
! strcpy(new + newlen, EntTable[c - 160]);
! newlen += strlen(new + newlen);
! new[newlen++] = ';';
! } else {
! new[newlen++] = c;
! }
! s++;
! }
! new[newlen] = '\0';
! return new;
! }
! PHPAPI char * _php3_utf8_htmlentities(char *s, int len, int all)
! {
! int newlen, maxlen;
! int pos = len;
! unsigned long c;
! char *new;
!
! maxlen = 2 * len;
if (maxlen < 128)
maxlen = 128;
new = emalloc (maxlen);
! newlen = 0;
!
! while (pos > 0) {
! c = (unsigned char)(*s);
! /* four bytes encoded, 21 bits */
! if (c >= 0xf0 && pos >= 4) {
! c = (*(s++) & 7) << 18;
! c += (*(s++) & 63) << 12;
! c += (*(s++) & 63) << 6;
! c += (*(s++) & 63);
! pos -= 4;
! /* three bytes encoded, 16 bits */
! } else if (c >= 0xe0 && c < 0xf0 && pos >= 3) {
! c = (*(s++) & 15) << 12;
! c += (*(s++) & 63) << 6;
! c += (*(s++) & 63);
! pos -= 3;
! /* two bytes encoded, 11 bits */
! } else if (c >= 0xc0 && c < 0xe0 && pos >= 2) {
! c = ((unsigned long)*(s++) & 31) << 6;
! c += ((unsigned long)*(s++) & 63);
! pos -= 2;
! } else {
! c = (*(s++) & 127);
! pos--;
! }
! if (newlen + 11 > maxlen)
! new = erealloc(new, maxlen += 128);
! if (c == 38) {
! memcpy(new + newlen, "&", 5);
! newlen += 5;
! } else if (c == 34) {
! memcpy(new + newlen, """, 6);
! newlen += 6;
! } else if (c == 60) {
! memcpy (new + newlen, "<", 4);
! newlen += 4;
! } else if (c == 62) {
! memcpy (new + newlen, ">", 4);
! newlen += 4;
! } else if (all && c >= 160) {
! new[newlen++] = '&';
! if (c <= 255) {
! strcpy(new + newlen, EntTable[c - 160]);
! } else {
! new[newlen++] = '#';
! sprintf(new + newlen, "%lu", c);
! }
! newlen += strlen(new + newlen);
! new[newlen++] = ';';
} else {
! new[newlen++] = (unsigned char)c;
}
}
! new[newlen] = '\0';
return new;
}
static void _htmlentities(INTERNAL_FUNCTION_PARAMETERS, int all)
{
! pval *arg;
char *new;
TLS_VARS;
! if (ARG_COUNT(ht) != 1 || getParameters(ht, 1, &arg) == FAILURE) {
WRONG_PARAM_COUNT;
! }
! convert_to_string(arg);
new = _php3_htmlentities(arg->value.str.val, arg->value.str.len,
all);
! RETVAL_STRINGL(new,strlen(new), 0);
! }
!
! static void _utf8_htmlentities(INTERNAL_FUNCTION_PARAMETERS, int
all)
! {
! pval *arg;
! char *new;
! TLS_VARS;
!
! if (ARG_COUNT(ht) != 1 || getParameters(ht, 1, &arg) == FAILURE) {
! WRONG_PARAM_COUNT;
! }
!
! convert_to_string(arg);
!
! new = _php3_utf8_htmlentities(arg->value.str.val,
arg->value.str.len, all);
!
! RETVAL_STRINGL(new, strlen(new), 0);
}
/* {{{ proto string htmlspecialchars(string string)
***************
*** 133,140 ****
Convert all applicable characters to HTML entities */
void php3_htmlentities(INTERNAL_FUNCTION_PARAMETERS)
{
! /* _php3_htmlentities(INTERNAL_FUNCTION_PARAM_PASSTHRU,1);*/
! _htmlentities(INTERNAL_FUNCTION_PARAM_PASSTHRU,1);
}
/* }}} */
--- 220,236 ----
Convert all applicable characters to HTML entities */
void php3_htmlentities(INTERNAL_FUNCTION_PARAMETERS)
{
! /* _php3_htmlentities(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);*/
! _htmlentities(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
! }
! /* }}} */
!
! /* {{{ proto string utf8_htmlentities(string string)
! Convert all applicable UTF-8 characters to HTML entities */
! void php3_utf8_htmlentities(INTERNAL_FUNCTION_PARAMETERS)
! {
! /* _php3_utf8_htmlentities(INTERNAL_FUNCTION_PARAM_PASSTHRU,
1);*/
! _utf8_htmlentities(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
}
/* }}} */
***************
*** 144,146 ****
--- 240,243 ----
* c-basic-offset: 4
* End:
*/
+
diff -c php-3.0.12.orig/functions/html.h php-3.0.12/functions/html.h
*** php-3.0.12.orig/functions/html.h Mon Nov 8 16:51:15 1999
--- php-3.0.12/functions/html.h Mon Nov 8 16:58:56 1999
***************
*** 34,39 ****
extern void php3_htmlspecialchars(INTERNAL_FUNCTION_PARAMETERS);
extern void php3_htmlentities(INTERNAL_FUNCTION_PARAMETERS);
! PHPAPI char * _php3_htmlentities(char *s, int i, int all);
#endif /* _HTML_H */
--- 34,41 ----
extern void php3_htmlspecialchars(INTERNAL_FUNCTION_PARAMETERS);
extern void php3_htmlentities(INTERNAL_FUNCTION_PARAMETERS);
! extern void php3_utf8_htmlentities(INTERNAL_FUNCTION_PARAMETERS);
! PHPAPI char * _php3_htmlentities(char *s, int len, int all);
! PHPAPI char * _php3_utf8_htmlentities(char *s, int len, int all);
#endif /* _HTML_H */
------------------------------------------------------------------------
--
Edit this bug report at http://bugs.php.net/?id=2685&edit=1