Just another function I have found useful.. PHP has a htmlentities
function, but no unhtmlentities function to go the other direction.. (At
least not that I am aware of). So, here you go. Don't think this one
would perform nearly as quickly if it were done using regexps in PHP...
This function also has the added benefit of supporting entities like
  which I don't believe would be supported by a naive regexp
approach.
So if someone would like to include it, be my guest.
-Brad
--------------------- Start of code ------------------------------
struct entity {
char* str;
char ch;
};
// Perhaps this could be modified to use the htmlentities translation
table...?
static struct entity il_EntTable[] =
{
{"quot",34}, {"amp",38}, {"lt",60}, {"gt",62},
{"nbsp",160},
{"iexcl",161}, {"cent",162}, {"pound",163}, {"curren",164},
{"yen",165},
{"brvbar",166}, {"sect",167}, {"uml",168}, {"copy",169},
{"ordf",170},
{"laquo",171}, {"not",172}, {"shy",173}, {"reg",174},
{"macr",175},
{"deg",176}, {"plusmn",177}, {"sup2",178}, {"sup3",179},
{"acute",180},
{"micro",181}, {"para",182}, {"middot",183}, {"cedil",184},
{"sup1",185},
{"ordm",186}, {"raquo",187}, {"frac14",188}, {"frac12",189},
{"frac34",190},
{"iquest",191}, {"Agrave",192}, {"Aacute",193}, {"Acirc",194},
{"Atilde",195},
{"Auml",196}, {"Aring",197}, {"AElig",198}, {"Ccedil",199},
{"Egrave",200},
{"Eacute",201}, {"Ecirc",202}, {"Euml",203}, {"Igrave",204},
{"Iacute",205},
{"Icirc",206}, {"Iuml",207}, {"ETH",208}, {"Ntilde",209},
{"Ograve",210},
{"Oacute",211}, {"Ocirc",212}, {"Otilde",213}, {"Ouml",214},
{"times",215},
{"Oslash",216}, {"Ugrave",217}, {"Uacute",218}, {"Ucirc",219},
{"Uuml",220},
{"Yacute",221}, {"THORN",222}, {"szlig",223}, {"agrave",224},
{"aacute",225},
{"acirc",226}, {"atilde",227}, {"auml",228}, {"aring",229},
{"aelig",230},
{"ccedil",231}, {"egrave",232}, {"eacute",233}, {"ecirc",234},
{"euml",235},
{"igrave",236}, {"iacute",237}, {"icirc",238}, {"iuml",239},
{"eth",240},
{"ntilde",241}, {"ograve",242}, {"oacute",243}, {"ocirc",244},
{"otilde",245},
{"ouml",246}, {"divide",247}, {"oslash",248}, {"ugrave",249},
{"uacute",250},
{"ucirc",251}, {"uuml",252}, {"yacute",253}, {"thorn",254},
{"yuml",255}
};
/* BF 6/11/01 ([EMAIL PROTECTED]) */
/*
Translates HTML entities in the given string into the appropriate
characters. This function is
the reverse of the standard PHP function htmlentities, however it DOES
NOT currently use the same
translation table. HTML entities have the form "&data;" where data is
either the name of an
entity (ie. >, <, ") or a # symbol followed by a decimal
value from 0 to 255 (ie. ", &)
str = the string to decode
*/
PHPAPI char* php_str_unhtmlentities(char *str, unsigned int *resultlen)
{
char *p, *sp, *ep,
*buf;
int buflen = 0,
len,
slen;
if (resultlen) {
if (*resultlen != 0)
slen = *resultlen;
else
slen = strlen(str);
} //if
// Scan through the string and find entities to decode
buf = emalloc(slen * 2);
p = str;
while (sp = strchr(p, '&')) {
ep = sp + sizeof(char);
// Scan up to 15 characters ahead for a ';'
while ((*ep) && (*ep != ';') && (ep < sp +
sizeof(char)*15)) {
if (*ep == '&') sp = ep;
ep += sizeof(char);
} //while
if (!(*ep)) break; // End of string
// Copy the previous string data up to this point
len = (sp - p) / sizeof(char);
strncpy(&buf[buflen], p, len);
buflen += len;
// Translate the entity
len = (ep - sp) / sizeof(char) - 1;
if (len > 0) {
int i,
found = 0;
char ch = *(ep);
sp += sizeof(char);
*ep = 0;
if (*sp == '#') {
if ((len > 1) && (len <= 4)) {
unsigned long ch = strtoul(sp +
sizeof(char), (char**) NULL, 10);
if (ch <= (unsigned long)
UCHAR_MAX) {
buf[buflen] = (char) ch;
++buflen;
found = 1;
} //if
} //if
} else {
for (i = 0; i < (sizeof(il_EntTable) /
sizeof(struct entity)); ++i) {
if (strcmp(sp,
il_EntTable[i].str) == 0) {
buf[buflen] =
il_EntTable[i].ch;
++buflen;
found = 1;
break;
} //if
} //for
} //if
*ep = ch;
// Copy the entity as-is if it is not recognized
if (!found) {
len += 2;
sp -= sizeof(char);
strncpy(&buf[buflen], sp, len);
buflen += len;
} //if
} else { // No data in entity? (ie. "&;" Just copy
as-is... not an entity
strncpy(&buf[buflen], sp, 2);
buflen += 2;
} //if
// Start checking for the next match
p = ep + sizeof(char);
} //while
// Copy any remaining portion of the string
len = slen - ((p - str) / sizeof(char));
strncpy(&buf[buflen], p, len);
buflen += len;
// Reallocate the buffer to match the size of the result
buf = erealloc(buf, buflen + 1);
buf[buflen] = 0;
if (resultlen)
*resultlen = buflen;
return buf;
} // php_str_unhtmlentities()
/* }}} */
/* BF 6/11/01 ([EMAIL PROTECTED]) */
/* {{{ proto string unhtmlentities(string str)
Translates HTML entities in the given string into the appropriate
characters. This function is
the reverse of the standard PHP function htmlentities, however it DOES
NOT currently use the same
translation table. HTML entities have the form "&data;" where data is
either the name of an
entity (ie. >, <, ") or a # symbol followed by a decimal
value from 0 to 255 (ie. ", &)
str = the string to decode
*/
PHP_FUNCTION(unhtmlentities)
{
zval **_str; // The string
uint resultlen;
char *result;
int myargc = ZEND_NUM_ARGS();
if (myargc != 1 ||
zend_get_parameters_ex(myargc, &_str) == FAILURE)
{
ZEND_WRONG_PARAM_COUNT();
} //if
// Convert the parameters to the appropriate types
convert_to_string_ex(_str);
resultlen = Z_STRLEN_PP(_str);
result = php_str_unhtmlentities(Z_STRVAL_PP(_str), &resultlen);
// Return the result
RETURN_STRINGL(result, resultlen, 0);
} // PHP_FUNCTION(unhtmlentities)
/* }}} */
--
PHP Development Mailing List <http://www.php.net/>
To unsubscribe, visit: http://www.php.net/unsub.php