Just another function I have found useful.. PHP has a htmlentities function, but no unhtmlentities function to go the other direction.. (At least not that I am aware of). So, here you go. Don't think this one would perform nearly as quickly if it were done using regexps in PHP... This function also has the added benefit of supporting entities like   which I don't believe would be supported by a naive regexp approach.
So if someone would like to include it, be my guest. -Brad --------------------- Start of code ------------------------------ struct entity { char* str; char ch; }; // Perhaps this could be modified to use the htmlentities translation table...? static struct entity il_EntTable[] = { {"quot",34}, {"amp",38}, {"lt",60}, {"gt",62}, {"nbsp",160}, {"iexcl",161}, {"cent",162}, {"pound",163}, {"curren",164}, {"yen",165}, {"brvbar",166}, {"sect",167}, {"uml",168}, {"copy",169}, {"ordf",170}, {"laquo",171}, {"not",172}, {"shy",173}, {"reg",174}, {"macr",175}, {"deg",176}, {"plusmn",177}, {"sup2",178}, {"sup3",179}, {"acute",180}, {"micro",181}, {"para",182}, {"middot",183}, {"cedil",184}, {"sup1",185}, {"ordm",186}, {"raquo",187}, {"frac14",188}, {"frac12",189}, {"frac34",190}, {"iquest",191}, {"Agrave",192}, {"Aacute",193}, {"Acirc",194}, {"Atilde",195}, {"Auml",196}, {"Aring",197}, {"AElig",198}, {"Ccedil",199}, {"Egrave",200}, {"Eacute",201}, {"Ecirc",202}, {"Euml",203}, {"Igrave",204}, {"Iacute",205}, {"Icirc",206}, {"Iuml",207}, {"ETH",208}, {"Ntilde",209}, {"Ograve",210}, {"Oacute",211}, {"Ocirc",212}, {"Otilde",213}, {"Ouml",214}, {"times",215}, {"Oslash",216}, {"Ugrave",217}, {"Uacute",218}, {"Ucirc",219}, {"Uuml",220}, {"Yacute",221}, {"THORN",222}, {"szlig",223}, {"agrave",224}, {"aacute",225}, {"acirc",226}, {"atilde",227}, {"auml",228}, {"aring",229}, {"aelig",230}, {"ccedil",231}, {"egrave",232}, {"eacute",233}, {"ecirc",234}, {"euml",235}, {"igrave",236}, {"iacute",237}, {"icirc",238}, {"iuml",239}, {"eth",240}, {"ntilde",241}, {"ograve",242}, {"oacute",243}, {"ocirc",244}, {"otilde",245}, {"ouml",246}, {"divide",247}, {"oslash",248}, {"ugrave",249}, {"uacute",250}, {"ucirc",251}, {"uuml",252}, {"yacute",253}, {"thorn",254}, {"yuml",255} }; /* BF 6/11/01 ([EMAIL PROTECTED]) */ /* Translates HTML entities in the given string into the appropriate characters. This function is the reverse of the standard PHP function htmlentities, however it DOES NOT currently use the same translation table. HTML entities have the form "&data;" where data is either the name of an entity (ie. >, <, ") or a # symbol followed by a decimal value from 0 to 255 (ie. ", &) str = the string to decode */ PHPAPI char* php_str_unhtmlentities(char *str, unsigned int *resultlen) { char *p, *sp, *ep, *buf; int buflen = 0, len, slen; if (resultlen) { if (*resultlen != 0) slen = *resultlen; else slen = strlen(str); } //if // Scan through the string and find entities to decode buf = emalloc(slen * 2); p = str; while (sp = strchr(p, '&')) { ep = sp + sizeof(char); // Scan up to 15 characters ahead for a ';' while ((*ep) && (*ep != ';') && (ep < sp + sizeof(char)*15)) { if (*ep == '&') sp = ep; ep += sizeof(char); } //while if (!(*ep)) break; // End of string // Copy the previous string data up to this point len = (sp - p) / sizeof(char); strncpy(&buf[buflen], p, len); buflen += len; // Translate the entity len = (ep - sp) / sizeof(char) - 1; if (len > 0) { int i, found = 0; char ch = *(ep); sp += sizeof(char); *ep = 0; if (*sp == '#') { if ((len > 1) && (len <= 4)) { unsigned long ch = strtoul(sp + sizeof(char), (char**) NULL, 10); if (ch <= (unsigned long) UCHAR_MAX) { buf[buflen] = (char) ch; ++buflen; found = 1; } //if } //if } else { for (i = 0; i < (sizeof(il_EntTable) / sizeof(struct entity)); ++i) { if (strcmp(sp, il_EntTable[i].str) == 0) { buf[buflen] = il_EntTable[i].ch; ++buflen; found = 1; break; } //if } //for } //if *ep = ch; // Copy the entity as-is if it is not recognized if (!found) { len += 2; sp -= sizeof(char); strncpy(&buf[buflen], sp, len); buflen += len; } //if } else { // No data in entity? (ie. "&;" Just copy as-is... not an entity strncpy(&buf[buflen], sp, 2); buflen += 2; } //if // Start checking for the next match p = ep + sizeof(char); } //while // Copy any remaining portion of the string len = slen - ((p - str) / sizeof(char)); strncpy(&buf[buflen], p, len); buflen += len; // Reallocate the buffer to match the size of the result buf = erealloc(buf, buflen + 1); buf[buflen] = 0; if (resultlen) *resultlen = buflen; return buf; } // php_str_unhtmlentities() /* }}} */ /* BF 6/11/01 ([EMAIL PROTECTED]) */ /* {{{ proto string unhtmlentities(string str) Translates HTML entities in the given string into the appropriate characters. This function is the reverse of the standard PHP function htmlentities, however it DOES NOT currently use the same translation table. HTML entities have the form "&data;" where data is either the name of an entity (ie. >, <, ") or a # symbol followed by a decimal value from 0 to 255 (ie. ", &) str = the string to decode */ PHP_FUNCTION(unhtmlentities) { zval **_str; // The string uint resultlen; char *result; int myargc = ZEND_NUM_ARGS(); if (myargc != 1 || zend_get_parameters_ex(myargc, &_str) == FAILURE) { ZEND_WRONG_PARAM_COUNT(); } //if // Convert the parameters to the appropriate types convert_to_string_ex(_str); resultlen = Z_STRLEN_PP(_str); result = php_str_unhtmlentities(Z_STRVAL_PP(_str), &resultlen); // Return the result RETURN_STRINGL(result, resultlen, 0); } // PHP_FUNCTION(unhtmlentities) /* }}} */ -- PHP Development Mailing List <http://www.php.net/> To unsubscribe, visit: http://www.php.net/unsub.php