Here are the diffs for the unhtmlentities function. Also included is a test script for some rudimentary benchmarking.
-Brad
unhtmlentities_test.php
Description: application/unknown-content-type-microsoft
--- html.c Tue Feb 26 22:44:44 2002 +++ html.c Wed Feb 27 00:42:04 2002 @@ -123,6 +123,39 @@ { NULL } }; + +/* Entities table used by unhtmlentities - This should be changed to use the same +tables as htmlentities */ +struct entity { + char* str; + char ch; +}; + +static struct entity il_EntTable[] = +{ + {"quot",34}, {"amp",38}, {"lt",60}, {"gt",62}, {"nbsp",160}, + {"iexcl",161}, {"cent",162}, {"pound",163}, {"curren",164}, {"yen",165}, + {"brvbar",166}, {"sect",167}, {"uml",168}, {"copy",169}, {"ordf",170}, + {"laquo",171}, {"not",172}, {"shy",173}, {"reg",174}, {"macr",175}, + {"deg",176}, {"plusmn",177}, {"sup2",178}, {"sup3",179}, {"acute",180}, + {"micro",181}, {"para",182}, {"middot",183}, {"cedil",184}, {"sup1",185}, + {"ordm",186}, {"raquo",187}, {"frac14",188}, {"frac12",189}, +{"frac34",190}, + {"iquest",191}, {"Agrave",192}, {"Aacute",193}, {"Acirc",194}, +{"Atilde",195}, + {"Auml",196}, {"Aring",197}, {"AElig",198}, {"Ccedil",199}, +{"Egrave",200}, + {"Eacute",201}, {"Ecirc",202}, {"Euml",203}, {"Igrave",204}, +{"Iacute",205}, + {"Icirc",206}, {"Iuml",207}, {"ETH",208}, {"Ntilde",209}, +{"Ograve",210}, + {"Oacute",211}, {"Ocirc",212}, {"Otilde",213}, {"Ouml",214}, {"times",215}, + {"Oslash",216}, {"Ugrave",217}, {"Uacute",218}, {"Ucirc",219}, {"Uuml",220}, + {"Yacute",221}, {"THORN",222}, {"szlig",223}, {"agrave",224}, +{"aacute",225}, + {"acirc",226}, {"atilde",227}, {"auml",228}, {"aring",229}, {"aelig",230}, + {"ccedil",231}, {"egrave",232}, {"eacute",233}, {"ecirc",234}, {"euml",235}, + {"igrave",236}, {"iacute",237}, {"icirc",238}, {"iuml",239}, {"eth",240}, + {"ntilde",241}, {"ograve",242}, {"oacute",243}, {"ocirc",244}, +{"otilde",245}, + {"ouml",246}, {"divide",247}, {"oslash",248}, {"ugrave",249}, +{"uacute",250}, + {"ucirc",251}, {"uuml",252}, {"yacute",253}, {"thorn",254}, {"yuml",255} +}; + + + /* {{{ get_next_char */ inline static unsigned short get_next_char(enum entity_charset charset, @@ -575,6 +608,136 @@ } } /* }}} */ + + +/* {{{ proto string il_str_decode_htmlentities(string str) + Translates HTML entities in the given string into the appropriate characters. */ +PHPAPI char* php_str_unhtmlentities(char *str, unsigned int *resultlen TSRMLS_DC) +{ + char *p, *sp, *ep, + *buf; + int buflen = 0, + len, + slen; + + if (resultlen) { + if (*resultlen != 0) + slen = *resultlen; + else + slen = strlen(str); + } //if + + // Scan through the string and find entities to decode + buf = emalloc(slen * 2); + + p = str; + while (sp = strchr(p, '&')) { + ep = sp + sizeof(char); + + // Scan up to 15 characters ahead for a ';' + while ((*ep) && (*ep != ';') && (ep < sp + sizeof(char)*15)) { + if (*ep == '&') sp = ep; + ep += sizeof(char); + } //while + if (!(*ep)) break; // End of string + + // Copy the previous string data up to this point + len = (sp - p) / sizeof(char); +// strncpy(&buf[buflen], p, len); + memcpy(&buf[buflen], p, len * sizeof(char)); + buflen += len; + + // Translate the entity + len = (ep - sp) / sizeof(char) - 1; + if (len > 0) { + int i, + found = 0; + char ch = *(ep); + + sp += sizeof(char); + *ep = 0; + + if (*sp == '#') { + if ((len > 1) && (len <= 4)) { + unsigned long ch = strtoul(sp + sizeof(char), +(char**) NULL, 10); + if (ch <= (unsigned long) UCHAR_MAX) { + buf[buflen] = (char) ch; + ++buflen; + found = 1; + } //if + } //if + } else { + for (i = 0; i < (sizeof(il_EntTable) / sizeof(struct +entity)); ++i) { + if (strcmp(sp, il_EntTable[i].str) == 0) { + buf[buflen] = il_EntTable[i].ch; + ++buflen; + + found = 1; + break; + } //if + } //for + } //if + + *ep = ch; + + // Copy the entity as-is if it is not recognized + if (!found) { + len += 2; + sp -= sizeof(char); +// strncpy(&buf[buflen], sp, len); + memcpy(&buf[buflen], sp, len * sizeof(char)); + buflen += len; + } //if + } else { // No data in entity? (ie. "&;" Just copy as-is... not an +entity +// strncpy(&buf[buflen], sp, 2); + memcpy(&buf[buflen], sp, 2 * sizeof(char)); + buflen += 2; + } //if + + // Start checking for the next match + p = ep + sizeof(char); + } //while + + // Copy any remaining portion of the string + len = slen - ((p - str) / sizeof(char)); + strncpy(&buf[buflen], p, len); + buflen += len; + + // Reallocate the buffer to match the size of the result + buf = erealloc(buf, buflen + 1); + buf[buflen] = 0; + + if (resultlen) + *resultlen = buflen; +} // php_str_unhtmlentities() +/* }}} */ + + +/* {{{ proto string il_str_decode_htmlentities(string str) + Translates HTML entities in the given string into the appropriate characters. */ +PHP_FUNCTION(unhtmlentities) +{ +/* + This function is basically the reverse of the standard PHP function htmlentities, +however it DOES NOT currently + use the same translation table (or the locale). HTML entities have the form +"&data;" where data is either the + name of an entity (ie. >, <, ") or a # symbol followed by a decimal +value from 0 to 255 (ie. ", &) +*/ + char *str; + uint strlen = 0; + + char *result; + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &str, &strlen) == +FAILURE) { + return; + } + + result = php_str_unhtmlentities(str, &strlen); + + // Return the result + RETURN_STRINGL(result, strlen, 0); +} // PHP_FUNCTION(unhtmlentities) +/* }}} */ + /* * Local variables:
--- html.h Tue Feb 26 22:44:44 2002 +++ html.h Wed Feb 27 00:14:54 2002 @@ -29,6 +29,7 @@ PHP_FUNCTION(htmlspecialchars); PHP_FUNCTION(htmlentities); +PHP_FUNCTION(unhtmlentities); PHP_FUNCTION(get_html_translation_table); PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char * hint_charset);
--- basic_functions.c Tue Feb 26 22:44:44 2002 +++ basic_functions.c Wed Feb 27 00:22:04 2002 @@ -274,6 +274,7 @@ PHP_FE(wordwrap, NULL) PHP_FE(htmlspecialchars, NULL) PHP_FE(htmlentities, NULL) + PHP_FE(unhtmlentities, + NULL) PHP_FE(get_html_translation_table, NULL) PHP_NAMED_FE(md5,php_if_md5, NULL) PHP_NAMED_FE(md5_file,php_if_md5_file, NULL)
-- PHP Development Mailing List <http://www.php.net/> To unsubscribe, visit: http://www.php.net/unsub.php