On Thu, 2005-10-13 at 09:25 -0400, John Nichel wrote:
> jonathan wrote:
> > do you then have to do the reverse operation to get it back for
> > rendering. Since it was erroring on me during DOM creation, I feel like
> > I'm going around it to put it into a format it likes but then on
> > display via XSL transformation, I will have to convert it back. Or am I
> > missing something?
>
> I'm only on the sending end; I don't know if the people I send the
> documents too have to convert it back. Don't quote me on this, but if
> you're going to display the information in a web browser, *I think* it
> will display the decimal value properly.
Don't worry about being quoted on that -- it most definitely will. Using
decimal values in HTML entities is very common, especially for writing
non-ASCII and non-usual-HTML-entities characters (such as CJK Kanji,
mathematical operators or cyrillic glyphs) without worrying about the
transfer charset. Personally though, I usually convert into proper UTF-8
instead of using entities. It takes a *lot* less space, providing that
the web server is properly configured not to say that the document is
ISO-8859-1 or something.
Personally, I use a seven-liner perl script to convert the official HTML
entitity DTDs into a PHP include file:
print("<?php\n\$htmlentities = array(\n");
while(<>) {
if(/<!ENTITY\s+(\w+)\s+CDATA\s+\"&\#(\d+);\"/) {
print "\t\"$1\" => $2,\n";
}
}
print("\t);\n?>\n");
The result is as follows:
<?php
$htmlentities = array(
"nbsp" => 160,
"iexcl" => 161,
"cent" => 162,
"pound" => 163,
"curren" => 164,
"yen" => 165,
"brvbar" => 166,
"sect" => 167,
"uml" => 168,
"copy" => 169,
"ordf" => 170,
"laquo" => 171,
"not" => 172,
"shy" => 173,
"reg" => 174,
"macr" => 175,
"deg" => 176,
"plusmn" => 177,
"sup2" => 178,
"sup3" => 179,
"acute" => 180,
"micro" => 181,
"para" => 182,
"middot" => 183,
"cedil" => 184,
"sup1" => 185,
"ordm" => 186,
"raquo" => 187,
"frac14" => 188,
"frac12" => 189,
"frac34" => 190,
"iquest" => 191,
"Agrave" => 192,
"Aacute" => 193,
"Acirc" => 194,
"Atilde" => 195,
"Auml" => 196,
"Aring" => 197,
"AElig" => 198,
"Ccedil" => 199,
"Egrave" => 200,
"Eacute" => 201,
"Ecirc" => 202,
"Euml" => 203,
"Igrave" => 204,
"Iacute" => 205,
"Icirc" => 206,
"Iuml" => 207,
"ETH" => 208,
"Ntilde" => 209,
"Ograve" => 210,
"Oacute" => 211,
"Ocirc" => 212,
"Otilde" => 213,
"Ouml" => 214,
"times" => 215,
"Oslash" => 216,
"Ugrave" => 217,
"Uacute" => 218,
"Ucirc" => 219,
"Uuml" => 220,
"Yacute" => 221,
"THORN" => 222,
"szlig" => 223,
"agrave" => 224,
"aacute" => 225,
"acirc" => 226,
"atilde" => 227,
"auml" => 228,
"aring" => 229,
"aelig" => 230,
"ccedil" => 231,
"egrave" => 232,
"eacute" => 233,
"ecirc" => 234,
"euml" => 235,
"igrave" => 236,
"iacute" => 237,
"icirc" => 238,
"iuml" => 239,
"eth" => 240,
"ntilde" => 241,
"ograve" => 242,
"oacute" => 243,
"ocirc" => 244,
"otilde" => 245,
"ouml" => 246,
"divide" => 247,
"oslash" => 248,
"ugrave" => 249,
"uacute" => 250,
"ucirc" => 251,
"uuml" => 252,
"yacute" => 253,
"thorn" => 254,
"yuml" => 255,
"fnof" => 402,
"Alpha" => 913,
"Beta" => 914,
"Gamma" => 915,
"Delta" => 916,
"Epsilon" => 917,
"Zeta" => 918,
"Eta" => 919,
"Theta" => 920,
"Iota" => 921,
"Kappa" => 922,
"Lambda" => 923,
"Mu" => 924,
"Nu" => 925,
"Xi" => 926,
"Omicron" => 927,
"Pi" => 928,
"Rho" => 929,
"Sigma" => 931,
"Tau" => 932,
"Upsilon" => 933,
"Phi" => 934,
"Chi" => 935,
"Psi" => 936,
"Omega" => 937,
"alpha" => 945,
"beta" => 946,
"gamma" => 947,
"delta" => 948,
"epsilon" => 949,
"zeta" => 950,
"eta" => 951,
"theta" => 952,
"iota" => 953,
"kappa" => 954,
"lambda" => 955,
"mu" => 956,
"nu" => 957,
"xi" => 958,
"omicron" => 959,
"pi" => 960,
"rho" => 961,
"sigmaf" => 962,
"sigma" => 963,
"tau" => 964,
"upsilon" => 965,
"phi" => 966,
"chi" => 967,
"psi" => 968,
"omega" => 969,
"thetasym" => 977,
"upsih" => 978,
"piv" => 982,
"bull" => 8226,
"hellip" => 8230,
"prime" => 8242,
"Prime" => 8243,
"oline" => 8254,
"frasl" => 8260,
"weierp" => 8472,
"image" => 8465,
"real" => 8476,
"trade" => 8482,
"alefsym" => 8501,
"larr" => 8592,
"uarr" => 8593,
"rarr" => 8594,
"darr" => 8595,
"harr" => 8596,
"crarr" => 8629,
"lArr" => 8656,
"uArr" => 8657,
"rArr" => 8658,
"dArr" => 8659,
"hArr" => 8660,
"forall" => 8704,
"part" => 8706,
"exist" => 8707,
"empty" => 8709,
"nabla" => 8711,
"isin" => 8712,
"notin" => 8713,
"ni" => 8715,
"prod" => 8719,
"sum" => 8721,
"minus" => 8722,
"lowast" => 8727,
"radic" => 8730,
"prop" => 8733,
"infin" => 8734,
"ang" => 8736,
"and" => 8743,
"or" => 8744,
"cap" => 8745,
"cup" => 8746,
"int" => 8747,
"there4" => 8756,
"sim" => 8764,
"cong" => 8773,
"asymp" => 8776,
"ne" => 8800,
"equiv" => 8801,
"le" => 8804,
"ge" => 8805,
"sub" => 8834,
"sup" => 8835,
"nsub" => 8836,
"sube" => 8838,
"supe" => 8839,
"oplus" => 8853,
"otimes" => 8855,
"perp" => 8869,
"sdot" => 8901,
"lceil" => 8968,
"rceil" => 8969,
"lfloor" => 8970,
"rfloor" => 8971,
"lang" => 9001,
"rang" => 9002,
"loz" => 9674,
"spades" => 9824,
"clubs" => 9827,
"hearts" => 9829,
"diams" => 9830,
"quot" => 34,
"amp" => 38,
"lt" => 60,
"gt" => 62,
"OElig" => 338,
"oelig" => 339,
"Scaron" => 352,
"scaron" => 353,
"Yuml" => 376,
"circ" => 710,
"tilde" => 732,
"ensp" => 8194,
"emsp" => 8195,
"thinsp" => 8201,
"zwnj" => 8204,
"zwj" => 8205,
"lrm" => 8206,
"rlm" => 8207,
"ndash" => 8211,
"mdash" => 8212,
"lsquo" => 8216,
"rsquo" => 8217,
"sbquo" => 8218,
"ldquo" => 8220,
"rdquo" => 8221,
"bdquo" => 8222,
"dagger" => 8224,
"Dagger" => 8225,
"permil" => 8240,
"lsaquo" => 8249,
"rsaquo" => 8250,
"euro" => 8364,
);
?>
Hope it helps.
Fredrik Tolf
--
PHP General Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php