Hello, all.

 Attached patch provides cyrillic character sets support to
htmlentities(), useful for those who wants to see correct cyrillic
letters after using this function (at this moment htmlentities()
successfully breaks cyrillic characters). 
 KOI8-R, Windows-1251 & CP866 encodings are supported.

 I've tested this patch with PHP-CVS & PHP 4.3.2RC1 - it seems to be
working correctly.
 Someone who has enough karma - please, take a look at it and apply it
if it's ok.


--
Wbr,
Antony Dovgal aka tony2001      mailto:[EMAIL PROTECTED]
http://phpclub.net
---
Stand for something or you will fall for nothing.
--- html.c      Thu Mar 20 09:51:08 2003
+++ /root/CVS/php5/ext/standard/html.c  Fri Mar 21 00:36:10 2003
@@ -46,7 +46,9 @@
 
 enum entity_charset { cs_terminator, cs_8859_1, cs_cp1252,
                                          cs_8859_15, cs_utf_8, cs_big5, cs_gb2312, 
-                                         cs_big5hkscs, cs_sjis, cs_eucjp};
+                                         cs_big5hkscs, cs_sjis, cs_eucjp, cs_koi8r,
+                      cs_cp1251, cs_8859_5, cs_cp866
+                    };
 typedef const char *entity_table_t;
 
 /* codepage 1252 is a Windows extension to iso-8859-1. */
@@ -255,6 +257,64 @@
        "spades", NULL, NULL, "clubs", NULL, "hearts", "diams"
 };
 
+static entity_table_t ent_koi8r[] = {
+    "#1105", /* "jo "*/
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
+    NULL, NULL, NULL, NULL, NULL, "#1025", /* "JO" */
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
+    "#1102", "#1072", "#1073", "#1094", "#1076", "#1077", "#1092", 
+    "#1075", "#1093", "#1080", "#1081", "#1082", "#1083", "#1084", 
+    "#1085", "#1086", "#1087", "#1103", "#1088", "#1089", "#1090", 
+    "#1091", "#1078", "#1074", "#1100", "#1099", "#1079", "#1096", 
+    "#1101", "#1097", "#1095", "#1098", "#1070", "#1040", "#1041", 
+    "#1062", "#1044", "#1045", "#1060", "#1043", "#1061", "#1048", 
+    "#1049", "#1050", "#1051", "#1052", "#1053", "#1054", "#1055", 
+    "#1071", "#1056", "#1057", "#1058", "#1059", "#1046", "#1042",
+    "#1068", "#1067", "#1047", "#1064", "#1069", "#1065", "#1063", 
+    "#1066"
+};
+
+static entity_table_t ent_cp_1251[] = {
+    "#1040", "#1041", "#1042", "#1043", "#1044", "#1045", "#1046",
+    "#1047", "#1048", "#1049", "#1050", "#1051", "#1052", "#1053",
+    "#1054", "#1055", "#1056", "#1057", "#1058", "#1059", "#1060",
+    "#1061", "#1062", "#1063", "#1064", "#1065", "#1066", "#1067",
+    "#1068", "#1069", "#1070", "#1071", "#1072", "#1073", "#1074",
+    "#1075", "#1076", "#1077", "#1078", "#1079", "#1080", "#1081",
+    "#1082", "#1083", "#1084", "#1085", "#1086", "#1087", "#1088",
+    "#1089", "#1090", "#1091", "#1092", "#1093", "#1094", "#1095",
+    "#1096", "#1097", "#1098", "#1099", "#1100", "#1101", "#1102",
+    "#1103"
+};
+
+static entity_table_t ent_iso_8859_5[] = {
+    "#1056", "#1057", "#1058", "#1059", "#1060", "#1061", "#1062",
+    "#1063", "#1064", "#1065", "#1066", "#1067", "#1068", "#1069",
+    "#1070", "#1071", "#1072", "#1073", "#1074", "#1075", "#1076",
+    "#1077", "#1078", "#1079", "#1080", "#1081", "#1082", "#1083",
+    "#1084", "#1085", "#1086", "#1087", "#1088", "#1089", "#1090",
+    "#1091", "#1092", "#1093", "#1094", "#1095", "#1096", "#1097",
+    "#1098", "#1099", "#1100", "#1101", "#1102", "#1103", "#1104",
+    "#1105", "#1106", "#1107", "#1108", "#1109", "#1110", "#1111",
+    "#1112", "#1113", "#1114", "#1115", "#1116", "#1117", "#1118",
+    "#1119"
+};
+
+static entity_table_t ent_cp_866[] = {
+
+    "#9492", "#9524", "#9516", "#9500", "#9472", "#9532", "#9566", 
+    "#9567", "#9562", "#9556", "#9577", "#9574", "#9568", "#9552", 
+    "#9580", "#9575", "#9576", "#9572", "#9573", "#9561", "#9560", 
+    "#9554", "#9555", "#9579", "#9578", "#9496", "#9484", "#9608", 
+    "#9604", "#9612", "#9616", "#9600", "#1088", "#1089", "#1090", 
+    "#1091", "#1092", "#1093", "#1094", "#1095", "#1096", "#1097", 
+    "#1098", "#1099", "#1100", "#1101", "#1102", "#1103", "#1025", 
+    "#1105", "#1028", "#1108", "#1031", "#1111", "#1038", "#1118", 
+    "#176", "#8729", "#183", "#8730", "#8470", "#164",  "#9632", 
+    "#160"
+};
+
+
 struct html_entity_map {
        enum entity_charset charset;    /* charset identifier */
        unsigned short basechar;                        /* char code at start of table 
*/
@@ -281,6 +341,10 @@
        { cs_big5hkscs,         0xa0, 0xff, ent_iso_8859_1 },
        { cs_sjis,                      0xa0, 0xff, ent_iso_8859_1 },
        { cs_eucjp,                     0xa0, 0xff, ent_iso_8859_1 },
+       { cs_koi8r,                 0xa3, 0xff, ent_koi8r },
+    { cs_cp1251,               0xc0, 0xff, ent_cp_1251 },
+    { cs_8859_5,               0xc0, 0xff, ent_iso_8859_5 },
+    { cs_cp866,                    0xc0, 0xff, ent_cp_866 },
        { cs_terminator }
 };
 
@@ -306,6 +370,17 @@
        { "932",            cs_sjis },
        { "EUCJP",              cs_eucjp },
        { "EUC-JP",             cs_eucjp },
+       { "KOI8-R",         cs_koi8r },
+       { "koi8-ru",        cs_koi8r },
+       { "koi8r",          cs_koi8r },
+    { "cp1251",         cs_cp1251 },
+    { "Windows-1251",   cs_cp1251 },
+    { "win-1251",       cs_cp1251 },
+    { "iso8859-5",      cs_8859_5 },
+    { "iso-8859-5",     cs_8859_5 },
+    { "cp866",          cs_cp866 },
+    { "866",            cs_cp866 },    
+    { "ibm866",         cs_cp866 },
        { NULL }
 };
 
@@ -643,7 +718,6 @@
 
        if (charset_hint) {
                int found = 0;
-               
                /* now walk the charset map and look for the codeset */
                for (i = 0; charset_map[i].codeset; i++) {
                        if (strncasecmp(charset_hint, charset_map[i].codeset, len) == 
0) {

-- 
PHP Internals - PHP Runtime Development Mailing List
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to