Replying to Jonathan Houser:

Well, I did something. It survives only basic testing, and of course
not complete yet, but I'll post it here - maybe someone will say
something.

P.S. add octstr_destroy(charset) at the end of normalize manually :)

-- 
Paul P 'Stingray' Komkoff Jr // http://stingr.net/key <- my pgp key
 This message represents the official view of the voices in my head
--- gateway.orig/gw/wml_compiler.c      2005-02-11 23:58:53.000000000 +0300
+++ gateway.C9/gw/wml_compiler.c        2005-05-01 18:51:12.657859082 +0400
@@ -335,7 +335,6 @@
     xmlDocPtr pDoc = NULL;
     char *wml_c_text;
     wml_binary_t *wbxml = NULL;
-    Octstr *encoding = NULL;
 
     *wml_binary = octstr_create("");
     wbxml = wml_binary_create();
@@ -347,37 +346,6 @@
        -- tuo */
     parse_entities(wml_text);
 
-    /* transcode from charset to UTF-8 */
-    if (charset && octstr_len(charset) && 
-        octstr_case_compare(charset, octstr_imm("UTF-8")) == -1) {
-        debug("wml_compile", 0, "WML compiler: Transcoding from <%s> to 
UTF-8", 
-              octstr_get_cstr(charset));
-        set_charset(wml_text, charset);
-    }
-
-    /* 
-     * If we did not set the character set encoding yet, then obviously
-     * there was no charset argument in the Content-Type HTTP reply header.
-     * We have to scan the xml preamble line for an explicite encoding
-     * definition to allow transcoding from UTF-8 to that charset after 
-     * libxml2 did all it's parsing magic. (Keep in mind libxml2 uses UTF-8
-     * as internal encoding.) -- Stipe
-     */
-
-    /* 
-     * We will trust the xml preamble encoding more then the HTTP header 
-     * charset definition.
-     */
-    if ((encoding = find_charset_encoding(wml_text)) != NULL) {
-        /* ok, we rely on the xml preamble encoding */
-    } else if (charset && octstr_len(charset) > 0) {
-        /* we had a HTTP response charset, use this */
-        encoding = octstr_duplicate(charset);
-    } else {
-        /* we had none, so use UTF-8 as default */
-        encoding = octstr_create("UTF-8");
-    }
-
     size = octstr_len(wml_text);
     wml_c_text = octstr_get_cstr(wml_text);
 
@@ -393,17 +361,17 @@
          * into binary.
          */
 
-        pDoc = xmlParseMemory(wml_c_text, size);
+        pDoc = xmlReadMemory(wml_c_text, size, NULL, octstr_get_cstr(charset), 
XML_PARSE_RECOVER | XML_PARSE_NONET);
        
         if (pDoc != NULL) {
             /* 
              * If we have a set internal encoding, then apply this information 
              * to the XML parsing tree document for later transcoding ability.
              */
-            if (encoding)
-                pDoc->charset = 
xmlParseCharEncoding(octstr_get_cstr(encoding));
+            if (charset)
+                pDoc->charset = xmlParseCharEncoding(octstr_get_cstr(charset));
 
-            ret = parse_document(pDoc, encoding, &wbxml, version);
+            ret = parse_document(pDoc, charset, &wbxml, version);
             wml_binary_output(*wml_binary, wbxml);
         } else {    
             error(0, "WML compiler: Compiling error: "
@@ -413,7 +381,6 @@
     }
 
     wml_binary_destroy(wbxml);
-    octstr_destroy(encoding);
 
     if (pDoc) 
         xmlFreeDoc(pDoc);
Index: gateway.C9/gw/wap-appl.c
===================================================================
--- gateway.C9.orig/gw/wap-appl.c       2005-04-29 09:09:32.000000000 +0400
+++ gateway.C9/gw/wap-appl.c    2005-05-01 18:59:36.177853180 +0400
@@ -523,15 +523,8 @@
  * to handle those charsets for all content types, just WML/XHTML. */
 static void add_charset_headers(List *headers) 
 {
-    long i, len;
-    
-    gw_assert(charsets != NULL);
-    len = gwlist_len(charsets);
-    for (i = 0; i < len; i++) {
-        unsigned char *charset = octstr_get_cstr(gwlist_get(charsets, i));
-        if (!http_charset_accepted(headers, charset))
-            http_header_add(headers, "Accept-Charset", charset);
-    }
+    if (!http_charset_accepted(headers, "utf-8"))
+        http_header_add(headers, "Accept-Charset", "utf-8");
 }
 
 
@@ -720,6 +713,33 @@
 }
 
 
+static void normalize_charset(struct content * content, List* device_headers) {
+  Octstr* charset;
+
+  if ((charset = find_charset_encoding(content->body)) == NULL)
+    if (octstr_len(content->charset) > 0) {
+      charset = octstr_duplicate(content->charset);
+    } else {
+      charset = octstr_imm("UTF-8");
+    }
+
+  if (octstr_case_compare(charset, octstr_imm("UTF-8")) != 0 &&
+      !http_charset_accepted(device_headers, octstr_get_cstr(charset))) {
+    if (!http_charset_accepted(device_headers, "UTF-8")) {
+      warning(0, "WSP: Device doesn't support charset <%s> neither UTF-8",
+              octstr_get_cstr(charset));
+    } else {
+      debug("wsp",0,"Converting wml/xhtml from charset <%s> to UTF-8",
+            octstr_get_cstr(charset));
+      if (charset_convert(content->body,
+          octstr_get_cstr(charset), "UTF-8") >= 0) {
+        octstr_destroy(content->charset);
+        content->charset = octstr_create("UTF-8");
+      }
+    }
+  }
+}
+
 /*
  * Return an HTTP reply back to the phone.
  */
@@ -861,54 +881,8 @@
         if (octstr_search(content.type, octstr_imm("text/vnd.wap.wml"), 0) >= 
0 || 
             octstr_search(content.type, octstr_imm("application/xhtml+xml"), 
0) >= 0 ||
             octstr_search(content.type, 
octstr_imm("application/vnd.wap.xhtml+xml"), 0) >= 0) {
-            Octstr *charset;
-            
-            /* get charset used in content body, default to utf-8 if not 
present */
-            if ((charset = find_charset_encoding(content.body)) == NULL)
-                charset = octstr_imm("UTF-8"); 
-
-            /* convert to utf-8 if original charset is not utf-8 
-             * and device supports it */
-
-            if (octstr_case_compare(charset, octstr_imm("UTF-8")) < 0 &&
-                !http_charset_accepted(device_headers, 
octstr_get_cstr(charset))) {
-                if (!http_charset_accepted(device_headers, "UTF-8")) {
-                    warning(0, "WSP: Device doesn't support charset <%s> 
neither UTF-8", 
-                                octstr_get_cstr(charset));
-                } else {
-                    /* convert to utf-8 */
-                    debug("wsp",0,"Converting wml/xhtml from charset <%s> to 
UTF-8", 
-                          octstr_get_cstr(charset));
-                    if (charset_convert(content.body, 
-                                        octstr_get_cstr(charset), "UTF-8") >= 
0) {
-                        octstr_destroy(content.charset);
-                        content.charset = octstr_create("UTF-8");
-                        /* XXX it might be good idea to change 
<?xml...encoding?> */
-                    }
-                 }
-            }
- 
-            /* convert to iso-8859-1 if original charset is not iso 
-             * and device supports it */
-            else if (octstr_case_compare(charset, octstr_imm("ISO-8859-1")) < 
0 &&
-                    !http_charset_accepted(device_headers, 
octstr_get_cstr(charset))) {
-                if (!http_charset_accepted(device_headers, "ISO-8859-1")) {
-                    warning(0, "WSP: Device doesn't support charset <%s> 
neither ISO-8859-1", 
-                            octstr_get_cstr(charset));
-                } else {
-                    /* convert to iso-latin1 */
-                    debug("wsp",0,"Converting wml/xhtml from charset <%s> to 
ISO-8859-1", 
-                          octstr_get_cstr(charset));
-                    if (charset_convert(content.body, 
-                                        octstr_get_cstr(charset), 
"ISO-8859-1") >= 0) {
-                        octstr_destroy(content.charset);
-                        content.charset = octstr_create("ISO-8859-1");
-                        /* XXX it might be good idea to change 
<?xml...encoding?> */
-                    }
-                }
-            }
 
-            octstr_destroy(charset);
+            normalize_charset(&content, device_headers);
         }
 
         /* set WBXML Encoding-Version for wml->wmlc conversion */

Reply via email to