Replying to Jonathan Houser:
> >This is with a GSM handset so you may be able to test and get the same 
> >results.  I found the Character Set being sent via tcpdump and ethereal 
> >as it's not listed in the debug output from Kannel.
> 
>      Looks like it's the Character Set in the <?xml section (ie. 
> encoding=) that's causing the problem.  I checked the code and stumbled 
> across a TODO.  Guess I'll look into DOing it.  Thanks.

Looks that you're one of a few people who actually using kannel as a
wap gateway, not as smsrouter.
Take a look at my patchset. It worth it. In particular, almost all of
charset problems are solved here.

-- 
Paul P 'Stingray' Komkoff Jr // http://stingr.net/key <- my pgp key
 This message represents the official view of the voices in my head
Index: gateway.C8/configure.in
===================================================================
--- gateway.C8.orig/configure.in        2005-04-01 16:08:46.997722376 +0400
+++ gateway.C8/configure.in     2005-04-01 16:08:50.242239063 +0400
@@ -213,6 +213,18 @@
 ]
 )
 
+AC_MSG_CHECKING([whether to do all wapbox xml processing in utf-8])
+AC_ARG_ENABLE(scharset,
+[  --enable-scharset           do all wapbox xml processing in utf-8],
+[
+  if test "$enableval" != yes; then
+    AC_MSG_RESULT(no)
+  else
+    AC_MSG_RESULT(yes)
+    AC_DEFINE(NEW_CHARSETS, 1, [Simplify wapbox charset processing])
+  fi
+])
+
 dnl Extra feature checks
 
 dnl GW_HAVE_TYPE_FROM(HDRNAME, TYPE, HAVENAME, DESCRIPTION)
Index: gateway.C8/gw/wap-appl.c
===================================================================
--- gateway.C8.orig/gw/wap-appl.c       2005-04-01 16:08:47.000721930 +0400
+++ gateway.C8/gw/wap-appl.c    2005-04-01 16:08:50.244238765 +0400
@@ -523,6 +523,10 @@
  * to handle those charsets for all content types, just WML/XHTML. */
 static void add_charset_headers(List *headers) 
 {
+#ifdef NEW_CHARSETS
+    if (!http_charset_accepted(headers, "utf-8"))
+        http_header_add(headers, "Accept-Charset", "utf-8");
+#else
     long i, len;
     
     gw_assert(charsets != NULL);
@@ -532,6 +536,7 @@
         if (!http_charset_accepted(headers, charset))
             http_header_add(headers, "Accept-Charset", charset);
     }
+#endif
 }
 
 
@@ -720,6 +725,23 @@
 }
 
 
+static void strip_preamble(Octstr *document) {
+  long gt = 0, enc = 0;
+    Octstr *text = NULL, *encoding = NULL;
+
+    encoding = octstr_imm(" encoding");
+    enc = octstr_search(document, encoding, 0);
+    gt = octstr_search_char(document, '>', 0);
+
+    if (enc > 0 && gt > enc) {
+      gt++;
+      text = octstr_copy(document, gt, octstr_len(document) - gt);
+      octstr_truncate(document, 0);
+      octstr_append_data(document, octstr_get_cstr(text), octstr_len(text));
+      octstr_destroy(text);
+    }
+}
+
 /*
  * Return an HTTP reply back to the phone.
  */
@@ -865,12 +887,30 @@
             
             /* get charset used in content body, default to utf-8 if not 
present */
             if ((charset = find_charset_encoding(content.body)) == NULL)
+#ifdef NEW_CHARSETS
+                if (octstr_len(content.charset) > 0) {
+                    charset = octstr_duplicate(content.charset);
+                } else {
+                    charset = octstr_imm("UTF-8");
+                }
+#else
                 charset = octstr_imm("UTF-8"); 
+#endif
 
             /* convert to utf-8 if original charset is not utf-8 
              * and device supports it */
 
-            if (octstr_case_compare(charset, octstr_imm("UTF-8")) < 0 &&
+#if 0
+            if (octstr_case_compare(charset, octstr_imm("UTF-8")) != 0) {
+                debug("wsp",0,"Converting wml/xhtml from charset <%s> to 
UTF-8",
+                    octstr_get_cstr(charset));
+                if (charset_convert(content.body, octstr_get_cstr(charset), 
"UTF-8") >= 0) {
+                    octstr_destroy(content.charset);
+                    content.charset = octstr_create("UTF-8");
+                }
+            }
+#else
+            if (octstr_case_compare(charset, octstr_imm("UTF-8")) != 0 &&
                 !http_charset_accepted(device_headers, 
octstr_get_cstr(charset))) {
                 if (!http_charset_accepted(device_headers, "UTF-8")) {
                     warning(0, "WSP: Device doesn't support charset <%s> 
neither UTF-8", 
@@ -883,6 +923,7 @@
                                         octstr_get_cstr(charset), "UTF-8") >= 
0) {
                         octstr_destroy(content.charset);
                         content.charset = octstr_create("UTF-8");
+                        strip_preamble(content.body);
                         /* XXX it might be good idea to change 
<?xml...encoding?> */
                     }
                  }
@@ -890,7 +931,7 @@
  
             /* convert to iso-8859-1 if original charset is not iso 
              * and device supports it */
-            else if (octstr_case_compare(charset, octstr_imm("ISO-8859-1")) < 
0 &&
+            else if (octstr_case_compare(charset, octstr_imm("ISO-8859-1")) != 
0 &&
                     !http_charset_accepted(device_headers, 
octstr_get_cstr(charset))) {
                 if (!http_charset_accepted(device_headers, "ISO-8859-1")) {
                     warning(0, "WSP: Device doesn't support charset <%s> 
neither ISO-8859-1", 
@@ -903,10 +944,12 @@
                                         octstr_get_cstr(charset), 
"ISO-8859-1") >= 0) {
                         octstr_destroy(content.charset);
                         content.charset = octstr_create("ISO-8859-1");
+                        strip_preamble(content.body);
                         /* XXX it might be good idea to change 
<?xml...encoding?> */
                     }
                 }
             }
+#endif
 
             octstr_destroy(charset);
         }
Index: gateway.C8/gwlib/http.c
===================================================================
--- gateway.C8.orig/gwlib/http.c        2005-04-01 16:08:24.602058504 +0400
+++ gateway.C8/gwlib/http.c     2005-04-01 16:08:50.247238318 +0400
@@ -3029,9 +3029,11 @@
          * to return charset 'iso-8859-1' in case of no given encoding and
          * content-type is a 'text' subtype. 
          */
+        /* REMOVE THIS for now
         if (octstr_len(*charset) == 0 && 
             octstr_ncompare(*type, octstr_imm("text"), 4) == 0)
             octstr_append_cstr(*charset, "ISO-8859-1");
+         */
     }
 }
 

Reply via email to