Replying to Jonathan Houser:
> >This is with a GSM handset so you may be able to test and get the same
> >results. I found the Character Set being sent via tcpdump and ethereal
> >as it's not listed in the debug output from Kannel.
>
> Looks like it's the Character Set in the <?xml section (ie.
> encoding=) that's causing the problem. I checked the code and stumbled
> across a TODO. Guess I'll look into DOing it. Thanks.
Looks that you're one of a few people who actually using kannel as a
wap gateway, not as smsrouter.
Take a look at my patchset. It worth it. In particular, almost all of
charset problems are solved here.
--
Paul P 'Stingray' Komkoff Jr // http://stingr.net/key <- my pgp key
This message represents the official view of the voices in my head
Index: gateway.C8/configure.in
===================================================================
--- gateway.C8.orig/configure.in 2005-04-01 16:08:46.997722376 +0400
+++ gateway.C8/configure.in 2005-04-01 16:08:50.242239063 +0400
@@ -213,6 +213,18 @@
]
)
+AC_MSG_CHECKING([whether to do all wapbox xml processing in utf-8])
+AC_ARG_ENABLE(scharset,
+[ --enable-scharset do all wapbox xml processing in utf-8],
+[
+ if test "$enableval" != yes; then
+ AC_MSG_RESULT(no)
+ else
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(NEW_CHARSETS, 1, [Simplify wapbox charset processing])
+ fi
+])
+
dnl Extra feature checks
dnl GW_HAVE_TYPE_FROM(HDRNAME, TYPE, HAVENAME, DESCRIPTION)
Index: gateway.C8/gw/wap-appl.c
===================================================================
--- gateway.C8.orig/gw/wap-appl.c 2005-04-01 16:08:47.000721930 +0400
+++ gateway.C8/gw/wap-appl.c 2005-04-01 16:08:50.244238765 +0400
@@ -523,6 +523,10 @@
* to handle those charsets for all content types, just WML/XHTML. */
static void add_charset_headers(List *headers)
{
+#ifdef NEW_CHARSETS
+ if (!http_charset_accepted(headers, "utf-8"))
+ http_header_add(headers, "Accept-Charset", "utf-8");
+#else
long i, len;
gw_assert(charsets != NULL);
@@ -532,6 +536,7 @@
if (!http_charset_accepted(headers, charset))
http_header_add(headers, "Accept-Charset", charset);
}
+#endif
}
@@ -720,6 +725,23 @@
}
+static void strip_preamble(Octstr *document) {
+ long gt = 0, enc = 0;
+ Octstr *text = NULL, *encoding = NULL;
+
+ encoding = octstr_imm(" encoding");
+ enc = octstr_search(document, encoding, 0);
+ gt = octstr_search_char(document, '>', 0);
+
+ if (enc > 0 && gt > enc) {
+ gt++;
+ text = octstr_copy(document, gt, octstr_len(document) - gt);
+ octstr_truncate(document, 0);
+ octstr_append_data(document, octstr_get_cstr(text), octstr_len(text));
+ octstr_destroy(text);
+ }
+}
+
/*
* Return an HTTP reply back to the phone.
*/
@@ -865,12 +887,30 @@
/* get charset used in content body, default to utf-8 if not
present */
if ((charset = find_charset_encoding(content.body)) == NULL)
+#ifdef NEW_CHARSETS
+ if (octstr_len(content.charset) > 0) {
+ charset = octstr_duplicate(content.charset);
+ } else {
+ charset = octstr_imm("UTF-8");
+ }
+#else
charset = octstr_imm("UTF-8");
+#endif
/* convert to utf-8 if original charset is not utf-8
* and device supports it */
- if (octstr_case_compare(charset, octstr_imm("UTF-8")) < 0 &&
+#if 0
+ if (octstr_case_compare(charset, octstr_imm("UTF-8")) != 0) {
+ debug("wsp",0,"Converting wml/xhtml from charset <%s> to
UTF-8",
+ octstr_get_cstr(charset));
+ if (charset_convert(content.body, octstr_get_cstr(charset),
"UTF-8") >= 0) {
+ octstr_destroy(content.charset);
+ content.charset = octstr_create("UTF-8");
+ }
+ }
+#else
+ if (octstr_case_compare(charset, octstr_imm("UTF-8")) != 0 &&
!http_charset_accepted(device_headers,
octstr_get_cstr(charset))) {
if (!http_charset_accepted(device_headers, "UTF-8")) {
warning(0, "WSP: Device doesn't support charset <%s>
neither UTF-8",
@@ -883,6 +923,7 @@
octstr_get_cstr(charset), "UTF-8") >=
0) {
octstr_destroy(content.charset);
content.charset = octstr_create("UTF-8");
+ strip_preamble(content.body);
/* XXX it might be good idea to change
<?xml...encoding?> */
}
}
@@ -890,7 +931,7 @@
/* convert to iso-8859-1 if original charset is not iso
* and device supports it */
- else if (octstr_case_compare(charset, octstr_imm("ISO-8859-1")) <
0 &&
+ else if (octstr_case_compare(charset, octstr_imm("ISO-8859-1")) !=
0 &&
!http_charset_accepted(device_headers,
octstr_get_cstr(charset))) {
if (!http_charset_accepted(device_headers, "ISO-8859-1")) {
warning(0, "WSP: Device doesn't support charset <%s>
neither ISO-8859-1",
@@ -903,10 +944,12 @@
octstr_get_cstr(charset),
"ISO-8859-1") >= 0) {
octstr_destroy(content.charset);
content.charset = octstr_create("ISO-8859-1");
+ strip_preamble(content.body);
/* XXX it might be good idea to change
<?xml...encoding?> */
}
}
}
+#endif
octstr_destroy(charset);
}
Index: gateway.C8/gwlib/http.c
===================================================================
--- gateway.C8.orig/gwlib/http.c 2005-04-01 16:08:24.602058504 +0400
+++ gateway.C8/gwlib/http.c 2005-04-01 16:08:50.247238318 +0400
@@ -3029,9 +3029,11 @@
* to return charset 'iso-8859-1' in case of no given encoding and
* content-type is a 'text' subtype.
*/
+ /* REMOVE THIS for now
if (octstr_len(*charset) == 0 &&
octstr_ncompare(*type, octstr_imm("text"), 4) == 0)
octstr_append_cstr(*charset, "ISO-8859-1");
+ */
}
}