Replying to Alexander Malysh:
> Hi Paul,
>
> only one objection from my side: please fix indentation. Please use 4
> spaces instead of 2 as indent.
>
> When no objections from others and you fix indents I will happily commit
> this patch.
done
(also take a look at
http://anna.sgu.ru/viewmtn/headofbranch.psp?branch=net.stingr.kannel.generic
)
--
Paul P 'Stingray' Komkoff Jr // http://stingr.net/key <- my pgp key
This message represents the official view of the voices in my head
#
# patch "gw/wap-appl.c"
# from [6698038f9ba8d8227a90f72928dcbe1cbf2c2668]
# to [20be34e1d890e0b7c974590165eb420ea308bf2b]
#
# patch "gw/wml_compiler.c"
# from [727a5ea75763c1cd18d8d8aaa1de3f2f4d55bc45]
# to [211c041170a009bf497c2ae626c0b0a7a107bde2]
#
--- gw/wap-appl.c
+++ gw/wap-appl.c
@@ -510,15 +510,8 @@
* to handle those charsets for all content types, just WML/XHTML. */
static void add_charset_headers(List *headers)
{
- long i, len;
-
- gw_assert(charsets != NULL);
- len = gwlist_len(charsets);
- for (i = 0; i < len; i++) {
- unsigned char *charset = octstr_get_cstr(gwlist_get(charsets, i));
- if (!http_charset_accepted(headers, charset))
- http_header_add(headers, "Accept-Charset", charset);
- }
+ if (!http_charset_accepted(headers, "utf-8"))
+ http_header_add(headers, "Accept-Charset", "utf-8");
}
@@ -707,6 +700,36 @@
}
+static void normalize_charset(struct content * content, List* device_headers) {
+ Octstr* charset;
+
+ if ((charset = find_charset_encoding(content->body)) == NULL)
+ if (octstr_len(content->charset) > 0) {
+ charset = octstr_duplicate(content->charset);
+ } else {
+ charset = octstr_imm("UTF-8");
+ }
+
+ debug("wap-appl",0,"Normalizing charset from %s",
octstr_get_cstr(charset));
+
+ if (octstr_case_compare(charset, octstr_imm("UTF-8")) != 0 &&
+ !http_charset_accepted(device_headers, octstr_get_cstr(charset))) {
+ if (!http_charset_accepted(device_headers, "UTF-8")) {
+ warning(0, "WSP: Device doesn't support charset <%s> neither
UTF-8",
+ octstr_get_cstr(charset));
+ } else {
+ debug("wsp",0,"Converting wml/xhtml from charset <%s> to UTF-8",
+ octstr_get_cstr(charset));
+ if (charset_convert(content->body,
+ octstr_get_cstr(charset), "UTF-8") >= 0) {
+ octstr_destroy(content->charset);
+ content->charset = octstr_create("UTF-8");
+ }
+ }
+ }
+ octstr_destroy(charset);
+}
+
/*
* Return an HTTP reply back to the phone.
*/
@@ -848,54 +871,8 @@
if (octstr_search(content.type, octstr_imm("text/vnd.wap.wml"), 0) >=
0 ||
octstr_search(content.type, octstr_imm("application/xhtml+xml"),
0) >= 0 ||
octstr_search(content.type,
octstr_imm("application/vnd.wap.xhtml+xml"), 0) >= 0) {
- Octstr *charset;
-
- /* get charset used in content body, default to utf-8 if not
present */
- if ((charset = find_charset_encoding(content.body)) == NULL)
- charset = octstr_imm("UTF-8");
- /* convert to utf-8 if original charset is not utf-8
- * and device supports it */
-
- if (octstr_case_compare(charset, octstr_imm("UTF-8")) < 0 &&
- !http_charset_accepted(device_headers,
octstr_get_cstr(charset))) {
- if (!http_charset_accepted(device_headers, "UTF-8")) {
- warning(0, "WSP: Device doesn't support charset <%s>
neither UTF-8",
- octstr_get_cstr(charset));
- } else {
- /* convert to utf-8 */
- debug("wsp",0,"Converting wml/xhtml from charset <%s> to
UTF-8",
- octstr_get_cstr(charset));
- if (charset_convert(content.body,
- octstr_get_cstr(charset), "UTF-8") >=
0) {
- octstr_destroy(content.charset);
- content.charset = octstr_create("UTF-8");
- /* XXX it might be good idea to change
<?xml...encoding?> */
- }
- }
- }
-
- /* convert to iso-8859-1 if original charset is not iso
- * and device supports it */
- else if (octstr_case_compare(charset, octstr_imm("ISO-8859-1")) <
0 &&
- !http_charset_accepted(device_headers,
octstr_get_cstr(charset))) {
- if (!http_charset_accepted(device_headers, "ISO-8859-1")) {
- warning(0, "WSP: Device doesn't support charset <%s>
neither ISO-8859-1",
- octstr_get_cstr(charset));
- } else {
- /* convert to iso-latin1 */
- debug("wsp",0,"Converting wml/xhtml from charset <%s> to
ISO-8859-1",
- octstr_get_cstr(charset));
- if (charset_convert(content.body,
- octstr_get_cstr(charset),
"ISO-8859-1") >= 0) {
- octstr_destroy(content.charset);
- content.charset = octstr_create("ISO-8859-1");
- /* XXX it might be good idea to change
<?xml...encoding?> */
- }
- }
- }
-
- octstr_destroy(charset);
+ normalize_charset(&content, device_headers);
}
/* set WBXML Encoding-Version for wml->wmlc conversion */
--- gw/wml_compiler.c
+++ gw/wml_compiler.c
@@ -335,7 +335,6 @@
xmlDocPtr pDoc = NULL;
char *wml_c_text;
wml_binary_t *wbxml = NULL;
- Octstr *encoding = NULL;
*wml_binary = octstr_create("");
wbxml = wml_binary_create();
@@ -347,63 +346,44 @@
-- tuo */
parse_entities(wml_text);
- /* transcode from charset to UTF-8 */
- if (charset && octstr_len(charset) &&
- octstr_case_compare(charset, octstr_imm("UTF-8")) == -1) {
- debug("wml_compile", 0, "WML compiler: Transcoding from <%s> to
UTF-8",
- octstr_get_cstr(charset));
- set_charset(wml_text, charset);
- }
-
- /*
- * If we did not set the character set encoding yet, then obviously
- * there was no charset argument in the Content-Type HTTP reply header.
- * We have to scan the xml preamble line for an explicite encoding
- * definition to allow transcoding from UTF-8 to that charset after
- * libxml2 did all it's parsing magic. (Keep in mind libxml2 uses UTF-8
- * as internal encoding.) -- Stipe
- */
-
- /*
- * We will trust the xml preamble encoding more then the HTTP header
- * charset definition.
- */
- if ((encoding = find_charset_encoding(wml_text)) != NULL) {
- /* ok, we rely on the xml preamble encoding */
- } else if (charset && octstr_len(charset) > 0) {
- /* we had a HTTP response charset, use this */
- encoding = octstr_duplicate(charset);
- } else {
- /* we had none, so use UTF-8 as default */
- encoding = octstr_create("UTF-8");
- }
-
size = octstr_len(wml_text);
wml_c_text = octstr_get_cstr(wml_text);
+ debug("ww",0, "given encoding: %s", octstr_get_cstr(charset));
if (octstr_search_char(wml_text, '\0', 0) != -1) {
error(0, "WML compiler: Compiling error: "
"\\0 character found in the middle of the WML source.");
ret = -1;
} else {
-
+#if 0
+ char *tag1, *tag2;
+ tag1 = strchr(wml_c_text, '<');
+ if ((tag1 != NULL) && (tag1 == strstr(wml_c_text, "<?"))) {
+ tag2 = strchr(tag1, '>');
+ if ((tag2 != NULL) && (tag2 - 1 == strstr(tag1, "?>"))) {
+ debug("wml_compile",0,"Stripping preamble");
+ size -= ++tag2 - wml_c_text;
+ wml_c_text = tag2;
+ }
+ }
+#endif
/*
* An empty octet string for the binary output is created, the wml
* source is parsed into a parsing tree and the tree is then compiled
* into binary.
*/
- pDoc = xmlParseMemory(wml_c_text, size);
+ pDoc = xmlReadMemory(wml_c_text, size, NULL, octstr_get_cstr(charset),
XML_PARSE_RECOVER | XML_PARSE_NONET);
if (pDoc != NULL) {
/*
* If we have a set internal encoding, then apply this information
* to the XML parsing tree document for later transcoding ability.
*/
- if (encoding)
- pDoc->charset =
xmlParseCharEncoding(octstr_get_cstr(encoding));
+ if (charset)
+ pDoc->charset = xmlParseCharEncoding(octstr_get_cstr(charset));
- ret = parse_document(pDoc, encoding, &wbxml, version);
+ ret = parse_document(pDoc, charset, &wbxml, version);
wml_binary_output(*wml_binary, wbxml);
} else {
error(0, "WML compiler: Compiling error: "
@@ -413,7 +393,6 @@
}
wml_binary_destroy(wbxml);
- octstr_destroy(encoding);
if (pDoc)
xmlFreeDoc(pDoc);