Hi list,

there have been recently people trying to us the sqlbox daemon with UCS-2
(Unicode) payload data, which didn't work out and the received messages have
been totally crippled.

I gave it a glance on the source code level, and found out a flaw in how sqlbox
"assumes" about character encoding of the msg->sms.msgdata payload.

The userguide tells us this: "Contents of the message, URL encoded as
necessary". To addopt the way smsbox works on the HTTP abstractive layer, sqlbox
is also expecting UTF-8 as input if no coding value is set, and UCS-2 ie. for
coding=2. Now, you can't put this payload in the table natively, if the table
has alternative character encoding scheme defined, ie. latin1. To overcome this
problem sqlbox enforces the use of URL-encoded data in the 'msgdata' table
field. Which ensures we have a representation in ASCII via the URL-encoded
values and there is no class with the underlying character scheme for the table.

In fact the existing sqlbox assumed that it is hex byte Octstr stream for UCS-2
(coding=2), so the conversions totally produced bugs data.

The attached patch solves the issue, by:

1. keeping the userguide assumption and requirement that ALL data is injected in
the 'msgdata' field of type TEXT as URL-encoded data. Optionall coding can be
set to indicate the character representation of the actual payload.

2. we use a fork of smsbox's charset_processing() routine to map from defined
other character sets and make sure we URL-decode in first instance to obtain the
raw byte data.

3. use a #define function for gw_sql_save_msg() to URL-encode again the
msg->sms.msgdata before calling the DB specific pointer function. This ensures
we have URL-encoded data again the the 'msgdata' table field.

Please review and comment.

Stipe

-- 
-------------------------------------------------------------------
Kölner Landstrasse 419
40589 Düsseldorf, NRW, Germany

tolj.org system architecture      Kannel Software Foundation (KSF)
http://www.tolj.org/              http://www.kannel.org/

mailto:st_{at}_tolj.org           mailto:stolj_{at}_kannel.org
-------------------------------------------------------------------
Index: gw/sqlbox_sdb.c
===================================================================
--- gw/sqlbox_sdb.c     (revision 42)
+++ gw/sqlbox_sdb.c     (working copy)
@@ -234,10 +234,6 @@
         return;
     }
 
-    // checking if message is unicode and converting the message back to hex 
values to be able to store in the database
-    if(msg->sms.coding == 2)
-        octstr_binary_to_hex(msg->sms.msgdata,1);
-
     sql = octstr_format(queries->insert_query, sqlbox_logtable, st_str(momt), 
st_str(msg->sms.sender),
         st_str(msg->sms.receiver), st_str(msg->sms.udhdata), 
st_str(msg->sms.msgdata), st_num(msg->sms.time),
         st_str(msg->sms.smsc_id), st_str(msg->sms.service), 
st_str(msg->sms.account), st_num(msg->sms.sms_type),
Index: gw/sqlbox.c
===================================================================
--- gw/sqlbox.c (revision 42)
+++ gw/sqlbox.c (working copy)
@@ -66,6 +66,7 @@
 #include "gwlib/gwlib.h"
 #include "gwlib/dbpool.h"
 #include "gw/msg.h"
+#include "gw/sms.h"
 #include "gw/shared.h"
 #include "gw/bb.h"
 #include "sqlbox_sql.h"
@@ -164,6 +165,45 @@
     return 0;
 }
 
+
+/****************************************************************************
+ * Character convertion.
+ * 
+ * The 'msgdata' is read from the DB table as URL-encoded byte stream, 
+ * which we need to URL-decode to get the orginal message. We use this
+ * approach to get rid of the table character dependancy of the DB systems.
+ * The URL-encoded chars as a subset of ASCII which is typicall no problem
+ * for any of the supported DB systems.
+ */
+
+static int charset_processing(Msg *msg) 
+{
+    gw_assert(msg->type == sms);
+
+    /* URL-decode first */
+    if (octstr_url_decode(msg->sms.msgdata) == -1)
+        return -1;
+        
+    /* If a specific character encoding has been indicated by the
+     * user, then make sure we convert to our internal representations. */
+    if (octstr_len(msg->sms.charset)) {
+    
+        if (msg->sms.coding == DC_7BIT) {
+            /* For 7 bit, convert to UTF-8 */
+            if (charset_convert(msg->sms.msgdata, 
octstr_get_cstr(msg->sms.charset), "UTF-8") < 0)
+                return -1;
+        } 
+        else if (msg->sms.coding == DC_UCS2) {
+            /* For UCS-2, convert to UTF-16BE */
+            if (charset_convert(msg->sms.msgdata, 
octstr_get_cstr(msg->sms.charset), "UTF-16BE") < 0) 
+                return -1;
+        }
+    }
+    
+    return 0;
+}
+
+
 /*
  *-------------------------------------------------
  *  receiver thingies
@@ -209,10 +249,6 @@
 {
     Octstr *pack;
 
-    // checking if the message is unicode and converting it to binary for 
submitting
-    if(pmsg->sms.coding == 2)
-    octstr_hex_to_binary(pmsg->sms.msgdata);
-
     pack = msg_pack(pmsg);
 
     if (pack == NULL)
@@ -247,12 +283,12 @@
 
         if (msg_type(msg) == sms) {
             debug("sqlbox", 0, "smsbox_to_bearerbox: sms received");
-           msg_escaped = msg_duplicate(msg);
-           gw_sql_save_msg(msg_escaped, octstr_imm("MT"));
-           msg_destroy(msg_escaped);
-       }
+            msg_escaped = msg_duplicate(msg);
+            gw_sql_save_msg(msg_escaped, octstr_imm("MT"));
+            msg_destroy(msg_escaped);
+        }
 
-       send_msg(conn->bearerbox_connection, conn, msg);
+        send_msg(conn->bearerbox_connection, conn, msg);
 
         /* if this is an identification message from an smsbox instance */
         if (msg_type(msg) == admin && msg->admin.command == cmd_identify) {
@@ -361,22 +397,22 @@
 
     while (sqlbox_status == SQL_RUNNING && conn->alive) {
 
-    msg = read_from_box(conn->bearerbox_connection, conn);
+        msg = read_from_box(conn->bearerbox_connection, conn);
 
-    if (msg == NULL) {
-        /* tell sqlbox to die */
-       conn->alive = 0;
-       debug("sqlbox", 0, "bearerbox_to_smsbox: connection to bearerbox 
died.");
-       break;
-    }
-    if (msg_type(msg) == admin) {
-       if (msg->admin.command == cmd_shutdown || msg->admin.command == 
cmd_restart) {
+        if (msg == NULL) {
             /* tell sqlbox to die */
-           conn->alive = 0;
-            debug("sqlbox", 0, "bearerbox_to_smsbox: Bearerbox told us to 
shutdown.");
-           break;
-       }
-    }
+            conn->alive = 0;
+            debug("sqlbox", 0, "bearerbox_to_smsbox: connection to bearerbox 
died.");
+            break;
+        }
+        if (msg_type(msg) == admin) {
+            if (msg->admin.command == cmd_shutdown || msg->admin.command == 
cmd_restart) {
+                /* tell sqlbox to die */
+                conn->alive = 0;
+                debug("sqlbox", 0, "bearerbox_to_smsbox: Bearerbox told us to 
shutdown.");
+                break;
+            }
+        }
 
         if (msg_type(msg) == heartbeat) {
         // todo
@@ -385,18 +421,18 @@
             continue;
         }
         if (!conn->alive) {
-           msg_destroy(msg);
-           break;
-       }
-    if (msg_type(msg) == sms) {
-       msg_escaped = msg_duplicate(msg);
-        if (msg->sms.sms_type != report_mo)
-            gw_sql_save_msg(msg_escaped, octstr_imm("MO"));
-        else
-            gw_sql_save_msg(msg_escaped, octstr_imm("DLR"));
-       msg_destroy(msg_escaped);
-    }
-    send_msg(conn->smsbox_connection, conn, msg);
+            msg_destroy(msg);
+            break;
+        }
+        if (msg_type(msg) == sms) {
+            msg_escaped = msg_duplicate(msg);
+            if (msg->sms.sms_type != report_mo)
+                gw_sql_save_msg(msg_escaped, octstr_imm("MO"));
+            else
+                gw_sql_save_msg(msg_escaped, octstr_imm("DLR"));
+            msg_destroy(msg_escaped);
+        }
+        send_msg(conn->smsbox_connection, conn, msg);
         msg_destroy(msg);
     }
     /* the client closes the connection, after that die in receiver */
@@ -547,6 +583,11 @@
 
     while (sqlbox_status == SQL_RUNNING && boxc->alive) {
         if ((msg = gw_sql_fetch_msg()) != NULL) {
+            if (charset_processing(msg) == -1) {
+                error(0, "Could not charset process message, dropping it!");
+                msg_destroy(msg);
+                continue;
+            }
             if (global_sender != NULL && (msg->sms.sender == NULL || 
octstr_len(msg->sms.sender) == 0)) {
                 msg->sms.sender = octstr_duplicate(global_sender);
             }
Index: gw/sqlbox_sqlite3.c
===================================================================
--- gw/sqlbox_sqlite3.c (revision 42)
+++ gw/sqlbox_sqlite3.c (working copy)
@@ -198,10 +198,6 @@
     int stuffcount = 0;
     DBPoolConn *pc;
 
-    // checking if message is unicode and converting the message back to hex 
values to be able to store in the database
-    if(msg->sms.coding == 2)
-             octstr_binary_to_hex(msg->sms.msgdata,1);
-
     pc = dbpool_conn_consume(pool);
     if (pc == NULL) {
         error(0, "SQLITE3: Database pool got no connection! DB update 
failed!");
Index: gw/sqlbox_pgsql.c
===================================================================
--- gw/sqlbox_pgsql.c   (revision 42)
+++ gw/sqlbox_pgsql.c   (working copy)
@@ -144,10 +144,6 @@
     Octstr *stuffer[30];
     int stuffcount = 0;
 
-    // checking if message is unicode and converting the message back to hex 
values to be able to store in the database
-        if(msg->sms.coding == 2)
-                octstr_binary_to_hex(msg->sms.msgdata,1);
-
     sql = octstr_format(SQLBOX_PGSQL_INSERT_QUERY, sqlbox_logtable, 
st_str(momt), st_str(msg->sms.sender),
         st_str(msg->sms.receiver), st_str(msg->sms.udhdata), 
st_str(msg->sms.msgdata), st_num(msg->sms.time),
         st_str(msg->sms.smsc_id), st_str(msg->sms.service), 
st_str(msg->sms.account), st_num(msg->sms.sms_type),
Index: gw/sqlbox_mssql.c
===================================================================
--- gw/sqlbox_mssql.c   (revision 42)
+++ gw/sqlbox_mssql.c   (working copy)
@@ -179,9 +179,6 @@
         error(0, "MSSql: DBPool Error!");
         return;
     }
-    // checking if message is unicode and converting the message back to hex 
values to be able to store in the database
-    if(msg->sms.coding == 2)
-             octstr_binary_to_hex(msg->sms.msgdata,1);
 
     sql = octstr_format(SQLBOX_MSSQL_INSERT_QUERY, sqlbox_logtable, 
st_str(momt), st_str(msg->sms.sender),
         st_str(msg->sms.receiver), st_str(msg->sms.udhdata), 
st_str(msg->sms.msgdata), st_num(msg->sms.time),
Index: gw/sqlbox_sqlite.c
===================================================================
--- gw/sqlbox_sqlite.c  (revision 42)
+++ gw/sqlbox_sqlite.c  (working copy)
@@ -216,10 +216,6 @@
         return;
     }
 
-    // checking if message is unicode and converting the message back to hex 
values to be able to store in the database
-    if(msg->sms.coding == 2)
-        octstr_binary_to_hex(msg->sms.msgdata,1);
-
     sql = octstr_format(SQLBOX_SQLITE_INSERT_QUERY, sqlbox_logtable, 
st_str(momt), st_str(msg->sms.sender),
         st_str(msg->sms.receiver), st_str(msg->sms.udhdata), 
st_str(msg->sms.msgdata), st_num(msg->sms.time),
         st_str(msg->sms.smsc_id), st_str(msg->sms.service), 
st_str(msg->sms.account), st_num(msg->sms.sms_type),
Index: gw/sqlbox_sql.h
===================================================================
--- gw/sqlbox_sql.h     (revision 42)
+++ gw/sqlbox_sql.h     (working copy)
@@ -38,7 +38,11 @@
 struct server_type *sql_type;
 
 #define gw_sql_fetch_msg sql_type->sql_fetch_msg
-#define gw_sql_save_msg sql_type->sql_save_msg
+#define gw_sql_save_msg(message, table) \
+    do { \
+        octstr_url_encode(message->sms.msgdata); \
+        sql_type->sql_save_msg(message, table); \
+    } while (0)
 #define gw_sql_enter sql_type->sql_enter
 #define gw_sql_leave sql_type->sql_leave
 
Index: gw/sqlbox_oracle.c
===================================================================
--- gw/sqlbox_oracle.c  (revision 42)
+++ gw/sqlbox_oracle.c  (working copy)
@@ -203,9 +203,6 @@
         error(0, "Oracle: DBPool Error!");
         return;
     }
-    // checking if message is unicode and converting the message back to hex 
values to be able to store in the database
-    if(msg->sms.coding == 2)
-             octstr_binary_to_hex(msg->sms.msgdata,1);
 
     sql = octstr_format(SQLBOX_ORACLE_INSERT_QUERY, sqlbox_logtable, 
st_str(momt), st_str(msg->sms.sender),
         st_str(msg->sms.receiver), st_str(msg->sms.udhdata), 
st_str(msg->sms.msgdata), st_num(msg->sms.time),
Index: gw/sqlbox_mysql.c
===================================================================
--- gw/sqlbox_mysql.c   (revision 42)
+++ gw/sqlbox_mysql.c   (working copy)
@@ -191,12 +191,6 @@
     Octstr *sql;
     Octstr *stuffer[30];
     int stuffcount = 0;
-    /*
-     * checking if message is unicode and converting the message back to
-     * hex values to be able to store in the database
-     */
-    if(msg->sms.coding == 2)
-        octstr_binary_to_hex(msg->sms.msgdata, 1);
 
     sql = octstr_format(SQLBOX_MYSQL_INSERT_QUERY, sqlbox_logtable, 
st_str(momt), st_str(msg->sms.sender),
         st_str(msg->sms.receiver), st_str(msg->sms.udhdata), 
st_str(msg->sms.msgdata), st_num(msg->sms.time),

Reply via email to