Hi,
seems sms_split function was always buggy and never worked correctly?
I found two issues:
1) max_part_len calculation was wrong.
It was:
max_len * 8 / 7 - udh_len
But because udh nerer packed this is just wrong end should be:
(max_len - udh_len) * 8 / 7
2) we dropped last utf-8 multibyte sequence. (Thanks to Stipe for report!).
To reproduce just sent following message via smsbox:
text=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa%c3%9f
To fix the bug we must first convert msgdata to UTF-8 then gsm_truncate
and then convert it again to UTF-8. The length of UTF-8 string is our
allowed max_part_length.
Please find attached patch that should fix those issues.
Please test and vote!
--
Thanks,
Alex
=== gw/sms.c
==================================================================
--- gw/sms.c (revision 206)
+++ gw/sms.c (local)
@@ -267,8 +267,7 @@
static Octstr *extract_msgdata_part_by_coding(Msg *msg, Octstr *split_chars,
int max_part_len)
{
- Octstr *temp = NULL;
- int pos, esc_count;
+ Octstr *temp = NULL, *temp_utf;
if (msg->sms.coding == DC_8BIT || msg->sms.coding == DC_UCS2) {
/* nothing to do here, just call the original extract_msgdata_part */
@@ -283,17 +282,16 @@
charset_utf8_to_gsm(temp);
charset_gsm_truncate(temp, max_part_len);
- pos = esc_count = 0;
+ /* calculate utf-8 length */
+ temp_utf = octstr_duplicate(temp);
+ charset_gsm_to_utf8(temp_utf);
+ max_part_len = octstr_len(temp_utf);
- while ((pos = octstr_search_char(temp, 27, pos)) != -1) {
- ++pos;
- ++esc_count;
- }
-
octstr_destroy(temp);
+ octstr_destroy(temp_utf);
/* now just call the original extract_msgdata_part with the new length */
- return extract_msgdata_part(msg->sms.msgdata, split_chars, max_part_len - esc_count);
+ return extract_msgdata_part(msg->sms.msgdata, split_chars, max_part_len);
}
@@ -316,7 +314,7 @@
if (orig->sms.coding == DC_8BIT || orig->sms.coding == DC_UCS2)
max_part_len = max_octets - udh_len - hf_len;
else
- max_part_len = max_octets * 8 / 7 - udh_len - hf_len;
+ max_part_len = (max_octets - udh_len) * 8 / 7 - hf_len;
if (sms_msgdata_len(orig) > max_part_len && catenate) {
/* Change part length to take concatenation overhead into account */
@@ -326,7 +324,7 @@
if (orig->sms.coding == DC_8BIT || orig->sms.coding == DC_UCS2)
max_part_len = max_octets - udh_len - hf_len;
else
- max_part_len = max_octets * 8 / 7 - udh_len - hf_len;
+ max_part_len = (max_octets - udh_len) * 8 / 7 - hf_len;
}
/* ensure max_part_len is never negativ */
@@ -336,6 +334,7 @@
msgno = 0;
list = gwlist_create();
+ last = 0;
do {
msgno++;
part = msg_duplicate(orig);
@@ -350,19 +349,15 @@
part->sms.dlr_mask = 0;
}
octstr_destroy(part->sms.msgdata);
- if (sms_msgdata_len(temp) <= max_part_len || msgno == max_messages) {
- part->sms.msgdata = temp->sms.msgdata ?
- octstr_copy(temp->sms.msgdata, 0, max_part_len) : octstr_create("");
+ if (sms_msgdata_len(temp) <= max_part_len || msgno == max_messages)
last = 1;
- }
- else {
- part->sms.msgdata =
- extract_msgdata_part_by_coding(temp, split_chars,
- max_part_len - nlsuf_len);
- /* create new id for every part, except last */
- uuid_generate(part->sms.id);
- last = 0;
- }
+
+ part->sms.msgdata =
+ extract_msgdata_part_by_coding(temp, split_chars,
+ max_part_len - nlsuf_len);
+ /* create new id for every part, except last */
+ uuid_generate(part->sms.id);
+
if (header)
octstr_insert(part->sms.msgdata, header, 0);
if (footer)