Hi, * Holger Hans Peter Freyther <[email protected]> [2010-07-09 13:00]: > On 07/09/2010 04:58 AM, Sylvain Munaut wrote: > >> (even though what could be properly decoded before > >> also can be properly decoded now, no change for this). > > > > Well, I think that before > > > > encode(decode(x)) == x > > if the decoder is too much work right now, you can change the test to > only test encoding. So make it encode(x) == expected_result and add one > test case for the string that was failing, convert the old decoding to > decode(x) == expected_result as well. and you might feel like adding an > expected failure (because the code is missing). > > The benefit of changing the test is that you can more easily convince me > that the new code can do everything the old promised and is fixing > something that didn't work with the old one.
Alright, cleaned up the previous code a bit, implemented the decoder and added a test case which the previous code encoded incorrect. Please note that just using the new test case string in the old code will also pass the test even though the encoding is wrong. You need to compare the encoded values to e.g. output of pduspy[0]. Patch attached. Cheers Nico [0] http://www.nobbi.com/download/pduspy.zip
>From e3b3aa0c985fba0a3697a1efc191112aa01a391f Mon Sep 17 00:00:00 2001 From: Nico Golde <[email protected]> Date: Fri, 9 Jul 2010 17:19:12 +0200 Subject: [PATCH] * rewrite GSM 7bit default encoding/decoding based on a lookup table as the previous code produced wrong encodings for certain characters. --- include/osmocore/gsm_utils.h | 28 ++++++++++++ src/gsm_utils.c | 99 +++++++++++++++++++++++++++++++++-------- tests/sms/sms_test.c | 17 +++++++- 3 files changed, 123 insertions(+), 21 deletions(-) diff --git a/include/osmocore/gsm_utils.h b/include/osmocore/gsm_utils.h index 7dc2388..64f9edc 100644 --- a/include/osmocore/gsm_utils.h +++ b/include/osmocore/gsm_utils.h @@ -3,6 +3,7 @@ * (C) 2008 by Daniel Willmann <[email protected]> * (C) 2009 by Holger Hans Peter Freyther <[email protected]> * (C) 2009-2010 by Harald Welte <[email protected]> + * (C) 2010 by Nico Golde <[email protected]> * * All Rights Reserved * @@ -53,6 +54,33 @@ enum gsm_band { GSM_BAND_810 = 0x80, }; +/* ETSI GSM 03.38 6.2.1 and 6.2.1.1 default alphabet + * Greek symbols at hex positions 0x10 and 0x12-0x1a + * left out as they can't be handled with a char and + * since most phones don't display or write these + * characters this would only needlessly make the code + * more complex +*/ +unsigned char gsm_7bit_alphabet[] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0a, 0xff, 0xff, 0x0d, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0x20, 0x21, 0x22, 0x23, 0x02, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, + 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, + 0x3c, 0x3d, 0x3e, 0x3f, 0x00, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, + 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, + 0x5a, 0x3c, 0x2f, 0x3e, 0x14, 0x11, 0xff, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, + 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x28, 0x40, 0x29, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0x0c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x40, 0xff, 0x01, 0xff, + 0x03, 0xff, 0x7b, 0x7d, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5c, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5b, 0x7e, 0x5d, 0xff, 0x7c, 0xff, 0xff, 0xff, + 0xff, 0x5b, 0x0e, 0x1c, 0x09, 0xff, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5d, + 0xff, 0xff, 0xff, 0xff, 0x5c, 0xff, 0x0b, 0xff, 0xff, 0xff, 0x5e, 0xff, 0xff, 0x1e, 0x7f, + 0xff, 0xff, 0xff, 0x7b, 0x0f, 0x1d, 0xff, 0x04, 0x05, 0xff, 0xff, 0x07, 0xff, 0xff, 0xff, + 0xff, 0x7d, 0x08, 0xff, 0xff, 0xff, 0x7c, 0xff, 0x0c, 0x06, 0xff, 0xff, 0x7e, 0xff, 0xff +}; + const char *gsm_band_name(enum gsm_band band); enum gsm_band gsm_band_parse(const char *mhz); diff --git a/src/gsm_utils.c b/src/gsm_utils.c index dc97cef..fb69377 100644 --- a/src/gsm_utils.c +++ b/src/gsm_utils.c @@ -2,6 +2,7 @@ * (C) 2008 by Daniel Willmann <[email protected]> * (C) 2009 by Holger Hans Peter Freyther <[email protected]> * (C) 2009-2010 by Harald Welte <[email protected]> + * (C) 2010 by Nico Golde <[email protected]> * * All Rights Reserved * @@ -34,52 +35,110 @@ #include "../config.h" -/* GSM 03.38 6.2.1 Charachter packing */ +/* GSM 03.38 6.2.1 Character lookup for decoding */ +static int gsm_septet_lookup(uint8_t ch) +{ + int i = 0; + for(; i < sizeof(gsm_7bit_alphabet); i++){ + if(gsm_7bit_alphabet[i] == ch) + return i; + } + return -1; +} + +/* GSM 03.38 6.2.1 Character unpacking */ int gsm_7bit_decode(char *text, const uint8_t *user_data, uint8_t length) { int i = 0; int l = 0; + uint8_t *rtext = calloc(length, sizeof(uint8_t)); + uint8_t tmp; - /* FIXME: We need to account for user data headers here */ + /* FIXME: We need to account for user data headers here */ i += l; - for (; i < length; i ++) - *(text ++) = + for (; i < length; i ++){ + rtext[i] = ((user_data[(i * 7 + 7) >> 3] << (7 - ((i * 7 + 7) & 7))) | (user_data[(i * 7) >> 3] >> ((i * 7) & 7))) & 0x7f; + } + for(i = 0; i < length; i++){ + /* this is an extension character */ + if(rtext[i] == 0x1b){ + tmp = rtext[i+1]; + *(text++) = gsm_7bit_alphabet[0x7f + tmp]; + i++; + continue; + } + + *(text++) = gsm_septet_lookup(rtext[i]); + } + *text = '\0'; + free(rtext); - return i - l; + return i; } +/* GSM 03.38 6.2.1 Prepare character packing */ +static int gsm_septet_encode(uint8_t *result, const char *data) +{ + int i, y = 0; + uint8_t ch; + for(i = 0; i < strlen(data); i++){ + ch = data[i]; + switch(ch){ + /* fall-through for extension characters */ + case 0x0c: + case 0x5e: + case 0x7b: + case 0x7d: + case 0x5c: + case 0x5b: + case 0x7e: + case 0x5d: + case 0x7c: + result[y++] = 0x1b; + default: + result[y] = gsm_7bit_alphabet[ch]; + break; + } + y++; + } -/* GSM 03.38 6.2.1 Charachter packing */ + return y; +} + +/* GSM 03.38 6.2.1 Character packing */ int gsm_7bit_encode(uint8_t *result, const char *data) { - int i,j = 0; - unsigned char ch1, ch2; + int i,y,z = 0; + /* prepare for the worst case, every character expanding to two bytes */ + uint8_t *rdata = calloc(strlen(data) * 2, sizeof(uint8_t)); + uint8_t cb, nb; int shift = 0; - for ( i=0; i<strlen(data); i++ ) { + y = gsm_septet_encode(rdata, data); - ch1 = data[i] & 0x7F; - ch1 = ch1 >> shift; - ch2 = data[(i+1)] & 0x7F; - ch2 = ch2 << (7-shift); + for(i = 0; i < y; i++) { + if(shift == 7 && i + 1 < y){ + shift = 0; + continue; + } - ch1 = ch1 | ch2; + cb = (rdata[i] & 0x7f) >> shift; + if(i + 1 < y){ + nb = (rdata[i + 1] & 0x7f) << (7 - shift); + cb = cb | nb; + } - result[j++] = ch1; + result[z++] = cb; shift++; - - if ((shift == 7) && (i+1<strlen(data))) { - shift = 0; - i++; - } } + free(rdata); return i; } diff --git a/tests/sms/sms_test.c b/tests/sms/sms_test.c index f5183d5..4daf003 100644 --- a/tests/sms/sms_test.c +++ b/tests/sms/sms_test.c @@ -1,5 +1,6 @@ /* * (C) 2008 by Daniel Willmann <[email protected]> + * (C) 2010 by Nico Golde <[email protected]> * All Rights Reserved * * This program is free software; you can redistribute it and/or modify @@ -32,7 +33,7 @@ int main(int argc, char** argv) uint8_t *sms; uint8_t i; - /* test 7-bit coding/decoding */ + /* test 7-bit coding/decoding */ const char *input = "test text"; uint8_t length; uint8_t coded[256]; @@ -43,5 +44,19 @@ int main(int argc, char** argv) if (strcmp(result, input) != 0) { printf("7 Bit coding failed... life sucks\n"); printf("Wanted: '%s' got '%s'\n", input, result); + return -1; } + + memset(coded, 0, sizeof(coded)); + memset(result, 0, sizeof(coded)); + input = strdup("!$ a more#^- complicated test@@?_\%! case"); + length = gsm_7bit_encode(coded, input); + gsm_7bit_decode(result, coded, length); + if (strcmp(result, input) != 0) { + printf("7 Bit coding failed... life sucks\n"); + printf("Wanted: '%s' got '%s'\n", input, result); + return -2; + } + + return 0; } -- 1.7.1
