Re: [Devel] [PATCH] Support UTF-8 characters in Subject field

2010-11-30 Thread Paul Bagyenda
Patches applied to CVS
On Nov 26, 2010, at 19:52, Piotr Isajew wrote:

> updated
> ___
> Devel mailing list
> Devel@mbuni.org
> http://lists.mbuni.org/mailman/listinfo/devel

___
Devel mailing list
Devel@mbuni.org
http://lists.mbuni.org/mailman/listinfo/devel


Re: [Devel] [PATCH] Support UTF-8 characters in Subject field

2010-11-26 Thread Piotr Isajew
updated
(3, ANSI_X3.4-1968)
(4, ISO_8859-1:1987)
(5, ISO_8859-2:1987)
(6, ISO_8859-3:1988)
(7, ISO_8859-4:1988)
(8, ISO_8859-5:1988)
(9, ISO_8859-6:1987)
(10, ISO_8859-7:1987)
(11, ISO_8859-8:1988)
(12, ISO_8859-9:1989)
(13, ISO-8859-10)
(15, JIS_X0201)
(17, SHIFT_JIS)
(18, EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE)
(36, KS_C_5601-1987)
(37, ISO-2022-KR)
(38, EUC-KR)
(39, ISO-2022-JP)
(40, ISO-2022-JP-2)
(42, JIS_C6220-1969-RO)
(56, GB_1988-80)
(57, GB_2312-80)
(63, JIS_C6226-1983)
(98, JIS_X0212-1990)
(103, UNICODE-1-1-UTF-7)
(104, ISO-2022-CN)
(105, ISO-2022-CN-EXT)
(106, UTF-8)
(109, ISO-8859-13)
(110, ISO-8859-14)
(111, ISO-8859-15)
(112, ISO-8859-16)
(113, GBK)
(114, GB18030)
(1000, ISO-10646-UCS-2)
(1001, ISO-10646-UCS-4)
(1010, UNICODE-1-1)
(1012, UTF-7)
(1013, UTF-16BE)
(1014, UTF-16LE)
(1015, UTF-16)
(1017, UTF-32)
(1018, UTF-32BE)
(1019, UTF-32LE)
(2004, HP-ROMAN8)
(2009, IBM850)
(2013, IBM862)
(2025, GB2312)
(2026, BIG5)
(2027, MACINTOSH)
(2082, VISCII)
(2084, KOI8-R)
(2085, HZ-GB-2312)
(2086, IBM866)
(2088, KOI8-U)
(2101, BIG5-HKSCS)
(2103, PTCP154)
(2109, WINDOWS-874)
(2250, WINDOWS-1250)
(2251, WINDOWS-1251)
(2252, WINDOWS-1252)
(2253, WINDOWS-1253)
(2254, WINDOWS-1254)
(2255, WINDOWS-1255)
(2256, WINDOWS-1256)
(2257, WINDOWS-1257)
(2258, WINDOWS-1258)
(2259, TIS-620)


pgp1zwL0EIkKn.pgp
Description: PGP signature
___
Devel mailing list
Devel@mbuni.org
http://lists.mbuni.org/mailman/listinfo/devel


Re: [Devel] [PATCH] Support UTF-8 characters in Subject field

2010-11-26 Thread Piotr Isajew
attached

On Fri, Nov 26, 2010 at 05:29:34PM +0300, Paul Bagyenda wrote:
> The log level patch has been applied. This one requires some thought. Ideally 
> we should decode to  RFC 2047 format, and encode to the MM1 format. And this 
> should apply to more than just the "subject" field. More work than this. I 
> need to do it, but first (and it's friday, which means lazy days) I need a 
> cleaned mapping from MIBEnum values to charset names (from 
> http://www.iana.org/assignments/character-sets ) in a format that libiconv 
> can understand. If you are feeling less lazy, you could put these into a text 
> file for me (col1 = mib enum, col2 = name) and I'll do the code changes.
> 
> :)
> 
> P.
> 
> On Nov 26, 2010, at 15:21, Piotr Isajew wrote:
> 
> > I tested this for MM1 outgoing messages and it works. I don't know if
> > it will perform ok (or does matter) for other protocols.
> > 
> > --- mbuni-cvs/mmlib/mms_msg.c   2010-10-26 12:18:13.0 +0200
> > +++ mbuni/mmlib/mms_msg.c   2010-11-26 13:05:18.950730650 +0100
> > @@ -53,6 +53,34 @@
> >  wsp_pack_short_integer(s, c);
> > }
> > 
> > +static void pack_utf8_encoded_text(Octstr *s, Octstr *value)
> > +{
> > +  const unsigned short short_len = 30;
> > +  const unsigned char length_quote = 31;
> > +  const unsigned char utf8_enc = 0x6a;
> > +  int need_space =  octstr_get_char(value, 0) > 0x7f; /* see below */
> > +  unsigned long len = octstr_len(value) + 2 + need_space;
> > +  /* Pack value length */
> > +  if(len <= short_len) {
> > +octstr_append_char(s, len);
> > +  } else {
> > +octstr_append_char(s, length_quote);
> > +octstr_append_uintvar(s, len);
> > +  }
> > +  /* Pack encoding */
> > +  wsp_pack_short_integer(s, utf8_enc);
> > +
> > +  /* Pack the rest */
> > +  if(need_space) {
> > +/* it looks like if UTF-8 character goes first, it's not properly
> > +   decoded on handsets, so we insert a space before it. Ugly, but
> > +   appears to work. */
> > +octstr_append_char(s, 0x20);
> > +  }
> > +  octstr_append(s, value);
> > +  octstr_append_char(s, 0);
> > +}
> > +
> > #if 0
> > static void encode_uint(Octstr *os, unsigned int l)
> > {
> > @@ -683,11 +711,12 @@
> >  unsigned char c;
> > 
> >  switch (field_type) {
> > - 
> > + case MMS_HEADER_SUBJECT:
> > +   pack_utf8_encoded_text(os, value);
> > +   break;
> >  case MMS_HEADER_TO:
> >  case MMS_HEADER_CC:
> >  case MMS_HEADER_BCC:
> > - case MMS_HEADER_SUBJECT:
> >  case MMS_HEADER_TRANSACTION_ID:
> >  case MMS_HEADER_MESSAGE_ID:
> >  case MMS_HEADER_REPLY_CHARGING_ID:
> > @@ -695,8 +724,8 @@
> > 
> >  case MMS_HEADER_STORE_STATUS_TEXT:
> >   
> > - wsp_pack_text(os, value); /* XXX need to deal with charset issues. */
> > - break;   
> > + wsp_pack_text(os, value); /* XXX need to deal with charset 
> > issues. */
> > + break;   
> >   
> >  case MMS_HEADER_RESPONSE_TEXT: /* make sure response status does not 
> > begin with digit!! Has special meaning*/
> >  case MMS_HEADER_CONTENT_LOCATION:
> > ___
> > Devel mailing list
> > Devel@mbuni.org
> > http://lists.mbuni.org/mailman/listinfo/devel
> 
> ___
> Devel mailing list
> Devel@mbuni.org
> http://lists.mbuni.org/mailman/listinfo/devel
> 
> 
(3, ANSI_X3.4-1968)
(4, ISO_8859-1:1987)
(5, ISO_8859-2:1987)
(6, ISO_8859-3:1988)
(7, ISO_8859-4:1988)
(8, ISO_8859-5:1988)
(9, ISO_8859-6:1987)
(10, ISO_8859-7:1987)
(11, ISO_8859-8:1988)
(12, ISO_8859-9:1989)
(13, ISO-8859-10)
(15, JIS_X0201)
(36, KS_C_5601-1987)
(37, ISO-2022-KR)
(38, EUC-KR)
(39, ISO-2022-JP)
(40, ISO-2022-JP-2)
(56, GB_1988-80)
(57, GB_2312-80)
(63, JIS_C6226-1983)
(98, JIS_X0212-1990)
(103, UNICODE-1-1-UTF-7)
(104, ISO-2022-CN)
(105, ISO-2022-CN-EXT)
(106, UTF-8)
(109, ISO-8859-13)
(110, ISO-8859-14)
(111, ISO-8859-15)
(112, ISO-8859-16)
(113, GBK)
(114, GB18030)
(1000, ISO-10646-UCS-2)
(1001, ISO-10646-UCS-4)
(1010, UNICODE-1-1)
(1012, UTF-7)
(1013, UTF-16BE)
(1014, UTF-16LE)
(1015, UTF-16)
(1017, UTF-32)
(1018, UTF-32BE)
(1019, UTF-32LE)
(2009, IBM850)
(2013, IBM862)
(2025, GB2312)
(2082, VISCII)
(2084, KOI8-R)
(2085, HZ-GB-2312)
(2086, IBM866)
(2088, KOI8-U)
(2103, PTCP154)
(2259, TIS-620)


pgpSUMri1sELV.pgp
Description: PGP signature
___
Devel mailing list
Devel@mbuni.org
http://lists.mbuni.org/mailman/listinfo/devel


Re: [Devel] [PATCH] Support UTF-8 characters in Subject field

2010-11-26 Thread Piotr Isajew
On Fri, Nov 26, 2010 at 05:29:34PM +0300, Paul Bagyenda wrote:
> The log level patch has been applied. This one requires some thought. Ideally 
> we should decode to  RFC 2047 format, and encode to the MM1 format. And this 
> should apply to more than just the "subject" field. More work than this. I 
> need to do it, but first (and it's friday, which means lazy days) I need a 
> cleaned mapping from MIBEnum values to charset names (from 
> http://www.iana.org/assignments/character-sets ) in a format that libiconv 
> can understand. If you are feeling less lazy, you could put these into a text 
> file for me (col1 = mib enum, col2 = name) and I'll do the code changes.


Sure. A little perl parser is everything I need to do to make myself
happy on friday evening ;-)


pgpHVJnKLswRa.pgp
Description: PGP signature
___
Devel mailing list
Devel@mbuni.org
http://lists.mbuni.org/mailman/listinfo/devel


Re: [Devel] [PATCH] Support UTF-8 characters in Subject field

2010-11-26 Thread Paul Bagyenda
The log level patch has been applied. This one requires some thought. Ideally 
we should decode to  RFC 2047 format, and encode to the MM1 format. And this 
should apply to more than just the "subject" field. More work than this. I need 
to do it, but first (and it's friday, which means lazy days) I need a cleaned 
mapping from MIBEnum values to charset names (from 
http://www.iana.org/assignments/character-sets ) in a format that libiconv can 
understand. If you are feeling less lazy, you could put these into a text file 
for me (col1 = mib enum, col2 = name) and I'll do the code changes.

:)

P.

On Nov 26, 2010, at 15:21, Piotr Isajew wrote:

> I tested this for MM1 outgoing messages and it works. I don't know if
> it will perform ok (or does matter) for other protocols.
> 
> --- mbuni-cvs/mmlib/mms_msg.c 2010-10-26 12:18:13.0 +0200
> +++ mbuni/mmlib/mms_msg.c 2010-11-26 13:05:18.950730650 +0100
> @@ -53,6 +53,34 @@
>  wsp_pack_short_integer(s, c);
> }
> 
> +static void pack_utf8_encoded_text(Octstr *s, Octstr *value)
> +{
> +  const unsigned short short_len = 30;
> +  const unsigned char length_quote = 31;
> +  const unsigned char utf8_enc = 0x6a;
> +  int need_space =  octstr_get_char(value, 0) > 0x7f; /* see below */
> +  unsigned long len = octstr_len(value) + 2 + need_space;
> +  /* Pack value length */
> +  if(len <= short_len) {
> +octstr_append_char(s, len);
> +  } else {
> +octstr_append_char(s, length_quote);
> +octstr_append_uintvar(s, len);
> +  }
> +  /* Pack encoding */
> +  wsp_pack_short_integer(s, utf8_enc);
> +
> +  /* Pack the rest */
> +  if(need_space) {
> +/* it looks like if UTF-8 character goes first, it's not properly
> +   decoded on handsets, so we insert a space before it. Ugly, but
> +   appears to work. */
> +octstr_append_char(s, 0x20);
> +  }
> +  octstr_append(s, value);
> +  octstr_append_char(s, 0);
> +}
> +
> #if 0
> static void encode_uint(Octstr *os, unsigned int l)
> {
> @@ -683,11 +711,12 @@
>  unsigned char c;
> 
>  switch (field_type) {
> -   
> + case MMS_HEADER_SUBJECT:
> +   pack_utf8_encoded_text(os, value);
> +   break;
>  case MMS_HEADER_TO:
>  case MMS_HEADER_CC:
>  case MMS_HEADER_BCC:
> - case MMS_HEADER_SUBJECT:
>  case MMS_HEADER_TRANSACTION_ID:
>  case MMS_HEADER_MESSAGE_ID:
>  case MMS_HEADER_REPLY_CHARGING_ID:
> @@ -695,8 +724,8 @@
> 
>  case MMS_HEADER_STORE_STATUS_TEXT:
> 
> -   wsp_pack_text(os, value); /* XXX need to deal with charset issues. */
> -   break;   
> +   wsp_pack_text(os, value); /* XXX need to deal with charset 
> issues. */
> + break; 
> 
>  case MMS_HEADER_RESPONSE_TEXT: /* make sure response status does not 
> begin with digit!! Has special meaning*/
>  case MMS_HEADER_CONTENT_LOCATION:
> ___
> Devel mailing list
> Devel@mbuni.org
> http://lists.mbuni.org/mailman/listinfo/devel

___
Devel mailing list
Devel@mbuni.org
http://lists.mbuni.org/mailman/listinfo/devel


[Devel] [PATCH] Support UTF-8 characters in Subject field

2010-11-26 Thread Piotr Isajew
I tested this for MM1 outgoing messages and it works. I don't know if
it will perform ok (or does matter) for other protocols.

--- mbuni-cvs/mmlib/mms_msg.c   2010-10-26 12:18:13.0 +0200
+++ mbuni/mmlib/mms_msg.c   2010-11-26 13:05:18.950730650 +0100
@@ -53,6 +53,34 @@
  wsp_pack_short_integer(s, c);
 }
 
+static void pack_utf8_encoded_text(Octstr *s, Octstr *value)
+{
+  const unsigned short short_len = 30;
+  const unsigned char length_quote = 31;
+  const unsigned char utf8_enc = 0x6a;
+  int need_space =  octstr_get_char(value, 0) > 0x7f; /* see below */
+  unsigned long len = octstr_len(value) + 2 + need_space;
+  /* Pack value length */
+  if(len <= short_len) {
+octstr_append_char(s, len);
+  } else {
+octstr_append_char(s, length_quote);
+octstr_append_uintvar(s, len);
+  }
+  /* Pack encoding */
+  wsp_pack_short_integer(s, utf8_enc);
+
+  /* Pack the rest */
+  if(need_space) {
+/* it looks like if UTF-8 character goes first, it's not properly
+   decoded on handsets, so we insert a space before it. Ugly, but
+   appears to work. */
+octstr_append_char(s, 0x20);
+  }
+  octstr_append(s, value);
+  octstr_append_char(s, 0);
+}
+
 #if 0
 static void encode_uint(Octstr *os, unsigned int l)
 {
@@ -683,11 +711,12 @@
  unsigned char c;
 
  switch (field_type) {
- 
+ case MMS_HEADER_SUBJECT:
+   pack_utf8_encoded_text(os, value);
+   break;
  case MMS_HEADER_TO:
  case MMS_HEADER_CC:
  case MMS_HEADER_BCC:
- case MMS_HEADER_SUBJECT:
  case MMS_HEADER_TRANSACTION_ID:
  case MMS_HEADER_MESSAGE_ID:
  case MMS_HEADER_REPLY_CHARGING_ID:
@@ -695,8 +724,8 @@
 
  case MMS_HEADER_STORE_STATUS_TEXT:
  
- wsp_pack_text(os, value); /* XXX need to deal with charset issues. */
- break;   
+ wsp_pack_text(os, value); /* XXX need to deal with charset 
issues. */
+ break;   
  
  case MMS_HEADER_RESPONSE_TEXT: /* make sure response status does not 
begin with digit!! Has special meaning*/
  case MMS_HEADER_CONTENT_LOCATION:


pgpLUFjS4GYeL.pgp
Description: PGP signature
___
Devel mailing list
Devel@mbuni.org
http://lists.mbuni.org/mailman/listinfo/devel