[Multisync-devel] charset problem

2004-07-09 Thread YuLei
Hi, all:

  I am using multisync with my Clie, T68 and Evolution in chinese
  environment (Debian-sarge), it's just what I am seeking for,
  but seems that multisync did not deal chinese characters well,
  after dig into the source code (thanks to GPL), I made two fixes
  to resolve problems:

1. If properties of key contains charset specification, like
'N;CHARSET=UTF-7:+...", then remove this property and convert its value to UTF-8, 
because Evolution can process UTF-8 properly even if no charset specified

2. Because UTF-8 use all 8 bits of byte, so escape (in palm plugin) is needn't
after convert to UTF-8 or will corrupt the character encoding

here is my patch:

diff -ur multisync-0.82.orig/src/sync_vtype.c multisync-0.82/src/sync_vtype.c
--- multisync-0.82.orig/src/sync_vtype.c2004-04-13 05:03:29.0 +0800
+++ multisync-0.82/src/sync_vtype.c 2004-07-07 22:44:28.0 +0800
@@ -61,6 +61,46 @@
   VTYPE_VCARD = 0xc
 } vtype_type;
 
+// convert charset of value from which specified in properties
+static void
+convert_charset (char* line, unsigned char** value, const char* charset)
+{ 
+  while (line = strchr (line, ';')) {
+const char* key = "CHARSET=";
+char org_charset[64];
+int len;
+iconv_t ic;
+
+line++; // skip the ';' character
+
+if ((strlen (line) <= strlen (key))
+   || g_strncasecmp (line, key, sizeof (key)))
+// key not found
+  continue;
+
+org_charset[0] = 0;
+len = strcspn (line + strlen (key), ";: \r\n");
+if (len >= sizeof (org_charset))
+  len = sizeof (org_charset) - 1;
+strncat (org_charset, line + strlen (key), len);
+
+ic = iconv_open (charset, org_charset);
+if (ic >= 0) {
+  char *utfvalue = g_malloc0(65536);
+  size_t inbytes = strlen(*value);
+  size_t outbytes = 65536;
+  char *inbuf = *value, *outbuf = utfvalue;
+
+  iconv(ic, &inbuf, &inbytes, &outbuf, &outbytes);
+  g_free(*value);
+  *value = utfvalue;
+  iconv_close(ic);
+}
+
+break;
+  } // while every ';' character
+}
+
 // Parse and correct a number of errors in the VCARD/VTODO data
 // The "opts" parameter decides which errors/features that should be corrected
 char* sync_vtype_convert(char *card, sync_voption opts, char* charset) {
@@ -74,6 +114,7 @@
   gboolean output = TRUE; // If feedthrough output is enabled
   vtype_type datatype = VTYPE_UNKNOWN;
 
   outcard = g_string_new("");
   while (incard) {
 char* endln;
@@ -136,6 +177,7 @@
value = tmp;
   }
   qp = FALSE; // Do a proper QP detection
+  convert_charset (line, &value, "UTF-8");
   if (endent)
endent[0] = 0;
   strncpy(name, line, 255);
@@ -312,7 +354,10 @@
  !g_strcasecmp(propdata, "END"))
triggerrelatedend = TRUE;
  if (!g_strcasecmp(propname, "CHARSET"))
+ {
strncpy(linecharset, propdata, 255);
+   outputprop = FALSE; // value has been converted to UTF-8
+ }
  if (adr && 
  (!g_strcasecmp(propname, "HOME") || 
  !g_strcasecmp(propname, "WORK"))) {
@@ -362,7 +407,7 @@
value = sync_vtype_decode_qp(value);
g_free(tmp);
   }
-  if ((opts & VOPTION_FIXCHARSET) && value && charset) {
+  if ((opts & VOPTION_FIXCHARSET) && value && charset && !linecharset[0]) {
iconv_t ic;
int t;
gboolean highchar = FALSE;
@@ -1132,6 +1177,7 @@
  res = tmp;
  start = newstart;
} while(start < card+strlen(card) && start[0] == ' ');
+   convert_charset (pos, &res, "UTF-8");
return(res);
   }
 }

diff -ur multisync-0.82.orig/plugins/palm_sync/src/vcard.c 
multisync-0.82/plugins/palm_sync/src/vcard.c
--- multisync-0.82.orig/plugins/palm_sync/src/vcard.c   2004-04-13 05:03:20.0 
+0800
+++ multisync-0.82/plugins/palm_sync/src/vcard.c2004-07-07 23:19:14.0 
+0800
@@ -116,11 +116,11 @@
 
/* note */
if(appointment.note && strlen(appointment.note))
-   prop = addPropValueO(vevent, VCDescriptionPropO, 
g_strescape(appointment.note, NULL));
+   prop = addPropValueO(vevent, VCDescriptionPropO, appointment.note);
 
/* description */
if(appointment.description) {
-   prop = addPropValueO(vevent, VCSummaryPropO, 
escape_chars(g_strescape(appointment.description, NULL)));
+   prop = addPropValueO(vevent, VCSummaryPropO, appointment.description);
}
 
/* begin and end*/
@@ -584,13 +584,13 @@
 
/* note */
if(todo.note && strlen(todo.note)) {
-   prop = addPropValueO(vtodo, VCDescriptionPropO, 
escape_chars(g_strescape(todo.note, NULL)));
+   prop = addPropValueO(vtodo, VCDescriptionPropO, todo.note);
//addPropValueO(prop, "ENCODING", "QUOTED-PRINTABLE");
}
 
/* description */
if(todo.description) {
-   addPropValueO(vtodo, VCSummaryPropO

Re: [Multisync-devel] charset problem

2004-07-09 Thread Tom Foottit
Thanks very much for your interest and for taking the time to prepare a
patch.

We'll have a look at it and let you know if we have to make changes and
when things go into CVS.

Thanks again,

Tom


On Fri, 2004-07-09 at 12:34, YuLei wrote:
> Hi, all:
> 
>   I am using multisync with my Clie, T68 and Evolution in chinese
>   environment (Debian-sarge), it's just what I am seeking for,
>   but seems that multisync did not deal chinese characters well,
>   after dig into the source code (thanks to GPL), I made two fixes
>   to resolve problems:
> 
> 1. If properties of key contains charset specification, like
> 'N;CHARSET=UTF-7:+...", then remove this property and convert its value to UTF-8, 
> because Evolution can process UTF-8 properly even if no charset specified
> 
> 2. Because UTF-8 use all 8 bits of byte, so escape (in palm plugin) is needn't
> after convert to UTF-8 or will corrupt the character encoding
> 
> here is my patch:
> 
> diff -ur multisync-0.82.orig/src/sync_vtype.c multisync-0.82/src/sync_vtype.c
> --- multisync-0.82.orig/src/sync_vtype.c  2004-04-13 05:03:29.0 +0800
> +++ multisync-0.82/src/sync_vtype.c   2004-07-07 22:44:28.0 +0800
> @@ -61,6 +61,46 @@
>VTYPE_VCARD = 0xc
>  } vtype_type;
>  
> +// convert charset of value from which specified in properties
> +static void
> +convert_charset (char* line, unsigned char** value, const char* charset)
> +{ 
> +  while (line = strchr (line, ';')) {
> +const char* key = "CHARSET=";
> +char org_charset[64];
> +int len;
> +iconv_t ic;
> +
> +line++; // skip the ';' character
> +
> +if ((strlen (line) <= strlen (key))
> + || g_strncasecmp (line, key, sizeof (key)))
> +// key not found
> +  continue;
> +
> +org_charset[0] = 0;
> +len = strcspn (line + strlen (key), ";: \r\n");
> +if (len >= sizeof (org_charset))
> +  len = sizeof (org_charset) - 1;
> +strncat (org_charset, line + strlen (key), len);
> +
> +ic = iconv_open (charset, org_charset);
> +if (ic >= 0) {
> +  char *utfvalue = g_malloc0(65536);
> +  size_t inbytes = strlen(*value);
> +  size_t outbytes = 65536;
> +  char *inbuf = *value, *outbuf = utfvalue;
> +
> +  iconv(ic, &inbuf, &inbytes, &outbuf, &outbytes);
> +  g_free(*value);
> +  *value = utfvalue;
> +  iconv_close(ic);
> +}
> +
> +break;
> +  } // while every ';' character
> +}
> +
>  // Parse and correct a number of errors in the VCARD/VTODO data
>  // The "opts" parameter decides which errors/features that should be corrected
>  char* sync_vtype_convert(char *card, sync_voption opts, char* charset) {
> @@ -74,6 +114,7 @@
>gboolean output = TRUE; // If feedthrough output is enabled
>vtype_type datatype = VTYPE_UNKNOWN;
>  
>outcard = g_string_new("");
>while (incard) {
>  char* endln;
> @@ -136,6 +177,7 @@
>   value = tmp;
>}
>qp = FALSE; // Do a proper QP detection
> +  convert_charset (line, &value, "UTF-8");
>if (endent)
>   endent[0] = 0;
>strncpy(name, line, 255);
> @@ -312,7 +354,10 @@
> !g_strcasecmp(propdata, "END"))
>   triggerrelatedend = TRUE;
> if (!g_strcasecmp(propname, "CHARSET"))
> +   {
>   strncpy(linecharset, propdata, 255);
> + outputprop = FALSE; // value has been converted to UTF-8
> +   }
> if (adr && 
> (!g_strcasecmp(propname, "HOME") || 
> !g_strcasecmp(propname, "WORK"))) {
> @@ -362,7 +407,7 @@
>   value = sync_vtype_decode_qp(value);
>   g_free(tmp);
>}
> -  if ((opts & VOPTION_FIXCHARSET) && value && charset) {
> +  if ((opts & VOPTION_FIXCHARSET) && value && charset && !linecharset[0]) {
>   iconv_t ic;
>   int t;
>   gboolean highchar = FALSE;
> @@ -1132,6 +1177,7 @@
> res = tmp;
> start = newstart;
>   } while(start < card+strlen(card) && start[0] == ' ');
> + convert_charset (pos, &res, "UTF-8");
>   return(res);
>}
>  }
> 
> diff -ur multisync-0.82.orig/plugins/palm_sync/src/vcard.c 
> multisync-0.82/plugins/palm_sync/src/vcard.c
> --- multisync-0.82.orig/plugins/palm_sync/src/vcard.c 2004-04-13 05:03:20.0 
> +0800
> +++ multisync-0.82/plugins/palm_sync/src/vcard.c  2004-07-07 23:19:14.0 
> +0800
> @@ -116,11 +116,11 @@
>  
>   /* note */
>   if(appointment.note && strlen(appointment.note))
> - prop = addPropValueO(vevent, VCDescriptionPropO, 
> g_strescape(appointment.note, NULL));
> + prop = addPropValueO(vevent, VCDescriptionPropO, appointment.note);
>  
>   /* description */
>   if(appointment.description) {
> - prop = addPropValueO(vevent, VCSummaryPropO, 
> escape_chars(g_strescape(appointment.description, NULL)));
> + prop = addPropValueO(vevent, VCSummaryPropO, appointment.description);
>   }
>  
>   /* begin and end*/
> @@ -584,13 +584,13 @@
>  
>   

Re: [Multisync-devel] charset problem

2004-07-09 Thread YuLei
Hi,

you are right, the escape problem was fixed exacely in branch_08X, sorry
for annoying. I was checked the code in main cvs tree, but not the
brance_08X :(

ÔÚ2004Äê07ÔÂ10ÈÕµÄ07:00£¬Armin BauerдµÀ£º
> Hi
> 
> thanks for yoru help!
> 
> can you please try the latest palm plugin from cvs? the bug with the
> escaped characters were already mentioned some time ago and i uploaded
> the fix to cvs a couple of days ago.
> 
> It would be great to hear if this fixes the problems with the palm
> plugin.
> 
> oh and dont forget to use -r branch_08X when you check out.
> 
> Armin
> 
> On Fri, 2004-07-09 at 18:34, YuLei wrote:
> > Hi, all:
> > 
> >   I am using multisync with my Clie, T68 and Evolution in chinese
> >   environment (Debian-sarge), it's just what I am seeking for,
> >   but seems that multisync did not deal chinese characters well,
> >   after dig into the source code (thanks to GPL), I made two fixes
> >   to resolve problems:
> > 
> > 1. If properties of key contains charset specification, like
> > 'N;CHARSET=UTF-7:+...", then remove this property and convert its value to UTF-8, 
> > because Evolution can process UTF-8 properly even if no charset specified
> > 
> > 2. Because UTF-8 use all 8 bits of byte, so escape (in palm plugin) is needn't
> > after convert to UTF-8 or will corrupt the character encoding
> > 
> > here is my patch:
> > 
> > diff -ur multisync-0.82.orig/src/sync_vtype.c multisync-0.82/src/sync_vtype.c
> > --- multisync-0.82.orig/src/sync_vtype.c2004-04-13 05:03:29.0 +0800
> > +++ multisync-0.82/src/sync_vtype.c 2004-07-07 22:44:28.0 +0800
> > @@ -61,6 +61,46 @@
> >VTYPE_VCARD = 0xc
> >  } vtype_type;
> >  
> > +// convert charset of value from which specified in properties
> > +static void
> > +convert_charset (char* line, unsigned char** value, const char* charset)
> > +{ 
> > +  while (line = strchr (line, ';')) {
> > +const char* key = "CHARSET=";
> > +char org_charset[64];
> > +int len;
> > +iconv_t ic;
> > +
> > +line++; // skip the ';' character
> > +
> > +if ((strlen (line) <= strlen (key))
> > +   || g_strncasecmp (line, key, sizeof (key)))
> > +// key not found
> > +  continue;
> > +
> > +org_charset[0] = 0;
> > +len = strcspn (line + strlen (key), ";: \r\n");
> > +if (len >= sizeof (org_charset))
> > +  len = sizeof (org_charset) - 1;
> > +strncat (org_charset, line + strlen (key), len);
> > +
> > +ic = iconv_open (charset, org_charset);
> > +if (ic >= 0) {
> > +  char *utfvalue = g_malloc0(65536);
> > +  size_t inbytes = strlen(*value);
> > +  size_t outbytes = 65536;
> > +  char *inbuf = *value, *outbuf = utfvalue;
> > +
> > +  iconv(ic, &inbuf, &inbytes, &outbuf, &outbytes);
> > +  g_free(*value);
> > +  *value = utfvalue;
> > +  iconv_close(ic);
> > +}
> > +
> > +break;
> > +  } // while every ';' character
> > +}
> > +
> >  // Parse and correct a number of errors in the VCARD/VTODO data
> >  // The "opts" parameter decides which errors/features that should be corrected
> >  char* sync_vtype_convert(char *card, sync_voption opts, char* charset) {
> > @@ -74,6 +114,7 @@
> >gboolean output = TRUE; // If feedthrough output is enabled
> >vtype_type datatype = VTYPE_UNKNOWN;
> >  
> >outcard = g_string_new("");
> >while (incard) {
> >  char* endln;
> > @@ -136,6 +177,7 @@
> > value = tmp;
> >}
> >qp = FALSE; // Do a proper QP detection
> > +  convert_charset (line, &value, "UTF-8");
> >if (endent)
> > endent[0] = 0;
> >strncpy(name, line, 255);
> > @@ -312,7 +354,10 @@
> >   !g_strcasecmp(propdata, "END"))
> > triggerrelatedend = TRUE;
> >   if (!g_strcasecmp(propname, "CHARSET"))
> > + {
> > strncpy(linecharset, propdata, 255);
> > +   outputprop = FALSE; // value has been converted to UTF-8
> > + }
> >   if (adr && 
> >   (!g_strcasecmp(propname, "HOME") || 
> >   !g_strcasecmp(propname, "WORK"))) {
> > @@ -362,7 +407,7 @@
> > value = sync_vtype_decode_qp(value);
> > g_free(tmp);
> >}
> > -  if ((opts & VOPTION_FIXCHARSET) && value && charset) {
> > +  if ((opts & VOPTION_FIXCHARSET) && value && charset && !linecharset[0]) {
> > iconv_t ic;
> > int t;
> > gboolean highchar = FALSE;
> > @@ -1132,6 +1177,7 @@
> >   res = tmp;
> >   start = newstart;
> > } while(start < card+strlen(card) && start[0] == ' ');
> > +   convert_charset (pos, &res, "UTF-8");
> > return(res);
> >}
> >  }
> > 
> > diff -ur multisync-0.82.orig/plugins/palm_sync/src/vcard.c 
> > multisync-0.82/plugins/palm_sync/src/vcard.c
> > --- multisync-0.82.orig/plugins/palm_sync/src/vcard.c   2004-04-13 
> > 05:03:20.0 +0800
> > +++ multisync-0.82/plugins/palm_sync/src/vcard.c2004-07-07 23:19:14.0 
> > +0800
> > @@ -116,11 +116,11 @@
> >  
> > /* note */
> > if(appointmen