[Multisync-devel] charset problem
Hi, all: I am using multisync with my Clie, T68 and Evolution in chinese environment (Debian-sarge), it's just what I am seeking for, but seems that multisync did not deal chinese characters well, after dig into the source code (thanks to GPL), I made two fixes to resolve problems: 1. If properties of key contains charset specification, like 'N;CHARSET=UTF-7:+...", then remove this property and convert its value to UTF-8, because Evolution can process UTF-8 properly even if no charset specified 2. Because UTF-8 use all 8 bits of byte, so escape (in palm plugin) is needn't after convert to UTF-8 or will corrupt the character encoding here is my patch: diff -ur multisync-0.82.orig/src/sync_vtype.c multisync-0.82/src/sync_vtype.c --- multisync-0.82.orig/src/sync_vtype.c2004-04-13 05:03:29.0 +0800 +++ multisync-0.82/src/sync_vtype.c 2004-07-07 22:44:28.0 +0800 @@ -61,6 +61,46 @@ VTYPE_VCARD = 0xc } vtype_type; +// convert charset of value from which specified in properties +static void +convert_charset (char* line, unsigned char** value, const char* charset) +{ + while (line = strchr (line, ';')) { +const char* key = "CHARSET="; +char org_charset[64]; +int len; +iconv_t ic; + +line++; // skip the ';' character + +if ((strlen (line) <= strlen (key)) + || g_strncasecmp (line, key, sizeof (key))) +// key not found + continue; + +org_charset[0] = 0; +len = strcspn (line + strlen (key), ";: \r\n"); +if (len >= sizeof (org_charset)) + len = sizeof (org_charset) - 1; +strncat (org_charset, line + strlen (key), len); + +ic = iconv_open (charset, org_charset); +if (ic >= 0) { + char *utfvalue = g_malloc0(65536); + size_t inbytes = strlen(*value); + size_t outbytes = 65536; + char *inbuf = *value, *outbuf = utfvalue; + + iconv(ic, &inbuf, &inbytes, &outbuf, &outbytes); + g_free(*value); + *value = utfvalue; + iconv_close(ic); +} + +break; + } // while every ';' character +} + // Parse and correct a number of errors in the VCARD/VTODO data // The "opts" parameter decides which errors/features that should be corrected char* sync_vtype_convert(char *card, sync_voption opts, char* charset) { @@ -74,6 +114,7 @@ gboolean output = TRUE; // If feedthrough output is enabled vtype_type datatype = VTYPE_UNKNOWN; outcard = g_string_new(""); while (incard) { char* endln; @@ -136,6 +177,7 @@ value = tmp; } qp = FALSE; // Do a proper QP detection + convert_charset (line, &value, "UTF-8"); if (endent) endent[0] = 0; strncpy(name, line, 255); @@ -312,7 +354,10 @@ !g_strcasecmp(propdata, "END")) triggerrelatedend = TRUE; if (!g_strcasecmp(propname, "CHARSET")) + { strncpy(linecharset, propdata, 255); + outputprop = FALSE; // value has been converted to UTF-8 + } if (adr && (!g_strcasecmp(propname, "HOME") || !g_strcasecmp(propname, "WORK"))) { @@ -362,7 +407,7 @@ value = sync_vtype_decode_qp(value); g_free(tmp); } - if ((opts & VOPTION_FIXCHARSET) && value && charset) { + if ((opts & VOPTION_FIXCHARSET) && value && charset && !linecharset[0]) { iconv_t ic; int t; gboolean highchar = FALSE; @@ -1132,6 +1177,7 @@ res = tmp; start = newstart; } while(start < card+strlen(card) && start[0] == ' '); + convert_charset (pos, &res, "UTF-8"); return(res); } } diff -ur multisync-0.82.orig/plugins/palm_sync/src/vcard.c multisync-0.82/plugins/palm_sync/src/vcard.c --- multisync-0.82.orig/plugins/palm_sync/src/vcard.c 2004-04-13 05:03:20.0 +0800 +++ multisync-0.82/plugins/palm_sync/src/vcard.c2004-07-07 23:19:14.0 +0800 @@ -116,11 +116,11 @@ /* note */ if(appointment.note && strlen(appointment.note)) - prop = addPropValueO(vevent, VCDescriptionPropO, g_strescape(appointment.note, NULL)); + prop = addPropValueO(vevent, VCDescriptionPropO, appointment.note); /* description */ if(appointment.description) { - prop = addPropValueO(vevent, VCSummaryPropO, escape_chars(g_strescape(appointment.description, NULL))); + prop = addPropValueO(vevent, VCSummaryPropO, appointment.description); } /* begin and end*/ @@ -584,13 +584,13 @@ /* note */ if(todo.note && strlen(todo.note)) { - prop = addPropValueO(vtodo, VCDescriptionPropO, escape_chars(g_strescape(todo.note, NULL))); + prop = addPropValueO(vtodo, VCDescriptionPropO, todo.note); //addPropValueO(prop, "ENCODING", "QUOTED-PRINTABLE"); } /* description */ if(todo.description) { - addPropValueO(vtodo, VCSummaryPropO
Re: [Multisync-devel] charset problem
Thanks very much for your interest and for taking the time to prepare a patch. We'll have a look at it and let you know if we have to make changes and when things go into CVS. Thanks again, Tom On Fri, 2004-07-09 at 12:34, YuLei wrote: > Hi, all: > > I am using multisync with my Clie, T68 and Evolution in chinese > environment (Debian-sarge), it's just what I am seeking for, > but seems that multisync did not deal chinese characters well, > after dig into the source code (thanks to GPL), I made two fixes > to resolve problems: > > 1. If properties of key contains charset specification, like > 'N;CHARSET=UTF-7:+...", then remove this property and convert its value to UTF-8, > because Evolution can process UTF-8 properly even if no charset specified > > 2. Because UTF-8 use all 8 bits of byte, so escape (in palm plugin) is needn't > after convert to UTF-8 or will corrupt the character encoding > > here is my patch: > > diff -ur multisync-0.82.orig/src/sync_vtype.c multisync-0.82/src/sync_vtype.c > --- multisync-0.82.orig/src/sync_vtype.c 2004-04-13 05:03:29.0 +0800 > +++ multisync-0.82/src/sync_vtype.c 2004-07-07 22:44:28.0 +0800 > @@ -61,6 +61,46 @@ >VTYPE_VCARD = 0xc > } vtype_type; > > +// convert charset of value from which specified in properties > +static void > +convert_charset (char* line, unsigned char** value, const char* charset) > +{ > + while (line = strchr (line, ';')) { > +const char* key = "CHARSET="; > +char org_charset[64]; > +int len; > +iconv_t ic; > + > +line++; // skip the ';' character > + > +if ((strlen (line) <= strlen (key)) > + || g_strncasecmp (line, key, sizeof (key))) > +// key not found > + continue; > + > +org_charset[0] = 0; > +len = strcspn (line + strlen (key), ";: \r\n"); > +if (len >= sizeof (org_charset)) > + len = sizeof (org_charset) - 1; > +strncat (org_charset, line + strlen (key), len); > + > +ic = iconv_open (charset, org_charset); > +if (ic >= 0) { > + char *utfvalue = g_malloc0(65536); > + size_t inbytes = strlen(*value); > + size_t outbytes = 65536; > + char *inbuf = *value, *outbuf = utfvalue; > + > + iconv(ic, &inbuf, &inbytes, &outbuf, &outbytes); > + g_free(*value); > + *value = utfvalue; > + iconv_close(ic); > +} > + > +break; > + } // while every ';' character > +} > + > // Parse and correct a number of errors in the VCARD/VTODO data > // The "opts" parameter decides which errors/features that should be corrected > char* sync_vtype_convert(char *card, sync_voption opts, char* charset) { > @@ -74,6 +114,7 @@ >gboolean output = TRUE; // If feedthrough output is enabled >vtype_type datatype = VTYPE_UNKNOWN; > >outcard = g_string_new(""); >while (incard) { > char* endln; > @@ -136,6 +177,7 @@ > value = tmp; >} >qp = FALSE; // Do a proper QP detection > + convert_charset (line, &value, "UTF-8"); >if (endent) > endent[0] = 0; >strncpy(name, line, 255); > @@ -312,7 +354,10 @@ > !g_strcasecmp(propdata, "END")) > triggerrelatedend = TRUE; > if (!g_strcasecmp(propname, "CHARSET")) > + { > strncpy(linecharset, propdata, 255); > + outputprop = FALSE; // value has been converted to UTF-8 > + } > if (adr && > (!g_strcasecmp(propname, "HOME") || > !g_strcasecmp(propname, "WORK"))) { > @@ -362,7 +407,7 @@ > value = sync_vtype_decode_qp(value); > g_free(tmp); >} > - if ((opts & VOPTION_FIXCHARSET) && value && charset) { > + if ((opts & VOPTION_FIXCHARSET) && value && charset && !linecharset[0]) { > iconv_t ic; > int t; > gboolean highchar = FALSE; > @@ -1132,6 +1177,7 @@ > res = tmp; > start = newstart; > } while(start < card+strlen(card) && start[0] == ' '); > + convert_charset (pos, &res, "UTF-8"); > return(res); >} > } > > diff -ur multisync-0.82.orig/plugins/palm_sync/src/vcard.c > multisync-0.82/plugins/palm_sync/src/vcard.c > --- multisync-0.82.orig/plugins/palm_sync/src/vcard.c 2004-04-13 05:03:20.0 > +0800 > +++ multisync-0.82/plugins/palm_sync/src/vcard.c 2004-07-07 23:19:14.0 > +0800 > @@ -116,11 +116,11 @@ > > /* note */ > if(appointment.note && strlen(appointment.note)) > - prop = addPropValueO(vevent, VCDescriptionPropO, > g_strescape(appointment.note, NULL)); > + prop = addPropValueO(vevent, VCDescriptionPropO, appointment.note); > > /* description */ > if(appointment.description) { > - prop = addPropValueO(vevent, VCSummaryPropO, > escape_chars(g_strescape(appointment.description, NULL))); > + prop = addPropValueO(vevent, VCSummaryPropO, appointment.description); > } > > /* begin and end*/ > @@ -584,13 +584,13 @@ > >
Re: [Multisync-devel] charset problem
Hi, you are right, the escape problem was fixed exacely in branch_08X, sorry for annoying. I was checked the code in main cvs tree, but not the brance_08X :( ÔÚ2004Äê07ÔÂ10ÈÕµÄ07:00£¬Armin BauerдµÀ£º > Hi > > thanks for yoru help! > > can you please try the latest palm plugin from cvs? the bug with the > escaped characters were already mentioned some time ago and i uploaded > the fix to cvs a couple of days ago. > > It would be great to hear if this fixes the problems with the palm > plugin. > > oh and dont forget to use -r branch_08X when you check out. > > Armin > > On Fri, 2004-07-09 at 18:34, YuLei wrote: > > Hi, all: > > > > I am using multisync with my Clie, T68 and Evolution in chinese > > environment (Debian-sarge), it's just what I am seeking for, > > but seems that multisync did not deal chinese characters well, > > after dig into the source code (thanks to GPL), I made two fixes > > to resolve problems: > > > > 1. If properties of key contains charset specification, like > > 'N;CHARSET=UTF-7:+...", then remove this property and convert its value to UTF-8, > > because Evolution can process UTF-8 properly even if no charset specified > > > > 2. Because UTF-8 use all 8 bits of byte, so escape (in palm plugin) is needn't > > after convert to UTF-8 or will corrupt the character encoding > > > > here is my patch: > > > > diff -ur multisync-0.82.orig/src/sync_vtype.c multisync-0.82/src/sync_vtype.c > > --- multisync-0.82.orig/src/sync_vtype.c2004-04-13 05:03:29.0 +0800 > > +++ multisync-0.82/src/sync_vtype.c 2004-07-07 22:44:28.0 +0800 > > @@ -61,6 +61,46 @@ > >VTYPE_VCARD = 0xc > > } vtype_type; > > > > +// convert charset of value from which specified in properties > > +static void > > +convert_charset (char* line, unsigned char** value, const char* charset) > > +{ > > + while (line = strchr (line, ';')) { > > +const char* key = "CHARSET="; > > +char org_charset[64]; > > +int len; > > +iconv_t ic; > > + > > +line++; // skip the ';' character > > + > > +if ((strlen (line) <= strlen (key)) > > + || g_strncasecmp (line, key, sizeof (key))) > > +// key not found > > + continue; > > + > > +org_charset[0] = 0; > > +len = strcspn (line + strlen (key), ";: \r\n"); > > +if (len >= sizeof (org_charset)) > > + len = sizeof (org_charset) - 1; > > +strncat (org_charset, line + strlen (key), len); > > + > > +ic = iconv_open (charset, org_charset); > > +if (ic >= 0) { > > + char *utfvalue = g_malloc0(65536); > > + size_t inbytes = strlen(*value); > > + size_t outbytes = 65536; > > + char *inbuf = *value, *outbuf = utfvalue; > > + > > + iconv(ic, &inbuf, &inbytes, &outbuf, &outbytes); > > + g_free(*value); > > + *value = utfvalue; > > + iconv_close(ic); > > +} > > + > > +break; > > + } // while every ';' character > > +} > > + > > // Parse and correct a number of errors in the VCARD/VTODO data > > // The "opts" parameter decides which errors/features that should be corrected > > char* sync_vtype_convert(char *card, sync_voption opts, char* charset) { > > @@ -74,6 +114,7 @@ > >gboolean output = TRUE; // If feedthrough output is enabled > >vtype_type datatype = VTYPE_UNKNOWN; > > > >outcard = g_string_new(""); > >while (incard) { > > char* endln; > > @@ -136,6 +177,7 @@ > > value = tmp; > >} > >qp = FALSE; // Do a proper QP detection > > + convert_charset (line, &value, "UTF-8"); > >if (endent) > > endent[0] = 0; > >strncpy(name, line, 255); > > @@ -312,7 +354,10 @@ > > !g_strcasecmp(propdata, "END")) > > triggerrelatedend = TRUE; > > if (!g_strcasecmp(propname, "CHARSET")) > > + { > > strncpy(linecharset, propdata, 255); > > + outputprop = FALSE; // value has been converted to UTF-8 > > + } > > if (adr && > > (!g_strcasecmp(propname, "HOME") || > > !g_strcasecmp(propname, "WORK"))) { > > @@ -362,7 +407,7 @@ > > value = sync_vtype_decode_qp(value); > > g_free(tmp); > >} > > - if ((opts & VOPTION_FIXCHARSET) && value && charset) { > > + if ((opts & VOPTION_FIXCHARSET) && value && charset && !linecharset[0]) { > > iconv_t ic; > > int t; > > gboolean highchar = FALSE; > > @@ -1132,6 +1177,7 @@ > > res = tmp; > > start = newstart; > > } while(start < card+strlen(card) && start[0] == ' '); > > + convert_charset (pos, &res, "UTF-8"); > > return(res); > >} > > } > > > > diff -ur multisync-0.82.orig/plugins/palm_sync/src/vcard.c > > multisync-0.82/plugins/palm_sync/src/vcard.c > > --- multisync-0.82.orig/plugins/palm_sync/src/vcard.c 2004-04-13 > > 05:03:20.0 +0800 > > +++ multisync-0.82/plugins/palm_sync/src/vcard.c2004-07-07 23:19:14.0 > > +0800 > > @@ -116,11 +116,11 @@ > > > > /* note */ > > if(appointmen