Hi, 

 Here is a patch to text.c from curent Wv that wrote my friend Walery
Studennikov <[EMAIL PROTECTED]> (he doesn't have internet access right now, 
so he asked me to send this patch), that does the following:

* caches iconv descriptor (so new iconv handle is not opened and closed for
  each character in the document) - this should greatly improve .doc file
  import.
* Fix for byte order problem of wvWare utility (that problem was partially
  fixed - only the functions that used by AW are fixed, but the functions that
  are used for conversion by wvWare binary were not fixed). He says that this
  patch makes wvWare on his RedHat7.0 box producing reasonable characters
  again in the output (he says that without the patch, any word2k file would
  produce a garbage on the output of, say, wvHtml script).

 Patched wv also works fine on my RH6.0 box - so I suppose it was fixed on all
 platforms now.

 I repeat, this is a patch to text.c (header containing file name has been
stripped).

 Best regards,
  -Vlad
21c21
<   #define wv_iconv(a,b,c,d,e) iconv(a, (const char**)b,c,(char**)d,e)
---
>   #define wv_iconv(a,b,c,d,e) iconv(a, (char**)b,c,(char**)d,e)
104c104
<         wvOutputFromUnicode(eachchar,outputtype);
---
>         wvOutputFromUnicode( eachchar, outputtype );
296,305c296,302
<        if(eachchar > 0xff)
<                {
<                buffer[0]= (char)(eachchar >> 8);
<                buffer[1]= (char)eachchar & 0xff;
<                }
<        else
<                {
<                buffer[0] = eachchar & 0xff;
<                buffer[1] = 0;
<                }
---
>       if(eachchar > 0xff) {
>           buffer[0]= (char)(eachchar >> 8);
>           buffer[1]= (char)eachchar & 0xff;
>       } else {
>           buffer[0] = eachchar & 0xff;
>           buffer[1] = 0;
>       }
352,374c349,371
< void wvOutputFromUnicode(U16 eachchar,char *outputtype)
<         {
<     U16 i;
<     char f_code[33];  /* From CCSID                       */
<     char t_code[33];            /* To CCSID                             */
<     iconv_t iconv_handle;       /* Conversion Descriptor returned       */
<                                 /* from iconv_open() function           */
<     U8 *obuf;                 /* Buffer for converted characters      */
<     U8 *p;
<     size_t ibuflen;               /* Length of input buffer               */
<     size_t obuflen;               /* Length of output buffer              */
<     size_t len;
<     U8 *ibuf;
<     U8 buffer[2];
<     U8 buffer2[5];
< 
<     buffer[0]=(eachchar>>8)&0x00ff;
<     buffer[1]=eachchar&0x00ff;
<     ibuf = buffer;
<     obuf = buffer2;
< 
<         if ((wvConvertUnicodeToEntity != NULL) && 
wvConvertUnicodeToEntity(eachchar))
<                 return;
---
> void wvOutputFromUnicode( U16 eachchar, char *outputtype )
> {
>     static char cached_outptype[33];    /* Last outputtype                */
>     static iconv_t iconv_handle = NULL;       /* Cached iconv descriptor          */
>     static int need_swapping;
>     U8 *ibuf, *obuf;
>     size_t ibuflen, obuflen, len, count, i;
>     U8 buffer[2], buffer2[5];
> 
>     if ((wvConvertUnicodeToEntity != NULL) && wvConvertUnicodeToEntity(eachchar))
>       return;
> 
>     if ( !iconv_handle || strcmp(cached_outptype, outputtype) != 0 )
>     {
>       if (iconv_handle)
>           iconv_close(iconv_handle);
>       
>       iconv_handle = iconv_open( outputtype, "UCS-2" );
>       if (iconv_handle == (iconv_t)-1) {
>           wvError(("iconv_open fail: %d, cannot convert %s to 
>%s\n",errno,"UCS-2",outputtype));
>           printf("?");
>           return;
>       }
376,377c373
<          /* All reserved positions of from code (last 12 characters) and to code   
*/
<     /* (last 19 characters) must be set to hexadecimal zeros.                 */
---
>       strcpy( cached_outptype, outputtype );
379,380c375,376
<     memset(f_code,'\0',33);
<     memset(t_code,'\0',33);
---
>       /* Determining if unicode biteorder is swapped (glibc < 2.2) */
>       need_swapping = 1;
382,383c378,393
<     strcpy(f_code,"UCS-2");
<     strcpy(t_code,outputtype);
---
>       buffer[0] = 0x20; buffer[1] = 0;
>       ibuf = buffer;    obuf = buffer2;
>       ibuflen = 2;      obuflen = 5;
> 
>       count = wv_iconv(iconv_handle, &ibuf, &ibuflen, &obuf, &obuflen);
>       if (count >= 0)
>           need_swapping = buffer2[0] != 0x20;
>     }
> 
>     if (need_swapping) {
>       buffer[0] = (eachchar>>8) & 0x00ff;
>       buffer[1] = eachchar & 0x00ff;
>     } else {
>       buffer[0] = eachchar & 0x00ff;
>       buffer[1] = (eachchar>>8) & 0x00ff;
>     }
385,391c395,396
<         iconv_handle = iconv_open(t_code,f_code);
<     if (iconv_handle == (iconv_t)-1)
<         {
<         wvError(("iconv_open fail: %d, cannot convert %s to 
%s\n",errno,"UCS-2",outputtype));
<         printf("?");
<         return;
<         }
---
>     ibuf = buffer;
>     obuf = buffer2;
393,396c398,399
<         ibuflen = 2;
<     obuflen = 5;
<         p = obuf;
<     len = obuflen;
---
>     ibuflen = 2;
>     len = obuflen = 5;
398,400c401,405
<     if(wv_iconv(iconv_handle, &ibuf, &ibuflen, &obuf, &obuflen) == (size_t)-1)
<       {
<         wvError(("iconv failed errno: %d, to:%s from:%s\n",errno, t_code, f_code));
---
>     count = wv_iconv(iconv_handle, &ibuf, &ibuflen, &obuf, &obuflen);
>     if(count == (size_t)-1)
>     {
>       wvError(("iconv failed errno: %d, char: 0x%X, %s -> %s\n",
>                errno, eachchar, "UCS-2", outputtype));
406,413c411,417
<       } else {
<         len = len-obuflen;
<         iconv_close(iconv_handle);
< 
<         for (i=0;i<len;i++)
<                 printf("%c",p[i]);
<       }
<       }
---
>     } else {
>       len = len-obuflen;
> 
>       for (i=0; i<len; i++)
>           printf("%c", buffer2[i]);
>     }
> }

Reply via email to