I made patches that may solve the problem:
  URL: http://rufus.w3.org/tools/Kaffe/messages/5935.html
  Subject: CharToByteUTF8.java loses some characters
  From: [EMAIL PROTECTED]
  Date: Fri Mar 03 2000 - 06:03:21 EST 

I am afraid the code 'off += outlen; len -= outlen;' in
OutputStreamWriter.java is based on the assumption that
the input length is equal to the output length, which is
not necessarily true for non-ASCII characters.

So I prepared a method "convertALAP" (convert as long as possible)
in CharToByteConverter, which may not necessarily convert the
whole input specified by fpos and flen, but tries to convert
the input as long as possible, and  returns not only the output
length but also the input length actually processed.

Attached patches:

Patch for kaffe/io/CharToByteConverter.java

*** kaffe/io/CharToByteConverter.java.orig      Thu Nov 11 01:56:18 1999
--- kaffe/io/CharToByteConverter.java   Sun May 14 22:31:29 2000
***************
*** 69,74 ****
--- 69,82 ----
  
  abstract public int convert(char[] from, int fpos, int flen, byte[] to, int tpos, 
int tlen);
  
+ public int[] convertALAP (char[] from, int fpos, int flen, byte[] to, int tpos, int 
+tlen) {
+       // returns int[] of input length and output length
+       // by default assume input length = output length
+       int[] a = new int[2];
+       a[0] = a[1] = convert(from, fpos, flen, to, tpos, tlen);
+       return a;
+ }
+ 
  public int flush ( byte[] to, int tpos, int tlen ) {
        if ( blen == 0 ){
                return 0;

Patch for kaffe/io/CharToByteUTF8.java

*** kaffe/io/CharToByteUTF8.java.orig   Thu Mar  9 20:42:18 2000
--- kaffe/io/CharToByteUTF8.java        Sun May 14 22:32:47 2000
***************
*** 16,21 ****
--- 16,29 ----
  }
  
  public int convert ( char[] from, int fpos, int flen, byte[] to, int tpos, int tlen 
) {
+       return (_convert(from, fpos, flen, to, tpos, tlen, true /* use carry */))[1];
+ }
+ 
+ public int[] convertALAP ( char[] from, int fpos, int flen, byte[] to, int tpos, int 
+tlen ) {
+       return _convert(from, fpos, flen, to, tpos, tlen, false /* do not use carry 
+*/);
+ }
+ 
+ private int[] _convert ( char[] from, int fpos, int flen, byte[] to, int tpos, int 
+tlen, boolean usecarry ) {
        int o = tpos;
        int oe = tpos + tlen;
        int i = fpos;
***************
*** 46,57 ****
                  }
          }
  
!       // Carry anything left.
!       if (ie > i) {
!               carry(from, i, ie - i);
        }
  
!       return (o - tpos);
  }
  
  public int getNumberOfBytes ( char[] from, int fpos, int flen ) {
--- 54,70 ----
                  }
          }
  
!       if(usecarry) {
!               // Carry anything left.
!               if (ie > i) {
!                       carry(from, i, ie - i);
!               }
        }
  
!       int[] a = new int[2];
!       a[0] = i - fpos;
!       a[1] = o - tpos;
!       return a;
  }
  
  public int getNumberOfBytes ( char[] from, int fpos, int flen ) {

Patch for java/io/OutputStreamWriter.java

*** java/io/OutputStreamWriter.java.orig        Fri Aug 13 10:56:01 1999
--- java/io/OutputStreamWriter.java     Sun May 14 22:33:26 2000
***************
*** 75,88 ****
  
        synchronized (lock) {
                while (len > 0) {
!                       int outlen = encoding.convert(cbuf, off, len,
                                outbuf, buflen, outbuf.length - buflen);
                        buflen += outlen;
                        if (outlen == 0 || outbuf.length - buflen < MINMARGIN) {
                                flush();
                        }
!                       off += outlen;
!                       len -= outlen;
                }
        }
  }
--- 75,90 ----
  
        synchronized (lock) {
                while (len > 0) {
!                       int[] a = encoding.convertALAP(cbuf, off, len,
                                outbuf, buflen, outbuf.length - buflen);
+                       int inlen = a[0];
+                       int outlen = a[1];
                        buflen += outlen;
                        if (outlen == 0 || outbuf.length - buflen < MINMARGIN) {
                                flush();
                        }
!                       off += inlen;
!                       len -= inlen;
                }
        }
  }

Reply via email to