Update of /cvsroot/monetdb/MonetDB/src/common
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv22201

Modified Files:
        stream.mx 
Log Message:
Do error checking when calling iconv().
Also deal with incomplete multibyte sequences by remembering data
until next time.
This code compiles on Linux but has not (yet) been tested...


Index: stream.mx
===================================================================
RCS file: /cvsroot/monetdb/MonetDB/src/common/stream.mx,v
retrieving revision 1.154
retrieving revision 1.155
diff -u -d -r1.154 -r1.155
--- stream.mx   2 Jan 2008 14:58:33 -0000       1.154
+++ stream.mx   2 Jan 2008 16:30:48 -0000       1.155
@@ -2258,6 +2258,7 @@
        stream *s;
        char buffer[BUFSIZ];
        size_t buflen;
+       int eof;
 };
 
 static ssize_t
@@ -2266,11 +2267,51 @@
        struct icstream *ic = (struct icstream *) s->stream_data.p;
        ICONV_CONST char *inbuf = (char *) buf;
        size_t inbytesleft = elmsize * cnt;
-       char *outbuf = ic->buffer;
-       size_t outbytesleft = sizeof(ic->buffer);
 
+       /* if unconverted data from a previous call remains, add it to
+          the start of the new data, using temporary space */
+       if (ic->buflen > 0) {
+               char *s = alloca(ic->buflen + inbytesleft);
+
+               memcpy(s, ic->buffer, ic->buflen);
+               memcpy(s + ic->buflen, buf, inbytesleft);
+               buf = s;
+               inbytesleft += ic->buflen;
+               ic->buflen = 0;
+       }
        while (inbytesleft > 0) {
-               iconv(ic->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+               char *outbuf = ic->buffer;
+               size_t outbytesleft = sizeof(ic->buffer);
+
+               if (iconv(ic->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) 
== (size_t) -1) {
+                       switch (errno) {
+                       case EILSEQ:
+                               /* invalid multibyte sequence encountered */
+                               s->errnr = WRITE_ERROR;
+                               return -1;
+                       case EINVAL:
+                               /* incomplete multibyte sequence encountered */
+                               /* flush what has been converted */
+                               if (outbytesleft < sizeof(ic->buffer))
+                                       stream_write(ic->s, ic->buffer, 1, 
sizeof(ic->buffer) - outbytesleft);
+                               /* remember what hasn't been converted */
+                               if (inbytesleft > sizeof(ic->buffer)) {
+                                       /* ridiculously long multibyte 
sequence, so return error */
+                                       s->errnr = WRITE_ERROR;
+                                       return -1;
+                               }
+                               memcpy(ic->buffer, inbuf, inbytesleft);
+                               ic->buflen = inbytesleft;
+                               return cnt;
+                       case E2BIG:
+                               /* not enough space in output buffer */
+                               break;
+                       default:
+                               /* cannot happen (according to manual) */
+                               s->errnr = WRITE_ERROR;
+                               return -1;
+                       }
+               }
                stream_write(ic->s, ic->buffer, 1, sizeof(ic->buffer) - 
outbytesleft);
        }
        return cnt;
@@ -2285,18 +2326,93 @@
        char *outbuf = (char *) buf;
        size_t outbytesleft = elmsize * cnt;
 
-       errno = 0;
-       while (outbytesleft > 0 && errno != E2BIG) {
-               stream_read(ic->s, ic->buffer, 1, 1);
-               inbytesleft++;
-               iconv(ic->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
-               if (inbytesleft == 0)
+       while (outbytesleft > 0 && !ic->eof) {
+               if (ic->buflen == sizeof(ic->buffer)) {
+                       /* ridiculously long multibyte sequence, return error */
+                       s->errnr = READ_ERROR;
+                       return -1;
+               }
+
+               switch (stream_read(ic->s, ic->buffer + ic->buflen, 1, 1)) {
+               case 1:
+                       /* expected: read one byte */
+                       ic->buflen++;
+                       inbytesleft++;
+                       break;
+               case 0:
+                       /* end of file */
+                       ic->eof = 1;
+                       if (ic->buflen > 0) {
+                               /* incomplete input */
+                               s->errnr = READ_ERROR;
+                               return -1;
+                       }
+                       iconv(ic->cd, NULL, NULL, &outbuf, &outbytesleft);
+                       goto exit_func;
+               default:
+                       /* error */
+                       s->errnr = READ_ERROR;
+                       return -1;
+               }
+               if (iconv(ic->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) 
== (size_t) -1) {
+                       switch (errno) {
+                       case EILSEQ:
+                               /* invalid multibyte sequence encountered */
+                               s->errnr = READ_ERROR;
+                               return -1;
+                       case EINVAL:
+                               /* incomplete multibyte sequence encountered */
+                               break;
+                       case E2BIG:
+                               /* not enough space in output buffer,
+                                  return what we have, saving what's in
+                                  the buffer */
+                               goto exit_func;
+                       default:
+                               /* cannot happen (according to manual) */
+                               s->errnr = READ_ERROR;
+                               return -1;
+                       }
+               }
+               if (inbytesleft == 0) {
+                       /* converted complete buffer */
                        inbuf = ic->buffer;
+                       ic->buflen = 0;
+               }
        }
+exit_func:
+       if (inbuf > ic->buffer)
+               memmove(ic->buffer, inbuf, inbytesleft);
        ic->buflen = inbytesleft;
+       if (outbytesleft == elmsize * cnt) {
+               /* if we're returning data, we must pass on EOF on the
+                  next call (i.e. keep ic->eof set), otherwise we
+                  must clear it so that the next call will cause the
+                  underlying stream to be read again */
+               ic->eof = 0;
+       }
        return (elmsize * cnt - outbytesleft) / elmsize;
 }
 
+static int
+ic_flush(stream *s)
+{
+       struct icstream *ic = (struct icstream *) s->stream_data.p;
+       char *outbuf = ic->buffer;
+       size_t outbytesleft = sizeof(ic->buffer);
+
+       /* if unconverted data from a previous call remains, it was an
+          incomplete multibyte sequence, so an error */
+       if (ic->buflen > 0) {
+               s->errnr = WRITE_ERROR;
+               return 1;
+       }
+       iconv(ic->cd, NULL, NULL, &outbuf, &outbytesleft);
+       if (outbytesleft < sizeof(ic->buffer))
+               stream_write(ic->s, ic->buffer, 1, sizeof(ic->buffer) - 
outbytesleft);
+       return stream_flush(ic->s);
+}
+
 static void
 ic_close(stream *s)
 {
@@ -2304,24 +2420,12 @@
 
        if (ic == NULL)
                return;
+       ic_flush(s);
        stream_close(ic->s);
        free(s->stream_data.p);
        s->stream_data.p = NULL;
 }
 
-static int
-ic_flush(stream *s)
-{
-       struct icstream *ic = (struct icstream *) s->stream_data.p;
-       char *outbuf = ic->buffer;
-       size_t outbytesleft = sizeof(ic->buffer);
-
-       iconv(ic->cd, NULL, NULL, &outbuf, &outbytesleft);
-       stream_write(ic->s, ic->buffer, 1, sizeof(ic->buffer) - outbytesleft);
-       stream_flush(ic->s);
-       return 1;
-}
-
 static stream *
 ic_open(iconv_t cd, stream *ss, const char *name)
 {
@@ -2339,6 +2443,7 @@
        ic->cd = cd;
        ic->s = ss;
        ic->buflen = 0;
+       ic->eof = 0;
        return s;
 }
        


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Monetdb-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-checkins

Reply via email to