Module Name: src Committed By: christos Date: Fri Feb 15 00:29:45 UTC 2013
Modified Files: src/usr.bin/vis: vis.c Log Message: More fixes from J.R. Oldroyd: - I have added a call to memset() to clear the mbibuff on each loop. Since we're dealing with possibly broken multibyte sequences, clearing it will avoid problems with a new input sequence possibly being confused by extra bytes still there from the last iteration. wctomb(), which is used to fill that buffer, does not append a NUL. - I have added a (char) cast when copying single bytes into the input buffer after a multibyte conversion error. - In the call to strvisx() the count must be 1, not mbilen which can be 2 or 3 etc for a multibyte character. This value is a count of characters - not bytes - to process. It even says characters in the man page. In vis(3) I am interpreting this value to mean multibyte characters. To generate a diff of this commit: cvs rdiff -u -r1.20 -r1.21 src/usr.bin/vis/vis.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/usr.bin/vis/vis.c diff -u src/usr.bin/vis/vis.c:1.20 src/usr.bin/vis/vis.c:1.21 --- src/usr.bin/vis/vis.c:1.20 Thu Feb 14 09:00:01 2013 +++ src/usr.bin/vis/vis.c Thu Feb 14 19:29:44 2013 @@ -1,4 +1,4 @@ -/* $NetBSD: vis.c,v 1.20 2013/02/14 14:00:01 christos Exp $ */ +/* $NetBSD: vis.c,v 1.21 2013/02/15 00:29:44 christos Exp $ */ /*- * Copyright (c) 1989, 1993 @@ -39,12 +39,13 @@ __COPYRIGHT("@(#) Copyright (c) 1989, 19 #if 0 static char sccsid[] = "@(#)vis.c 8.1 (Berkeley) 6/6/93"; #endif -__RCSID("$NetBSD: vis.c,v 1.20 2013/02/14 14:00:01 christos Exp $"); +__RCSID("$NetBSD: vis.c,v 1.21 2013/02/15 00:29:44 christos Exp $"); #endif /* not lint */ #include <stdio.h> #include <string.h> #include <stdlib.h> +#include <string.h> #include <errno.h> #include <wchar.h> #include <limits.h> @@ -160,29 +161,49 @@ process(FILE *fp) static int col = 0; static char nul[] = "\0"; char *cp = nul + 1; /* so *(cp-1) starts out != '\n' */ - wint_t c, c1, rachar; - char mbibuff[13]; /* ((sizeof(ibuff) - 1) * MB_LEN_MAX) + NUL */ + wint_t c, c1, rachar; + char mbibuff[13]; /* (2 wchars (i.e., c + c1)) * MB_LEN_MAX) */ char buff[5]; /* max vis-encoding length for one char + NUL */ int mbilen, cerr = 0, raerr = 0; + /* + * The input stream is considered to be multibyte characters. + * The input loop will read this data inputing one character, + * possibly multiple bytes, at a time and converting each to + * a wide character wchar_t. + * + * The vis(3) functions, however, require single either bytes + * or a multibyte string as their arguments. So we convert + * our input wchar_t and the following look-ahead wchar_t to + * a multibyte string for processing by vis(3). + */ + + /* Read one multibyte character, store as wchar_t */ c = getwc(fp); if (c == WEOF && errno == EILSEQ) { + /* Error in multibyte data. Read one byte. */ c = (wint_t)getc(fp); cerr = 1; } while (c != WEOF) { + /* Clear multibyte input buffer. */ + memset(mbibuff, 0, sizeof(mbibuff)); + /* Read-ahead next multibyte character. */ rachar = getwc(fp); if (rachar == WEOF && errno == EILSEQ) { + /* Error in multibyte data. Read one byte. */ rachar = (wint_t)getc(fp); raerr = 1; } if (none) { + /* Handle -n flag. */ cp = buff; *cp++ = c; if (c == '\\') *cp++ = '\\'; *cp = '\0'; } else if (markeol && c == '\n') { + /* Handle -l flag. */ cp = buff; if ((eflags & VIS_NOSLASH) == 0) *cp++ = '\\'; @@ -190,19 +211,41 @@ process(FILE *fp) *cp++ = '\n'; *cp = '\0'; } else { + /* + * Convert character using vis(3) library. + * At this point we will process one character. + * But we must pass the vis(3) library this + * character plus the next one because the next + * one is used as a look-ahead to decide how to + * encode this one under certain circumstances. + * + * Since our characters may be multibyte, e.g., + * in the UTF-8 locale, we cannot use vis() and + * svis() which require byte input, so we must + * create a multibyte string and use strvisx(). + */ + /* Treat EOF as a NUL char. */ c1 = rachar; if (c1 == WEOF) c1 = L'\0'; + /* + * If we hit a multibyte conversion error above, + * insert byte directly into string buff because + * wctomb() will fail. Else convert wchar_t to + * multibyte using wctomb(). + */ if (cerr) { - *mbibuff = c; + *mbibuff = (char)c; mbilen = 1; } else mbilen = wctomb(mbibuff, c); + /* Same for look-ahead character. */ if (raerr) - mbibuff[mbilen] = c1; + mbibuff[mbilen] = (char)c1; else wctomb(mbibuff + mbilen, c1); - (void)strsvisx(buff, mbibuff, mbilen, eflags, extra); + /* Perform encoding on just first character. */ + (void)strsvisx(buff, mbibuff, 1, eflags, extra); } cp = buff;