Module Name:    src
Committed By:   christos
Date:           Wed Feb 13 22:24:48 UTC 2013

Modified Files:
        src/usr.bin/vis: vis.1 vis.c

Log Message:
Multi-byte docs and fixes for > 2 mblen charsets from J.R. Oldroyd


To generate a diff of this commit:
cvs rdiff -u -r1.14 -r1.15 src/usr.bin/vis/vis.1
cvs rdiff -u -r1.17 -r1.18 src/usr.bin/vis/vis.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/usr.bin/vis/vis.1
diff -u src/usr.bin/vis/vis.1:1.14 src/usr.bin/vis/vis.1:1.15
--- src/usr.bin/vis/vis.1:1.14	Tue Feb 10 18:06:31 2009
+++ src/usr.bin/vis/vis.1	Wed Feb 13 17:24:48 2013
@@ -1,4 +1,4 @@
-.\"	$NetBSD: vis.1,v 1.14 2009/02/10 23:06:31 christos Exp $
+.\"	$NetBSD: vis.1,v 1.15 2013/02/13 22:24:48 christos Exp $
 .\"
 .\" Copyright (c) 1989, 1991, 1993, 1994
 .\"	The Regents of the University of California.  All rights reserved.
@@ -29,7 +29,7 @@
 .\"
 .\"     @(#)vis.1	8.4 (Berkeley) 4/19/94
 .\"
-.Dd February 10, 2009
+.Dd February 13, 2013
 .Dt VIS 1
 .Os
 .Sh NAME
@@ -135,6 +135,28 @@ Tabs are also encoded.
 White space (space-tab-newline) is also encoded.
 .Pq Dv VIS_WHITE
 .El
+.Sh MULTIBYTE CHARACTER SUPPORT
+.Nm
+supports multibyte character input.
+The encoding conversion is influenced by the setting of the LC_CTYPE
+environment variable which defines the set of characters that can be
+copied without encoding.
+.Pp
+When 8-bit data is present in the input, LC_CTYPE must be set to
+the correct locale or to the C locale.
+If the locales of the data and the conversion are mismatched, multibyte
+character recognition may fail and encoding will be performed byte-by-byte
+instead.
+The result of encoding using
+.Nm
+followed by decoding using
+.Xr unvis 3
+is unlikely to return the same input data in this case.
+.Sh ENVIRONMENT
+.Bl -tag -width ".Ev LC_CTYPE"
+.It Ev LC_CTYPE
+Specify the locale of the input data.
+Set to C if the input data locale is unknown.
 .Sh SEE ALSO
 .Xr unvis 1 ,
 .Xr svis 3 ,
@@ -144,3 +166,7 @@ The
 .Nm
 command appears in
 .Bx 4.4 .
+Myltibyte character support was added in
+.Nx 6.1
+and
+.Fx 9.2 .

Index: src/usr.bin/vis/vis.c
diff -u src/usr.bin/vis/vis.c:1.17 src/usr.bin/vis/vis.c:1.18
--- src/usr.bin/vis/vis.c:1.17	Wed Feb 13 08:58:44 2013
+++ src/usr.bin/vis/vis.c	Wed Feb 13 17:24:48 2013
@@ -1,4 +1,4 @@
-/*	$NetBSD: vis.c,v 1.17 2013/02/13 13:58:44 christos Exp $	*/
+/*	$NetBSD: vis.c,v 1.18 2013/02/13 22:24:48 christos Exp $	*/
 
 /*-
  * Copyright (c) 1989, 1993
@@ -39,7 +39,7 @@ __COPYRIGHT("@(#) Copyright (c) 1989, 19
 #if 0
 static char sccsid[] = "@(#)vis.c	8.1 (Berkeley) 6/6/93";
 #endif
-__RCSID("$NetBSD: vis.c,v 1.17 2013/02/13 13:58:44 christos Exp $");
+__RCSID("$NetBSD: vis.c,v 1.18 2013/02/13 22:24:48 christos Exp $");
 #endif /* not lint */
 
 #include <stdio.h>
@@ -47,6 +47,7 @@ __RCSID("$NetBSD: vis.c,v 1.17 2013/02/1
 #include <stdlib.h>
 #include <errno.h>
 #include <wchar.h>
+#include <limits.h>
 #include <unistd.h>
 #include <err.h>
 #include <vis.h>
@@ -159,8 +160,10 @@ process(FILE *fp)
 	static int col = 0;
 	static char nul[] = "\0";
 	char *cp = nul + 1;	/* so *(cp-1) starts out != '\n' */
-	wint_t c, rachar; 
-	char buff[5];
+	wint_t c, c1, rachar; 
+	wchar_t ibuff[3]; /* room for c + rachar + NUL */
+	char mbibuff[13]; /* ((sizeof(ibuff) - 1) * MB_LEN_MAX) + NUL */
+	char buff[5]; /* max vis-encoding length for one char + NUL */
 	
 	c = getwc(fp);
 	if (c == WEOF && errno == EILSEQ)
@@ -182,10 +185,15 @@ process(FILE *fp)
 			*cp++ = '$';
 			*cp++ = '\n';
 			*cp = '\0';
-		} else if (extra)
-			(void)svis(buff, c, eflags, rachar, extra);
-		else
-			(void)vis(buff, c, eflags, rachar);
+		} else {
+			c1 = rachar;
+			if (c1 == WEOF)
+				c1 = L'\0';
+			swprintf(ibuff, 3, L"%lc%lc", c, c1);
+			wcstombs(mbibuff, ibuff,
+			    (wcslen(ibuff) * MB_LEN_MAX) + 1);
+			(void) strsvisx(buff, mbibuff, 1, eflags, extra);
+		}
 
 		cp = buff;
 		if (fold) {

Reply via email to