Module Name: src Committed By: christos Date: Wed Feb 13 22:24:48 UTC 2013
Modified Files: src/usr.bin/vis: vis.1 vis.c Log Message: Multi-byte docs and fixes for > 2 mblen charsets from J.R. Oldroyd To generate a diff of this commit: cvs rdiff -u -r1.14 -r1.15 src/usr.bin/vis/vis.1 cvs rdiff -u -r1.17 -r1.18 src/usr.bin/vis/vis.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/usr.bin/vis/vis.1 diff -u src/usr.bin/vis/vis.1:1.14 src/usr.bin/vis/vis.1:1.15 --- src/usr.bin/vis/vis.1:1.14 Tue Feb 10 18:06:31 2009 +++ src/usr.bin/vis/vis.1 Wed Feb 13 17:24:48 2013 @@ -1,4 +1,4 @@ -.\" $NetBSD: vis.1,v 1.14 2009/02/10 23:06:31 christos Exp $ +.\" $NetBSD: vis.1,v 1.15 2013/02/13 22:24:48 christos Exp $ .\" .\" Copyright (c) 1989, 1991, 1993, 1994 .\" The Regents of the University of California. All rights reserved. @@ -29,7 +29,7 @@ .\" .\" @(#)vis.1 8.4 (Berkeley) 4/19/94 .\" -.Dd February 10, 2009 +.Dd February 13, 2013 .Dt VIS 1 .Os .Sh NAME @@ -135,6 +135,28 @@ Tabs are also encoded. White space (space-tab-newline) is also encoded. .Pq Dv VIS_WHITE .El +.Sh MULTIBYTE CHARACTER SUPPORT +.Nm +supports multibyte character input. +The encoding conversion is influenced by the setting of the LC_CTYPE +environment variable which defines the set of characters that can be +copied without encoding. +.Pp +When 8-bit data is present in the input, LC_CTYPE must be set to +the correct locale or to the C locale. +If the locales of the data and the conversion are mismatched, multibyte +character recognition may fail and encoding will be performed byte-by-byte +instead. +The result of encoding using +.Nm +followed by decoding using +.Xr unvis 3 +is unlikely to return the same input data in this case. +.Sh ENVIRONMENT +.Bl -tag -width ".Ev LC_CTYPE" +.It Ev LC_CTYPE +Specify the locale of the input data. +Set to C if the input data locale is unknown. .Sh SEE ALSO .Xr unvis 1 , .Xr svis 3 , @@ -144,3 +166,7 @@ The .Nm command appears in .Bx 4.4 . +Myltibyte character support was added in +.Nx 6.1 +and +.Fx 9.2 . Index: src/usr.bin/vis/vis.c diff -u src/usr.bin/vis/vis.c:1.17 src/usr.bin/vis/vis.c:1.18 --- src/usr.bin/vis/vis.c:1.17 Wed Feb 13 08:58:44 2013 +++ src/usr.bin/vis/vis.c Wed Feb 13 17:24:48 2013 @@ -1,4 +1,4 @@ -/* $NetBSD: vis.c,v 1.17 2013/02/13 13:58:44 christos Exp $ */ +/* $NetBSD: vis.c,v 1.18 2013/02/13 22:24:48 christos Exp $ */ /*- * Copyright (c) 1989, 1993 @@ -39,7 +39,7 @@ __COPYRIGHT("@(#) Copyright (c) 1989, 19 #if 0 static char sccsid[] = "@(#)vis.c 8.1 (Berkeley) 6/6/93"; #endif -__RCSID("$NetBSD: vis.c,v 1.17 2013/02/13 13:58:44 christos Exp $"); +__RCSID("$NetBSD: vis.c,v 1.18 2013/02/13 22:24:48 christos Exp $"); #endif /* not lint */ #include <stdio.h> @@ -47,6 +47,7 @@ __RCSID("$NetBSD: vis.c,v 1.17 2013/02/1 #include <stdlib.h> #include <errno.h> #include <wchar.h> +#include <limits.h> #include <unistd.h> #include <err.h> #include <vis.h> @@ -159,8 +160,10 @@ process(FILE *fp) static int col = 0; static char nul[] = "\0"; char *cp = nul + 1; /* so *(cp-1) starts out != '\n' */ - wint_t c, rachar; - char buff[5]; + wint_t c, c1, rachar; + wchar_t ibuff[3]; /* room for c + rachar + NUL */ + char mbibuff[13]; /* ((sizeof(ibuff) - 1) * MB_LEN_MAX) + NUL */ + char buff[5]; /* max vis-encoding length for one char + NUL */ c = getwc(fp); if (c == WEOF && errno == EILSEQ) @@ -182,10 +185,15 @@ process(FILE *fp) *cp++ = '$'; *cp++ = '\n'; *cp = '\0'; - } else if (extra) - (void)svis(buff, c, eflags, rachar, extra); - else - (void)vis(buff, c, eflags, rachar); + } else { + c1 = rachar; + if (c1 == WEOF) + c1 = L'\0'; + swprintf(ibuff, 3, L"%lc%lc", c, c1); + wcstombs(mbibuff, ibuff, + (wcslen(ibuff) * MB_LEN_MAX) + 1); + (void) strsvisx(buff, mbibuff, 1, eflags, extra); + } cp = buff; if (fold) {