Module Name:    src
Committed By:   martin
Date:           Sat Dec  9 13:09:04 UTC 2023

Modified Files:
        src/lib/libc/gen [netbsd-9]: vis.c
        src/tests/lib/libc/gen [netbsd-9]: t_vis.c

Log Message:
Pull up following revision(s) (requested by riastradh in ticket #1774):

        lib/libc/gen/vis.c: revision 1.75-1.86
        tests/lib/libc/gen/t_vis.c: revision 1.10-1.14

PR 56260: fix out-of-bounds stack read.

vis(3): Avoid nonportable MIN in portable code.

vis(3) tests: Add xfail test for encoding overflow.

>From Kyle Evans <kevans%FreeBSD.org@localhost>.
PR lib/57573

vis(3) tests: Expand tests and diagnostic outputs on failure.
PR lib/57573

vis(3) tests: Test another overflow edge case.
Related to PR lib/57573.

vis(3): Make maxolen unsigned size_t, not ssize_t.
It is initialized once either to *dlen, which is unsigned size_t, or
to wcslen(start) * MB_MAX_LEN + 1, and wcslen returns unsigned size_t
too.  So there appears to have never been any reason for this to be
signed.
Part of PR lib/57573.

vis(3): Make mbslength unsigned.
Sprinkle assertions and comments justifying the proposition that it
would never go negative if signed.
Obviates need to worry about mblength > SSIZE_MAX.
Prompted by PR lib/57573.

vis(3): Avoid arithmetic overflow before calloc(3).
Prompted by PR lib/57573.

vis(3): Call wcslen(start) only once.
It had better not change between these two times!
Prompted by PR lib/57573.

vis(3): Avoid potential arithmetic overflow in maxolen.
Can't easily prove that this overflow is impossible, so let's add a
check.
Prompted by PR lib/57573.

vis(3): Fix main part of PR lib/57573.
>From Kyle Evans <kevans%FreeBSD.org@localhost>.

vis(3): Fix one more buffer overrun in an edge case.
PR lib/57573

vis(3): Sort includes.  No functional change intended.
Prompted by PR lib/57573.

vis(3): Need <stdint.h> for SIZE_MAX, per C standard.
>From Kyle Evans <kevans%FreeBSD.org@localhost>.
Followup to PR lib/57573.

vis(3): Per KNF, sys/param.h comes before sys/types.h.
Which is nice because that's also lexicographic.


To generate a diff of this commit:
cvs rdiff -u -r1.74 -r1.74.6.1 src/lib/libc/gen/vis.c
cvs rdiff -u -r1.9 -r1.9.16.1 src/tests/lib/libc/gen/t_vis.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/lib/libc/gen/vis.c
diff -u src/lib/libc/gen/vis.c:1.74 src/lib/libc/gen/vis.c:1.74.6.1
--- src/lib/libc/gen/vis.c:1.74	Mon Nov 27 16:37:21 2017
+++ src/lib/libc/gen/vis.c	Sat Dec  9 13:09:03 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: vis.c,v 1.74 2017/11/27 16:37:21 christos Exp $	*/
+/*	$NetBSD: vis.c,v 1.74.6.1 2023/12/09 13:09:03 martin Exp $	*/
 
 /*-
  * Copyright (c) 1989, 1993
@@ -57,7 +57,7 @@
 
 #include <sys/cdefs.h>
 #if defined(LIBC_SCCS) && !defined(lint)
-__RCSID("$NetBSD: vis.c,v 1.74 2017/11/27 16:37:21 christos Exp $");
+__RCSID("$NetBSD: vis.c,v 1.74.6.1 2023/12/09 13:09:03 martin Exp $");
 #endif /* LIBC_SCCS and not lint */
 #ifdef __FBSDID
 __FBSDID("$FreeBSD$");
@@ -65,13 +65,15 @@ __FBSDID("$FreeBSD$");
 #endif
 
 #include "namespace.h"
-#include <sys/types.h>
+
 #include <sys/param.h>
+#include <sys/types.h>
 
 #include <assert.h>
-#include <vis.h>
 #include <errno.h>
+#include <stdint.h>
 #include <stdlib.h>
+#include <vis.h>
 #include <wchar.h>
 #include <wctype.h>
 
@@ -353,12 +355,15 @@ makeextralist(int flags, const char *src
 	wchar_t *dst, *d;
 	size_t len;
 	const wchar_t *s;
+	mbstate_t mbstate;
 
 	len = strlen(src);
 	if ((dst = calloc(len + MAXEXTRAS, sizeof(*dst))) == NULL)
 		return NULL;
 
-	if ((flags & VIS_NOLOCALE) || mbstowcs(dst, src, len) == (size_t)-1) {
+	memset(&mbstate, 0, sizeof(mbstate));
+	if ((flags & VIS_NOLOCALE)
+	    || mbsrtowcs(dst, &src, len, &mbstate) == (size_t)-1) {
 		size_t i;
 		for (i = 0; i < len; i++)
 			dst[i] = (wchar_t)(u_char)src[i];
@@ -393,20 +398,23 @@ static int
 istrsenvisx(char **mbdstp, size_t *dlen, const char *mbsrc, size_t mblength,
     int flags, const char *mbextra, int *cerr_ptr)
 {
+	char mbbuf[MB_LEN_MAX];
 	wchar_t *dst, *src, *pdst, *psrc, *start, *extra;
 	size_t len, olen;
 	uint64_t bmsk, wmsk;
 	wint_t c;
 	visfun_t f;
 	int clen = 0, cerr, error = -1, i, shft;
-	char *mbdst, *mdst;
-	ssize_t mbslength, maxolen;
+	char *mbdst, *mbwrite, *mdst;
+	size_t mbslength;
+	size_t maxolen;
+	mbstate_t mbstate;
 
 	_DIAGASSERT(mbdstp != NULL);
 	_DIAGASSERT(mbsrc != NULL || mblength == 0);
 	_DIAGASSERT(mbextra != NULL);
 
-	mbslength = (ssize_t)mblength;
+	mbslength = mblength;
 	/*
 	 * When inputing a single character, must also read in the
 	 * next character for nextc, the look-ahead character.
@@ -427,6 +435,14 @@ istrsenvisx(char **mbdstp, size_t *dlen,
 	 * return to the caller.
 	 */
 
+	/*
+	 * Guarantee the arithmetic on input to calloc won't overflow.
+	 */
+	if (mbslength > (SIZE_MAX - 1)/16) {
+		errno = ENOMEM;
+		return -1;
+	}
+
 	/* Allocate space for the wide char strings */
 	psrc = pdst = extra = NULL;
 	mdst = NULL;
@@ -458,10 +474,18 @@ istrsenvisx(char **mbdstp, size_t *dlen,
 	 * stop at NULs because we may be processing a block of data
 	 * that includes NULs.
 	 */
+	memset(&mbstate, 0, sizeof(mbstate));
 	while (mbslength > 0) {
 		/* Convert one multibyte character to wchar_t. */
-		if (!cerr)
-			clen = mbtowc(src, mbsrc, MB_LEN_MAX);
+		if (!cerr) {
+			clen = mbrtowc(src, mbsrc,
+			    (mbslength < MB_LEN_MAX
+				? mbslength
+				: MB_LEN_MAX),
+			    &mbstate);
+			assert(clen < 0 || (size_t)clen <= mbslength);
+			assert(clen <= MB_LEN_MAX);
+		}
 		if (cerr || clen < 0) {
 			/* Conversion error, process as a byte instead. */
 			*src = (wint_t)(u_char)*mbsrc;
@@ -475,6 +499,20 @@ istrsenvisx(char **mbdstp, size_t *dlen,
 			 */
 			clen = 1;
 		}
+		/*
+		 * Let n := MIN(mbslength, MB_LEN_MAX).  We have:
+		 *
+		 *	mbslength >= 1
+		 *	mbrtowc(..., n, &mbstate) <= n,
+		 *		by the contract of mbrtowc
+		 *
+		 *  clen is either
+		 *  (a) mbrtowc(..., n, &mbstate), in which case
+		 *      clen <= n <= mbslength; or
+		 *  (b) 1, in which case clen = 1 <= mbslength.
+		 */
+		assert(clen > 0);
+		assert((size_t)clen <= mbslength);
 		/* Advance buffer character pointer. */
 		src++;
 		/* Advance input pointer by number of bytes read. */
@@ -532,11 +570,49 @@ istrsenvisx(char **mbdstp, size_t *dlen,
 	 * output byte-by-byte here.  Else use wctomb().
 	 */
 	len = wcslen(start);
-	maxolen = dlen ? *dlen : (wcslen(start) * MB_LEN_MAX + 1);
+	if (dlen) {
+		maxolen = *dlen;
+		if (maxolen == 0) {
+			errno = ENOSPC;
+			goto out;
+		}
+	} else {
+		if (len > (SIZE_MAX - 1)/MB_LEN_MAX) {
+			errno = ENOSPC;
+			goto out;
+		}
+		maxolen = len*MB_LEN_MAX + 1;
+	}
 	olen = 0;
+	memset(&mbstate, 0, sizeof(mbstate));
 	for (dst = start; len > 0; len--) {
-		if (!cerr)
-			clen = wctomb(mbdst, *dst);
+		if (!cerr) {
+			/*
+			 * If we have at least MB_CUR_MAX bytes in the buffer,
+			 * we'll just do the conversion in-place into mbdst.  We
+			 * need to be a little more conservative when we get to
+			 * the end of the buffer, as we may not have MB_CUR_MAX
+			 * bytes but we may not need it.
+			 */
+			if (maxolen - olen > MB_CUR_MAX)
+				mbwrite = mbdst;
+			else
+				mbwrite = mbbuf;
+			clen = wcrtomb(mbwrite, *dst, &mbstate);
+			if (clen > 0 && mbwrite != mbdst) {
+				/*
+				 * Don't break past our output limit, noting
+				 * that maxolen includes the nul terminator so
+				 * we can't write past maxolen - 1 here.
+				 */
+				if (olen + clen >= maxolen) {
+					errno = ENOSPC;
+					goto out;
+				}
+
+				memcpy(mbdst, mbwrite, clen);
+			}
+		}
 		if (cerr || clen < 0) {
 			/*
 			 * Conversion error, process as a byte(s) instead.
@@ -551,16 +627,27 @@ istrsenvisx(char **mbdstp, size_t *dlen,
 				shft = i * NBBY;
 				bmsk = (uint64_t)0xffLL << shft;
 				wmsk |= bmsk;
-				if ((*dst & wmsk) || i == 0)
+				if ((*dst & wmsk) || i == 0) {
+					if (olen + clen + 1 >= maxolen) {
+						errno = ENOSPC;
+						goto out;
+					}
+
 					mbdst[clen++] = (char)(
 					    (uint64_t)(*dst & bmsk) >>
 					    shft);
+				}
 			}
 			cerr = 1;
 		}
-		/* If this character would exceed our output limit, stop. */
-		if (olen + clen > (size_t)maxolen)
-			break;
+
+		/*
+		 * We'll be dereferencing mbdst[clen] after this to write the
+		 * nul terminator; the above paths should have checked for a
+		 * possible overflow already.
+		 */
+		assert(olen + clen < maxolen);
+
 		/* Advance output pointer by number of bytes written. */
 		mbdst += clen;
 		/* Advance buffer character pointer. */
@@ -570,6 +657,7 @@ istrsenvisx(char **mbdstp, size_t *dlen,
 	}
 
 	/* Terminate the output string. */
+	assert(olen < maxolen);
 	*mbdst = '\0';
 
 	if (flags & VIS_NOLOCALE) {

Index: src/tests/lib/libc/gen/t_vis.c
diff -u src/tests/lib/libc/gen/t_vis.c:1.9 src/tests/lib/libc/gen/t_vis.c:1.9.16.1
--- src/tests/lib/libc/gen/t_vis.c:1.9	Tue Jan 10 15:16:57 2017
+++ src/tests/lib/libc/gen/t_vis.c	Sat Dec  9 13:09:04 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: t_vis.c,v 1.9 2017/01/10 15:16:57 christos Exp $	*/
+/*	$NetBSD: t_vis.c,v 1.9.16.1 2023/12/09 13:09:04 martin Exp $	*/
 
 /*-
  * Copyright (c) 2002 The NetBSD Foundation, Inc.
@@ -116,6 +116,23 @@ ATF_TC_BODY(strvis_empty, tc)
 	ATF_REQUIRE(dst[0] == '\0' && dst[1] == 'a');
 }
 
+ATF_TC(strnvis_empty_empty);
+ATF_TC_HEAD(strnvis_empty_empty, tc)
+{
+	atf_tc_set_md_var(tc, "descr",
+	    "Test strnvis(3) with empty source and destination");
+}
+
+ATF_TC_BODY(strnvis_empty_empty, tc)
+{
+	char dst[] = "fail";
+	int n;
+
+	n = strnvis(dst, 0, "", VIS_SAFE);
+	ATF_CHECK(memcmp(dst, "fail", sizeof(dst)) == 0);
+	ATF_CHECK_EQ_MSG(n, -1, "n=%d", n);
+}
+
 ATF_TC(strunvis_hex);
 ATF_TC_HEAD(strunvis_hex, tc)
 {
@@ -175,16 +192,95 @@ ATF_TC_BODY(strvis_locale, tc)
 }
 #endif /* VIS_NOLOCALE */
 
+#define	STRVIS_OVERFLOW_MARKER	((char)0xff)	/* Arbitrary */
+
+#ifdef VIS_NOLOCALE
+ATF_TC(strvis_overflow_mb);
+ATF_TC_HEAD(strvis_overflow_mb, tc)
+{
+	atf_tc_set_md_var(tc, "descr", "Test strvis(3) multi-byte overflow");
+}
+
+ATF_TC_BODY(strvis_overflow_mb, tc)
+{
+	const char src[] = "\xf0\x9f\xa5\x91";
+	/* Extra byte to detect overflow */
+	char dst[sizeof(src) + 1];
+	unsigned i;
+	int n;
+
+	setlocale(LC_CTYPE, "en_US.UTF-8");
+
+	for (i = 0; i < sizeof(dst) - 1; i++) {
+		memset(dst, STRVIS_OVERFLOW_MARKER, sizeof(dst));
+		n = strnvis(dst, i, src, VIS_SAFE);
+		ATF_CHECK_EQ_MSG(dst[i], STRVIS_OVERFLOW_MARKER,
+		    "[%u] dst=[%02hhx %02hhx %02hhx %02hhx %02hhx]"
+		    " STRVIS_OVERFLOW_MARKER=%02hhx",
+		    i, dst[0], dst[1], dst[2], dst[3], dst[4],
+		    STRVIS_OVERFLOW_MARKER);
+		ATF_CHECK_EQ_MSG(n, -1, "[%u] n=%d", i, n);
+	}
+
+	memset(dst, STRVIS_OVERFLOW_MARKER, sizeof(dst));
+	n = strnvis(dst, sizeof(dst) - 1, src, VIS_SAFE);
+	ATF_CHECK_EQ_MSG(dst[sizeof(dst) - 1], STRVIS_OVERFLOW_MARKER,
+	    "[%u] dst=[%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx]"
+	    " STRVIS_OVERFLOW_MARKER=%02hhx",
+	    i, dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
+	    STRVIS_OVERFLOW_MARKER);
+	ATF_CHECK_EQ_MSG(n, (int)sizeof(dst) - 2, "n=%d", n);
+}
+#endif
+
+ATF_TC(strvis_overflow_c);
+ATF_TC_HEAD(strvis_overflow_c, tc)
+{
+	atf_tc_set_md_var(tc, "descr", "Test strvis(3) C locale overflow");
+}
+
+ATF_TC_BODY(strvis_overflow_c, tc)
+{
+	const char src[] = "AAAA";
+	/* Extra byte to detect overflow */
+	char dst[sizeof(src) + 1];
+	unsigned i;
+	int n;
+
+	for (i = 0; i < sizeof(dst) - 1; i++) {
+		memset(dst, STRVIS_OVERFLOW_MARKER, sizeof(dst));
+		n = strnvis(dst, i, src, VIS_SAFE | VIS_NOLOCALE);
+		ATF_CHECK_EQ_MSG(dst[i], STRVIS_OVERFLOW_MARKER,
+		    "[%u] dst=[%02hhx %02hhx %02hhx %02hhx %02hhx]"
+		    " STRVIS_OVERFLOW_MARKER=%02hhx",
+		    i, dst[0], dst[1], dst[2], dst[3], dst[4],
+		    STRVIS_OVERFLOW_MARKER);
+		ATF_CHECK_EQ_MSG(n, -1, "[%u] n=%d", i, n);
+	}
+
+	memset(dst, STRVIS_OVERFLOW_MARKER, sizeof(dst));
+	n = strnvis(dst, sizeof(dst) - 1, src, VIS_SAFE | VIS_NOLOCALE);
+	ATF_CHECK_EQ_MSG(dst[sizeof(dst) - 1], STRVIS_OVERFLOW_MARKER,
+	    "[%u] dst=[%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx]"
+	    " STRVIS_OVERFLOW_MARKER=%02hhx",
+	    i, dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
+	    STRVIS_OVERFLOW_MARKER);
+	ATF_CHECK_EQ_MSG(n, (int)sizeof(dst) - 2, "n=%d", n);
+}
+
 ATF_TP_ADD_TCS(tp)
 {
 
 	ATF_TP_ADD_TC(tp, strvis_basic);
 	ATF_TP_ADD_TC(tp, strvis_null);
 	ATF_TP_ADD_TC(tp, strvis_empty);
+	ATF_TP_ADD_TC(tp, strnvis_empty_empty);
 	ATF_TP_ADD_TC(tp, strunvis_hex);
 #ifdef VIS_NOLOCALE
 	ATF_TP_ADD_TC(tp, strvis_locale);
+	ATF_TP_ADD_TC(tp, strvis_overflow_mb);
 #endif /* VIS_NOLOCALE */
+	ATF_TP_ADD_TC(tp, strvis_overflow_c);
 
 	return atf_no_error();
 }

Reply via email to