Module Name:    src
Committed By:   riastradh
Date:           Sat Mar 29 19:40:42 UTC 2025

Modified Files:
        src/lib/libc/gen: ctype_.c tolower_.c toupper_.c
        src/tests/lib/libc/gen: t_ctype.c
Added Files:
        src/lib/libc/gen: ctype_guard.h

Log Message:
ctype(3): Put guard pages before the C ctype/tolower/toupper tables.

This also only affects machines where char is signed for now.  (But
maybe it would be worth doing unconditionally; users could still try
to pass in explicit `signed char' inputs.)

PR lib/58208: ctype(3) provides poor runtime feedback of abuse


To generate a diff of this commit:
cvs rdiff -u -r1.20 -r1.21 src/lib/libc/gen/ctype_.c
cvs rdiff -u -r0 -r1.1 src/lib/libc/gen/ctype_guard.h
cvs rdiff -u -r1.14 -r1.15 src/lib/libc/gen/tolower_.c \
    src/lib/libc/gen/toupper_.c
cvs rdiff -u -r1.9 -r1.10 src/tests/lib/libc/gen/t_ctype.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/lib/libc/gen/ctype_.c
diff -u src/lib/libc/gen/ctype_.c:1.20 src/lib/libc/gen/ctype_.c:1.21
--- src/lib/libc/gen/ctype_.c:1.20	Sat Apr 13 10:21:20 2013
+++ src/lib/libc/gen/ctype_.c	Sat Mar 29 19:40:42 2025
@@ -1,4 +1,4 @@
-/*	$NetBSD: ctype_.c,v 1.20 2013/04/13 10:21:20 joerg Exp $	*/
+/*	$NetBSD: ctype_.c,v 1.21 2025/03/29 19:40:42 riastradh Exp $	*/
 
 /*
  * Copyright (c) 1989 The Regents of the University of California.
@@ -39,12 +39,16 @@
 #if 0
 /*static char *sccsid = "from: @(#)ctype_.c	5.6 (Berkeley) 6/1/90";*/
 #else
-__RCSID("$NetBSD: ctype_.c,v 1.20 2013/04/13 10:21:20 joerg Exp $");
+__RCSID("$NetBSD: ctype_.c,v 1.21 2025/03/29 19:40:42 riastradh Exp $");
 #endif
 #endif /* LIBC_SCCS and not lint */
 
 #include <sys/ctype_bits.h>
+#include <sys/mman.h>
+
 #include <stdio.h>
+
+#include "ctype_guard.h"
 #include "ctype_local.h"
 
 #if EOF != -1
@@ -61,8 +65,12 @@ __RCSID("$NetBSD: ctype_.c,v 1.20 2013/0
 #define	_B	_COMPAT_B
 #define	_N	_COMPAT_N
 
-const unsigned char _C_compat_bsdctype[1 + _CTYPE_NUM_CHARS] = {
-	0,
+__ctype_table_guarded(_C_compat_bsdctype, _C_compat_bsdctype_guarded);
+__ctype_table
+static
+const unsigned char _C_compat_bsdctype_guarded[_C_COMPAT_BSDCTYPE_GUARD +
+    1 + _CTYPE_NUM_CHARS] = {
+	[_C_COMPAT_BSDCTYPE_GUARD] = 0,
 	_C,	_C,	_C,	_C,	_C,	_C,	_C,	_C,
 	_C,	_C|_S,	_C|_S,	_C|_S,	_C|_S,	_C|_S,	_C,	_C,
 	_C,	_C,	_C,	_C,	_C,	_C,	_C,	_C,
@@ -109,8 +117,11 @@ const unsigned char *_ctype_ = &_C_compa
 #define	_U	_CTYPE_U
 #define	_X	_CTYPE_X
 
-const unsigned short _C_ctype_tab_[1 + _CTYPE_NUM_CHARS] = {
-	0,
+__ctype_table_guarded(_C_ctype_tab_, _C_ctype_tab_guarded_);
+__ctype_table
+static const unsigned short _C_ctype_tab_guarded_[_C_CTYPE_TAB_GUARD +
+    1 + _CTYPE_NUM_CHARS] = {
+	[_C_CTYPE_TAB_GUARD] = 0,
 	_C,		_C,		_C,		_C,
 	_C,		_C,		_C,		_C,
 	_C,		_BL|_C|_S,	_C|_S,		_C|_S,
@@ -158,3 +169,18 @@ const unsigned short _C_ctype_tab_[1 + _
 #undef _X
 
 const unsigned short *_ctype_tab_ = &_C_ctype_tab_[0];
+
+#if _CTYPE_GUARD_PAGE
+__attribute__((constructor))
+static void
+_C_ctype_tab_guard_init(void)
+{
+
+#ifdef __BUILD_LEGACY
+	(void)mprotect(__UNCONST(_C_compat_bsdctype_guarded),
+	    _CTYPE_GUARD_SIZE, PROT_NONE);
+#endif	/* __BUILD_LEGACY */
+	(void)mprotect(__UNCONST(_C_ctype_tab_guarded_),
+	    _CTYPE_GUARD_SIZE, PROT_NONE);
+}
+#endif	/* _CTYPE_GUARD_PAGE */

Index: src/lib/libc/gen/tolower_.c
diff -u src/lib/libc/gen/tolower_.c:1.14 src/lib/libc/gen/tolower_.c:1.15
--- src/lib/libc/gen/tolower_.c:1.14	Sat Apr 13 10:16:27 2013
+++ src/lib/libc/gen/tolower_.c	Sat Mar 29 19:40:42 2025
@@ -1,4 +1,4 @@
-/*	$NetBSD: tolower_.c,v 1.14 2013/04/13 10:16:27 joerg Exp $	*/
+/*	$NetBSD: tolower_.c,v 1.15 2025/03/29 19:40:42 riastradh Exp $	*/
 
 /*
  * Written by J.T. Conklin <j...@netbsd.org>.
@@ -7,19 +7,26 @@
 
 #include <sys/cdefs.h>
 #if defined(LIBC_RCS) && !defined(lint)
-__RCSID("$NetBSD: tolower_.c,v 1.14 2013/04/13 10:16:27 joerg Exp $");
+__RCSID("$NetBSD: tolower_.c,v 1.15 2025/03/29 19:40:42 riastradh Exp $");
 #endif /* LIBC_RCS and not lint */
 
 #include <sys/ctype_bits.h>
+#include <sys/mman.h>
+
 #include <stdio.h>
+
+#include "ctype_guard.h"
 #include "ctype_local.h"
 
 #if EOF != -1
 #error "EOF != -1"
 #endif
 
-const short _C_tolower_tab_[1 + _CTYPE_NUM_CHARS] = {
-	EOF,
+__ctype_table_guarded(_C_tolower_tab_, _C_tolower_tab_guarded_);
+__ctype_table
+static const short _C_tolower_tab_guarded_[_C_TOLOWER_TAB_GUARD +
+    1 + _CTYPE_NUM_CHARS] = {
+	[_C_TOLOWER_TAB_GUARD] = EOF,
 	0x00,	0x01,	0x02,	0x03,	0x04,	0x05,	0x06,	0x07,
 	0x08,	0x09,	0x0a,	0x0b,	0x0c,	0x0d,	0x0e,	0x0f,
 	0x10,	0x11,	0x12,	0x13,	0x14,	0x15,	0x16,	0x17,
@@ -61,3 +68,14 @@ __weak_alias(_C_tolower_, _C_tolower_tab
 #endif
 
 const short *_tolower_tab_ = &_C_tolower_tab_[0];
+
+#if _CTYPE_GUARD_PAGE
+__attribute__((constructor))
+static void
+_C_tolower_tab_guard_init(void)
+{
+
+	(void)mprotect(__UNCONST(_C_tolower_tab_guarded_), _CTYPE_GUARD_SIZE,
+	    PROT_NONE);
+}
+#endif	/* _CTYPE_GUARD_PAGE */
Index: src/lib/libc/gen/toupper_.c
diff -u src/lib/libc/gen/toupper_.c:1.14 src/lib/libc/gen/toupper_.c:1.15
--- src/lib/libc/gen/toupper_.c:1.14	Sat Apr 13 10:16:27 2013
+++ src/lib/libc/gen/toupper_.c	Sat Mar 29 19:40:42 2025
@@ -1,4 +1,4 @@
-/*	$NetBSD: toupper_.c,v 1.14 2013/04/13 10:16:27 joerg Exp $	*/
+/*	$NetBSD: toupper_.c,v 1.15 2025/03/29 19:40:42 riastradh Exp $	*/
 
 /*
  * Written by J.T. Conklin <j...@netbsd.org>.
@@ -7,19 +7,26 @@
 
 #include <sys/cdefs.h>
 #if defined(LIBC_RCS) && !defined(lint)
-__RCSID("$NetBSD: toupper_.c,v 1.14 2013/04/13 10:16:27 joerg Exp $");
+__RCSID("$NetBSD: toupper_.c,v 1.15 2025/03/29 19:40:42 riastradh Exp $");
 #endif /* LIBC_RCS and not lint */
 
 #include <sys/ctype_bits.h>
+#include <sys/mman.h>
+
 #include <stdio.h>
+
+#include "ctype_guard.h"
 #include "ctype_local.h"
 
 #if EOF != -1
 #error "EOF != -1"
 #endif
 
-const short _C_toupper_tab_[1 + _CTYPE_NUM_CHARS] = {
-	EOF,
+__ctype_table_guarded(_C_toupper_tab_, _C_toupper_tab_guarded_);
+__ctype_table
+static const short _C_toupper_tab_guarded_[_C_TOUPPER_TAB_GUARD +
+    1 + _CTYPE_NUM_CHARS] = {
+	[_C_TOUPPER_TAB_GUARD] = EOF,
 	0x00,	0x01,	0x02,	0x03,	0x04,	0x05,	0x06,	0x07,
 	0x08,	0x09,	0x0a,	0x0b,	0x0c,	0x0d,	0x0e,	0x0f,
 	0x10,	0x11,	0x12,	0x13,	0x14,	0x15,	0x16,	0x17,
@@ -61,3 +68,14 @@ __weak_alias(_C_toupper_, _C_toupper_tab
 #endif
 
 const short *_toupper_tab_ = &_C_toupper_tab_[0];
+
+#if _CTYPE_GUARD_PAGE
+__attribute__((constructor))
+static void
+_C_toupper_tab_guard_init(void)
+{
+
+	(void)mprotect(__UNCONST(_C_toupper_tab_guarded_), _CTYPE_GUARD_SIZE,
+	    PROT_NONE);
+}
+#endif	/* _CTYPE_GUARD_PAGE */

Index: src/tests/lib/libc/gen/t_ctype.c
diff -u src/tests/lib/libc/gen/t_ctype.c:1.9 src/tests/lib/libc/gen/t_ctype.c:1.10
--- src/tests/lib/libc/gen/t_ctype.c:1.9	Sat Mar 29 01:06:36 2025
+++ src/tests/lib/libc/gen/t_ctype.c	Sat Mar 29 19:40:42 2025
@@ -1,4 +1,4 @@
-/*	$NetBSD: t_ctype.c,v 1.9 2025/03/29 01:06:36 riastradh Exp $	*/
+/*	$NetBSD: t_ctype.c,v 1.10 2025/03/29 19:40:42 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2025 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: t_ctype.c,v 1.9 2025/03/29 01:06:36 riastradh Exp $");
+__RCSID("$NetBSD: t_ctype.c,v 1.10 2025/03/29 19:40:42 riastradh Exp $");
 
 #include <atf-c.h>
 #include <ctype.h>
@@ -112,13 +112,7 @@ test_abuse_in_locales(const char *name, 
 		ATF_REQUIRE_MSG(setlocale(LC_CTYPE, locales[i]) != NULL,
 		    "locales[i]=%s", locales[i]);
 		snprintf(buf, sizeof(buf), "[%s]%s", locales[i], name);
-		if (macro && strcmp(locales[i], "C") == 0) {
-			atf_tc_expect_fail("PR lib/58208: ctype(3)"
-			    " provides poor runtime feedback of abuse");
-		}
 		test_abuse(buf, ctypefn);
-		if (strcmp(locales[i], "C") == 0)
-			atf_tc_expect_pass();
 	}
 }
 
@@ -789,8 +783,6 @@ ATF_TC_BODY(abuse_##FN##_macro_c, tc)			
 		atf_tc_skip("runtime ctype(3) abuse is impossible with"	      \
 		    " unsigned char");					      \
 	}								      \
-	atf_tc_expect_fail("PR lib/58208:"				      \
-	    " ctype(3) provides poor runtime feedback of abuse");	      \
 	test_abuse(#FN, &FN##_wrapper);					      \
 }									      \
 ATF_TC(abuse_##FN##_function_c);					      \

Added files:

Index: src/lib/libc/gen/ctype_guard.h
diff -u /dev/null src/lib/libc/gen/ctype_guard.h:1.1
--- /dev/null	Sat Mar 29 19:40:42 2025
+++ src/lib/libc/gen/ctype_guard.h	Sat Mar 29 19:40:42 2025
@@ -0,0 +1,163 @@
+/*	$NetBSD: ctype_guard.h,v 1.1 2025/03/29 19:40:42 riastradh Exp $	*/
+
+/*-
+ * Copyright (c) 2025 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef	_LIBC_CTYPE_GUARD_H_
+#define	_LIBC_CTYPE_GUARD_H_
+
+#include <sys/cdefs.h>
+
+#include "ctype_local.h"
+
+/*
+ * On platforms where char is signed, the ctype(3) functions have an
+ * insidious trap where logic like
+ *
+ *	char *s = ...;
+ *
+ *	if (isspace(*s)) ...
+ *
+ * is undefined behaviour whenever the string s has any bytes outside
+ * the 7-bit US-ASCII range.  The correct way to do this is to cast any
+ * char inputs to unsigned char first:
+ *
+ *	if (isspace((unsigned char)*s)) ...
+ *
+ * Unfortunately, the buggy idiom without an unsigned char cast is
+ * extremely prevalent in C code in the wild.  (See the ctype(3) man
+ * page for details on why this idiom is required and why isspace
+ * itself can't just be `fixed'.)
+ *
+ * The ctype(3) functions are implemented as macros that expand to
+ * simple table lookups -- partly for performance, and partly to
+ * deliberately trigger -Wchar-subscript compiler warnings for
+ * suspicious code like the above.
+ *
+ * To noisily detect the undefined behaviour without incurring overhead
+ * for the defined cases of the ctype(3) functions, we put a guard page
+ * before the tables in memory so that (at least for inputs other than
+ * 0xff, which coincides as signed char with EOF = -1) attempts to use
+ * the ctype(3) functions on invalid inputs will reliably -- and
+ * safely! -- crash with SIGSEGV instead of sometimes returning
+ * nondeterministic nonsense results.
+ *
+ * We do this by defining two symbols, one local and one global:
+ *
+ *	- The local (C `static') symbol _C_ctype_tab_guarded_ is
+ *	  page-aligned and has an extra page-sized buffer at the
+ *	  beginning for a guard page.  It is defined as an ordinary C
+ *	  array to keep the source code legible.
+ *
+ *	- The global symbol _C_ctype_tab_ starts one page past the
+ *	  start of _C_ctype_tab_guarded_.  It is defined, via some
+ *	  macros for convenience, by the assembler directives:
+ *
+ *		.type _C_ctype_tab_,@object
+ *		.global _C_ctype_tab_
+ *		_C_ctype_tab_ = _C_ctype_tab_guarded_ + PAGE_SIZE
+ *
+ *	  (with PAGE_SIZE being replaced by the numeric value from
+ *	  vmparam.h -- actually, we use MAX_PAGE_SIZE for the handful
+ *	  of architectures that support different page sizes on
+ *	  different machines).
+ *
+ * Then, at startup, we mprotect the guard page PROT_NONE.
+ *
+ * The global symbol _C_ctype_tab_ is an immutable part of the libc
+ * ABI, and is used by, e.g., libstdc++.so, so we have to define it
+ * compatibly -- this is why it must be defined as an ELF global symbol
+ * in its own right, and not simply handled inside libc.so by
+ * additional arithmetic relative to _C_ctype_tab_guarded_.
+ */
+
+#ifndef __CHAR_UNSIGNED__
+#  define	_CTYPE_GUARD_PAGE	1
+#else
+#  define	_CTYPE_GUARD_PAGE	0
+#endif
+
+#ifdef __arm__
+#  define	__ctype_table_object(name)				      \
+	__asm(".type " #name ",%object")
+#else
+#  define	__ctype_table_object(name)				      \
+	__asm(".type " #name ",@object")
+#endif
+
+#if _CTYPE_GUARD_PAGE
+
+#  include <machine/vmparam.h>
+
+/*
+ * _CTYPE_GUARD_SIZE must be a macro so it will work through ___STRING
+ * to produce a string for symbol arithmetic in __asm.
+ */
+#  ifdef MAX_PAGE_SIZE
+#    define	_CTYPE_GUARD_SIZE	MAX_PAGE_SIZE
+#  else
+#    define	_CTYPE_GUARD_SIZE	PAGE_SIZE
+#  endif
+
+enum {
+	_C_CTYPE_TAB_GUARD = _CTYPE_GUARD_SIZE/sizeof(_C_ctype_tab_[0]),
+#  ifdef __BUILD_LEGACY
+	_C_COMPAT_BSDCTYPE_GUARD =
+	    _CTYPE_GUARD_SIZE/sizeof(_C_compat_bsdctype[0]),
+#  endif
+	_C_TOLOWER_TAB_GUARD = _CTYPE_GUARD_SIZE/sizeof(_C_tolower_tab_[0]),
+	_C_TOUPPER_TAB_GUARD = _CTYPE_GUARD_SIZE/sizeof(_C_toupper_tab_[0]),
+};
+
+#  define	__ctype_table	__aligned(_CTYPE_GUARD_SIZE)
+#  define	__ctype_table_guarded(name, guard)			      \
+	__ctype_table_object(name);					      \
+	__asm(".global " _C_LABEL_STRING(#name));			      \
+	__asm(_C_LABEL_STRING(#name) " = " _C_LABEL_STRING(#guard) " + "      \
+	    ___STRING(_CTYPE_GUARD_SIZE))
+
+#else  /* !_CTYPE_GUARD_PAGE */
+
+#  define	_CTYPE_GUARD_SIZE	0
+
+enum {
+	_C_CTYPE_TAB_GUARD = 0,
+#  ifdef __BUILD_LEGACY
+	_C_COMPAT_BSDCTYPE_GUARD = 0,
+#  endif
+	_C_TOLOWER_TAB_GUARD = 0,
+	_C_TOUPPER_TAB_GUARD = 0,
+};
+
+/* Compiler can't see into __strong_alias, so mark it __used. */
+#  define	__ctype_table	__used
+#  define	__ctype_table_guarded(name, guard)			      \
+	__ctype_table_object(name);					      \
+	__strong_alias(name, guard)
+
+#endif	/* _CTYPE_GUARD_PAGE */
+
+#endif	/* _LIBC_CTYPE_GUARD_H_ */

Reply via email to