Module Name: src Committed By: riastradh Date: Sat Mar 29 19:40:42 UTC 2025
Modified Files: src/lib/libc/gen: ctype_.c tolower_.c toupper_.c src/tests/lib/libc/gen: t_ctype.c Added Files: src/lib/libc/gen: ctype_guard.h Log Message: ctype(3): Put guard pages before the C ctype/tolower/toupper tables. This also only affects machines where char is signed for now. (But maybe it would be worth doing unconditionally; users could still try to pass in explicit `signed char' inputs.) PR lib/58208: ctype(3) provides poor runtime feedback of abuse To generate a diff of this commit: cvs rdiff -u -r1.20 -r1.21 src/lib/libc/gen/ctype_.c cvs rdiff -u -r0 -r1.1 src/lib/libc/gen/ctype_guard.h cvs rdiff -u -r1.14 -r1.15 src/lib/libc/gen/tolower_.c \ src/lib/libc/gen/toupper_.c cvs rdiff -u -r1.9 -r1.10 src/tests/lib/libc/gen/t_ctype.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/lib/libc/gen/ctype_.c diff -u src/lib/libc/gen/ctype_.c:1.20 src/lib/libc/gen/ctype_.c:1.21 --- src/lib/libc/gen/ctype_.c:1.20 Sat Apr 13 10:21:20 2013 +++ src/lib/libc/gen/ctype_.c Sat Mar 29 19:40:42 2025 @@ -1,4 +1,4 @@ -/* $NetBSD: ctype_.c,v 1.20 2013/04/13 10:21:20 joerg Exp $ */ +/* $NetBSD: ctype_.c,v 1.21 2025/03/29 19:40:42 riastradh Exp $ */ /* * Copyright (c) 1989 The Regents of the University of California. @@ -39,12 +39,16 @@ #if 0 /*static char *sccsid = "from: @(#)ctype_.c 5.6 (Berkeley) 6/1/90";*/ #else -__RCSID("$NetBSD: ctype_.c,v 1.20 2013/04/13 10:21:20 joerg Exp $"); +__RCSID("$NetBSD: ctype_.c,v 1.21 2025/03/29 19:40:42 riastradh Exp $"); #endif #endif /* LIBC_SCCS and not lint */ #include <sys/ctype_bits.h> +#include <sys/mman.h> + #include <stdio.h> + +#include "ctype_guard.h" #include "ctype_local.h" #if EOF != -1 @@ -61,8 +65,12 @@ __RCSID("$NetBSD: ctype_.c,v 1.20 2013/0 #define _B _COMPAT_B #define _N _COMPAT_N -const unsigned char _C_compat_bsdctype[1 + _CTYPE_NUM_CHARS] = { - 0, +__ctype_table_guarded(_C_compat_bsdctype, _C_compat_bsdctype_guarded); +__ctype_table +static +const unsigned char _C_compat_bsdctype_guarded[_C_COMPAT_BSDCTYPE_GUARD + + 1 + _CTYPE_NUM_CHARS] = { + [_C_COMPAT_BSDCTYPE_GUARD] = 0, _C, _C, _C, _C, _C, _C, _C, _C, _C, _C|_S, _C|_S, _C|_S, _C|_S, _C|_S, _C, _C, _C, _C, _C, _C, _C, _C, _C, _C, @@ -109,8 +117,11 @@ const unsigned char *_ctype_ = &_C_compa #define _U _CTYPE_U #define _X _CTYPE_X -const unsigned short _C_ctype_tab_[1 + _CTYPE_NUM_CHARS] = { - 0, +__ctype_table_guarded(_C_ctype_tab_, _C_ctype_tab_guarded_); +__ctype_table +static const unsigned short _C_ctype_tab_guarded_[_C_CTYPE_TAB_GUARD + + 1 + _CTYPE_NUM_CHARS] = { + [_C_CTYPE_TAB_GUARD] = 0, _C, _C, _C, _C, _C, _C, _C, _C, _C, _BL|_C|_S, _C|_S, _C|_S, @@ -158,3 +169,18 @@ const unsigned short _C_ctype_tab_[1 + _ #undef _X const unsigned short *_ctype_tab_ = &_C_ctype_tab_[0]; + +#if _CTYPE_GUARD_PAGE +__attribute__((constructor)) +static void +_C_ctype_tab_guard_init(void) +{ + +#ifdef __BUILD_LEGACY + (void)mprotect(__UNCONST(_C_compat_bsdctype_guarded), + _CTYPE_GUARD_SIZE, PROT_NONE); +#endif /* __BUILD_LEGACY */ + (void)mprotect(__UNCONST(_C_ctype_tab_guarded_), + _CTYPE_GUARD_SIZE, PROT_NONE); +} +#endif /* _CTYPE_GUARD_PAGE */ Index: src/lib/libc/gen/tolower_.c diff -u src/lib/libc/gen/tolower_.c:1.14 src/lib/libc/gen/tolower_.c:1.15 --- src/lib/libc/gen/tolower_.c:1.14 Sat Apr 13 10:16:27 2013 +++ src/lib/libc/gen/tolower_.c Sat Mar 29 19:40:42 2025 @@ -1,4 +1,4 @@ -/* $NetBSD: tolower_.c,v 1.14 2013/04/13 10:16:27 joerg Exp $ */ +/* $NetBSD: tolower_.c,v 1.15 2025/03/29 19:40:42 riastradh Exp $ */ /* * Written by J.T. Conklin <j...@netbsd.org>. @@ -7,19 +7,26 @@ #include <sys/cdefs.h> #if defined(LIBC_RCS) && !defined(lint) -__RCSID("$NetBSD: tolower_.c,v 1.14 2013/04/13 10:16:27 joerg Exp $"); +__RCSID("$NetBSD: tolower_.c,v 1.15 2025/03/29 19:40:42 riastradh Exp $"); #endif /* LIBC_RCS and not lint */ #include <sys/ctype_bits.h> +#include <sys/mman.h> + #include <stdio.h> + +#include "ctype_guard.h" #include "ctype_local.h" #if EOF != -1 #error "EOF != -1" #endif -const short _C_tolower_tab_[1 + _CTYPE_NUM_CHARS] = { - EOF, +__ctype_table_guarded(_C_tolower_tab_, _C_tolower_tab_guarded_); +__ctype_table +static const short _C_tolower_tab_guarded_[_C_TOLOWER_TAB_GUARD + + 1 + _CTYPE_NUM_CHARS] = { + [_C_TOLOWER_TAB_GUARD] = EOF, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, @@ -61,3 +68,14 @@ __weak_alias(_C_tolower_, _C_tolower_tab #endif const short *_tolower_tab_ = &_C_tolower_tab_[0]; + +#if _CTYPE_GUARD_PAGE +__attribute__((constructor)) +static void +_C_tolower_tab_guard_init(void) +{ + + (void)mprotect(__UNCONST(_C_tolower_tab_guarded_), _CTYPE_GUARD_SIZE, + PROT_NONE); +} +#endif /* _CTYPE_GUARD_PAGE */ Index: src/lib/libc/gen/toupper_.c diff -u src/lib/libc/gen/toupper_.c:1.14 src/lib/libc/gen/toupper_.c:1.15 --- src/lib/libc/gen/toupper_.c:1.14 Sat Apr 13 10:16:27 2013 +++ src/lib/libc/gen/toupper_.c Sat Mar 29 19:40:42 2025 @@ -1,4 +1,4 @@ -/* $NetBSD: toupper_.c,v 1.14 2013/04/13 10:16:27 joerg Exp $ */ +/* $NetBSD: toupper_.c,v 1.15 2025/03/29 19:40:42 riastradh Exp $ */ /* * Written by J.T. Conklin <j...@netbsd.org>. @@ -7,19 +7,26 @@ #include <sys/cdefs.h> #if defined(LIBC_RCS) && !defined(lint) -__RCSID("$NetBSD: toupper_.c,v 1.14 2013/04/13 10:16:27 joerg Exp $"); +__RCSID("$NetBSD: toupper_.c,v 1.15 2025/03/29 19:40:42 riastradh Exp $"); #endif /* LIBC_RCS and not lint */ #include <sys/ctype_bits.h> +#include <sys/mman.h> + #include <stdio.h> + +#include "ctype_guard.h" #include "ctype_local.h" #if EOF != -1 #error "EOF != -1" #endif -const short _C_toupper_tab_[1 + _CTYPE_NUM_CHARS] = { - EOF, +__ctype_table_guarded(_C_toupper_tab_, _C_toupper_tab_guarded_); +__ctype_table +static const short _C_toupper_tab_guarded_[_C_TOUPPER_TAB_GUARD + + 1 + _CTYPE_NUM_CHARS] = { + [_C_TOUPPER_TAB_GUARD] = EOF, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, @@ -61,3 +68,14 @@ __weak_alias(_C_toupper_, _C_toupper_tab #endif const short *_toupper_tab_ = &_C_toupper_tab_[0]; + +#if _CTYPE_GUARD_PAGE +__attribute__((constructor)) +static void +_C_toupper_tab_guard_init(void) +{ + + (void)mprotect(__UNCONST(_C_toupper_tab_guarded_), _CTYPE_GUARD_SIZE, + PROT_NONE); +} +#endif /* _CTYPE_GUARD_PAGE */ Index: src/tests/lib/libc/gen/t_ctype.c diff -u src/tests/lib/libc/gen/t_ctype.c:1.9 src/tests/lib/libc/gen/t_ctype.c:1.10 --- src/tests/lib/libc/gen/t_ctype.c:1.9 Sat Mar 29 01:06:36 2025 +++ src/tests/lib/libc/gen/t_ctype.c Sat Mar 29 19:40:42 2025 @@ -1,4 +1,4 @@ -/* $NetBSD: t_ctype.c,v 1.9 2025/03/29 01:06:36 riastradh Exp $ */ +/* $NetBSD: t_ctype.c,v 1.10 2025/03/29 19:40:42 riastradh Exp $ */ /*- * Copyright (c) 2025 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__RCSID("$NetBSD: t_ctype.c,v 1.9 2025/03/29 01:06:36 riastradh Exp $"); +__RCSID("$NetBSD: t_ctype.c,v 1.10 2025/03/29 19:40:42 riastradh Exp $"); #include <atf-c.h> #include <ctype.h> @@ -112,13 +112,7 @@ test_abuse_in_locales(const char *name, ATF_REQUIRE_MSG(setlocale(LC_CTYPE, locales[i]) != NULL, "locales[i]=%s", locales[i]); snprintf(buf, sizeof(buf), "[%s]%s", locales[i], name); - if (macro && strcmp(locales[i], "C") == 0) { - atf_tc_expect_fail("PR lib/58208: ctype(3)" - " provides poor runtime feedback of abuse"); - } test_abuse(buf, ctypefn); - if (strcmp(locales[i], "C") == 0) - atf_tc_expect_pass(); } } @@ -789,8 +783,6 @@ ATF_TC_BODY(abuse_##FN##_macro_c, tc) atf_tc_skip("runtime ctype(3) abuse is impossible with" \ " unsigned char"); \ } \ - atf_tc_expect_fail("PR lib/58208:" \ - " ctype(3) provides poor runtime feedback of abuse"); \ test_abuse(#FN, &FN##_wrapper); \ } \ ATF_TC(abuse_##FN##_function_c); \ Added files: Index: src/lib/libc/gen/ctype_guard.h diff -u /dev/null src/lib/libc/gen/ctype_guard.h:1.1 --- /dev/null Sat Mar 29 19:40:42 2025 +++ src/lib/libc/gen/ctype_guard.h Sat Mar 29 19:40:42 2025 @@ -0,0 +1,163 @@ +/* $NetBSD: ctype_guard.h,v 1.1 2025/03/29 19:40:42 riastradh Exp $ */ + +/*- + * Copyright (c) 2025 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _LIBC_CTYPE_GUARD_H_ +#define _LIBC_CTYPE_GUARD_H_ + +#include <sys/cdefs.h> + +#include "ctype_local.h" + +/* + * On platforms where char is signed, the ctype(3) functions have an + * insidious trap where logic like + * + * char *s = ...; + * + * if (isspace(*s)) ... + * + * is undefined behaviour whenever the string s has any bytes outside + * the 7-bit US-ASCII range. The correct way to do this is to cast any + * char inputs to unsigned char first: + * + * if (isspace((unsigned char)*s)) ... + * + * Unfortunately, the buggy idiom without an unsigned char cast is + * extremely prevalent in C code in the wild. (See the ctype(3) man + * page for details on why this idiom is required and why isspace + * itself can't just be `fixed'.) + * + * The ctype(3) functions are implemented as macros that expand to + * simple table lookups -- partly for performance, and partly to + * deliberately trigger -Wchar-subscript compiler warnings for + * suspicious code like the above. + * + * To noisily detect the undefined behaviour without incurring overhead + * for the defined cases of the ctype(3) functions, we put a guard page + * before the tables in memory so that (at least for inputs other than + * 0xff, which coincides as signed char with EOF = -1) attempts to use + * the ctype(3) functions on invalid inputs will reliably -- and + * safely! -- crash with SIGSEGV instead of sometimes returning + * nondeterministic nonsense results. + * + * We do this by defining two symbols, one local and one global: + * + * - The local (C `static') symbol _C_ctype_tab_guarded_ is + * page-aligned and has an extra page-sized buffer at the + * beginning for a guard page. It is defined as an ordinary C + * array to keep the source code legible. + * + * - The global symbol _C_ctype_tab_ starts one page past the + * start of _C_ctype_tab_guarded_. It is defined, via some + * macros for convenience, by the assembler directives: + * + * .type _C_ctype_tab_,@object + * .global _C_ctype_tab_ + * _C_ctype_tab_ = _C_ctype_tab_guarded_ + PAGE_SIZE + * + * (with PAGE_SIZE being replaced by the numeric value from + * vmparam.h -- actually, we use MAX_PAGE_SIZE for the handful + * of architectures that support different page sizes on + * different machines). + * + * Then, at startup, we mprotect the guard page PROT_NONE. + * + * The global symbol _C_ctype_tab_ is an immutable part of the libc + * ABI, and is used by, e.g., libstdc++.so, so we have to define it + * compatibly -- this is why it must be defined as an ELF global symbol + * in its own right, and not simply handled inside libc.so by + * additional arithmetic relative to _C_ctype_tab_guarded_. + */ + +#ifndef __CHAR_UNSIGNED__ +# define _CTYPE_GUARD_PAGE 1 +#else +# define _CTYPE_GUARD_PAGE 0 +#endif + +#ifdef __arm__ +# define __ctype_table_object(name) \ + __asm(".type " #name ",%object") +#else +# define __ctype_table_object(name) \ + __asm(".type " #name ",@object") +#endif + +#if _CTYPE_GUARD_PAGE + +# include <machine/vmparam.h> + +/* + * _CTYPE_GUARD_SIZE must be a macro so it will work through ___STRING + * to produce a string for symbol arithmetic in __asm. + */ +# ifdef MAX_PAGE_SIZE +# define _CTYPE_GUARD_SIZE MAX_PAGE_SIZE +# else +# define _CTYPE_GUARD_SIZE PAGE_SIZE +# endif + +enum { + _C_CTYPE_TAB_GUARD = _CTYPE_GUARD_SIZE/sizeof(_C_ctype_tab_[0]), +# ifdef __BUILD_LEGACY + _C_COMPAT_BSDCTYPE_GUARD = + _CTYPE_GUARD_SIZE/sizeof(_C_compat_bsdctype[0]), +# endif + _C_TOLOWER_TAB_GUARD = _CTYPE_GUARD_SIZE/sizeof(_C_tolower_tab_[0]), + _C_TOUPPER_TAB_GUARD = _CTYPE_GUARD_SIZE/sizeof(_C_toupper_tab_[0]), +}; + +# define __ctype_table __aligned(_CTYPE_GUARD_SIZE) +# define __ctype_table_guarded(name, guard) \ + __ctype_table_object(name); \ + __asm(".global " _C_LABEL_STRING(#name)); \ + __asm(_C_LABEL_STRING(#name) " = " _C_LABEL_STRING(#guard) " + " \ + ___STRING(_CTYPE_GUARD_SIZE)) + +#else /* !_CTYPE_GUARD_PAGE */ + +# define _CTYPE_GUARD_SIZE 0 + +enum { + _C_CTYPE_TAB_GUARD = 0, +# ifdef __BUILD_LEGACY + _C_COMPAT_BSDCTYPE_GUARD = 0, +# endif + _C_TOLOWER_TAB_GUARD = 0, + _C_TOUPPER_TAB_GUARD = 0, +}; + +/* Compiler can't see into __strong_alias, so mark it __used. */ +# define __ctype_table __used +# define __ctype_table_guarded(name, guard) \ + __ctype_table_object(name); \ + __strong_alias(name, guard) + +#endif /* _CTYPE_GUARD_PAGE */ + +#endif /* _LIBC_CTYPE_GUARD_H_ */