bug#47412: env: fragile argument parsing
On 3/26/21 3:21 PM, Paul Eggert wrote: The -S code could use some more fixes in this area too - it can probably still dump core on platforms like the Hurd that don't limit exec arg size - but one thing at a time. I fixed the (unlikely) bugs I found in this area by installing the attached. >From e3766c5db176ca7abbb8212d5b0b7862fb98a5be Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Mon, 29 Mar 2021 21:42:44 -0700 Subject: [PATCH] env: simplify --split-string memory management * bootstrap.conf (gnulib_modules): Add idx. * src/env.c: Include idx.h, minmax.h. Prefer idx_t to ptrdiff_t when values are nonnegative. (valid_escape_sequence, escape_char, validate_split_str) (CHECK_START_NEW_ARG): Remove; no longer needed now that we validate as we go. (struct splitbuf): New type. (splitbuf_grow, splitbuf_append_byte, check_start_new_arg) (splitbuf_finishup): New functions. (build_argv): New arg ARGC. Validate and process in one go, using the new functions; this is simpler and more reliable than the old approach (as witness the recent bug). Avoid integer overflow in the unlikely case where the string contains more than INT_MAX arguments. (parse_split_string): Simplify by exploiting the new build_argv. --- bootstrap.conf | 1 + src/env.c | 385 ++--- 2 files changed, 173 insertions(+), 213 deletions(-) diff --git a/bootstrap.conf b/bootstrap.conf index ab6b3ef0c..f55da99db 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -134,6 +134,7 @@ gnulib_modules=" host-os human idcache + idx ignore-value inttostr inttypes diff --git a/src/env.c b/src/env.c index 11db374d9..e2ab39fd5 100644 --- a/src/env.c +++ b/src/env.c @@ -26,6 +26,8 @@ #include "system.h" #include "die.h" #include "error.h" +#include "idx.h" +#include "minmax.h" #include "operand2sig.h" #include "quote.h" #include "sig2str.h" @@ -41,14 +43,14 @@ /* Array of envvars to unset. */ static const char **usvars; static size_t usvars_alloc; -static ptrdiff_t usvars_used; +static idx_t usvars_used; /* Annotate the output with extra info to aid the user. */ static bool dev_debug; /* Buffer and length of extracted envvars in -S strings. */ static char *varname; -static ptrdiff_t vnlen; +static idx_t vnlen; /* Possible actions on each signal. */ enum SIGNAL_MODE { @@ -175,7 +177,7 @@ append_unset_var (const char *var) static void unset_envvars (void) { - for (ptrdiff_t i = 0; i < usvars_used; ++i) + for (idx_t i = 0; i < usvars_used; ++i) { devmsg ("unset:%s\n", usvars[i]); @@ -190,29 +192,6 @@ unset_envvars (void) IF_LINT (usvars_alloc = 0); } -static bool _GL_ATTRIBUTE_PURE -valid_escape_sequence (const char c) -{ - return (c == 'c' || c == 'f' || c == 'n' || c == 'r' || c == 't' || c == 'v' \ - || c == '#' || c == '$' || c == '_' || c == '"' || c == '\'' \ - || c == '\\'); -} - -static char _GL_ATTRIBUTE_PURE -escape_char (const char c) -{ - switch (c) -{ -/* \a, \b not supported by FreeBSD's env. */ -case 'f': return '\f'; -case 'n': return '\n'; -case 'r': return '\r'; -case 't': return '\t'; -case 'v': return '\v'; -default: assume (false); -} -} - /* Return a pointer to the end of a valid ${VARNAME} string, or NULL. 'str' should point to the '$' character. First letter in VARNAME must be alpha or underscore, @@ -241,7 +220,7 @@ scan_varname (const char *str) static char * extract_varname (const char *str) { - ptrdiff_t i; + idx_t i; const char *p; p = scan_varname (str); @@ -263,150 +242,127 @@ extract_varname (const char *str) return varname; } -/* Validate the "-S" parameter, according to the syntax defined by FreeBSD's - env(1). Terminate with an error message if not valid. +/* Temporary buffer used by --split-string processing. */ +struct splitbuf +{ + /* Buffer address, arg count, and half the number of elements in the buffer. + ARGC and ARGV are as in 'main', and ARGC + 1 <= HALF_ALLOC so + that the upper half of ARGV can be used for string contents. + This may waste up to half the space but keeps the code simple, + which is better for this rarely-used but security-sensitive code. + + ARGV[0] is not initialized; that is the caller's responsibility + after finalization. + + During assembly, ARGV[I] (where 0 < I < ARGC) contains the offset + of the Ith string (relative to ARGV + HALF_ALLOC), so that + reallocating ARGV does not change the validity of its contents. + The integer offset is cast to char * during assembly, and is + converted to a true char * pointer on finalization. + + During assembly, ARGV[ARGC] contains the offset of the first + unused string byte (relative to ARGV + HALF_ALLOC). */ + char **argv; + int argc; + idx_t half_alloc; + + /* The number of extra argv slots to keep room for. */ + int extra_argc; + + /* Whether processing should act as if the most
bug#47412: env: fragile argument parsing
On 26/03/2021 20:52, Paul Eggert wrote: Thanks for the bug report. I installed the attached to fix it and am closing the report. The fix looks good, thanks. I added in a test case also at: https://git.sv.gnu.org/gitweb/?p=coreutils.git;a=commitdiff;h=9f1bda18f cheers, Pádraig
bug#47412: env: fragile argument parsing
I also installed the attached two followup patches to document this and issue a better warning in rare cases. The -S code could use some more fixes in this area too - it can probably still dump core on platforms like the Hurd that don't limit exec arg size - but one thing at a time. >From 6c4efdc0f51c8e253f16da2ec60cdf647bec3c06 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 26 Mar 2021 14:00:37 -0700 Subject: [PATCH] doc: document env fix * NEWS, doc/coreutils.texi (env invocation): Document recent change. --- NEWS | 3 +++ doc/coreutils.texi | 2 ++ 2 files changed, 5 insertions(+) diff --git a/NEWS b/NEWS index 97cb4bd64..802f4b427 100644 --- a/NEWS +++ b/NEWS @@ -17,6 +17,9 @@ GNU coreutils NEWS-*- outline -*- heavily changed during the run. [bug introduced in coreutils-8.25] + env -S no longer crashes when given unusual whitespace characters + [bug introduced in coreutils-8.30] + expr no longer mishandles unmatched \(...\) in regular expressions. [bug introduced in coreutils-6.0] diff --git a/doc/coreutils.texi b/doc/coreutils.texi index ac0b4467d..06ecdd74c 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -17592,6 +17592,8 @@ hello Running @command{env -Sstring} splits the @var{string} into arguments based on unquoted spaces or tab characters. +(Newlines, carriage returns, vertical tabs and form feeds are treated +like spaces and tabs.) In the following contrived example the @command{awk} variable @samp{OFS} will be @code{xyz} as these spaces are inside -- 2.30.2 >From 5f99c7533df49f25819d7bb850be5c6cb49aa13d Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 26 Mar 2021 14:51:55 -0700 Subject: [PATCH] env: improve whitespace warning * src/env.c (main): Issue -S warning for any whitespace, not just space. --- src/env.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/env.c b/src/env.c index d07918fee..341777cb8 100644 --- a/src/env.c +++ b/src/env.c @@ -942,7 +942,7 @@ main (int argc, char **argv) int exit_status = errno == ENOENT ? EXIT_ENOENT : EXIT_CANNOT_INVOKE; error (0, errno, "%s", quote (argv[optind])); - if (exit_status == EXIT_ENOENT && strchr (argv[optind], ' ')) + if (exit_status == EXIT_ENOENT && strpbrk (argv[optind], C_ISSPACE_CHARS)) error (0, 0, _("use -[v]S to pass options in shebang lines")); return exit_status; -- 2.30.2
bug#47412: env: fragile argument parsing
Thanks for the bug report. I installed the attached to fix it and am closing the report. >From 6dd466eda6fa3f1f7d2a9474ec926ccd2ede98e9 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 26 Mar 2021 13:49:49 -0700 Subject: [PATCH] env: fix address violation with '\v' in -S Problem reported by Frank Busse (Bug#47412). * src/env.c (C_ISSPACE_CHARS): New macro. (shortopts, build_argv, main): Treate all C-locale space characters like space and tab, for compatibility with FreeBSD. (validate_split_str, build_argv, parse_split_string): Use the C locale, not the current locale, to determine whether a byte is a space character. --- src/env.c | 23 --- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/env.c b/src/env.c index ba9da1113..e13a312cd 100644 --- a/src/env.c +++ b/src/env.c @@ -73,7 +73,10 @@ static bool sig_mask_changed; /* Whether to list non default handling. */ static bool report_signal_handling; -static char const shortopts[] = "+C:iS:u:v0 \t"; +/* isspace characters in the C locale. */ +#define C_ISSPACE_CHARS " \t\n\v\f\r" + +static char const shortopts[] = "+C:iS:u:v0" C_ISSPACE_CHARS; /* For long options that have no equivalent short option, use a non-character as a pseudo short option, starting with CHAR_MAX + 1. */ @@ -277,7 +280,7 @@ validate_split_str (const char* str, size_t* /*out*/ bufsize, size_t buflen; int cnt = 1; - assert (str && str[0] && !isspace (str[0])); /* LCOV_EXCL_LINE */ + assert (str && str[0] && !c_isspace (str[0])); /* LCOV_EXCL_LINE */ dq = sq = sp = false; buflen = strlen (str)+1; @@ -286,7 +289,7 @@ validate_split_str (const char* str, size_t* /*out*/ bufsize, { const char next = *(str+1); - if (isspace (*str) && !dq && !sq) + if (c_isspace (*str) && !dq && !sq) { sp = true; } @@ -392,7 +395,7 @@ build_argv (const char* str, int extra_argc) } \ } while (0) - assert (str && str[0] && !isspace (str[0])); /* LCOV_EXCL_LINE */ + assert (str && str[0] && !c_isspace (str[0])); /* LCOV_EXCL_LINE */ validate_split_str (str, &buflen, &newargc); @@ -433,13 +436,12 @@ build_argv (const char* str, int extra_argc) ++str; continue; -case ' ': -case '\t': - /* space/tab outside quotes starts a new argument. */ +case ' ': case '\t': case '\n': case '\v': case '\f': case '\r': + /* Start a new argument if outside quotes. */ if (sq || dq) break; sep = true; - str += strspn (str, " \t"); /* skip whitespace. */ + str += strspn (str, C_ISSPACE_CHARS); continue; case '#': @@ -540,7 +542,7 @@ parse_split_string (const char* str, int /*out*/ *orig_optind, char **newargv, **nextargv; - while (isspace (*str)) + while (c_isspace (*str)) str++; if (*str == '\0') return; @@ -848,8 +850,7 @@ main (int argc, char **argv) case 'S': parse_split_string (optarg, &optind, &argc, &argv); break; -case ' ': -case '\t': +case ' ': case '\t': case '\n': case '\v': case '\f': case '\r': /* These are undocumented options. Attempt to detect incorrect shebang usage with extraneous space, e.g.: #!/usr/bin/env -i command -- 2.30.2
bug#47412: env: fragile argument parsing
On 3/26/21 1:12 PM, Pádraig Brady wrote: I'll fix it up. I've got a fix. My goodness, that part of the code is messy.
bug#47412: env: fragile argument parsing
On 26/03/2021 15:00, Frank Busse wrote: Hi, env crashes for some nonsensical command line arguments (reported by KLEE), e.g.: --- python3 -c "import os; os.execl('./src/env', 'env', b'--s=\"\"\t\x0b')" = ==140651==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x60300028 at pc 0x562e1cc1078a bp 0x7ffd59964dd0 sp 0x7ffd59964dc0 WRITE of size 8 at 0x60300028 thread T0 #0 0x562e1cc10789 in build_argv src/env.c:511 #1 0x562e1cc10982 in parse_split_string src/env.c:548 #2 0x562e1cc127bc in main src/env.c:849 #3 0x7f1c167e3b24 in __libc_start_main (/usr/lib/libc.so.6+0x27b24) #4 0x562e1cc0e54d in _start (coreutils-8.32/src/env+0x654d) 0x60300028 is located 0 bytes to the right of 24-byte region [0x60300010,0x60300028) allocated by thread T0 here: #0 0x7f1c16a3b459 in __interceptor_malloc /build/gcc/src/gcc/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x562e1cc19463 in xmalloc lib/xmalloc.c:41 #2 0x562e1cc0ff54 in build_argv src/env.c:404 #3 0x562e1cc10982 in parse_split_string src/env.c:548 #4 0x562e1cc127bc in main src/env.c:849 #5 0x7f1c167e3b24 in __libc_start_main (/usr/lib/libc.so.6+0x27b24) Confirmed on an ASAN build of the latest source. I'll fix it up. thanks! Pádraig
bug#47412: env: fragile argument parsing
Hi, env crashes for some nonsensical command line arguments (reported by KLEE), e.g.: --- > python3 -c "import os; os.execl('./src/env', 'env', b'--s=\"\"\t\x0b')" = ==140651==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x60300028 at pc 0x562e1cc1078a bp 0x7ffd59964dd0 sp 0x7ffd59964dc0 WRITE of size 8 at 0x60300028 thread T0 #0 0x562e1cc10789 in build_argv src/env.c:511 #1 0x562e1cc10982 in parse_split_string src/env.c:548 #2 0x562e1cc127bc in main src/env.c:849 #3 0x7f1c167e3b24 in __libc_start_main (/usr/lib/libc.so.6+0x27b24) #4 0x562e1cc0e54d in _start (coreutils-8.32/src/env+0x654d) 0x60300028 is located 0 bytes to the right of 24-byte region [0x60300010,0x60300028) allocated by thread T0 here: #0 0x7f1c16a3b459 in __interceptor_malloc /build/gcc/src/gcc/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x562e1cc19463 in xmalloc lib/xmalloc.c:41 #2 0x562e1cc0ff54 in build_argv src/env.c:404 #3 0x562e1cc10982 in parse_split_string src/env.c:548 #4 0x562e1cc127bc in main src/env.c:849 #5 0x7f1c167e3b24 in __libc_start_main (/usr/lib/libc.so.6+0x27b24) SUMMARY: AddressSanitizer: heap-buffer-overflow src/env.c:511 in build_argv --- or --- > python3 -c "import os; os.execl('./src/env', 'env', b'--s=\xff > \r\x0b\t\x0b-')" = ==140886==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x60300030 at pc 0x55821372878a bp 0x7ffdd6e4bc40 sp 0x7ffdd6e4bc30 WRITE of size 8 at 0x60300030 thread T0 #0 0x558213728789 in build_argv src/env.c:511 #1 0x558213728982 in parse_split_string src/env.c:548 #2 0x55821372a7bc in main src/env.c:849 #3 0x7f5b05ec5b24 in __libc_start_main (/usr/lib/libc.so.6+0x27b24) #4 0x55821372654d in _start (coreutils-8.32/src/env+0x654d) 0x60300030 is located 0 bytes to the right of 32-byte region [0x60300010,0x60300030) allocated by thread T0 here: #0 0x7f5b0611d459 in __interceptor_malloc/build/gcc/src/gcc/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x558213731463 in xmalloc lib/xmalloc.c:41 #2 0x558213727f54 in build_argv src/env.c:404 #3 0x558213728982 in parse_split_string src/env.c:548 #4 0x55821372a7bc in main src/env.c:849 #5 0x7f5b05ec5b24 in __libc_start_main (/usr/lib/libc.so.6+0x27b24) SUMMARY: AddressSanitizer: heap-buffer-overflow src/env.c:511 in build_argv --- Version: 8.32 Configure: CFLAGS="-ggdb -O0 -fsanitize=address" ./configure --without-selinux --without-gmp --disable-acl --disable-largefile --disable-libsmack --disable-xattr --disable-libcap --disable-nls Kind regards, Frank