bug#47412: env: fragile argument parsing

2021-03-29 Thread Paul Eggert

On 3/26/21 3:21 PM, Paul Eggert wrote:
The -S code could use some more fixes in this area too - it can 
probably still dump core on platforms like the Hurd that don't limit 
exec arg size - but one thing at a time. 


I fixed the (unlikely) bugs I found in this area by installing the attached.

>From e3766c5db176ca7abbb8212d5b0b7862fb98a5be Mon Sep 17 00:00:00 2001
From: Paul Eggert 
Date: Mon, 29 Mar 2021 21:42:44 -0700
Subject: [PATCH] env: simplify --split-string memory management

* bootstrap.conf (gnulib_modules): Add idx.
* src/env.c: Include idx.h, minmax.h.
Prefer idx_t to ptrdiff_t when values are nonnegative.
(valid_escape_sequence, escape_char, validate_split_str)
(CHECK_START_NEW_ARG):
Remove; no longer needed now that we validate as we go.
(struct splitbuf): New type.
(splitbuf_grow, splitbuf_append_byte, check_start_new_arg)
(splitbuf_finishup): New functions.
(build_argv): New arg ARGC.  Validate and process in one go, using
the new functions; this is simpler and more reliable than the old
approach (as witness the recent bug).  Avoid integer overflow in
the unlikely case where the string contains more than INT_MAX
arguments.
(parse_split_string): Simplify by exploiting the new build_argv.
---
 bootstrap.conf |   1 +
 src/env.c  | 385 ++---
 2 files changed, 173 insertions(+), 213 deletions(-)

diff --git a/bootstrap.conf b/bootstrap.conf
index ab6b3ef0c..f55da99db 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -134,6 +134,7 @@ gnulib_modules="
   host-os
   human
   idcache
+  idx
   ignore-value
   inttostr
   inttypes
diff --git a/src/env.c b/src/env.c
index 11db374d9..e2ab39fd5 100644
--- a/src/env.c
+++ b/src/env.c
@@ -26,6 +26,8 @@
 #include "system.h"
 #include "die.h"
 #include "error.h"
+#include "idx.h"
+#include "minmax.h"
 #include "operand2sig.h"
 #include "quote.h"
 #include "sig2str.h"
@@ -41,14 +43,14 @@
 /* Array of envvars to unset.  */
 static const char **usvars;
 static size_t usvars_alloc;
-static ptrdiff_t usvars_used;
+static idx_t usvars_used;
 
 /* Annotate the output with extra info to aid the user.  */
 static bool dev_debug;
 
 /* Buffer and length of extracted envvars in -S strings.  */
 static char *varname;
-static ptrdiff_t vnlen;
+static idx_t vnlen;
 
 /* Possible actions on each signal.  */
 enum SIGNAL_MODE {
@@ -175,7 +177,7 @@ append_unset_var (const char *var)
 static void
 unset_envvars (void)
 {
-  for (ptrdiff_t i = 0; i < usvars_used; ++i)
+  for (idx_t i = 0; i < usvars_used; ++i)
 {
   devmsg ("unset:%s\n", usvars[i]);
 
@@ -190,29 +192,6 @@ unset_envvars (void)
   IF_LINT (usvars_alloc = 0);
 }
 
-static bool _GL_ATTRIBUTE_PURE
-valid_escape_sequence (const char c)
-{
-  return (c == 'c' || c == 'f' || c == 'n' || c == 'r' || c == 't' || c == 'v' \
-  || c == '#' || c == '$' || c == '_' || c == '"' || c == '\'' \
-  || c == '\\');
-}
-
-static char _GL_ATTRIBUTE_PURE
-escape_char (const char c)
-{
-  switch (c)
-{
-/* \a, \b not supported by FreeBSD's env.  */
-case 'f': return '\f';
-case 'n': return '\n';
-case 'r': return '\r';
-case 't': return '\t';
-case 'v': return '\v';
-default: assume (false);
-}
-}
-
 /* Return a pointer to the end of a valid ${VARNAME} string, or NULL.
'str' should point to the '$' character.
First letter in VARNAME must be alpha or underscore,
@@ -241,7 +220,7 @@ scan_varname (const char *str)
 static char *
 extract_varname (const char *str)
 {
-  ptrdiff_t i;
+  idx_t i;
   const char *p;
 
   p = scan_varname (str);
@@ -263,150 +242,127 @@ extract_varname (const char *str)
   return varname;
 }
 
-/* Validate the "-S" parameter, according to the syntax defined by FreeBSD's
-   env(1).  Terminate with an error message if not valid.
+/* Temporary buffer used by --split-string processing.  */
+struct splitbuf
+{
+  /* Buffer address, arg count, and half the number of elements in the buffer.
+ ARGC and ARGV are as in 'main', and ARGC + 1 <= HALF_ALLOC so
+ that the upper half of ARGV can be used for string contents.
+ This may waste up to half the space but keeps the code simple,
+ which is better for this rarely-used but security-sensitive code.
+
+ ARGV[0] is not initialized; that is the caller's responsibility
+ after finalization.
+
+ During assembly, ARGV[I] (where 0 < I < ARGC) contains the offset
+ of the Ith string (relative to ARGV + HALF_ALLOC), so that
+ reallocating ARGV does not change the validity of its contents.
+ The integer offset is cast to char * during assembly, and is
+ converted to a true char * pointer on finalization.
+
+ During assembly, ARGV[ARGC] contains the offset of the first
+ unused string byte (relative to ARGV + HALF_ALLOC).  */
+  char **argv;
+  int argc;
+  idx_t half_alloc;
+
+  /* The number of extra argv slots to keep room for.  */
+  int extra_argc;
+
+  /* Whether processing should act as if the most 

bug#47412: env: fragile argument parsing

2021-03-29 Thread Pádraig Brady

On 26/03/2021 20:52, Paul Eggert wrote:

Thanks for the bug report. I installed the attached to fix it and am
closing the report.


The fix looks good, thanks.
I added in a test case also at:
https://git.sv.gnu.org/gitweb/?p=coreutils.git;a=commitdiff;h=9f1bda18f

cheers,
Pádraig





bug#47412: env: fragile argument parsing

2021-03-26 Thread Paul Eggert
I also installed the attached two followup patches to document this and 
issue a better warning in rare cases.


The -S code could use some more fixes in this area too - it can probably 
still dump core on platforms like the Hurd that don't limit exec arg 
size - but one thing at a time.
>From 6c4efdc0f51c8e253f16da2ec60cdf647bec3c06 Mon Sep 17 00:00:00 2001
From: Paul Eggert 
Date: Fri, 26 Mar 2021 14:00:37 -0700
Subject: [PATCH] doc: document env fix

* NEWS, doc/coreutils.texi (env invocation): Document recent change.
---
 NEWS   | 3 +++
 doc/coreutils.texi | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/NEWS b/NEWS
index 97cb4bd64..802f4b427 100644
--- a/NEWS
+++ b/NEWS
@@ -17,6 +17,9 @@ GNU coreutils NEWS-*- outline -*-
   heavily changed during the run.
   [bug introduced in coreutils-8.25]
 
+  env -S no longer crashes when given unusual whitespace characters
+  [bug introduced in coreutils-8.30]
+
   expr no longer mishandles unmatched \(...\) in regular expressions.
   [bug introduced in coreutils-6.0]
 
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index ac0b4467d..06ecdd74c 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -17592,6 +17592,8 @@ hello
 
 Running @command{env -Sstring} splits the @var{string} into
 arguments based on unquoted spaces or tab characters.
+(Newlines, carriage returns, vertical tabs and form feeds are treated
+like spaces and tabs.)
 
 In the following contrived example the @command{awk} variable
 @samp{OFS} will be @code{xyz} as these spaces are inside
-- 
2.30.2

>From 5f99c7533df49f25819d7bb850be5c6cb49aa13d Mon Sep 17 00:00:00 2001
From: Paul Eggert 
Date: Fri, 26 Mar 2021 14:51:55 -0700
Subject: [PATCH] env: improve whitespace warning

* src/env.c (main): Issue -S warning for any whitespace,
not just space.
---
 src/env.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/env.c b/src/env.c
index d07918fee..341777cb8 100644
--- a/src/env.c
+++ b/src/env.c
@@ -942,7 +942,7 @@ main (int argc, char **argv)
   int exit_status = errno == ENOENT ? EXIT_ENOENT : EXIT_CANNOT_INVOKE;
   error (0, errno, "%s", quote (argv[optind]));
 
-  if (exit_status == EXIT_ENOENT && strchr (argv[optind], ' '))
+  if (exit_status == EXIT_ENOENT && strpbrk (argv[optind], C_ISSPACE_CHARS))
 error (0, 0, _("use -[v]S to pass options in shebang lines"));
 
   return exit_status;
-- 
2.30.2



bug#47412: env: fragile argument parsing

2021-03-26 Thread Paul Eggert
Thanks for the bug report. I installed the attached to fix it and am 
closing the report.
>From 6dd466eda6fa3f1f7d2a9474ec926ccd2ede98e9 Mon Sep 17 00:00:00 2001
From: Paul Eggert 
Date: Fri, 26 Mar 2021 13:49:49 -0700
Subject: [PATCH] env: fix address violation with '\v' in -S

Problem reported by Frank Busse (Bug#47412).
* src/env.c (C_ISSPACE_CHARS): New macro.
(shortopts, build_argv, main): Treate all C-locale space
characters like space and tab, for compatibility with FreeBSD.
(validate_split_str, build_argv, parse_split_string):
Use the C locale, not the current locale, to determine whether a
byte is a space character.
---
 src/env.c | 23 ---
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/env.c b/src/env.c
index ba9da1113..e13a312cd 100644
--- a/src/env.c
+++ b/src/env.c
@@ -73,7 +73,10 @@ static bool sig_mask_changed;
 /* Whether to list non default handling.  */
 static bool report_signal_handling;
 
-static char const shortopts[] = "+C:iS:u:v0 \t";
+/* isspace characters in the C locale.  */
+#define C_ISSPACE_CHARS " \t\n\v\f\r"
+
+static char const shortopts[] = "+C:iS:u:v0" C_ISSPACE_CHARS;
 
 /* For long options that have no equivalent short option, use a
non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
@@ -277,7 +280,7 @@ validate_split_str (const char* str, size_t* /*out*/ bufsize,
   size_t buflen;
   int cnt = 1;
 
-  assert (str && str[0] && !isspace (str[0])); /* LCOV_EXCL_LINE */
+  assert (str && str[0] && !c_isspace (str[0])); /* LCOV_EXCL_LINE */
 
   dq = sq = sp = false;
   buflen = strlen (str)+1;
@@ -286,7 +289,7 @@ validate_split_str (const char* str, size_t* /*out*/ bufsize,
 {
   const char next = *(str+1);
 
-  if (isspace (*str) && !dq && !sq)
+  if (c_isspace (*str) && !dq && !sq)
 {
   sp = true;
 }
@@ -392,7 +395,7 @@ build_argv (const char* str, int extra_argc)
   } \
   } while (0)
 
-  assert (str && str[0] && !isspace (str[0])); /* LCOV_EXCL_LINE */
+  assert (str && str[0] && !c_isspace (str[0]));  /* LCOV_EXCL_LINE */
 
   validate_split_str (str, &buflen, &newargc);
 
@@ -433,13 +436,12 @@ build_argv (const char* str, int extra_argc)
   ++str;
   continue;
 
-case ' ':
-case '\t':
-  /* space/tab outside quotes starts a new argument. */
+case ' ': case '\t': case '\n': case '\v': case '\f': case '\r':
+  /* Start a new argument if outside quotes.  */
   if (sq || dq)
 break;
   sep = true;
-  str += strspn (str, " \t"); /* skip whitespace. */
+  str += strspn (str, C_ISSPACE_CHARS);
   continue;
 
 case '#':
@@ -540,7 +542,7 @@ parse_split_string (const char* str, int /*out*/ *orig_optind,
   char **newargv, **nextargv;
 
 
-  while (isspace (*str))
+  while (c_isspace (*str))
 str++;
   if (*str == '\0')
 return;
@@ -848,8 +850,7 @@ main (int argc, char **argv)
 case 'S':
   parse_split_string (optarg, &optind, &argc, &argv);
   break;
-case ' ':
-case '\t':
+case ' ': case '\t': case '\n': case '\v': case '\f': case '\r':
   /* These are undocumented options. Attempt to detect
  incorrect shebang usage with extraneous space, e.g.:
 #!/usr/bin/env -i command
-- 
2.30.2



bug#47412: env: fragile argument parsing

2021-03-26 Thread Paul Eggert

On 3/26/21 1:12 PM, Pádraig Brady wrote:


I'll fix it up.


I've got a fix. My goodness, that part of the code is messy.





bug#47412: env: fragile argument parsing

2021-03-26 Thread Pádraig Brady

On 26/03/2021 15:00, Frank Busse wrote:

Hi,


env crashes for some nonsensical command line arguments (reported by
KLEE), e.g.:

---

python3 -c "import os; os.execl('./src/env', 'env', b'--s=\"\"\t\x0b')"


=
==140651==ERROR: AddressSanitizer: heap-buffer-overflow on address
0x60300028 at pc 0x562e1cc1078a bp 0x7ffd59964dd0 sp 0x7ffd59964dc0
WRITE of size 8 at 0x60300028 thread T0
  #0 0x562e1cc10789 in build_argv src/env.c:511
  #1 0x562e1cc10982 in parse_split_string src/env.c:548
  #2 0x562e1cc127bc in main src/env.c:849
  #3 0x7f1c167e3b24 in __libc_start_main (/usr/lib/libc.so.6+0x27b24)
  #4 0x562e1cc0e54d in _start (coreutils-8.32/src/env+0x654d)

0x60300028 is located 0 bytes to the right of 24-byte region
[0x60300010,0x60300028)
allocated by thread T0 here:
  #0 0x7f1c16a3b459 in __interceptor_malloc 
/build/gcc/src/gcc/libsanitizer/asan/asan_malloc_linux.cpp:145
  #1 0x562e1cc19463 in xmalloc lib/xmalloc.c:41
  #2 0x562e1cc0ff54 in build_argv src/env.c:404
  #3 0x562e1cc10982 in parse_split_string src/env.c:548
  #4 0x562e1cc127bc in main src/env.c:849
  #5 0x7f1c167e3b24 in __libc_start_main (/usr/lib/libc.so.6+0x27b24)


Confirmed on an ASAN build of the latest source.
I'll fix it up.

thanks!
Pádraig





bug#47412: env: fragile argument parsing

2021-03-26 Thread Frank Busse
Hi,


env crashes for some nonsensical command line arguments (reported by
KLEE), e.g.:

---
> python3 -c "import os; os.execl('./src/env', 'env', b'--s=\"\"\t\x0b')"

=
==140651==ERROR: AddressSanitizer: heap-buffer-overflow on address
0x60300028 at pc 0x562e1cc1078a bp 0x7ffd59964dd0 sp 0x7ffd59964dc0
WRITE of size 8 at 0x60300028 thread T0
 #0 0x562e1cc10789 in build_argv src/env.c:511
 #1 0x562e1cc10982 in parse_split_string src/env.c:548
 #2 0x562e1cc127bc in main src/env.c:849
 #3 0x7f1c167e3b24 in __libc_start_main (/usr/lib/libc.so.6+0x27b24)
 #4 0x562e1cc0e54d in _start (coreutils-8.32/src/env+0x654d)

0x60300028 is located 0 bytes to the right of 24-byte region
[0x60300010,0x60300028)
allocated by thread T0 here:
 #0 0x7f1c16a3b459 in __interceptor_malloc 
/build/gcc/src/gcc/libsanitizer/asan/asan_malloc_linux.cpp:145
 #1 0x562e1cc19463 in xmalloc lib/xmalloc.c:41
 #2 0x562e1cc0ff54 in build_argv src/env.c:404
 #3 0x562e1cc10982 in parse_split_string src/env.c:548
 #4 0x562e1cc127bc in main src/env.c:849
 #5 0x7f1c167e3b24 in __libc_start_main (/usr/lib/libc.so.6+0x27b24)

SUMMARY: AddressSanitizer: heap-buffer-overflow src/env.c:511 in build_argv
---

or

---
> python3 -c "import os; os.execl('./src/env', 'env', b'--s=\xff 
> \r\x0b\t\x0b-')"

=
==140886==ERROR: AddressSanitizer: heap-buffer-overflow on address
0x60300030 at pc 0x55821372878a bp 0x7ffdd6e4bc40 sp 0x7ffdd6e4bc30
WRITE of size 8 at 0x60300030 thread T0
 #0 0x558213728789 in build_argv src/env.c:511
 #1 0x558213728982 in parse_split_string src/env.c:548
 #2 0x55821372a7bc in main src/env.c:849
 #3 0x7f5b05ec5b24 in __libc_start_main (/usr/lib/libc.so.6+0x27b24)
 #4 0x55821372654d in _start (coreutils-8.32/src/env+0x654d)

0x60300030 is located 0 bytes to the right of 32-byte region
[0x60300010,0x60300030) allocated by thread T0 here:
 #0 0x7f5b0611d459 in 
__interceptor_malloc/build/gcc/src/gcc/libsanitizer/asan/asan_malloc_linux.cpp:145
 #1 0x558213731463 in xmalloc lib/xmalloc.c:41
 #2 0x558213727f54 in build_argv src/env.c:404
 #3 0x558213728982 in parse_split_string src/env.c:548
 #4 0x55821372a7bc in main src/env.c:849
 #5 0x7f5b05ec5b24 in __libc_start_main (/usr/lib/libc.so.6+0x27b24)

SUMMARY: AddressSanitizer: heap-buffer-overflow src/env.c:511 in build_argv
---


Version: 8.32
Configure: CFLAGS="-ggdb -O0 -fsanitize=address" ./configure --without-selinux 
--without-gmp --disable-acl --disable-largefile --disable-libsmack 
--disable-xattr --disable-libcap --disable-nls


Kind regards,

Frank