When building m4-1.4.18b with CC="gcc -ftrapv" (in order to catch undefined behaviour through signed integer overflow at runtime), the test suite fails:
================================================================================ Checking ../../checks/189.eval @ ../doc/m4.texi:6402: Origin of test ../../checks/189.eval: status was 2, expected 0 @ ../doc/m4.texi:6402: Origin of test ../../checks/189.eval: stdout mismatch --- m4-tmp.6646/m4-xout 2021-05-09 00:52:24.655715842 +0200 +++ m4-tmp.6646/m4-out 2021-05-09 00:52:24.651715784 +0200 @@ -1,9 +1,2 @@ -1 -1 -overflow occurred --2147483648 -0 --2 --2 @ ../doc/m4.texi:6402: Origin of test ../../checks/189.eval: stderr mismatch --- m4-tmp.6646/m4-xerr 2021-05-09 00:52:24.659715900 +0200 +++ m4-tmp.6646/m4-err 2021-05-09 00:52:24.651715784 +0200 @@ -0,0 +1 @@ +m4: internal error detected; please report this bug to <bug-m4@gnu.org>: Aborted ================================================================================ In the debugger I see this: (gdb) run Starting program: /BUILD/m4-1.4.18b/build-64/src/m4 define(`max_int', eval(`0x7fffffff')) define(`min_int', incr(max_int)) eval(min_int` < 0') Program received signal SIGABRT, Aborted. 0x00007ffff7a42438 in __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:54 54 ../sysdeps/unix/sysv/linux/raise.c: Datei oder Verzeichnis nicht gefunden. (gdb) where #0 0x00007ffff7a42438 in __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:54 #1 0x00007ffff7a4403a in __GI_abort () at abort.c:89 #2 0x00000000004458e5 in __addvsi3 () #3 0x000000000040c0b7 in eval_lex (val=0x7fffffffd22c) at ../../src/eval.c:180 #4 0x000000000040d2c3 in unary_term (et=MINUS, v1=0x7fffffffd22c) at ../../src/eval.c:796 #5 0x000000000040d18e in exp_term (et=MINUS, v1=0x7fffffffd22c) at ../../src/eval.c:757 #6 0x000000000040cfe2 in mult_term (et=MINUS, v1=0x7fffffffd22c) at ../../src/eval.c:695 #7 0x000000000040cee0 in add_term (et=MINUS, v1=0x7fffffffd22c) at ../../src/eval.c:660 #8 0x000000000040cd78 in shift_term (et=MINUS, v1=0x7fffffffd22c) at ../../src/eval.c:607 #9 0x000000000040cbea in cmp_term (et=MINUS, v1=0x7fffffffd22c) at ../../src/eval.c:554 #10 0x000000000040cac2 in equality_term (et=MINUS, v1=0x7fffffffd22c) at ../../src/eval.c:517 #11 0x000000000040c9f7 in and_term (et=MINUS, v1=0x7fffffffd22c) at ../../src/eval.c:489 #12 0x000000000040c92c in xor_term (et=MINUS, v1=0x7fffffffd22c) at ../../src/eval.c:462 #13 0x000000000040c861 in or_term (et=MINUS, v1=0x7fffffffd22c) at ../../src/eval.c:435 #14 0x000000000040c766 in logical_and_term (et=MINUS, v1=0x7fffffffd22c) at ../../src/eval.c:404 #15 0x000000000040c65d in logical_or_term (et=MINUS, v1=0x7fffffffd22c) at ../../src/eval.c:373 #16 0x000000000040c473 in evaluate (expr=0x67a1b0 "-2147483648 < 0", val=0x7fffffffd22c) at ../../src/eval.c:297 #17 0x00000000004068d6 in m4_eval (obs=0x673f30, argc=2, argv=0x67b190) at ../../src/builtin.c:1115 #18 0x00000000004130e4 in call_macro (sym=0x679670, argc=2, argv=0x67b190, expansion=0x673f30) at ../../src/macro.c:278 #19 0x0000000000413415 in expand_macro (sym=0x679670) at ../../src/macro.c:369 #20 0x0000000000412810 in expand_token (obs=0x0, t=TOKEN_WORD, td=0x7fffffffd490, line=3) at ../../src/macro.c:118 #21 0x0000000000412636 in expand_input () at ../../src/macro.c:68 #22 0x0000000000403a0c in process_file (name=0x4469e0 "-") at ../../src/m4.c:378 #23 0x00000000004042d1 in main (argc=1, argv=0x7fffffffd728) at ../../src/m4.c:696 (gdb) print *val $1 = 214748364 (gdb) print base $2 = 10 (gdb) print digit $3 = 8 And there's actually a FIXME in the code at that location. The attached patch fixes it: It avoids the undefined behaviour. I did consider using strtol. It would not help much, because it does not support base == 1 and also may have portability problems. OK to push?
>From 981f4617c6362a22eee20247c170e760812e5b68 Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Sun, 9 May 2021 01:37:36 +0200 Subject: [PATCH] Avoid undefined behaviour when parsing the number -2147483648. * src/eval.c (eval_lex): Use an unsigned variable for accumulating the value. --- src/eval.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/eval.c b/src/eval.c index 31d0f3c..4717c3c 100644 --- a/src/eval.c +++ b/src/eval.c @@ -115,7 +115,11 @@ eval_lex (int32_t *val) if (c_isdigit (*eval_text)) { - int base, digit; + unsigned int base, digit; + /* The documentation says that "overflow silently results in wraparound". + Therefore use an unsigned integer type to avoid undefined behaviour + when parsing '-2147483648'. */ + uint32_t value; if (*eval_text == '0') { @@ -152,8 +156,7 @@ eval_lex (int32_t *val) else base = 10; - /* FIXME - this calculation can overflow. Consider xstrtol. */ - *val = 0; + value = 0; for (; *eval_text; eval_text++) { if (c_isdigit (*eval_text)) @@ -168,8 +171,8 @@ eval_lex (int32_t *val) if (base == 1) { if (digit == 1) - (*val)++; - else if (digit == 0 && !*val) + value++; + else if (digit == 0 && value == 0) continue; else break; @@ -177,8 +180,9 @@ eval_lex (int32_t *val) else if (digit >= base) break; else - *val = *val * base + digit; + value = value * base + digit; } + *val = value; return NUMBER; } -- 2.7.4