Avoid undefined behaviour when parsing the number -2147483648

Bruno Haible Sat, 08 May 2021 16:46:41 -0700

When building m4-1.4.18b with CC="gcc -ftrapv" (in order to catch
undefined behaviour through signed integer overflow at runtime),
the test suite fails:


================================================================================
Checking ../../checks/189.eval
@ ../doc/m4.texi:6402: Origin of test
../../checks/189.eval: status was 2, expected 0
@ ../doc/m4.texi:6402: Origin of test
../../checks/189.eval: stdout mismatch
--- m4-tmp.6646/m4-xout 2021-05-09 00:52:24.655715842 +0200
+++ m4-tmp.6646/m4-out  2021-05-09 00:52:24.651715784 +0200
@@ -1,9 +1,2 @@
 
 
-1
-1
-overflow occurred
--2147483648
-0
--2
--2
@ ../doc/m4.texi:6402: Origin of test
../../checks/189.eval: stderr mismatch
--- m4-tmp.6646/m4-xerr 2021-05-09 00:52:24.659715900 +0200
+++ m4-tmp.6646/m4-err  2021-05-09 00:52:24.651715784 +0200
@@ -0,0 +1 @@
+m4: internal error detected; please report this bug to <bug-m4@gnu.org>: 
Aborted
================================================================================

In the debugger I see this:

(gdb) run
Starting program: /BUILD/m4-1.4.18b/build-64/src/m4 
define(`max_int', eval(`0x7fffffff'))

define(`min_int', incr(max_int))

eval(min_int` < 0')

Program received signal SIGABRT, Aborted.
0x00007ffff7a42438 in __GI_raise (sig=sig@entry=6) at 
../sysdeps/unix/sysv/linux/raise.c:54
54      ../sysdeps/unix/sysv/linux/raise.c: Datei oder Verzeichnis nicht 
gefunden.
(gdb) where
#0  0x00007ffff7a42438 in __GI_raise (sig=sig@entry=6) at 
../sysdeps/unix/sysv/linux/raise.c:54
#1  0x00007ffff7a4403a in __GI_abort () at abort.c:89
#2  0x00000000004458e5 in __addvsi3 ()
#3  0x000000000040c0b7 in eval_lex (val=0x7fffffffd22c) at ../../src/eval.c:180
#4  0x000000000040d2c3 in unary_term (et=MINUS, v1=0x7fffffffd22c) at 
../../src/eval.c:796
#5  0x000000000040d18e in exp_term (et=MINUS, v1=0x7fffffffd22c) at 
../../src/eval.c:757
#6  0x000000000040cfe2 in mult_term (et=MINUS, v1=0x7fffffffd22c) at 
../../src/eval.c:695
#7  0x000000000040cee0 in add_term (et=MINUS, v1=0x7fffffffd22c) at 
../../src/eval.c:660
#8  0x000000000040cd78 in shift_term (et=MINUS, v1=0x7fffffffd22c) at 
../../src/eval.c:607
#9  0x000000000040cbea in cmp_term (et=MINUS, v1=0x7fffffffd22c) at 
../../src/eval.c:554
#10 0x000000000040cac2 in equality_term (et=MINUS, v1=0x7fffffffd22c) at 
../../src/eval.c:517
#11 0x000000000040c9f7 in and_term (et=MINUS, v1=0x7fffffffd22c) at 
../../src/eval.c:489
#12 0x000000000040c92c in xor_term (et=MINUS, v1=0x7fffffffd22c) at 
../../src/eval.c:462
#13 0x000000000040c861 in or_term (et=MINUS, v1=0x7fffffffd22c) at 
../../src/eval.c:435
#14 0x000000000040c766 in logical_and_term (et=MINUS, v1=0x7fffffffd22c) at 
../../src/eval.c:404
#15 0x000000000040c65d in logical_or_term (et=MINUS, v1=0x7fffffffd22c) at 
../../src/eval.c:373
#16 0x000000000040c473 in evaluate (expr=0x67a1b0 "-2147483648 < 0", 
val=0x7fffffffd22c) at ../../src/eval.c:297
#17 0x00000000004068d6 in m4_eval (obs=0x673f30, argc=2, argv=0x67b190) at 
../../src/builtin.c:1115
#18 0x00000000004130e4 in call_macro (sym=0x679670, argc=2, argv=0x67b190, 
expansion=0x673f30) at ../../src/macro.c:278
#19 0x0000000000413415 in expand_macro (sym=0x679670) at ../../src/macro.c:369
#20 0x0000000000412810 in expand_token (obs=0x0, t=TOKEN_WORD, 
td=0x7fffffffd490, line=3) at ../../src/macro.c:118
#21 0x0000000000412636 in expand_input () at ../../src/macro.c:68
#22 0x0000000000403a0c in process_file (name=0x4469e0 "-") at ../../src/m4.c:378
#23 0x00000000004042d1 in main (argc=1, argv=0x7fffffffd728) at 
../../src/m4.c:696
(gdb) print *val
$1 = 214748364
(gdb) print base
$2 = 10
(gdb) print digit
$3 = 8

And there's actually a FIXME in the code at that location.

The attached patch fixes it: It avoids the undefined behaviour.

I did consider using strtol. It would not help much, because it does not
support base == 1 and also may have portability problems.

OK to push?

>From 981f4617c6362a22eee20247c170e760812e5b68 Mon Sep 17 00:00:00 2001
From: Bruno Haible <br...@clisp.org>
Date: Sun, 9 May 2021 01:37:36 +0200
Subject: [PATCH] Avoid undefined behaviour when parsing the number
 -2147483648.

* src/eval.c (eval_lex): Use an unsigned variable for accumulating the value.
---
 src/eval.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/eval.c b/src/eval.c
index 31d0f3c..4717c3c 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -115,7 +115,11 @@ eval_lex (int32_t *val)
 
   if (c_isdigit (*eval_text))
     {
-      int base, digit;
+      unsigned int base, digit;
+      /* The documentation says that "overflow silently results in wraparound".
+         Therefore use an unsigned integer type to avoid undefined behaviour
+         when parsing '-2147483648'.  */
+      uint32_t value;
 
       if (*eval_text == '0')
         {
@@ -152,8 +156,7 @@ eval_lex (int32_t *val)
       else
         base = 10;
 
-      /* FIXME - this calculation can overflow.  Consider xstrtol.  */
-      *val = 0;
+      value = 0;
       for (; *eval_text; eval_text++)
         {
           if (c_isdigit (*eval_text))
@@ -168,8 +171,8 @@ eval_lex (int32_t *val)
           if (base == 1)
             {
               if (digit == 1)
-                (*val)++;
-              else if (digit == 0 && !*val)
+                value++;
+              else if (digit == 0 && value == 0)
                 continue;
               else
                 break;
@@ -177,8 +180,9 @@ eval_lex (int32_t *val)
           else if (digit >= base)
             break;
           else
-            *val = *val * base + digit;
+            value = value * base + digit;
         }
+      *val = value;
       return NUMBER;
     }
 
-- 
2.7.4

Avoid undefined behaviour when parsing the number -2147483648

Reply via email to