[PATCH branch-1.6] eval: Add eval64 builtin [work in progress]

Eric Blake via M4-patches Thu, 15 May 2025 19:38:04 -0700

TODO: Add more tests, fill in changelog
---

Given that POSIX says in one place that eval must operate on at least
32-bit numbers, and in another that it must use at least 'signed
long'; and given that most development machines these days are 64-bit,
I think it's high time M4 offers 64-bit math.  For back-compat
reasons, I'm thinking of keeping eval at 32-bit and adding a new
eval64 (the user can always use define(`eval', defn(`eval64')) to opt
in to the newer size).


But I'm not sure whether eval64 should be visible by default, or only
opt-in via a command line option, or even if I can get define(`eval',
builtin(`eval64')) working even when eval64 itself is not pre-defined.
So for now, I'm just posting what I got working in one evening, but it
is not yet the final patch.

Thoughts on the best way to add this feature?

 NEWS            |  3 ++-
 doc/m4.texi     |  9 +++++---
 src/Makefile.am |  2 +-
 src/builtin.c   | 37 ++++++++++++++++++++++++-------
 src/eval.c      | 58 +++++++++++++++++++++++++++++--------------------
 src/eval64.c    | 39 +++++++++++++++++++++++++++++++++
 src/m4.h        |  9 ++++++--
 7 files changed, 118 insertions(+), 39 deletions(-)
 create mode 100644 src/eval64.c

diff --git a/NEWS b/NEWS
index 97c4d9c0..f6204217 100644
--- a/NEWS
+++ b/NEWS
@@ -129,7 +129,8 @@ GNU M4 NEWS - User visible changes.
 ** Enhance the `eval' builtin to understand the `?:' operator, and
    downgrade a failed parse due to an unknown operator from an error to a
    warning.  Further, the builtin now refuses to recognize `=' as a
-   synonym for `==' (this had emitted a warning since 1.4.8b).
+   synonym for `==' (this had emitted a warning since 1.4.8b).  Add a new
+   `eval64' builtin that operates on 64-bit integers.

 ** A number of portability improvements inherited from gnulib.

diff --git a/doc/m4.texi b/doc/m4.texi
index 81476bc8..60992c5f 100644
--- a/doc/m4.texi
+++ b/doc/m4.texi
@@ -7070,13 +7070,16 @@ Eval
 Integer expressions are evaluated with @code{eval}:

 @deffn Builtin eval (@var{expression}, @dvar{radix, 10}, @ovar{width})
+@deffnx Builtin eval64 (@var{expression}, @dvar{radix, 10}, @ovar{width})
 Expands to the value of @var{expression}.  The expansion is empty
 if a problem is encountered while parsing the arguments.  If specified,
 @var{radix} and @var{width} control the format of the output.

-Calculations are done with 32-bit signed numbers.  Overflow silently
-results in wraparound.  A warning is issued if division by zero is
-attempted, or if @var{expression} could not be parsed.
+Calculations are done with 32-bit signed numbers for @code{eval}, and
+64-bit signed numbers for @code{eval64} (the latter was introduced in M4
+1.6).  Overflow silently results in wraparound.  A warning is issued if
+division by zero is attempted, or if @var{expression} could not be
+parsed.

 Expressions can contain the following operators, listed in order of
 decreasing precedence.
diff --git a/src/Makefile.am b/src/Makefile.am
index 0f567012..08f48afe 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -26,7 +26,7 @@ AM_CFLAGS = $(WARN_CFLAGS) $(WERROR_CFLAGS)
 AM_LDFLAGS = $(OS2_LDFLAGS)
 bin_PROGRAMS = m4
 noinst_HEADERS = m4.h
-m4_SOURCES = m4.c builtin.c debug.c eval.c format.c freeze.c input.c \
+m4_SOURCES = m4.c builtin.c debug.c eval.c eval64.c format.c freeze.c input.c \
 macro.c output.c path.c symtab.c
 LDADD = ../lib/libm4.a $(LIBM4_LIBDEPS) \
   $(CLOCK_TIME_LIB) $(GETLOCALENAME_L_LIB) $(GETRANDOM_LIB) \
diff --git a/src/builtin.c b/src/builtin.c
index bf0d1798..bba80e33 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -57,6 +57,7 @@ DECLARE (m4_dumpdef);
 DECLARE (m4_errprint);
 DECLARE (m4_esyscmd);
 DECLARE (m4_eval);
+DECLARE (m4_eval64);
 DECLARE (m4_format);
 DECLARE (m4_ifdef);
 DECLARE (m4_ifelse);
@@ -109,6 +110,7 @@ static builtin const builtin_tab[] = {
   {"errprint", false, false, true, m4_errprint},
   {"esyscmd", true, false, true, m4_esyscmd},
   {"eval", false, false, true, m4_eval},
+  {"eval64", true, false, true, m4_eval64},
   {"format", true, false, true, m4_format},
   {"ifdef", false, true, true, m4_ifdef},
   {"ifelse", false, true, true, m4_ifelse},
@@ -571,10 +573,10 @@ static char const digits[] = 
"0123456789abcdefghijklmnopqrstuvwxyz";
 /* The function ntoa () converts VALUE to a signed ASCII
    representation in radix RADIX, with the ending \0 in *END.  */
 static const char *
-ntoa (int32_t value, int radix, const char **end)
+ntoa (int64_t value, int radix, const char **end)
 {
   bool negative;
-  uint32_t uvalue;
+  uint64_t uvalue;
   /* Sized for radix 2, plus sign and trailing NUL.  */
   static char str[sizeof (value) * CHAR_BIT + 2];
   char *s = &str[sizeof str];
@@ -585,12 +587,12 @@ ntoa (int32_t value, int radix, const char **end)
   if (value < 0)
     {
       negative = true;
-      uvalue = -(uint32_t) value;
+      uvalue = -(uint64_t) value;
     }
   else
     {
       negative = false;
-      uvalue = (uint32_t) value;
+      uvalue = (uint64_t) value;
     }

   do
@@ -1253,10 +1255,11 @@ m4_sysval (struct obstack *obs, int argc MAYBE_UNUSED,
    The actual work is done in the function evaluate (), which lives in
    eval.c.  */
 static void
-m4_eval (struct obstack *obs, int argc, macro_arguments *argv)
+eval (struct obstack *obs, int argc, macro_arguments *argv,
+      bool func (const call_info *, const char *, size_t, int64_t *))
 {
   const call_info *me = arg_info (argv);
-  int32_t value = 0;
+  int64_t value = 0;
   int radix = 10;
   int min = 1;
   const char *s;
@@ -1283,17 +1286,23 @@ m4_eval (struct obstack *obs, int argc, macro_arguments 
*argv)
       return;
     }

-  if (evaluate (me, ARG (1), ARG_LEN (1), &value))
+  if (func (me, ARG (1), ARG_LEN (1), &value))
     return;

   if (radix == 1)
     {
+      if (value < INT_MIN || value > INT_MAX)
+        {
+          m4_warn (0, me, _("magnitude too large for base 1: %" PRId64),
+                   value);
+          return;
+        }
       if (value < 0)
         {
           obstack_1grow (obs, '-');
           value = -value;
         }
-      if (value + 0U < min + 0U)
+      if (value + 0ULL < min + 0ULL)
         {
           obstack_blank (obs, min - value);
           memset ((char *) obstack_next_free (obs) - (min - value), '0',
@@ -1321,6 +1330,18 @@ m4_eval (struct obstack *obs, int argc, macro_arguments 
*argv)
   obstack_grow (obs, s, len);
 }

+static void
+m4_eval (struct obstack *obs, int argc, macro_arguments *argv)
+{
+  eval (obs, argc, argv, evaluate);
+}
+
+static void
+m4_eval64 (struct obstack *obs, int argc, macro_arguments *argv)
+{
+  eval (obs, argc, argv, evaluate64);
+}
+
 static void
 m4_incr (struct obstack *obs, int argc, macro_arguments *argv)
 {
diff --git a/src/eval.c b/src/eval.c
index 163e2634..ae98a03f 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -22,9 +22,16 @@
 /* This file contains the functions to evaluate integer expressions for
    the "eval" macro.  It is a little, fairly self-contained module, with
    its own scanner, and a recursive descent parser.  The only entry point
-   is evaluate ().  */
+   is evaluate ().  This file is also used to implement eval64.  */

-#include "m4.h"
+#ifndef EVAL64
+# include "m4.h"
+
+# define INT int32_t
+# define UINT uint32_t
+# define SHIFTMASK 0x1f
+# define EVALUATE evaluate
+#endif /* !EVAL64 */

 /* Evaluates token types.  */

@@ -87,8 +94,9 @@ typedef enum eval_error
 }
 eval_error;

-static eval_error primary (int32_t *);
-static eval_error parse_expr (int32_t *, eval_error, unsigned);
+
+static eval_error primary (INT *);
+static eval_error parse_expr (INT *, eval_error, unsigned);

 /* Lexical functions.  */

@@ -120,7 +128,7 @@ eval_undo (void)
 /* VAL is numerical value, if any.  */

 static eval_token
-eval_lex (int32_t *val)
+eval_lex (INT *val)
 {
   while (eval_text != end_text && c_isspace (*eval_text))
     eval_text++;
@@ -139,7 +147,7 @@ eval_lex (int32_t *val)
       /* The documentation says that "overflow silently results in wraparound".
          Therefore use an unsigned integer type to avoid undefined behaviour
          when parsing '-2147483648'.  */
-      uint32_t value;
+      UINT value;

       if (*eval_text == '0')
         {
@@ -314,10 +322,10 @@ eval_lex (int32_t *val)

 /* Parse `(expr)', unary operators, and numbers.  */
 static eval_error
-primary (int32_t *v1)
+primary (INT *v1)
 {
   eval_error er;
-  int32_t v2;
+  INT v2;

   switch (eval_lex (v1))
     {
@@ -352,7 +360,7 @@ primary (int32_t *v1)
       return primary (v1);
     case MINUS:
       er = primary (v1);
-      *v1 = (int32_t) -(uint32_t) *v1;
+      *v1 = (INT) -(UINT) *v1;
       return er;
     case NOT:
       er = primary (v1);
@@ -380,15 +388,15 @@ primary (int32_t *v1)

 /* Parse binary operators with at least MIN_PREC precedence.  */
 static eval_error
-parse_expr (int32_t *v1, eval_error er, unsigned min_prec)
+parse_expr (INT *v1, eval_error er, unsigned min_prec)
 {
   eval_token et;
   eval_token et2;
   eval_error er2;
   eval_error er3;
-  int32_t v2;
-  int32_t v3;
-  uint32_t u1;
+  INT v2;
+  INT v3;
+  UINT u1;

   if (er >= SYNTAX_ERROR)
     return er;
@@ -437,20 +445,20 @@ parse_expr (int32_t *v1, eval_error er, unsigned min_prec)
           else
             {
               while (v2-- > 0)
-                u1 *= (uint32_t) *v1;
+                u1 *= (UINT) *v1;
             }
           *v1 = u1;
           break;

         case TIMES:
-          *v1 = (int32_t) ((uint32_t) *v1 * (uint32_t) v2);
+          *v1 = (INT) ((UINT) *v1 * (UINT) v2);
           break;
         case DIVIDE:
           if (v2 == 0)
             er = DIVIDE_ZERO;
           else if (v2 == -1)
             /* Avoid overflow, and the x86 SIGFPE on INT_MIN / -1.  */
-            *v1 = (int32_t) -(uint32_t) *v1;
+            *v1 = (INT) -(UINT) *v1;
           else
             *v1 /= v2;
           break;
@@ -465,20 +473,20 @@ parse_expr (int32_t *v1, eval_error er, unsigned min_prec)
           break;

         case PLUS:
-          *v1 = (int32_t) ((uint32_t) *v1 + (uint32_t) v2);
+          *v1 = (INT) ((UINT) *v1 + (UINT) v2);
           break;
         case MINUS:
-          *v1 = (int32_t) ((uint32_t) *v1 - (uint32_t) v2);
+          *v1 = (INT) ((UINT) *v1 - (UINT) v2);
           break;

         case LSHIFT:
           u1 = *v1;
-          u1 <<= (uint32_t) (v2 & 0x1f);
+          u1 <<= (UINT) (v2 & SHIFTMASK);
           *v1 = u1;
           break;
         case RSHIFT:
           u1 = *v1 < 0 ? ~*v1 : *v1;
-          u1 >>= (uint32_t) (v2 & 0x1f);
+          u1 >>= (UINT) (v2 & SHIFTMASK);
           *v1 = *v1 < 0 ? ~u1 : u1;
           break;

@@ -563,17 +571,18 @@ parse_expr (int32_t *v1, eval_error er, unsigned min_prec)

 /* Main entry point, called from "eval".  */
 bool
-evaluate (const call_info *me, const char *expr, size_t len, int32_t *val)
+EVALUATE (const call_info *me, const char *expr, size_t len, int64_t *pval)
 {
   eval_error err;
+  INT val;

   eval_init_lex (expr, len);
-  err = primary (val);
-  err = parse_expr (val, err, 1);
+  err = primary (&val);
+  err = parse_expr (&val, err, 1);

   if (err == NO_ERROR && eval_text != end_text)
     {
-      if (eval_lex (val) == BADOP)
+      if (eval_lex (&val) == BADOP)
         err = INVALID_OPERATOR;
       else
         err = EXCESS_INPUT;
@@ -585,6 +594,7 @@ evaluate (const call_info *me, const char *expr, size_t 
len, int32_t *val)
     {
       /* Cases where result is printed.  */
     case NO_ERROR:
+      *pval = val;
       return false;

     case EMPTY_ARGUMENT:
diff --git a/src/eval64.c b/src/eval64.c
new file mode 100644
index 00000000..6d5ae1b7
--- /dev/null
+++ b/src/eval64.c
@@ -0,0 +1,39 @@
+/* GNU m4 -- A simple macro processor
+
+   Copyright (C) 1989-1994, 2006-2014, 2016-2017, 2020-2025 Free
+   Software Foundation, Inc.
+
+   This file is part of GNU M4.
+
+   GNU M4 is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   GNU M4 is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.
+*/
+
+/* This file contains the functions to evaluate 64-bit integer
+   expressions for the "eval64" macro, by reusing code from eval.  The
+   only entry point is evaluate64 ().  */
+
+#include "m4.h"
+
+#define EVAL64
+#define INT int64_t
+#define UINT uint64_t
+#define SHIFTMASK 0x3f
+#define EVALUATE evaluate64
+
+#include "eval.c"
+/*
+  Defined by eval.c:
+bool
+evaluate64 (const call_info *me, const char *expr, size_t len, INT *val)
+ */
diff --git a/src/m4.h b/src/m4.h
index 79dca438..c1735318 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -29,6 +29,7 @@
 #include <c-ctype.h>
 #include <errno.h>
 #include <error.h>
+#include <inttypes.h>
 #include <limits.h>
 #include <locale.h>
 #include <stdbool.h>
@@ -546,9 +547,13 @@ extern void include_env_init (void);
 extern void add_include_directory (const char *);
 extern FILE *m4_path_search (const char *, bool, char **);
 
-/* File: eval.c  --- expression evaluation.  */
+/* File: eval.c  --- 32-bit expression evaluation.  */

-extern bool evaluate (const call_info *, const char *, size_t, int32_t *);
+extern bool evaluate (const call_info *, const char *, size_t, int64_t *);
+
+/* File: eval64.c  --- 64-bit expression evaluation.  */
+
+extern bool evaluate64 (const call_info *, const char *, size_t, int64_t *);
 
 /* File: format.c  --- printf like formatting.  */


base-commit: de2ad6ddc904ea07fe9a6f61fa9418b15ffbfc0b
prerequisite-patch-id: 609f7f2d1044633fdedb9052c0367b1d7b7b8cce
prerequisite-patch-id: bcbc80d445cc1737da3b453d50426c9f944186c9
-- 
2.49.0


_______________________________________________
M4-patches mailing list
M4-patches@gnu.org
https://lists.gnu.org/mailman/listinfo/m4-patches

[PATCH branch-1.6] eval: Add eval64 builtin [work in progress]

Reply via email to