Hi, On Sat, Mar 31, 2018 at 02:57:45AM +0200, Ingo Schwarze wrote: > Even though - as discussed previously for test(1) - behaviour is > undefined by POSIX outside the range 0 to 2147483647, we are allowed > to handle a larger range, and i agree that handling the range > -9223372036854775808 to 9223372036854775807 (INT64_MIN to INT64_MAX) > seems desirable.
I actually ended up in expr(1) after seeing that the test(1) and ksh(1) debate could be continued here. While expr(1) is int64_t, expressions in ksh(1) are of type long, i.e. 32/64 bit depending on architecture. This patch therefore is a preparation to get test(1) and expr(1) changes into ksh. As both utilities are standalone, it is much easier to discuss and make modifications in them, first. > > + if (c < 0 || INT64_MAX / 10 < i || INT64_MAX - c < i * 10) > > + errx(3, "out of range"); > > I'm not saying this is an absolute show-stopper, but i consider > it somewhat ugly. What do you think about using strtonum(3) > instead in is_integer? Totally agree. Patch is adjusted. I tried to keep it as strict as it was before, but you are correct. We don't violate POSIX with strtonum, we just accept a few more inputs -- being in sync with ksh and test. > $ /obin/expr.tobias -36854775808 - \( -9223372036854775807 - 1 \) > expr.tobias: overflow > > if ((l->u.i >= 0 && r->u.i < 0 && res <= 0) || I have adjusted the code. Awesome review skills and thanks a lot! Your example also triggers an overflow in FreeBSD's expr, so it is a good idea to inform them when we are happy with our fixed version. > Do you want to check and re-send your patch? Most certainly. Adjusted patch and extended regress tests in this mail. Thanks a lot again! Tobias Index: bin/expr/expr.c =================================================================== RCS file: /cvs/src/bin/expr/expr.c,v retrieving revision 1.26 diff -u -p -u -p -r1.26 expr.c --- bin/expr/expr.c 19 Oct 2016 18:20:25 -0000 1.26 +++ bin/expr/expr.c 31 Mar 2018 11:22:05 -0000 @@ -19,8 +19,8 @@ struct val *make_int(int64_t); struct val *make_str(char *); void free_value(struct val *); -int is_integer(struct val *, int64_t *); -int to_integer(struct val *); +int is_integer(struct val *, int64_t *, const char **); +int to_integer(struct val *, const char **); void to_string(struct val *); int is_zero_or_null(struct val *); void nexttoken(int); @@ -94,11 +94,9 @@ free_value(struct val *vp) /* determine if vp is an integer; if so, return it's value in *r */ int -is_integer(struct val *vp, int64_t *r) +is_integer(struct val *vp, int64_t *r, const char **errstr) { - char *s; - int neg; - int64_t i; + const char *errstrp; if (vp->type == integer) { *r = vp->u.i; @@ -107,43 +105,29 @@ is_integer(struct val *vp, int64_t *r) /* * POSIX.2 defines an "integer" as an optional unary minus - * followed by digits. + * followed by digits. Other representations are unspecified, + * which means that strtonum(3) is a viable option here. */ - s = vp->u.s; - i = 0; + if (errstr == NULL) + errstr = &errstrp; + *r = strtonum(vp->u.s, INT64_MIN, INT64_MAX, errstr); + if (*errstr != NULL) + return 0; - neg = (*s == '-'); - if (neg) - s++; - - while (*s) { - if (!isdigit((unsigned char)*s)) - return 0; - - i *= 10; - i += *s - '0'; - - s++; - } - - if (neg) - i *= -1; - - *r = i; return 1; } /* coerce to vp to an integer */ int -to_integer(struct val *vp) +to_integer(struct val *vp, const char **errstr) { int64_t r; if (vp->type == integer) return 1; - if (is_integer(vp, &r)) { + if (is_integer(vp, &r, errstr)) { free(vp->u.s); vp->u.i = r; vp->type = integer; @@ -176,7 +160,7 @@ is_zero_or_null(struct val *vp) if (vp->type == integer) return vp->u.i == 0; else - return *vp->u.s == 0 || (to_integer(vp) && vp->u.i == 0); + return *vp->u.s == 0 || (to_integer(vp, NULL) && vp->u.i == 0); } void @@ -303,20 +287,25 @@ eval5(void) struct val * eval4(void) { - struct val *l, *r; - enum token op; + const char *errstr; + struct val *l, *r; + enum token op; + volatile int64_t res; l = eval5(); while ((op = token) == MUL || op == DIV || op == MOD) { nexttoken(0); r = eval5(); - if (!to_integer(l) || !to_integer(r)) { - errx(2, "non-numeric argument"); + if (!to_integer(l, &errstr) || !to_integer(r, &errstr)) { + errx(2, "%s", errstr); } if (op == MUL) { - l->u.i *= r->u.i; + res = l->u.i * r->u.i; + if (r->u.i != 0 && l->u.i != res / r->u.i) + errx(3, "overflow"); + l->u.i = res; } else { if (r->u.i == 0) { errx(2, "division by zero"); @@ -324,6 +313,8 @@ eval4(void) if (op == DIV) { if (l->u.i != INT64_MIN || r->u.i != -1) l->u.i /= r->u.i; + else + errx(3, "overflow"); } else { if (l->u.i != INT64_MIN || r->u.i != -1) l->u.i %= r->u.i; @@ -342,22 +333,32 @@ eval4(void) struct val * eval3(void) { - struct val *l, *r; - enum token op; + const char *errstr; + struct val *l, *r; + enum token op; + volatile int64_t res; l = eval4(); while ((op = token) == ADD || op == SUB) { nexttoken(0); r = eval4(); - if (!to_integer(l) || !to_integer(r)) { - errx(2, "non-numeric argument"); + if (!to_integer(l, &errstr) || !to_integer(r, &errstr)) { + errx(2, "%s", errstr); } if (op == ADD) { - l->u.i += r->u.i; + res = l->u.i + r->u.i; + if ((l->u.i > 0 && r->u.i > 0 && res <= 0) || + (l->u.i < 0 && r->u.i < 0 && res >= 0)) + errx(3, "overflow"); + l->u.i = res; } else { - l->u.i -= r->u.i; + res = l->u.i - r->u.i; + if ((l->u.i >= 0 && r->u.i < 0 && res <= 0) || + (l->u.i < 0 && r->u.i > 0 && res >= 0)) + errx(3, "overflow"); + l->u.i = res; } free_value(r); @@ -380,7 +381,7 @@ eval2(void) nexttoken(0); r = eval3(); - if (is_integer(l, &li) && is_integer(r, &ri)) { + if (is_integer(l, &li, NULL) && is_integer(r, &ri, NULL)) { switch (op) { case GT: v = (li > ri); Index: regress/bin/expr/Makefile =================================================================== RCS file: regress/bin/expr/Makefile diff -N regress/bin/expr/Makefile --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ regress/bin/expr/Makefile 31 Mar 2018 11:22:05 -0000 @@ -0,0 +1,8 @@ +# $OpenBSD$ + +REGRESS_TARGETS = expr + +expr: + sh ${.CURDIR}/expr.sh + +.include <bsd.regress.mk> Index: regress/bin/expr/expr.sh =================================================================== RCS file: regress/bin/expr/expr.sh diff -N regress/bin/expr/expr.sh --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ regress/bin/expr/expr.sh 31 Mar 2018 11:22:05 -0000 @@ -0,0 +1,107 @@ +#!/bin/ksh + +: ${EXPR=expr} + +function test_expr { + #echo "Testing: `eval echo $1`" + res=`eval $EXPR $1 2>&1` + if [ "$res" != "$2" ]; then + echo "Expected $2, got $res from expression: `eval echo $1`" + exit 1 + fi +} + +# The first arg will get eval'd so escape any meta characters +# The 2nd arg is an expected string/response from expr for that op. + +# Test overflow cases +test_expr '4611686018427387904 + 4611686018427387903' '9223372036854775807' +test_expr '4611686018427387904 + 4611686018427387904' "expr: overflow" +test_expr '4611686018427387904 - -4611686018427387904' "expr: overflow" +test_expr '-4611686018427387904 - 4611686018427387903' '-9223372036854775807' +test_expr '-4611686018427387904 - 4611686018427387905' "expr: overflow" +test_expr '-4611686018427387904 \* 1' '-4611686018427387904' +test_expr '-4611686018427387904 \* -1' '4611686018427387904' +test_expr '-4611686018427387904 \* 2' '-9223372036854775808' +test_expr '-4611686018427387904 \* 3' "expr: overflow" +test_expr '-4611686018427387904 \* -2' "expr: overflow" +test_expr '4611686018427387904 \* 1' '4611686018427387904' +test_expr '4611686018427387904 \* 2' "expr: overflow" +test_expr '4611686018427387904 \* 3' "expr: overflow" + +# Test from gtk-- configure that cause problems on old expr +test_expr '3 \> 3 \| 3 = 3 \& 4 \> 4 \| 3 = 3 \& 4 = 4 \& 5 \>= 5' '1' +test_expr '3 \> 3 \| 3 = 3 \& 4 \> 4 \| 3 = 3 \& 4 = 4 \& 5 \>= 6' '0' +test_expr '3 \> 3 \| 3 = 3 \& 4 \> 4 \| 3 = 3 \& 4 = 3 \& 5 \>= 5' '0' +test_expr '3 \> 3 \| 3 = 3 \& 4 \> 4 \| 3 = 2 \& 4 = 4 \& 5 \>= 5' '0' +test_expr '3 \> 2 \| 3 = 3 \& 4 \> 4 \| 3 = 3 \& 4 = 4 \& 5 \>= 6' '1' +test_expr '3 \> 3 \| 3 = 3 \& 4 \> 3 \| 3 = 3 \& 4 = 4 \& 5 \>= 5' '1' + +# Basic precendence test with the : operator vs. math +test_expr '2 : 4 / 2' '0' +test_expr '4 : 4 % 3' '1' + +# Dangling arithemtic operator +test_expr '.java_wrapper : /' '0' +test_expr '4 : \*' '0' +test_expr '4 : +' '0' +test_expr '4 : -' '0' +test_expr '4 : /' '0' +test_expr '4 : %' '0' + +# Basic math test +test_expr '2 + 4 \* 5' '22' + +# Basic functional tests +test_expr '2' '2' +test_expr '-4' '-4' +test_expr 'hello' 'hello' + +# Compare operator precendence test +test_expr '2 \> 1 \* 17' '0' + +# Compare operator tests +test_expr '2 \!= 5' '1' +test_expr '2 \!= 2' '0' +test_expr '2 \<= 3' '1' +test_expr '2 \<= 2' '1' +test_expr '2 \<= 1' '0' +test_expr '2 \< 3' '1' +test_expr '2 \< 2' '0' +test_expr '2 = 2' '1' +test_expr '2 = 4' '0' +test_expr '2 \>= 1' '1' +test_expr '2 \>= 2' '1' +test_expr '2 \>= 3' '0' +test_expr '2 \> 1' '1' +test_expr '2 \> 2' '0' + +# Known failure once +test_expr '1 \* -1' '-1' + +# Test a known case that should fail +test_expr '- 1 + 5' 'expr: syntax error' + +# Double check negative numbers +test_expr '1 - -5' '6' + +# More complex math test for precedence +test_expr '-3 + -1 \* 4 + 3 / -6' '-7' + +# The next two are messy but the shell escapes cause that. +# Test precendence +test_expr 'X1/2/3 : X\\\(.\*[^/]\\\)//\*[^/][^/]\*/\*$ \| . : \\\(.\\\)' '1/2' + +# Test proper () returning \1 from a regex +test_expr '1/2 : .\*/\\\(.\*\\\)' '2' + +# Test integer edge cases +test_expr '-9223372036854775807 + 0' '-9223372036854775807' +test_expr '-9223372036854775807 - 1' '-9223372036854775808' +test_expr '-9223372036854775808 + 0' '-9223372036854775808' +test_expr '-9223372036854775807 - 2' 'expr: overflow' +test_expr '-9223372036854775808 - 1' 'expr: overflow' +test_expr '-9223372036854775809 + 0' 'expr: too small' +test_expr '-36854775808 - \( -9223372036854775807 - 1 \)' '9223372000000000000' + +exit 0