This fixes 3 tests, and the delta in expr.c is +11 lines -- all comments. The actual code is shorter.
It's a bit unnatural to me but I tried to compress the code, following the existing style. I'm going to fix the type coercion problems too, which will piggyback on this change. Basically the OPS lookup table needs have another field to specify coercion rules for the operation. The type conversion has to be done with respect to the operation being evaluated rather than globally up front. # fails with type error, '21' should be coerced to integer and then added to 3 expr ab21xx : '[^0-9]*\([0-9]*\)' + 3 # segfaults because '3' is coerced to integer at parse time, and we pass NULL to regexec expr 3 : '\(.\)' ----- expr now uses the precedence table specified by POSIX, implemented using the "precedence climbing" algorithm. See the references at the top of eval_expr(). This fixes 3 of 4 failing tests. I also added more tests for correct behavior and for syntax errors. This includes a new test exposing a segfault, related to type coercion.
From 0034aecd19302f138e0ad9b15fc08a97fec564c3 Mon Sep 17 00:00:00 2001 From: Andy Chu <[email protected]> Date: Mon, 14 Mar 2016 21:05:16 -0700 Subject: [PATCH] Fix the operator precedence in expr. expr now uses the precedence table specified by POSIX, implemented using the "precedence climbing" algorithm. See the references at the top of eval_expr(). This fixes 3 of 4 failing tests. I also added more tests for correct behavior and for syntax errors. This includes a new test exposing a segfault, related to type coercion. --- tests/expr.test | 33 +++++++++++++ toys/pending/expr.c | 135 ++++++++++++++++++++++++++++------------------------ 2 files changed, 106 insertions(+), 62 deletions(-) diff --git a/tests/expr.test b/tests/expr.test index c95d030..69e4bc4 100755 --- a/tests/expr.test +++ b/tests/expr.test @@ -17,5 +17,38 @@ testing "% * same priority" "expr 3 % 2 \* 4" "4\n" "" "" testing "* % same priority" "expr 3 \* 2 % 4" "2\n" "" "" testing "= > same priority" "expr 0 = 2 \> 3" "0\n" "" "" testing "> = same priority" "expr 3 \> 2 = 1" "1\n" "" "" + +testing "/ by zero" "expr 1 / 0; echo \$?" "2\n" "" "" +testing "% by zero" "expr 1 % 0; echo \$?" "2\n" "" "" + +testing "regex position" "expr ab21xx : '[^0-9]*[0-9]*'" "4\n" "" "" +testing "regex extraction" "expr ab21xx : '[^0-9]*\([0-9]*\).*'" "21\n" "" "" +testing "regex no match" "expr ab21xx : x" "0\n" "" "" + +# result of ':' regex match can subsequently be used for arithmetic testing "string becomes integer" "expr ab21xx : '[^0-9]*\([0-9]*\)' + 3" \ "24\n" "" "" + +testing "integer comparison" "expr -3 \< -2" "1\n" "" "" +testing "string comparison" "expr -3 \< -2s" "0\n" "" "" +testing "integer expr comparison" "expr 2 - 5 \< -2" "1\n" "" "" +# result of arithmetic can subsequently be compared as a string +testing "string expr comparison" "expr 2 - 5 \< -2s" "0\n" "" "" + +testing "parens around literal" "expr \( a \)" "a\n" "" "" + +testing "exit code when true" "expr a; echo \$?" "a\n0\n" "" "" +testing "exit code when false" "expr 0; echo \$?" "0\n1\n" "" "" +testing "exit code with syntax error" "expr \(; echo \$?" "2\n" "" "" +testing "exit code when evaluating to 0" "expr -1 + 1; echo \$?" "0\n1\n" "" "" + +# BUG: segfaults because '3' is coerced to integer and regexc gets NULL +testing "regex segfault" "expr 3 : '\(.\)'" "3\n" "" "" + +# syntax errors +testing "no expression" "expr; echo \$?" "2\n" "" "" +testing "empty ()" "expr \( \); echo \$?" "2\n" "" "" +testing "missing )" "expr \( 1; echo \$?" "2\n" "" "" +testing "missing outer )" "expr \( 1 + \( 2 \* 3 \); echo \$?" "2\n" "" "" +testing "bad operator" "expr 1 @ 2; echo \$?" "2\n" "" "" +testing "adjacent literals" "expr 1 2; echo \$?" "2\n" "" "" diff --git a/toys/pending/expr.c b/toys/pending/expr.c index 763ad02..ac7edac 100644 --- a/toys/pending/expr.c +++ b/toys/pending/expr.c @@ -45,9 +45,8 @@ config EXPR #define FOR_expr #include "toys.h" - GLOBALS( - int argidx; + char* tok; // current token, not on the stack since recursive calls mutate it ) // Scalar value. @@ -87,6 +86,7 @@ static void re(struct value *lhs, struct value *rhs) regmatch_t rm[2]; xregcomp(&rp, rhs->s, 0); + // BUG: lhs->s is NULL when it looks like an integer, causing a segfault. if (!regexec(&rp, lhs->s, 2, rm, 0) && rm[0].rm_so == 0) { if (rp.re_nsub > 0 && rm[1].rm_so >= 0) lhs->s = xmprintf("%.*s", rm[1].rm_eo - rm[1].rm_so, lhs->s+rm[1].rm_so); @@ -183,91 +183,102 @@ static void or(struct value *lhs, struct value *rhs) if (is_zero(lhs)) *lhs = *rhs; } -static void get_value(struct value *v) +// Converts an arg string to a value struct. Assumes arg != NULL. +static void parse_value(char* arg, struct value *v) { - char *endp, *arg; - - if (TT.argidx == toys.optc) { - v->i = 0; - v->s = ""; // signal end of expression - return; - } - -// can't happen, the increment is after the == test -// if (TT.argidx >= toys.optc) error_exit("syntax error"); - - arg = toys.optargs[TT.argidx++]; - + char *endp; v->i = strtoll(arg, &endp, 10); + // if non-NULL, there's still stuff left, and it's a string. Otherwise no + // string. v->s = *endp ? arg : NULL; } -// check if v matches a token, and consume it if so -static int match(struct value *v, char *tok) -{ - if (v->s && !strcmp(v->s, tok)) { - get_value(v); - return 1; - } - - return 0; +void syntax_error(char *msg, ...) { + if (0) { // detailed message for debugging. TODO: add CFG_ var to enable + va_list va; + va_start(va, msg); + verror_msg(msg, 0, va); + va_end(va); + xexit(); + } else + error_exit("syntax error"); } -// operators in order of increasing precedence +// operators grouped by precedence static struct op { char *tok; - + char prec; // calculate "lhs op rhs" (e.g. lhs + rhs) and store result in lhs void (*calc)(struct value *lhs, struct value *rhs); -} ops[] = { - {"|", or }, {"&", and }, {"=", eq }, {"==", eq }, {">", gt }, - {">=", gte }, {"<", lt }, {"<=", lte }, {"!=", ne }, {"+", add }, - {"-", sub }, {"*", mul }, {"/", divi}, {"%", mod }, {":", re }, - {"(", NULL}, // special case - must be last +} OPS[] = { + {"|", 1, or }, + {"&", 2, and }, + {"=", 3, eq }, {"==", 3, eq }, {">", 3, gt }, {">=", 3, gte }, + {"<", 3, lt }, {"<=", 3, lte }, {"!=", 3, ne }, + {"+", 4, add }, {"-", 4, sub }, + {"*", 5, mul }, {"/", 5, divi }, {"%", 5, mod }, + {":", 6, re }, + {"", 0, NULL}, // sentinel }; -// "|,&,= ==> >=< <= !=,+-,*/%,:" +// Point TT.tok at the next token. It's NULL when there are no more tokens. +void advance() { + TT.tok = *toys.optargs++; +} -static void parse_op(struct value *lhs, struct value *tok, struct op *op) +// Evalute a compound expression, setting 'ret'. +// +// This function uses the recursive "Precedence Climbing" algorithm: +// +// Clarke, Keith. "The top-down parsing of expressions." University of London. +// Queen Mary College. Department of Computer Science and Statistics, 1986. +// +// http://www.antlr.org/papers/Clarke-expr-parsing-1986.pdf +// +// Nice explanation and Python implementation: +// http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing +static void eval_expr(struct value *ret, int min_prec) { - if (!op) op = ops; - - // special case parsing for parentheses - if (*op->tok == '(') { - if (match(tok, "(")) { - parse_op(lhs, tok, 0); - if (!match(tok, ")")) error_exit("syntax error"); // missing closing paren - } else { - // tok is a string or integer - return it and get the next token - *lhs = *tok; - get_value(tok); - } - - return; + if (!TT.tok) syntax_error("Unexpected end of input"); + + // Evaluate LHS atom, setting 'ret'. + if (!strcmp(TT.tok, "(")) { // parenthesized expression + advance(); // consume ( + eval_expr(ret, 1); // We're inside ( ), so start with min_prec = 1 + if (!TT.tok) syntax_error("Expected )"); + if (strcmp(TT.tok, ")")) syntax_error("Expected ) but got %s", TT.tok); + advance(); // consume ) + } else { // simple literal + parse_value(TT.tok, ret); + advance(); } - parse_op(lhs, tok, op + 1); - while (match(tok, op->tok)) { - struct value rhs; - parse_op(&rhs, tok, op + 1); - if (rhs.s && !*rhs.s) error_exit("syntax error"); // premature end of expression - op->calc(lhs, &rhs); + // Evaluate RHS and apply operator until precedence is too low. + struct value rhs; + while (TT.tok) { + struct op *o = OPS; + while (o->calc) { // Look up the precedence of operator TT.tok + if (!strcmp(TT.tok, o->tok)) break; + o++; + } + if (!o->calc) break; // Not an operator (extra input will fail later) + if (o->prec < min_prec) break; // Precedence too low, pop a stack frame + advance(); + + eval_expr(&rhs, o->prec + 1); // Evaluate RHS, with higher min precedence + o->calc(ret, &rhs); // Apply operator, setting 'ret'. } } void expr_main(void) { - struct value tok, ret = {0}; - + struct value ret = {0}; toys.exitval = 2; // if exiting early, indicate invalid expression - TT.argidx = 0; - - get_value(&tok); // warm up the parser with the initial value - parse_op(&ret, &tok, 0); + advance(); // initialize global token + eval_expr(&ret, 1); - // final token should be end of expression - if (!tok.s || *tok.s) error_exit("syntax error"); + if (TT.tok) syntax_error("Unexpected extra input '%s'\n", TT.tok); if (ret.s) printf("%s\n", ret.s); else printf("%lld\n", ret.i); -- 1.9.1
_______________________________________________ Toybox mailing list [email protected] http://lists.landley.net/listinfo.cgi/toybox-landley.net
