[Toybox] [PATCH] Fix the operator precedence in expr.

Andy Chu Mon, 14 Mar 2016 21:45:07 -0700

This fixes 3 tests, and the delta in expr.c is +11 lines -- all
comments.  The actual code is shorter.


It's a bit unnatural to me but I tried to compress the code, following
the existing style.

I'm going to fix the type coercion problems too, which will piggyback
on this change.  Basically the OPS lookup table needs have another
field to specify coercion rules for the operation.  The type
conversion has to be done with respect to the operation being
evaluated rather than globally up front.

# fails with type error, '21' should be coerced to integer and then added to 3
expr ab21xx : '[^0-9]*\([0-9]*\)' + 3

# segfaults because '3' is coerced to integer at parse time, and we
pass NULL to regexec
expr 3 : '\(.\)'

-----

expr now uses the precedence table specified by POSIX, implemented using
the "precedence climbing" algorithm.  See the references at the top of
eval_expr().

This fixes 3 of 4 failing tests.

I also added more tests for correct behavior and for syntax errors.
This includes a new test exposing a segfault, related to type coercion.

From 0034aecd19302f138e0ad9b15fc08a97fec564c3 Mon Sep 17 00:00:00 2001
From: Andy Chu <[email protected]>
Date: Mon, 14 Mar 2016 21:05:16 -0700
Subject: [PATCH] Fix the operator precedence in expr.

expr now uses the precedence table specified by POSIX, implemented using
the "precedence climbing" algorithm.  See the references at the top of
eval_expr().

This fixes 3 of 4 failing tests.

I also added more tests for correct behavior and for syntax errors.
This includes a new test exposing a segfault, related to type coercion.
---
 tests/expr.test     |  33 +++++++++++++
 toys/pending/expr.c | 135 ++++++++++++++++++++++++++++------------------------
 2 files changed, 106 insertions(+), 62 deletions(-)

diff --git a/tests/expr.test b/tests/expr.test
index c95d030..69e4bc4 100755
--- a/tests/expr.test
+++ b/tests/expr.test
@@ -17,5 +17,38 @@ testing "% * same priority" "expr 3 % 2 \* 4" "4\n" "" ""
 testing "* % same priority" "expr 3 \* 2 % 4" "2\n" "" ""
 testing "= > same priority" "expr 0 = 2 \> 3" "0\n" "" ""
 testing "> = same priority" "expr 3 \> 2 = 1" "1\n" "" ""
+
+testing "/ by zero" "expr 1 / 0; echo \$?" "2\n" "" ""
+testing "% by zero" "expr 1 % 0; echo \$?" "2\n" "" ""
+
+testing "regex position" "expr ab21xx : '[^0-9]*[0-9]*'" "4\n" "" ""
+testing "regex extraction" "expr ab21xx : '[^0-9]*\([0-9]*\).*'" "21\n" "" ""
+testing "regex no match" "expr ab21xx : x" "0\n" "" ""
+
+# result of ':' regex match can subsequently be used for arithmetic
 testing "string becomes integer" "expr ab21xx : '[^0-9]*\([0-9]*\)' + 3" \
 	"24\n" "" ""
+
+testing "integer comparison" "expr -3 \< -2" "1\n" "" ""
+testing "string comparison" "expr -3 \< -2s" "0\n" "" ""
+testing "integer expr comparison" "expr 2 - 5 \< -2" "1\n" "" ""
+# result of arithmetic can subsequently be compared as a string
+testing "string expr comparison" "expr 2 - 5 \< -2s" "0\n" "" ""
+
+testing "parens around literal" "expr \( a \)" "a\n" "" ""
+
+testing "exit code when true" "expr a; echo \$?" "a\n0\n" "" ""
+testing "exit code when false" "expr 0; echo \$?" "0\n1\n" "" ""
+testing "exit code with syntax error" "expr \(; echo \$?" "2\n" "" ""
+testing "exit code when evaluating to 0" "expr -1 + 1; echo \$?" "0\n1\n" "" ""
+
+# BUG: segfaults because '3' is coerced to integer and regexc gets NULL
+testing "regex segfault" "expr 3 : '\(.\)'" "3\n" "" ""
+
+# syntax errors
+testing "no expression" "expr; echo \$?" "2\n" "" ""
+testing "empty ()" "expr \( \); echo \$?" "2\n" "" ""
+testing "missing )" "expr \( 1; echo \$?" "2\n" "" ""
+testing "missing outer )" "expr \( 1 + \( 2 \* 3 \); echo \$?" "2\n" "" ""
+testing "bad operator" "expr 1 @ 2; echo \$?" "2\n" "" ""
+testing "adjacent literals" "expr 1 2; echo \$?" "2\n" "" ""
diff --git a/toys/pending/expr.c b/toys/pending/expr.c
index 763ad02..ac7edac 100644
--- a/toys/pending/expr.c
+++ b/toys/pending/expr.c
@@ -45,9 +45,8 @@ config EXPR
 #define FOR_expr
 #include "toys.h"
 
-
 GLOBALS(
-  int argidx;
+  char* tok; // current token, not on the stack since recursive calls mutate it
 )
 
 // Scalar value.
@@ -87,6 +86,7 @@ static void re(struct value *lhs, struct value *rhs)
   regmatch_t rm[2];
 
   xregcomp(&rp, rhs->s, 0);
+  // BUG: lhs->s is NULL when it looks like an integer, causing a segfault.
   if (!regexec(&rp, lhs->s, 2, rm, 0) && rm[0].rm_so == 0) {
     if (rp.re_nsub > 0 && rm[1].rm_so >= 0) 
       lhs->s = xmprintf("%.*s", rm[1].rm_eo - rm[1].rm_so, lhs->s+rm[1].rm_so);
@@ -183,91 +183,102 @@ static void or(struct value *lhs, struct value *rhs)
   if (is_zero(lhs)) *lhs = *rhs;
 }
 
-static void get_value(struct value *v)
+// Converts an arg string to a value struct.  Assumes arg != NULL.
+static void parse_value(char* arg, struct value *v)
 {
-  char *endp, *arg;
-
-  if (TT.argidx == toys.optc) {
-    v->i = 0;
-    v->s = ""; // signal end of expression
-    return;
-  }
-
-//  can't happen, the increment is after the == test
-//  if (TT.argidx >= toys.optc) error_exit("syntax error");
-
-  arg = toys.optargs[TT.argidx++];
-
+  char *endp;
   v->i = strtoll(arg, &endp, 10);
+  // if non-NULL, there's still stuff left, and it's a string.  Otherwise no
+  // string.
   v->s = *endp ? arg : NULL;
 }
 
-// check if v matches a token, and consume it if so
-static int match(struct value *v, char *tok)
-{
-  if (v->s && !strcmp(v->s, tok)) {
-    get_value(v);
-    return 1;
-  }
-
-  return 0;
+void syntax_error(char *msg, ...) {
+  if (0) { // detailed message for debugging.  TODO: add CFG_ var to enable
+    va_list va;
+    va_start(va, msg);
+    verror_msg(msg, 0, va);
+    va_end(va);
+    xexit();
+  } else
+    error_exit("syntax error");
 }
 
-// operators in order of increasing precedence
+// operators grouped by precedence
 static struct op {
   char *tok;
-
+  char prec;
   // calculate "lhs op rhs" (e.g. lhs + rhs) and store result in lhs
   void (*calc)(struct value *lhs, struct value *rhs);
-} ops[] = {
-  {"|",   or  }, {"&",   and }, {"=",   eq  }, {"==",  eq  }, {">",   gt  },
-  {">=",  gte }, {"<",   lt  }, {"<=",  lte }, {"!=",  ne  }, {"+",   add },
-  {"-",   sub }, {"*",   mul }, {"/",   divi}, {"%",   mod }, {":",   re  },
-  {"(",   NULL}, // special case - must be last
+} OPS[] = {
+  {"|", 1, or  },
+  {"&", 2, and },
+  {"=", 3, eq  }, {"==", 3, eq  }, {">",  3, gt  }, {">=", 3, gte },
+  {"<", 3, lt  }, {"<=", 3, lte }, {"!=", 3, ne  },
+  {"+", 4, add }, {"-",  4, sub },
+  {"*", 5, mul }, {"/",  5, divi }, {"%", 5, mod },
+  {":", 6, re  },
+  {"",  0, NULL}, // sentinel
 };
 
-// "|,&,= ==> >=< <= !=,+-,*/%,:"
+// Point TT.tok at the next token.  It's NULL when there are no more tokens.
+void advance() {
+  TT.tok = *toys.optargs++;
+}
 
-static void parse_op(struct value *lhs, struct value *tok, struct op *op)
+// Evalute a compound expression, setting 'ret'.
+//
+// This function uses the recursive "Precedence Climbing" algorithm:
+//
+// Clarke, Keith. "The top-down parsing of expressions." University of London.
+// Queen Mary College. Department of Computer Science and Statistics, 1986.
+//
+// http://www.antlr.org/papers/Clarke-expr-parsing-1986.pdf
+//
+// Nice explanation and Python implementation:
+// http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing
+static void eval_expr(struct value *ret, int min_prec)
 {
-  if (!op) op = ops;
-
-  // special case parsing for parentheses
-  if (*op->tok == '(') {
-    if (match(tok, "(")) {
-      parse_op(lhs, tok, 0);
-      if (!match(tok, ")")) error_exit("syntax error"); // missing closing paren
-    } else {
-      // tok is a string or integer - return it and get the next token
-      *lhs = *tok;
-      get_value(tok);
-    }
-
-    return;
+  if (!TT.tok) syntax_error("Unexpected end of input");
+
+  // Evaluate LHS atom, setting 'ret'.
+  if (!strcmp(TT.tok, "(")) { // parenthesized expression
+    advance(); // consume (
+    eval_expr(ret, 1); // We're inside ( ), so start with min_prec = 1
+    if (!TT.tok)             syntax_error("Expected )");
+    if (strcmp(TT.tok, ")")) syntax_error("Expected ) but got %s", TT.tok);
+    advance(); // consume )
+  } else { // simple literal
+    parse_value(TT.tok, ret);
+    advance();
   }
 
-  parse_op(lhs, tok, op + 1);
-  while (match(tok, op->tok)) {
-    struct value rhs;
-    parse_op(&rhs, tok, op + 1);
-    if (rhs.s && !*rhs.s) error_exit("syntax error"); // premature end of expression
-    op->calc(lhs, &rhs);
+  // Evaluate RHS and apply operator until precedence is too low.
+  struct value rhs;
+  while (TT.tok) {
+    struct op *o = OPS;
+    while (o->calc) {  // Look up the precedence of operator TT.tok
+      if (!strcmp(TT.tok, o->tok)) break;
+      o++;
+    }
+    if (!o->calc) break; // Not an operator (extra input will fail later)
+    if (o->prec < min_prec) break; // Precedence too low, pop a stack frame
+    advance();
+
+    eval_expr(&rhs, o->prec + 1); // Evaluate RHS, with higher min precedence
+    o->calc(ret, &rhs); // Apply operator, setting 'ret'.
   }
 }
 
 void expr_main(void)
 {
-  struct value tok, ret = {0};
-
+  struct value ret = {0};
   toys.exitval = 2; // if exiting early, indicate invalid expression
 
-  TT.argidx = 0;
-
-  get_value(&tok); // warm up the parser with the initial value
-  parse_op(&ret, &tok, 0);
+  advance(); // initialize global token
+  eval_expr(&ret, 1);
 
-  // final token should be end of expression
-  if (!tok.s || *tok.s) error_exit("syntax error");
+  if (TT.tok) syntax_error("Unexpected extra input '%s'\n", TT.tok);
 
   if (ret.s) printf("%s\n", ret.s);
   else printf("%lld\n", ret.i);
-- 
1.9.1

_______________________________________________
Toybox mailing list
[email protected]
http://lists.landley.net/listinfo.cgi/toybox-landley.net

[Toybox] [PATCH] Fix the operator precedence in expr.

Reply via email to