commit 6240d26beb52747638ed274ff6a60a8b4b8eb5b7
Author: Evan Gates <[email protected]>
Date:   Thu Nov 13 15:07:15 2014 -0800

    new expr using shunting-yard instead of recursive descent (this time with 
tabs)

diff --git a/expr.c b/expr.c
index 9457d39..28ca387 100644
--- a/expr.c
+++ b/expr.c
@@ -1,517 +1,241 @@
-/*     $OpenBSD: src/bin/expr/expr.c,v 1.19 2013/11/21 15:54:45 deraadt Exp $  
*/
-/*     $NetBSD: expr.c,v 1.3.6.1 1996/06/04 20:41:47 cgd Exp $ */
-
-/*
- * Written by J.T. Conklin <[email protected]>.
- * Public domain.
- */
-
+#include <inttypes.h>
+#include <limits.h>
+#include <regex.h>
 #include <stdio.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
-#include <limits.h>
-#include <locale.h>
-#include <ctype.h>
-#include <regex.h>
-#include <err.h>
-
-static struct val      *make_int(int);
-static struct val      *make_str(char *);
-static void            free_value(struct val *);
-static int             is_integer(struct val *, int *);
-static int             to_integer(struct val *);
-static void            to_string(struct val *);
-static int             is_zero_or_null(struct val *);
-static void            nexttoken(int);
-static void            error(void);
-static struct val      *eval6(void);
-static struct val      *eval5(void);
-static struct val      *eval4(void);
-static struct val      *eval3(void);
-static struct val      *eval2(void);
-static struct val      *eval1(void);
-static struct val      *eval0(void);
-
-enum token {
-       OR, AND, EQ, LT, GT, ADD, SUB, MUL, DIV, MOD, MATCH, RP, LP,
-       NE, LE, GE, OPERAND, EOI
-};
-
-struct val {
-       enum {
-               integer,
-               string
-       } type;
+#include "util.h"
 
-       union {
-               char           *s;
-               int             i;
-       } u;
+enum {
+       VAL = CHAR_MAX + 1, GE, LE, NE
 };
 
-static enum token      token;
-static struct val     *tokval;
-static char          **av;
-
-static struct val *
-make_int(int i)
-{
-       struct val     *vp;
+typedef struct {
+       char    *s;
+       intmax_t n;
+} Val;
 
-       vp = (struct val *) malloc(sizeof(*vp));
-       if (vp == NULL) {
-               err(3, NULL);
-       }
-       vp->type = integer;
-       vp->u.i = i;
-       return vp;
-}
 
-static struct val *
-make_str(char *s)
-{
-       struct val     *vp;
+static void doop(int*, int**, Val*, Val**);
+static Val match(Val, Val);
+static void num(Val);
+static int valcmp(Val, Val);
+static char *valstr(Val, char*);
+static int yylex(void);
+static int yyparse(int);
 
-       vp = (struct val *) malloc(sizeof(*vp));
-       if (vp == NULL || ((vp->u.s = strdup(s)) == NULL)) {
-               err(3, NULL);
-       }
-       vp->type = string;
-       return vp;
-}
+static char **args;
+static size_t intlen;
+static Val yylval;
 
+// otop points to one past last op
+// vtop points to one past last val
+// guaranteed otop != ops
+// pop two vals, pop op, apply op, push val
 static void
-free_value(struct val *vp)
+doop(int *ops, int **otop, Val *vals, Val **vtop)
 {
-       if (vp->type == string)
-               free(vp->u.s);
-       free(vp);
-}
+       Val ret, a, b;
+       int op;
 
-/* determine if vp is an integer; if so, return it's value in *r */
-static int
-is_integer(struct val *vp, int *r)
-{
-       char           *s;
-       int             neg;
-       int             i;
+       if((*otop)[-1] == '(')
+               enprintf(2, "syntax error: extra (\n");
+       if(*vtop - vals < 2)
+               enprintf(2, "syntax error: missing expression or extra 
operator\n");
 
-       if (vp->type == integer) {
-               *r = vp->u.i;
-               return 1;
-       }
+       a  = (*vtop)[-2];
+       b  = (*vtop)[-1];
+       op = (*otop)[-1];
 
-       /*
-        * POSIX.2 defines an "integer" as an optional unary minus
-        * followed by digits.
-        */
-       s = vp->u.s;
-       i = 0;
+       switch (op) {
+               case '|':
+                       if     ( a.s && *a.s) ret = (Val){ a.s ,   0 };
+                       else if(!a.s &&  a.n) ret = (Val){ NULL, a.n };
+                       else if( b.s && *b.s) ret = (Val){ b.s ,   0 };
+                       else                  ret = (Val){ NULL, b.n };
+                       break;
 
-       neg = (*s == '-');
-       if (neg)
-               s++;
+               case '&':
+                       if(((a.s && *a.s) || a.n) &&
+                          ((b.s && *b.s) || b.n)) ret = a;
+                       else                       ret = (Val){ NULL, 0 };
+                       break;
 
-       while (*s) {
-               if (!isdigit((unsigned char)*s))
-                       return 0;
+               case '=': ret = (Val){ NULL, valcmp(a, b) == 0 }; break;
+               case '>': ret = (Val){ NULL, valcmp(a, b) >  0 }; break;
+               case GE : ret = (Val){ NULL, valcmp(a, b) >= 0 }; break;
+               case '<': ret = (Val){ NULL, valcmp(a, b) <  0 }; break;
+               case LE : ret = (Val){ NULL, valcmp(a, b) <= 0 }; break;
+               case NE : ret = (Val){ NULL, valcmp(a, b) != 0 }; break;
 
-               i *= 10;
-               i += *s - '0';
+               case '+': num(a); num(b); ret = (Val){ NULL, a.n + b.n }; break;
+               case '-': num(a); num(b); ret = (Val){ NULL, a.n - b.n }; break;
+               case '*': num(a); num(b); ret = (Val){ NULL, a.n * b.n }; break;
+               case '/': num(a); num(b); ret = (Val){ NULL, a.n / b.n }; break;
+               case '%': num(a); num(b); ret = (Val){ NULL, a.n % b.n }; break;
 
-               s++;
+               case ':': ret = match(a, b); break;
        }
 
-       if (neg)
-               i *= -1;
-
-       *r = i;
-       return 1;
+       (*vtop)[-2] = ret;
+       (*otop)--;
+       (*vtop)--;
 }
 
-/* coerce to vp to an integer */
-static int
-to_integer(struct val *vp)
+static Val
+match(Val vstr, Val vregx)
 {
-       int             r;
+       char b1[intlen], *str  = valstr(vstr , b1);
+       char b2[intlen], *regx = valstr(vregx, b2);
 
-       if (vp->type == integer)
-               return 1;
+       regex_t    re;
+       regmatch_t matches[2];
+       char       anchreg[strlen(regx) + 2];
 
-       if (is_integer(vp, &r)) {
-               free(vp->u.s);
-               vp->u.i = r;
-               vp->type = integer;
-               return 1;
-       }
+       sprintf(anchreg, "^%s", regx);
 
-       return 0;
-}
+       if(regcomp(&re, anchreg, 0))
+               enprintf(3, "regcomp failed\n");
 
-/* coerce to vp to an string */
-static void
-to_string(struct val *vp)
-{
-       char           *tmp;
-
-       if (vp->type == string)
-               return;
-
-       if (asprintf(&tmp, "%d", vp->u.i) == -1)
-               err(3, NULL);
+       if(regexec(&re, str, 2, matches, 0))
+               return (Val){ (re.re_nsub ? "" : NULL), 0 };
 
-       vp->type = string;
-       vp->u.s = tmp;
-}
+       if(re.re_nsub) {
+               intmax_t d;
+               char    *ret, *p;
+               regoff_t len = matches[1].rm_eo - matches[1].rm_so + 1;
 
-static int
-is_zero_or_null(struct val *vp)
-{
-       if (vp->type == integer) {
-               return (vp->u.i == 0);
-       } else {
-               return (*vp->u.s == 0 || (to_integer(vp) && vp->u.i == 0));
-       }
-       /* NOTREACHED */
-}
+               if(!(ret = malloc(len))) // FIXME: free
+                       enprintf(3, "malloc failed\n");
 
-static void
-nexttoken(int pat)
-{
-       char           *p;
+               d = strtoimax(ret, &p, 10);
+               strlcpy(ret, str + matches[1].rm_so, len);
 
-       if ((p = *av) == NULL) {
-               token = EOI;
-               return;
-       }
-       av++;
-
-       if (pat == 0 && p[0] != '\0') {
-               if (p[1] == '\0') {
-                       const char     *x = "|&=<>+-*/%:()";
-                       char           *i;      /* index */
-
-                       if ((i = strchr(x, *p)) != NULL) {
-                               token = i - x;
-                               return;
-                       }
-               } else if (p[1] == '=' && p[2] == '\0') {
-                       switch (*p) {
-                       case '<':
-                               token = LE;
-                               return;
-                       case '>':
-                               token = GE;
-                               return;
-                       case '!':
-                               token = NE;
-                               return;
-                       }
-               }
+               if(*ret && !*p)
+                       return (Val){ NULL, d };
+               return (Val){ ret, 0 };
        }
-       tokval = make_str(p);
-       token = OPERAND;
-       return;
+       return (Val){ NULL, matches[0].rm_eo - matches[0].rm_so };
 }
 
 static void
-error(void)
+num(Val v)
 {
-       errx(2, "syntax error");
-       /* NOTREACHED */
+       if(v.s)
+               enprintf(2, "syntax error: expected integer got `%s'\n", v.s);
 }
 
-static struct val *
-eval6(void)
+static int
+valcmp(Val a, Val b)
 {
-       struct val     *v;
-
-       if (token == OPERAND) {
-               nexttoken(0);
-               return tokval;
-
-       } else if (token == RP) {
-               nexttoken(0);
-               v = eval0();
+       char b1[intlen], *p = valstr(a, b1);
+       char b2[intlen], *q = valstr(b, b2);
 
-               if (token != LP) {
-                       error();
-                       /* NOTREACHED */
-               }
-               nexttoken(0);
-               return v;
-       } else {
-               error();
-       }
-       /* NOTREACHED */
-       return NULL;
+       if(!a.s && !b.s)
+               return (a.n > b.n) - (a.n < b.n);
+       return strcmp(p, q);
 }
 
-/* Parse and evaluate match (regex) expressions */
-static struct val *
-eval5(void)
+static char *
+valstr(Val val, char *buf)
 {
-       regex_t         rp;
-       regmatch_t      rm[2];
-       char            errbuf[256];
-       int             eval;
-       struct val     *l, *r;
-       struct val     *v;
-
-       l = eval6();
-       while (token == MATCH) {
-               nexttoken(1);
-               r = eval6();
-
-               /* coerce to both arguments to strings */
-               to_string(l);
-               to_string(r);
-
-               /* compile regular expression */
-               if ((eval = regcomp(&rp, r->u.s, 0)) != 0) {
-                       regerror(eval, &rp, errbuf, sizeof(errbuf));
-                       errx(2, "%s", errbuf);
-               }
-
-               /* compare string against pattern --  remember that patterns
-                  are anchored to the beginning of the line */
-               if (regexec(&rp, l->u.s, 2, rm, 0) == 0 && rm[0].rm_so == 0) {
-                       if (rm[1].rm_so >= 0) {
-                               *(l->u.s + rm[1].rm_eo) = '\0';
-                               v = make_str(l->u.s + rm[1].rm_so);
-
-                       } else {
-                               v = make_int((int)(rm[0].rm_eo - rm[0].rm_so));
-                       }
-               } else {
-                       if (rp.re_nsub == 0) {
-                               v = make_int(0);
-                       } else {
-                               v = make_str("");
-                       }
-               }
-
-               /* free arguments and pattern buffer */
-               free_value(l);
-               free_value(r);
-               regfree(&rp);
-
-               l = v;
+       char *p = val.s;
+       if(!p) {
+               sprintf(buf, "%"PRIdMAX, val.n);
+               p = buf;
        }
-
-       return l;
+       return p;
 }
 
-/* Parse and evaluate multiplication and division expressions */
-static struct val *
-eval4(void)
+static int
+yylex(void)
 {
-       struct val     *l, *r;
-       enum token      op;
-
-       l = eval5();
-       while ((op = token) == MUL || op == DIV || op == MOD) {
-               nexttoken(0);
-               r = eval5();
-
-               if (!to_integer(l) || !to_integer(r)) {
-                       errx(2, "non-numeric argument");
-               }
+       intmax_t d;
+       char *q, *p, *ops = "|&=><+-*/%():";
 
-               if (op == MUL) {
-                       l->u.i *= r->u.i;
-               } else {
-                       if (r->u.i == 0) {
-                               errx(2, "division by zero");
-                       }
-                       if (op == DIV) {
-                               if (l->u.i != INT_MIN || r->u.i != -1)
-                                       l->u.i /= r->u.i;
-                       } else {
-                               if (l->u.i != INT_MIN || r->u.i != -1)
-                                       l->u.i %= r->u.i;
-                               else
-                                       l->u.i = 0;
-                       }
-               }
+       if(!(p = *args++))
+               return 0;
 
-               free_value(r);
+       d = strtoimax(p, &q, 10);
+       if(*p && !*q) {
+               yylval = (Val){ NULL, d };
+               return VAL;
        }
 
-       return l;
-}
-
-/* Parse and evaluate addition and subtraction expressions */
-static struct val *
-eval3(void)
-{
-       struct val     *l, *r;
-       enum token      op;
-
-       l = eval4();
-       while ((op = token) == ADD || op == SUB) {
-               nexttoken(0);
-               r = eval4();
+       if(*p && !p[1] && strchr(ops, *p))
+               return *p;
 
-               if (!to_integer(l) || !to_integer(r)) {
-                       errx(2, "non-numeric argument");
-               }
-
-               if (op == ADD) {
-                       l->u.i += r->u.i;
-               } else {
-                       l->u.i -= r->u.i;
-               }
+       if(strcmp(p, ">=") == 0) return GE;
+       if(strcmp(p, "<=") == 0) return LE;
+       if(strcmp(p, "!=") == 0) return NE;
 
-               free_value(r);
-       }
-
-       return l;
+       yylval = (Val){ p, 0 };
+       return VAL;
 }
 
-/* Parse and evaluate comparison expressions */
-static struct val *
-eval2(void)
-{
-       struct val     *l, *r;
-       enum token      op;
-       int             v = 0, li, ri;
-
-       l = eval3();
-       while ((op = token) == EQ || op == NE || op == LT || op == GT ||
-           op == LE || op == GE) {
-               nexttoken(0);
-               r = eval3();
-
-               if (is_integer(l, &li) && is_integer(r, &ri)) {
-                       switch (op) {
-                       case GT:
-                               v = (li >  ri);
-                               break;
-                       case GE:
-                               v = (li >= ri);
-                               break;
-                       case LT:
-                               v = (li <  ri);
-                               break;
-                       case LE:
-                               v = (li <= ri);
-                               break;
-                       case EQ:
-                               v = (li == ri);
-                               break;
-                       case NE:
-                               v = (li != ri);
-                               break;
-                       default:
-                               break;
-                       }
-               } else {
-                       to_string(l);
-                       to_string(r);
-
-                       switch (op) {
-                       case GT:
-                               v = (strcoll(l->u.s, r->u.s) > 0);
-                               break;
-                       case GE:
-                               v = (strcoll(l->u.s, r->u.s) >= 0);
-                               break;
-                       case LT:
-                               v = (strcoll(l->u.s, r->u.s) < 0);
-                               break;
-                       case LE:
-                               v = (strcoll(l->u.s, r->u.s) <= 0);
-                               break;
-                       case EQ:
-                               v = (strcoll(l->u.s, r->u.s) == 0);
-                               break;
-                       case NE:
-                               v = (strcoll(l->u.s, r->u.s) != 0);
+static int
+yyparse(int argc)
+{
+       Val vals[argc], *vtop = vals;
+       int ops [argc], *otop = ops;
+       int type, last = 0;
+       char prec[] = {
+               ['|'] = 1,
+               ['&'] = 2,
+               ['='] = 3, ['>'] = 3, [GE] = 3, ['<'] = 3, [LE] = 3, [NE] = 3,
+               ['+'] = 4, ['-'] = 4,
+               ['*'] = 5, ['/'] = 5, ['%'] = 5,
+               [':'] = 6,
+       };
+
+       while((type = yylex())) {
+               switch (type) {
+                       case VAL: *vtop++ = yylval; break;
+                       case '(': *otop++ = '('   ; break;
+                       case ')':
+                               if(last == '(')
+                                       enprintf(2, "syntax error: empty ( 
)\n");
+                               while(otop > ops && otop[-1] != '(')
+                                       doop(ops, &otop, vals, &vtop);
+                               if(otop == ops)
+                                       enprintf(2, "syntax error: extra )\n");
+                               otop--;
                                break;
-                       default:
+                       default :
+                               if(prec[last])
+                                       enprintf(2, "syntax error: extra 
operator\n");
+                               while(otop > ops && prec[otop[-1]] >= 
prec[type])
+                                       doop(ops, &otop, vals, &vtop);
+                               *otop++ = type;
                                break;
-                       }
                }
-
-               free_value(l);
-               free_value(r);
-               l = make_int(v);
+               last = type;
        }
+       while(otop > ops)
+               doop(ops, &otop, vals, &vtop);
 
-       return l;
-}
+       if(vtop == vals)
+               enprintf(2, "syntax error: missing expression\n");
+       if(vtop - vals > 1)
+               enprintf(2, "syntax error: extra expression\n");
 
-/* Parse and evaluate & expressions */
-static struct val *
-eval1(void)
-{
-       struct val     *l, *r;
-
-       l = eval2();
-       while (token == AND) {
-               nexttoken(0);
-               r = eval2();
-
-               if (is_zero_or_null(l) || is_zero_or_null(r)) {
-                       free_value(l);
-                       free_value(r);
-                       l = make_int(0);
-               } else {
-                       free_value(r);
-               }
-       }
+       vtop--;
+       if(vtop->s) printf("%s\n"        , vtop->s);
+       else        printf("%"PRIdMAX"\n", vtop->n);
 
-       return l;
+       return (vtop->s && *vtop->s) || vtop->n;
 }
 
-/* Parse and evaluate | expressions */
-static struct val *
-eval0(void)
-{
-       struct val     *l, *r;
-
-       l = eval1();
-       while (token == OR) {
-               nexttoken(0);
-               r = eval1();
-
-               if (is_zero_or_null(l)) {
-                       free_value(l);
-                       l = r;
-               } else {
-                       free_value(r);
-               }
-       }
-
-       return l;
-}
-
-
 int
-main(int argc, char *argv[])
+main(int argc, char **argv)
 {
-       struct val     *vp;
-
-       (void) setlocale(LC_ALL, "");
-
-       if (argc > 1 && !strcmp(argv[1], "--"))
-               argv++;
-
-       av = argv + 1;
-
-       nexttoken(0);
-       vp = eval0();
-
-       if (token != EOI) {
-               error();
-               /* NOTREACHED */
-       }
+       if(!(intlen = snprintf(NULL, 0, "%"PRIdMAX, INTMAX_MIN) + 1))
+               enprintf(3, "failed to get max digits\n");
 
-       if (vp->type == integer)
-               printf("%d\n", vp->u.i);
-       else
-               printf("%s\n", vp->u.s);
+       args = argv + 1;
+       if(*args && !strcmp("--", *args))
+               ++args;
 
-       exit(is_zero_or_null(vp));
+       return !yyparse(argc);
 }


Reply via email to