After the troubles with yaac and the makefile last time around I wrote
another expr, this time in C, using Dijkstra's shunting-yard
algorithm[0]. The end result is still less than half the length of the
existing recursive descent expr, and provides slightly better error
messages than my last attempt. Included patch: sbase-new_expr2.diff

While working on it I found out that building a single tool in sbase
(e.g. make expr) would fail if util.a was not yet built, so I'm
including a patch to fix that, which adds a recipe for $(BIN) that
depends on util.a. Included patch: sbase-build_single.diff

After that I removed the binlib recipe and just made all depend on
$(BIN). This works for me and I think it should work with other makes
as well, but as I demonstrated last time please make sure to test it.
This patch depends on the previous one. Included patch:
sbase-remove_binlib.diff

And lastly I added -D_DEFAULT_SOURCE in config.mk so gcc will shutup
about -D_BSD_SOURCE being deprecated. Included patch:
sbase-default_source.diff

emg

[0]https://en.wikipedia.org/wiki/Shunting-yard_algorithm
From c19ec21790e6402125349fbc9e7b4eab0cea4144 Mon Sep 17 00:00:00 2001
From: Evan Gates <evan.ga...@gmail.com>
Date: Thu, 13 Nov 2014 13:43:46 -0800
Subject: [PATCH] add $(BIN): util.a in order to build single binaries

---
 Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Makefile b/Makefile
index c85d485..eddb0e9 100644
--- a/Makefile
+++ b/Makefile
@@ -128,6 +128,8 @@ binlib: util.a
 
 bin: $(BIN)
 
+$(BIN): util.a
+
 $(OBJ): $(HDR) config.mk
 
 .o:
-- 
2.1.3

From 00a8c127377d315f4a03e59c29510d3f244a5746 Mon Sep 17 00:00:00 2001
From: Evan Gates <evan.ga...@gmail.com>
Date: Thu, 13 Nov 2014 13:42:09 -0800
Subject: [PATCH] add -D_DEFAULT_SOURCE to placate gcc

---
 config.mk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config.mk b/config.mk
index 2967eed..c605648 100644
--- a/config.mk
+++ b/config.mk
@@ -8,7 +8,7 @@ MANPREFIX = $(PREFIX)/share/man
 #CC = gcc
 #CC = musl-gcc
 LD = $(CC)
-CPPFLAGS = -D_BSD_SOURCE -D_GNU_SOURCE
+CPPFLAGS = -D_DEFAULT_SOURCE -D_BSD_SOURCE -D_GNU_SOURCE
 CFLAGS   = -g -std=c99 -Wall -pedantic $(CPPFLAGS)
 LDFLAGS  = -g
 
-- 
2.1.3

From 215fdf567786e9f9cd1491d5a10343009e70d4c5 Mon Sep 17 00:00:00 2001
From: Evan Gates <evan.ga...@gmail.com>
Date: Thu, 13 Nov 2014 13:40:14 -0800
Subject: [PATCH] new expr using shunting-yard instead of recursive descent

---
 expr.c | 640 +++++++++++++++++++----------------------------------------------
 1 file changed, 181 insertions(+), 459 deletions(-)

diff --git a/expr.c b/expr.c
index 9457d39..0f1e684 100644
--- a/expr.c
+++ b/expr.c
@@ -1,517 +1,239 @@
-/*     $OpenBSD: src/bin/expr/expr.c,v 1.19 2013/11/21 15:54:45 deraadt Exp $  
*/
-/*     $NetBSD: expr.c,v 1.3.6.1 1996/06/04 20:41:47 cgd Exp $ */
-
-/*
- * Written by J.T. Conklin <j...@netbsd.org>.
- * Public domain.
- */
-
+#include <inttypes.h>
+#include <limits.h>
+#include <regex.h>
 #include <stdio.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
-#include <limits.h>
-#include <locale.h>
-#include <ctype.h>
-#include <regex.h>
-#include <err.h>
-
-static struct val      *make_int(int);
-static struct val      *make_str(char *);
-static void            free_value(struct val *);
-static int             is_integer(struct val *, int *);
-static int             to_integer(struct val *);
-static void            to_string(struct val *);
-static int             is_zero_or_null(struct val *);
-static void            nexttoken(int);
-static void            error(void);
-static struct val      *eval6(void);
-static struct val      *eval5(void);
-static struct val      *eval4(void);
-static struct val      *eval3(void);
-static struct val      *eval2(void);
-static struct val      *eval1(void);
-static struct val      *eval0(void);
-
-enum token {
-       OR, AND, EQ, LT, GT, ADD, SUB, MUL, DIV, MOD, MATCH, RP, LP,
-       NE, LE, GE, OPERAND, EOI
-};
-
-struct val {
-       enum {
-               integer,
-               string
-       } type;
+#include "util.h"
 
-       union {
-               char           *s;
-               int             i;
-       } u;
+enum {
+    VAL = CHAR_MAX + 1, GE, LE, NE
 };
 
-static enum token      token;
-static struct val     *tokval;
-static char          **av;
+typedef struct {
+    char    *s;
+    intmax_t n;
+} Val;
 
-static struct val *
-make_int(int i)
-{
-       struct val     *vp;
-
-       vp = (struct val *) malloc(sizeof(*vp));
-       if (vp == NULL) {
-               err(3, NULL);
-       }
-       vp->type = integer;
-       vp->u.i = i;
-       return vp;
-}
 
-static struct val *
-make_str(char *s)
-{
-       struct val     *vp;
-
-       vp = (struct val *) malloc(sizeof(*vp));
-       if (vp == NULL || ((vp->u.s = strdup(s)) == NULL)) {
-               err(3, NULL);
-       }
-       vp->type = string;
-       return vp;
-}
+static void doop(int*, int**, Val*, Val**);
+static Val match(Val, Val);
+static void num(Val);
+static int valcmp(Val, Val);
+static char *valstr(Val, char*);
+static int yylex(void);
+static int yyparse(int);
 
-static void
-free_value(struct val *vp)
-{
-       if (vp->type == string)
-               free(vp->u.s);
-       free(vp);
-}
+static char **args;
+static size_t intlen;
+static Val yylval;
 
-/* determine if vp is an integer; if so, return it's value in *r */
-static int
-is_integer(struct val *vp, int *r)
+// otop points to one past last op
+// vtop points to one past last val
+// guaranteed otop != ops
+// pop two vals, pop op, apply op, push val
+static void
+doop(int *ops, int **otop, Val *vals, Val **vtop)
 {
-       char           *s;
-       int             neg;
-       int             i;
+    Val ret, a, b;
+    int op;
 
-       if (vp->type == integer) {
-               *r = vp->u.i;
-               return 1;
-       }
+    if((*otop)[-1] == '(')
+        enprintf(2, "syntax error: extra (\n");
+    if(*vtop - vals < 2)
+        enprintf(2, "syntax error: missing expression or extra operator\n");
 
-       /*
-        * POSIX.2 defines an "integer" as an optional unary minus
-        * followed by digits.
-        */
-       s = vp->u.s;
-       i = 0;
+    a  = (*vtop)[-2];
+    b  = (*vtop)[-1];
+    op = (*otop)[-1];
 
-       neg = (*s == '-');
-       if (neg)
-               s++;
+    switch (op) {
+        case '|': if     ( a.s && *a.s) ret = (Val){ a.s ,   0 };
+                  else if(!a.s &&  a.n) ret = (Val){ NULL, a.n };
+                  else if( b.s && *b.s) ret = (Val){ b.s ,   0 };
+                  else                  ret = (Val){ NULL, b.n };
+                  break;
 
-       while (*s) {
-               if (!isdigit((unsigned char)*s))
-                       return 0;
+        case '&': if(((a.s && *a.s) || a.n) &&
+                     ((b.s && *b.s) || b.n)) ret = a;
+                  else                       ret = (Val){ NULL, 0 };
+                  break;
 
-               i *= 10;
-               i += *s - '0';
+        case '=': ret = (Val){ NULL, valcmp(a, b) == 0 }; break;
+        case '>': ret = (Val){ NULL, valcmp(a, b) >  0 }; break;
+        case GE : ret = (Val){ NULL, valcmp(a, b) >= 0 }; break;
+        case '<': ret = (Val){ NULL, valcmp(a, b) <  0 }; break;
+        case LE : ret = (Val){ NULL, valcmp(a, b) <= 0 }; break;
+        case NE : ret = (Val){ NULL, valcmp(a, b) != 0 }; break;
 
-               s++;
-       }
+        case '+': num(a); num(b); ret = (Val){ NULL, a.n + b.n }; break;
+        case '-': num(a); num(b); ret = (Val){ NULL, a.n - b.n }; break;
+        case '*': num(a); num(b); ret = (Val){ NULL, a.n * b.n }; break;
+        case '/': num(a); num(b); ret = (Val){ NULL, a.n / b.n }; break;
+        case '%': num(a); num(b); ret = (Val){ NULL, a.n % b.n }; break;
 
-       if (neg)
-               i *= -1;
+        case ':': ret = match(a, b); break;
+    }
 
-       *r = i;
-       return 1;
+    (*vtop)[-2] = ret;
+    (*otop)--;
+    (*vtop)--;
 }
 
-/* coerce to vp to an integer */
-static int
-to_integer(struct val *vp)
+static Val
+match(Val vstr, Val vregx)
 {
-       int             r;
+    char b1[intlen], *str  = valstr(vstr , b1);
+    char b2[intlen], *regx = valstr(vregx, b2);
 
-       if (vp->type == integer)
-               return 1;
+    regex_t    re;
+    regmatch_t matches[2];
+    char       anchreg[strlen(regx) + 2];
 
-       if (is_integer(vp, &r)) {
-               free(vp->u.s);
-               vp->u.i = r;
-               vp->type = integer;
-               return 1;
-       }
+    sprintf(anchreg, "^%s", regx);
 
-       return 0;
-}
+    if(regcomp(&re, anchreg, 0))
+        enprintf(3, "regcomp failed\n");
 
-/* coerce to vp to an string */
-static void
-to_string(struct val *vp)
-{
-       char           *tmp;
+    if(regexec(&re, str, 2, matches, 0))
+        return (Val){ (re.re_nsub ? "" : NULL), 0 };
 
-       if (vp->type == string)
-               return;
+    if(re.re_nsub) {
+        intmax_t d;
+        char    *ret, *p;
+        regoff_t len = matches[1].rm_eo - matches[1].rm_so + 1;
 
-       if (asprintf(&tmp, "%d", vp->u.i) == -1)
-               err(3, NULL);
-
-       vp->type = string;
-       vp->u.s = tmp;
-}
+        if(!(ret = malloc(len))) // FIXME: free
+            enprintf(3, "malloc failed\n");
 
-static int
-is_zero_or_null(struct val *vp)
-{
-       if (vp->type == integer) {
-               return (vp->u.i == 0);
-       } else {
-               return (*vp->u.s == 0 || (to_integer(vp) && vp->u.i == 0));
-       }
-       /* NOTREACHED */
-}
+        d = strtoimax(ret, &p, 10);
+        strlcpy(ret, str + matches[1].rm_so, len);
 
-static void
-nexttoken(int pat)
-{
-       char           *p;
-
-       if ((p = *av) == NULL) {
-               token = EOI;
-               return;
-       }
-       av++;
-
-       if (pat == 0 && p[0] != '\0') {
-               if (p[1] == '\0') {
-                       const char     *x = "|&=<>+-*/%:()";
-                       char           *i;      /* index */
-
-                       if ((i = strchr(x, *p)) != NULL) {
-                               token = i - x;
-                               return;
-                       }
-               } else if (p[1] == '=' && p[2] == '\0') {
-                       switch (*p) {
-                       case '<':
-                               token = LE;
-                               return;
-                       case '>':
-                               token = GE;
-                               return;
-                       case '!':
-                               token = NE;
-                               return;
-                       }
-               }
-       }
-       tokval = make_str(p);
-       token = OPERAND;
-       return;
+        if(*ret && !*p)
+            return (Val){ NULL, d };
+        return (Val){ ret, 0 };
+    }
+    return (Val){ NULL, matches[0].rm_eo - matches[0].rm_so };
 }
 
 static void
-error(void)
+num(Val v)
 {
-       errx(2, "syntax error");
-       /* NOTREACHED */
+    if(v.s)
+        enprintf(2, "syntax error: expected integer got `%s'\n", v.s);
 }
 
-static struct val *
-eval6(void)
+static int
+valcmp(Val a, Val b)
 {
-       struct val     *v;
-
-       if (token == OPERAND) {
-               nexttoken(0);
-               return tokval;
-
-       } else if (token == RP) {
-               nexttoken(0);
-               v = eval0();
-
-               if (token != LP) {
-                       error();
-                       /* NOTREACHED */
-               }
-               nexttoken(0);
-               return v;
-       } else {
-               error();
-       }
-       /* NOTREACHED */
-       return NULL;
-}
+    char b1[intlen], *p = valstr(a, b1);
+    char b2[intlen], *q = valstr(b, b2);
 
-/* Parse and evaluate match (regex) expressions */
-static struct val *
-eval5(void)
-{
-       regex_t         rp;
-       regmatch_t      rm[2];
-       char            errbuf[256];
-       int             eval;
-       struct val     *l, *r;
-       struct val     *v;
-
-       l = eval6();
-       while (token == MATCH) {
-               nexttoken(1);
-               r = eval6();
-
-               /* coerce to both arguments to strings */
-               to_string(l);
-               to_string(r);
-
-               /* compile regular expression */
-               if ((eval = regcomp(&rp, r->u.s, 0)) != 0) {
-                       regerror(eval, &rp, errbuf, sizeof(errbuf));
-                       errx(2, "%s", errbuf);
-               }
-
-               /* compare string against pattern --  remember that patterns
-                  are anchored to the beginning of the line */
-               if (regexec(&rp, l->u.s, 2, rm, 0) == 0 && rm[0].rm_so == 0) {
-                       if (rm[1].rm_so >= 0) {
-                               *(l->u.s + rm[1].rm_eo) = '\0';
-                               v = make_str(l->u.s + rm[1].rm_so);
-
-                       } else {
-                               v = make_int((int)(rm[0].rm_eo - rm[0].rm_so));
-                       }
-               } else {
-                       if (rp.re_nsub == 0) {
-                               v = make_int(0);
-                       } else {
-                               v = make_str("");
-                       }
-               }
-
-               /* free arguments and pattern buffer */
-               free_value(l);
-               free_value(r);
-               regfree(&rp);
-
-               l = v;
-       }
-
-       return l;
+    if(!a.s && !b.s)
+        return (a.n > b.n) - (a.n < b.n);
+    return strcmp(p, q);
 }
 
-/* Parse and evaluate multiplication and division expressions */
-static struct val *
-eval4(void)
+static char *
+valstr(Val val, char *buf)
 {
-       struct val     *l, *r;
-       enum token      op;
-
-       l = eval5();
-       while ((op = token) == MUL || op == DIV || op == MOD) {
-               nexttoken(0);
-               r = eval5();
-
-               if (!to_integer(l) || !to_integer(r)) {
-                       errx(2, "non-numeric argument");
-               }
-
-               if (op == MUL) {
-                       l->u.i *= r->u.i;
-               } else {
-                       if (r->u.i == 0) {
-                               errx(2, "division by zero");
-                       }
-                       if (op == DIV) {
-                               if (l->u.i != INT_MIN || r->u.i != -1)
-                                       l->u.i /= r->u.i;
-                       } else {
-                               if (l->u.i != INT_MIN || r->u.i != -1)
-                                       l->u.i %= r->u.i;
-                               else
-                                       l->u.i = 0;
-                       }
-               }
-
-               free_value(r);
-       }
-
-       return l;
+    char *p = val.s;
+    if(!p) {
+        sprintf(buf, "%"PRIdMAX, val.n);
+        p = buf;
+    }
+    return p;
 }
 
-/* Parse and evaluate addition and subtraction expressions */
-static struct val *
-eval3(void)
+static int
+yylex(void)
 {
-       struct val     *l, *r;
-       enum token      op;
+    intmax_t d;
+    char *q, *p, *ops = "|&=><+-*/%():";
 
-       l = eval4();
-       while ((op = token) == ADD || op == SUB) {
-               nexttoken(0);
-               r = eval4();
+    if(!(p = *args++))
+        return 0;
 
-               if (!to_integer(l) || !to_integer(r)) {
-                       errx(2, "non-numeric argument");
-               }
+    d = strtoimax(p, &q, 10);
+    if(*p && !*q) {
+        yylval = (Val){ NULL, d };
+        return VAL;
+    }
 
-               if (op == ADD) {
-                       l->u.i += r->u.i;
-               } else {
-                       l->u.i -= r->u.i;
-               }
+    if(*p && !p[1] && strchr(ops, *p))
+        return *p;
 
-               free_value(r);
-       }
+    if(strcmp(p, ">=") == 0) return GE;
+    if(strcmp(p, "<=") == 0) return LE;
+    if(strcmp(p, "!=") == 0) return NE;
 
-       return l;
-}
-
-/* Parse and evaluate comparison expressions */
-static struct val *
-eval2(void)
-{
-       struct val     *l, *r;
-       enum token      op;
-       int             v = 0, li, ri;
-
-       l = eval3();
-       while ((op = token) == EQ || op == NE || op == LT || op == GT ||
-           op == LE || op == GE) {
-               nexttoken(0);
-               r = eval3();
-
-               if (is_integer(l, &li) && is_integer(r, &ri)) {
-                       switch (op) {
-                       case GT:
-                               v = (li >  ri);
-                               break;
-                       case GE:
-                               v = (li >= ri);
-                               break;
-                       case LT:
-                               v = (li <  ri);
-                               break;
-                       case LE:
-                               v = (li <= ri);
-                               break;
-                       case EQ:
-                               v = (li == ri);
-                               break;
-                       case NE:
-                               v = (li != ri);
-                               break;
-                       default:
-                               break;
-                       }
-               } else {
-                       to_string(l);
-                       to_string(r);
-
-                       switch (op) {
-                       case GT:
-                               v = (strcoll(l->u.s, r->u.s) > 0);
-                               break;
-                       case GE:
-                               v = (strcoll(l->u.s, r->u.s) >= 0);
-                               break;
-                       case LT:
-                               v = (strcoll(l->u.s, r->u.s) < 0);
-                               break;
-                       case LE:
-                               v = (strcoll(l->u.s, r->u.s) <= 0);
-                               break;
-                       case EQ:
-                               v = (strcoll(l->u.s, r->u.s) == 0);
-                               break;
-                       case NE:
-                               v = (strcoll(l->u.s, r->u.s) != 0);
-                               break;
-                       default:
-                               break;
-                       }
-               }
-
-               free_value(l);
-               free_value(r);
-               l = make_int(v);
-       }
-
-       return l;
+    yylval = (Val){ p, 0 };
+    return VAL;
 }
 
-/* Parse and evaluate & expressions */
-static struct val *
-eval1(void)
-{
-       struct val     *l, *r;
-
-       l = eval2();
-       while (token == AND) {
-               nexttoken(0);
-               r = eval2();
-
-               if (is_zero_or_null(l) || is_zero_or_null(r)) {
-                       free_value(l);
-                       free_value(r);
-                       l = make_int(0);
-               } else {
-                       free_value(r);
-               }
-       }
-
-       return l;
-}
-
-/* Parse and evaluate | expressions */
-static struct val *
-eval0(void)
-{
-       struct val     *l, *r;
-
-       l = eval1();
-       while (token == OR) {
-               nexttoken(0);
-               r = eval1();
-
-               if (is_zero_or_null(l)) {
-                       free_value(l);
-                       l = r;
-               } else {
-                       free_value(r);
-               }
-       }
-
-       return l;
+static int
+yyparse(int argc)
+{
+    Val vals[argc], *vtop = vals;
+    int ops [argc], *otop = ops;
+    int type, last = 0;
+    char prec[] = {
+        ['|'] = 1,
+        ['&'] = 2,
+        ['='] = 3, ['>'] = 3, [GE] = 3, ['<'] = 3, [LE] = 3, [NE] = 3,
+        ['+'] = 4, ['-'] = 4,
+        ['*'] = 5, ['/'] = 5, ['%'] = 5,
+        [':'] = 6,
+    };
+
+    while((type = yylex())) {
+        switch (type) {
+            case VAL: *vtop++ = yylval; break;
+            case '(': *otop++ = '('   ; break;
+            case ')':
+                if(last == '(')
+                    enprintf(2, "syntax error: empty ( )\n");
+                while(otop > ops && otop[-1] != '(')
+                    doop(ops, &otop, vals, &vtop);
+                if(otop == ops)
+                    enprintf(2, "syntax error: extra )\n");
+                otop--;
+                break;
+            default :
+                if(prec[last])
+                    enprintf(2, "syntax error: extra operator\n");
+                while(otop > ops && prec[otop[-1]] >= prec[type])
+                    doop(ops, &otop, vals, &vtop);
+                *otop++ = type;
+                break;
+        }
+        last = type;
+    }
+    while(otop > ops)
+        doop(ops, &otop, vals, &vtop);
+
+    if(vtop == vals)
+        enprintf(2, "syntax error: missing expression\n");
+    if(vtop - vals > 1)
+        enprintf(2, "syntax error: extra expression\n");
+
+    vtop--;
+    if(vtop->s) printf("%s\n"        , vtop->s);
+    else        printf("%"PRIdMAX"\n", vtop->n);
+
+    return (vtop->s && *vtop->s) || vtop->n;
 }
 
-
 int
-main(int argc, char *argv[])
+main(int argc, char **argv)
 {
-       struct val     *vp;
-
-       (void) setlocale(LC_ALL, "");
-
-       if (argc > 1 && !strcmp(argv[1], "--"))
-               argv++;
-
-       av = argv + 1;
-
-       nexttoken(0);
-       vp = eval0();
-
-       if (token != EOI) {
-               error();
-               /* NOTREACHED */
-       }
+    if(!(intlen = snprintf(NULL, 0, "%"PRIdMAX, INTMAX_MIN) + 1))
+        enprintf(3, "failed to get max digits\n");
 
-       if (vp->type == integer)
-               printf("%d\n", vp->u.i);
-       else
-               printf("%s\n", vp->u.s);
+    args = argv + 1;
+    if(*args && !strcmp("--", *args))
+        ++args;
 
-       exit(is_zero_or_null(vp));
+    return !yyparse(argc);
 }
-- 
2.1.3

From e7a04e9b94fb5ee4101b792c66dd471d2abd8e2d Mon Sep 17 00:00:00 2001
From: Evan Gates <evan.ga...@gmail.com>
Date: Thu, 13 Nov 2014 13:45:34 -0800
Subject: [PATCH] remove binlib recipe

---
 Makefile | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/Makefile b/Makefile
index eddb0e9..8505bdb 100644
--- a/Makefile
+++ b/Makefile
@@ -121,12 +121,7 @@ OBJ = $(SRC:.c=.o) $(LIB)
 BIN = $(SRC:.c=)
 MAN = $(SRC:.c=.1)
 
-all: binlib
-
-binlib: util.a
-       $(MAKE) bin
-
-bin: $(BIN)
+all: $(BIN)
 
 $(BIN): util.a
 
-- 
2.1.3

Reply via email to