Hi, last days I worked on the netbeans port and try to fix the heapsize problem. My patch always crashed and I realized this effect:
$ mem=`sysctl -n hw.usermem` $ echo $mem 8302469120 $ expr $mem + 1 -287465471 OpenBSD's expr can only process values between -2147483648 and +2147483647. It only handle/convert simple int. I looked at the other BSDs and found NetBSD's expr. "The reimplement expr using lexical parser generated by yacc highlights: - use 64 bit arithmetic, so expr is able to process integer values from (2**63) to (2**63 - 1) - checks for integer over- & underflows added - error messages improved, more error checking added" - http://cvsweb.netbsd.org/bsdweb.cgi/src/bin/expr/expr.y?only_with_tag=MAIN This patch contains NetBSD reimplemented expr with following changes: - pledge - remove whitespaces - remove __printflike(1, 2) - remove __dead from yyerror() - add "int yyparse(void);" declaration - remove special NetBSD information in manpage Any chance for ok? Best regards, Rafael Sadowski Index: Makefile =================================================================== RCS file: /cvs/src/bin/expr/Makefile,v retrieving revision 1.3 diff -u -p -r1.3 Makefile --- Makefile 21 Sep 1997 11:35:07 -0000 1.3 +++ Makefile 3 Jan 2016 14:38:39 -0000 @@ -1,6 +1,6 @@ # $OpenBSD: Makefile,v 1.3 1997/09/21 11:35:07 deraadt Exp $ PROG= expr -SRCS= expr.c +SRCS= expr.y .include <bsd.prog.mk> Index: expr.1 =================================================================== RCS file: /cvs/src/bin/expr/expr.1,v retrieving revision 1.23 diff -u -p -r1.23 expr.1 --- expr.1 16 Jan 2015 15:30:10 -0000 1.23 +++ expr.1 3 Jan 2016 14:38:39 -0000 @@ -1,17 +1,41 @@ -.\" $OpenBSD: expr.1,v 1.23 2015/01/16 15:30:10 schwarze Exp $ -.\" $NetBSD: expr.1,v 1.9 1995/04/28 23:27:13 jtc Exp $ +.\" $OpenBSD: expr.1,v 1.23 2015/01/16 15:30:10 schwarze Exp $ +.\" $NetBSD: expr.1,v 1.33 2012/08/12 17:27:04 wiz Exp $ .\" -.\" Written by J.T. Conklin <j...@netbsd.org>. -.\" Public domain. +.\" Copyright (c) 2000,2003 The NetBSD Foundation, Inc. +.\" All rights reserved. .\" -.Dd $Mdocdate: January 16 2015 $ +.\" This code is derived from software contributed to The NetBSD Foundation +.\" by J.T. Conklin <j...@netbsd.org> and Jaromir Dolecek <jdole...@netbsd.org>. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd April 20, 2004 .Dt EXPR 1 .Os .Sh NAME .Nm expr .Nd evaluate expression .Sh SYNOPSIS -.Nm expr +.Nm .Ar expression .Sh DESCRIPTION The @@ -19,6 +43,7 @@ The utility evaluates .Ar expression and writes the result on standard output. +.Pp All operators are separate arguments to the .Nm utility. @@ -27,18 +52,18 @@ Characters special to the command interp Operators are listed below in order of increasing precedence. Operators with equal precedence are grouped within { } symbols. .Bl -tag -width indent -.It Ar expr1 | expr2 +.It Ar expr1 Li \&| Ar expr2 Returns the evaluation of .Ar expr1 if it is neither an empty string nor zero; otherwise, returns the evaluation of .Ar expr2 . -.It Ar expr1 Li & Ar expr2 +.It Ar expr1 Li \*[Am] Ar expr2 Returns the evaluation of .Ar expr1 if neither expression evaluates to an empty string or zero; otherwise, returns zero. -.It Ar expr1 Li "{=, >, >=, <, <=, !=}" Ar expr2 +.It Ar expr1 Li "{=, \*[Gt], \*[Ge], \*[Lt], \*[Le], !=}" Ar expr2 Returns the results of integer comparison if both arguments are integers; otherwise, returns the results of string comparison using the locale-specific collation sequence. @@ -47,19 +72,18 @@ or 0 if the relation is false. .It Ar expr1 Li "{+, -}" Ar expr2 Returns the results of addition or subtraction of integer-valued arguments. .It Ar expr1 Li "{*, /, %}" Ar expr2 -Returns the results of multiplication, integer division, or remainder of -integer-valued arguments. +Returns the results of multiplication, integer division, or remainder of integer-valued arguments. .It Ar expr1 Li \&: Ar expr2 The -.Ql \&: +.Dq \&: operator matches .Ar expr1 against .Ar expr2 , -which must be a basic regular expression. +which must be a regular expression. The regular expression is anchored to the beginning of the string with an implicit -.Ql ^ . +.Dq ^ . .Pp If the match succeeds and the pattern contains at least one regular expression subexpression @@ -67,90 +91,174 @@ expression subexpression the string corresponding to .Dq "\e1" is returned; -otherwise, the matching operator returns the number of characters matched. +otherwise the matching operator returns the number of characters matched. If the match fails and the pattern contains a regular expression subexpression the null string is returned; -otherwise, returns 0. -.Pp -Note: the empty string cannot be matched using -.Bd -literal -offset indent -expr '' : '$' -.Ed +otherwise 0. +.It "( " Ar expr No " )" +Parentheses are used for grouping in the usual manner. +.El .Pp -This is because the returned number of matched characters -.Pq zero -is indistinguishable from a failed match, so -.Nm -returns failure -.Pq 0 . -To match the empty string, use a structure such as: -.Bd -literal -offset indent -expr X'' : 'X$' -.Ed +Additionally, the following keywords are recognized: +.Bl -tag -width indent +.It length Ar expr +Returns the length of the specified string in bytes. .El .Pp -Parentheses are used for grouping in the usual manner. +Operator precedence (from highest to lowest): +.Bl -enum -compact -offset indent +.It +parentheses +.It +length +.It +.Dq \&: +.It +.Dq "*" , +.Dq "/" , +and +.Dq "%" +.It +.Dq "+" +and +.Dq "-" +.It +compare operators +.It +.Dq \*[Am] +.It +.Dq \&| +.El .Sh EXIT STATUS The .Nm utility exits with one of the following values: -.Pp -.Bl -tag -width Ds -offset indent -compact +.Bl -tag -width Ds -compact .It 0 -The expression is neither an empty string nor 0. +the expression is neither an empty string nor 0. .It 1 -The expression is an empty string or 0. +the expression is an empty string or 0. .It 2 -The expression is invalid. -.It \*(Gt2 -An error occurred (such as memory allocation failure). +the expression is invalid. +.It \*[Gt]2 +an error occurred (such as memory allocation failure). .El .Sh EXAMPLES -Add 1 to the variable -.Va a : -.Bd -literal -offset indent -$ a=`expr $a + 1` -.Ed -.Pp -Return the filename portion of a pathname stored +.Bl -enum +.It +The following example adds one to variable +.Dq a : +.Dl a=`expr $a + 1` +.It +The following example returns zero, due to subtraction having higher precedence +than the +.Dq \*[Am] +operator: +.Dl expr 1 '\*[Am]' 1 - 1 +.It +The following example returns the filename portion of a pathname stored in variable -.Va a . -The -.Ql // -characters act to eliminate ambiguity with the division operator: +.Dq a : +.Dl expr "/$a" Li : '.*/\e(.*\e)' +.It +The following example returns the number of characters in variable +.Dq a : +.Dl expr $a Li : '.*' +.El +.Sh COMPATIBILITY +This implementation of +.Nm +internally uses 64 bit representation of integers and checks for +over- and underflows. +It also treats +.Dq / +(the division mark) and option +.Dq -- +correctly depending upon context. +.Pp +.Nm +on other systems might not be so graceful. +Arithmetic results might be arbitrarily +limited on such systems, most commonly to 32 bit quantities. +This means such +.Nm +can only process values between -2147483648 and +2147483647. +.Pp +On other systems, +.Nm +might also not work correctly for regular expressions where +either side contains +.Dq / +(a single forward slash), like this: .Bd -literal -offset indent -$ expr "//$a" \&: '.*/\e(.*\e)' +expr / : '.*/\e(.*\e)' .Ed .Pp -Return the number of characters in variable -.Va a : +If this is the case, you might use +.Dq // +(a double forward slash) +to avoid confusion with the division operator: .Bd -literal -offset indent -$ expr $a \&: '.*' +expr "//$a" : '.*/\e(.*\e)' .Ed -.Sh SEE ALSO -.Xr test 1 , -.Xr re_format 7 +.Pp +According to +.St -p1003.2 , +.Nm +has to recognize special option +.Dq -- , +treat it as a delimiter to mark the end of command +line options, and ignore it. +Some +.Nm +implementations don't recognize it at all; others +might ignore it even in cases where doing so results in syntax +error. +There should be same result for both following examples, +but it might not always be: +.Bl -enum -compact -offset indent +.It +expr -- : . +.It +expr -- -- : . +.El +Although +.Nx +.Nm +handles both cases correctly, you should not depend on this behavior +for portability reasons and avoid passing a bare +.Dq -- +as the first +argument. .Sh STANDARDS The .Nm -utility is compliant with the -.St -p1003.1-2008 -specification. -.Sh HISTORY +utility conforms to +.St -p1003.2 . The -.Nm -utility first appeared in the Programmer's Workbench (PWB/UNIX) -and has supported regular expressions since -.At v7 . -It was rewritten from scratch for -.Bx 386 0.1 -and again for -.Nx 1.1 . +.Ar length +keyword is an extension for compatibility with GNU +.Nm . .Sh AUTHORS -.An -nosplit -The first free version was written by -.An Pace Willisson -in 1992. -This version was written by -.An John T. Conklin -in 1994. +Original implementation was written by +.An J.T. Conklin +.Aq j...@netbsd.org . +It was rewritten for +.Nx 1.6 +by +.An Jaromir Dolecek +.Aq jdole...@netbsd.org . +.Sh NOTES +The empty string +.Do Dc +cannot be matched with the intuitive: +.Bd -literal -offset indent +expr '' : '$' +.Ed +.Pp +The reason is that the returned number of matched characters (zero) +is indistinguishable from a failed match, so this returns failure. +To match the empty string, use something like: +.Bd -literal -offset indent +expr x'' : 'x$' +.Ed Index: expr.c =================================================================== RCS file: expr.c diff -N expr.c --- expr.c 29 Dec 2015 19:06:16 -0000 1.23 +++ /dev/null 1 Jan 1970 00:00:00 -0000 @@ -1,526 +0,0 @@ -/* $OpenBSD: expr.c,v 1.23 2015/12/29 19:06:16 gsoares Exp $ */ -/* $NetBSD: expr.c,v 1.3.6.1 1996/06/04 20:41:47 cgd Exp $ */ - -/* - * Written by J.T. Conklin <j...@netbsd.org>. - * Public domain. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <limits.h> -#include <locale.h> -#include <ctype.h> -#include <unistd.h> -#include <regex.h> -#include <err.h> - -struct val *make_int(int); -struct val *make_str(char *); -void free_value(struct val *); -int is_integer(struct val *, int *); -int to_integer(struct val *); -void to_string(struct val *); -int is_zero_or_null(struct val *); -void nexttoken(int); -__dead void error(void); -struct val *eval6(void); -struct val *eval5(void); -struct val *eval4(void); -struct val *eval3(void); -struct val *eval2(void); -struct val *eval1(void); -struct val *eval0(void); - -enum token { - OR, AND, EQ, LT, GT, ADD, SUB, MUL, DIV, MOD, MATCH, RP, LP, - NE, LE, GE, OPERAND, EOI -}; - -struct val { - enum { - integer, - string - } type; - - union { - char *s; - int i; - } u; -}; - -enum token token; -struct val *tokval; -char **av; - -struct val * -make_int(int i) -{ - struct val *vp; - - vp = malloc(sizeof(*vp)); - if (vp == NULL) { - err(3, NULL); - } - vp->type = integer; - vp->u.i = i; - return vp; -} - - -struct val * -make_str(char *s) -{ - struct val *vp; - - vp = malloc(sizeof(*vp)); - if (vp == NULL || ((vp->u.s = strdup(s)) == NULL)) { - err(3, NULL); - } - vp->type = string; - return vp; -} - - -void -free_value(struct val *vp) -{ - if (vp->type == string) - free(vp->u.s); - free(vp); -} - - -/* determine if vp is an integer; if so, return it's value in *r */ -int -is_integer(struct val *vp, int *r) -{ - char *s; - int neg; - int i; - - if (vp->type == integer) { - *r = vp->u.i; - return 1; - } - - /* - * POSIX.2 defines an "integer" as an optional unary minus - * followed by digits. - */ - s = vp->u.s; - i = 0; - - neg = (*s == '-'); - if (neg) - s++; - - while (*s) { - if (!isdigit((unsigned char)*s)) - return 0; - - i *= 10; - i += *s - '0'; - - s++; - } - - if (neg) - i *= -1; - - *r = i; - return 1; -} - - -/* coerce to vp to an integer */ -int -to_integer(struct val *vp) -{ - int r; - - if (vp->type == integer) - return 1; - - if (is_integer(vp, &r)) { - free(vp->u.s); - vp->u.i = r; - vp->type = integer; - return 1; - } - - return 0; -} - - -/* coerce to vp to an string */ -void -to_string(struct val *vp) -{ - char *tmp; - - if (vp->type == string) - return; - - if (asprintf(&tmp, "%d", vp->u.i) == -1) - err(3, NULL); - - vp->type = string; - vp->u.s = tmp; -} - -int -is_zero_or_null(struct val *vp) -{ - if (vp->type == integer) { - return (vp->u.i == 0); - } else { - return (*vp->u.s == 0 || (to_integer(vp) && vp->u.i == 0)); - } - /* NOTREACHED */ -} - -void -nexttoken(int pat) -{ - char *p; - - if ((p = *av) == NULL) { - token = EOI; - return; - } - av++; - - - if (pat == 0 && p[0] != '\0') { - if (p[1] == '\0') { - const char *x = "|&=<>+-*/%:()"; - char *i; /* index */ - - if ((i = strchr(x, *p)) != NULL) { - token = i - x; - return; - } - } else if (p[1] == '=' && p[2] == '\0') { - switch (*p) { - case '<': - token = LE; - return; - case '>': - token = GE; - return; - case '!': - token = NE; - return; - } - } - } - tokval = make_str(p); - token = OPERAND; - return; -} - -__dead void -error(void) -{ - errx(2, "syntax error"); - /* NOTREACHED */ -} - -struct val * -eval6(void) -{ - struct val *v; - - if (token == OPERAND) { - nexttoken(0); - return tokval; - - } else if (token == RP) { - nexttoken(0); - v = eval0(); - - if (token != LP) { - error(); - /* NOTREACHED */ - } - nexttoken(0); - return v; - } else { - error(); - } - /* NOTREACHED */ -} - -/* Parse and evaluate match (regex) expressions */ -struct val * -eval5(void) -{ - regex_t rp; - regmatch_t rm[2]; - char errbuf[256]; - int eval; - struct val *l, *r; - struct val *v; - - l = eval6(); - while (token == MATCH) { - nexttoken(1); - r = eval6(); - - /* coerce to both arguments to strings */ - to_string(l); - to_string(r); - - /* compile regular expression */ - if ((eval = regcomp(&rp, r->u.s, 0)) != 0) { - regerror(eval, &rp, errbuf, sizeof(errbuf)); - errx(2, "%s", errbuf); - } - - /* compare string against pattern -- remember that patterns - are anchored to the beginning of the line */ - if (regexec(&rp, l->u.s, 2, rm, 0) == 0 && rm[0].rm_so == 0) { - if (rm[1].rm_so >= 0) { - *(l->u.s + rm[1].rm_eo) = '\0'; - v = make_str(l->u.s + rm[1].rm_so); - - } else { - v = make_int((int)(rm[0].rm_eo - rm[0].rm_so)); - } - } else { - if (rp.re_nsub == 0) { - v = make_int(0); - } else { - v = make_str(""); - } - } - - /* free arguments and pattern buffer */ - free_value(l); - free_value(r); - regfree(&rp); - - l = v; - } - - return l; -} - -/* Parse and evaluate multiplication and division expressions */ -struct val * -eval4(void) -{ - struct val *l, *r; - enum token op; - - l = eval5(); - while ((op = token) == MUL || op == DIV || op == MOD) { - nexttoken(0); - r = eval5(); - - if (!to_integer(l) || !to_integer(r)) { - errx(2, "non-numeric argument"); - } - - if (op == MUL) { - l->u.i *= r->u.i; - } else { - if (r->u.i == 0) { - errx(2, "division by zero"); - } - if (op == DIV) { - if (l->u.i != INT_MIN || r->u.i != -1) - l->u.i /= r->u.i; - } else { - if (l->u.i != INT_MIN || r->u.i != -1) - l->u.i %= r->u.i; - else - l->u.i = 0; - } - } - - free_value(r); - } - - return l; -} - -/* Parse and evaluate addition and subtraction expressions */ -struct val * -eval3(void) -{ - struct val *l, *r; - enum token op; - - l = eval4(); - while ((op = token) == ADD || op == SUB) { - nexttoken(0); - r = eval4(); - - if (!to_integer(l) || !to_integer(r)) { - errx(2, "non-numeric argument"); - } - - if (op == ADD) { - l->u.i += r->u.i; - } else { - l->u.i -= r->u.i; - } - - free_value(r); - } - - return l; -} - -/* Parse and evaluate comparison expressions */ -struct val * -eval2(void) -{ - struct val *l, *r; - enum token op; - int v = 0, li, ri; - - l = eval3(); - while ((op = token) == EQ || op == NE || op == LT || op == GT || - op == LE || op == GE) { - nexttoken(0); - r = eval3(); - - if (is_integer(l, &li) && is_integer(r, &ri)) { - switch (op) { - case GT: - v = (li > ri); - break; - case GE: - v = (li >= ri); - break; - case LT: - v = (li < ri); - break; - case LE: - v = (li <= ri); - break; - case EQ: - v = (li == ri); - break; - case NE: - v = (li != ri); - break; - default: - break; - } - } else { - to_string(l); - to_string(r); - - switch (op) { - case GT: - v = (strcoll(l->u.s, r->u.s) > 0); - break; - case GE: - v = (strcoll(l->u.s, r->u.s) >= 0); - break; - case LT: - v = (strcoll(l->u.s, r->u.s) < 0); - break; - case LE: - v = (strcoll(l->u.s, r->u.s) <= 0); - break; - case EQ: - v = (strcoll(l->u.s, r->u.s) == 0); - break; - case NE: - v = (strcoll(l->u.s, r->u.s) != 0); - break; - default: - break; - } - } - - free_value(l); - free_value(r); - l = make_int(v); - } - - return l; -} - -/* Parse and evaluate & expressions */ -struct val * -eval1(void) -{ - struct val *l, *r; - - l = eval2(); - while (token == AND) { - nexttoken(0); - r = eval2(); - - if (is_zero_or_null(l) || is_zero_or_null(r)) { - free_value(l); - free_value(r); - l = make_int(0); - } else { - free_value(r); - } - } - - return l; -} - -/* Parse and evaluate | expressions */ -struct val * -eval0(void) -{ - struct val *l, *r; - - l = eval1(); - while (token == OR) { - nexttoken(0); - r = eval1(); - - if (is_zero_or_null(l)) { - free_value(l); - l = r; - } else { - free_value(r); - } - } - - return l; -} - - -int -main(int argc, char *argv[]) -{ - struct val *vp; - - (void) setlocale(LC_ALL, ""); - - if (pledge("stdio", NULL) == -1) - err(2, "pledge"); - - if (argc > 1 && !strcmp(argv[1], "--")) - argv++; - - av = argv + 1; - - nexttoken(0); - vp = eval0(); - - if (token != EOI) { - error(); - /* NOTREACHED */ - } - - if (vp->type == integer) - printf("%d\n", vp->u.i); - else - printf("%s\n", vp->u.s); - - exit(is_zero_or_null(vp)); -} Index: expr.y =================================================================== RCS file: expr.y diff -N expr.y --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ expr.y 3 Jan 2016 14:38:39 -0000 @@ -0,0 +1,462 @@ +/* $OpenBSD: */ +/* $NetBSD: expr.y,v 1.38 2012/03/15 02:02:20 joerg Exp $ */ + +/*_ + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Jaromir Dolecek <jdole...@netbsd.org> and J.T. Conklin <j...@netbsd.org>. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +%{ +#include <sys/cdefs.h> +#include <sys/types.h> + +#include <err.h> +#include <errno.h> +#include <limits.h> +#include <locale.h> +#include <regex.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +static const char * const *av; + +int yyparse(void); +static void yyerror(const char *, ...); +static int yylex(void); +static int is_zero_or_null(const char *); +static int is_integer(const char *); +static int64_t perform_arith_op(const char *, const char *, const char *); + +int main(int, const char * const *); + +#define YYSTYPE const char * + +%} +%token STRING +%left SPEC_OR +%left SPEC_AND +%left COMPARE +%left ADD_SUB_OPERATOR +%left MUL_DIV_MOD_OPERATOR +%left SPEC_REG +%left LENGTH +%left LEFT_PARENT RIGHT_PARENT + +%% + +exp: expr = { + (void) printf("%s\n", $1); + return (is_zero_or_null($1)); + } + ; + +expr: item { $$ = $1; } + | expr SPEC_OR expr = { + /* + * Return evaluation of first expression if it is neither + * an empty string nor zero; otherwise, returns the evaluation + * of second expression. + */ + if (!is_zero_or_null($1)) + $$ = $1; + else + $$ = $3; + } + | expr SPEC_AND expr = { + /* + * Returns the evaluation of first expr if neither expression + * evaluates to an empty string or zero; otherwise, returns + * zero. + */ + if (!is_zero_or_null($1) && !is_zero_or_null($3)) + $$ = $1; + else + $$ = "0"; + } + | expr SPEC_REG expr = { + /* + * The ``:'' operator matches first expr against the second, + * which must be a regular expression. + */ + regex_t rp; + regmatch_t rm[2]; + int eval; + + /* compile regular expression */ + if ((eval = regcomp(&rp, $3, REG_BASIC)) != 0) { + char errbuf[256]; + (void)regerror(eval, &rp, errbuf, sizeof(errbuf)); + yyerror("%s", errbuf); + /* NOT REACHED */ + } + + /* compare string against pattern -- remember that patterns + are anchored to the beginning of the line */ + if (regexec(&rp, $1, 2, rm, 0) == 0 && rm[0].rm_so == 0) { + char *val; + if (rm[1].rm_so >= 0) { + (void) asprintf(&val, "%.*s", + (int) (rm[1].rm_eo - rm[1].rm_so), + $1 + rm[1].rm_so); + } else { + (void) asprintf(&val, "%d", + (int)(rm[0].rm_eo - rm[0].rm_so)); + } + if (val == NULL) + err(1, NULL); + $$ = val; + } else { + if (rp.re_nsub == 0) { + $$ = "0"; + } else { + $$ = ""; + } + } + + } + | expr ADD_SUB_OPERATOR expr = { + /* Returns the results of addition, subtraction */ + char *val; + int64_t res; + + res = perform_arith_op($1, $2, $3); + (void) asprintf(&val, "%lld", (long long int) res); + if (val == NULL) + err(1, NULL); + $$ = val; + } + + | expr MUL_DIV_MOD_OPERATOR expr = { + /* + * Returns the results of multiply, divide or remainder of + * numeric-valued arguments. + */ + char *val; + int64_t res; + + res = perform_arith_op($1, $2, $3); + (void) asprintf(&val, "%lld", (long long int) res); + if (val == NULL) + err(1, NULL); + $$ = val; + + } + | expr COMPARE expr = { + /* + * Returns the results of integer comparison if both arguments + * are integers; otherwise, returns the results of string + * comparison using the locale-specific collation sequence. + * The result of each comparison is 1 if the specified relation + * is true, or 0 if the relation is false. + */ + + int64_t l, r; + int res; + + res = 0; + + /* + * Slight hack to avoid differences in the compare code + * between string and numeric compare. + */ + if (is_integer($1) && is_integer($3)) { + /* numeric comparison */ + l = strtoll($1, NULL, 10); + r = strtoll($3, NULL, 10); + } else { + /* string comparison */ + l = strcoll($1, $3); + r = 0; + } + + switch($2[0]) { + case '=': /* equal */ + res = (l == r); + break; + case '>': /* greater or greater-equal */ + if ($2[1] == '=') + res = (l >= r); + else + res = (l > r); + break; + case '<': /* lower or lower-equal */ + if ($2[1] == '=') + res = (l <= r); + else + res = (l < r); + break; + case '!': /* not equal */ + /* the check if this is != was done in yylex() */ + res = (l != r); + } + + $$ = (res) ? "1" : "0"; + + } + | LEFT_PARENT expr RIGHT_PARENT { $$ = $2; } + | LENGTH expr { + /* + * Return length of 'expr' in bytes. + */ + char *ln; + + asprintf(&ln, "%ld", (long) strlen($2)); + if (ln == NULL) + err(1, NULL); + $$ = ln; + } + ; + +item: STRING + | ADD_SUB_OPERATOR + | MUL_DIV_MOD_OPERATOR + | COMPARE + | SPEC_OR + | SPEC_AND + | SPEC_REG + | LENGTH + ; +%% + +/* + * Returns 1 if the string is empty or contains only numeric zero. + */ +static int +is_zero_or_null(const char *str) +{ + char *endptr; + + return str[0] == '\0' + || ( strtoll(str, &endptr, 10) == 0LL + && endptr[0] == '\0'); +} + +/* + * Returns 1 if the string is an integer. + */ +static int +is_integer(const char *str) +{ + char *endptr; + + (void) strtoll(str, &endptr, 10); + /* note we treat empty string as valid number */ + return (endptr[0] == '\0'); +} + +static int64_t +perform_arith_op(const char *left, const char *op, const char *right) +{ + int64_t res, sign, l, r; + u_int64_t temp; + + res = 0; + + if (!is_integer(left)) { + yyerror("non-integer argument '%s'", left); + /* NOTREACHED */ + } + if (!is_integer(right)) { + yyerror("non-integer argument '%s'", right); + /* NOTREACHED */ + } + + errno = 0; + l = strtoll(left, NULL, 10); + if (errno == ERANGE) { + yyerror("value '%s' is %s is %lld", left, + (l > 0) ? "too big, maximum" : "too small, minimum", + (l > 0) ? LLONG_MAX : LLONG_MIN); + /* NOTREACHED */ + } + + errno = 0; + r = strtoll(right, NULL, 10); + if (errno == ERANGE) { + yyerror("value '%s' is %s is %lld", right, + (l > 0) ? "too big, maximum" : "too small, minimum", + (l > 0) ? LLONG_MAX : LLONG_MIN); + /* NOTREACHED */ + } + + switch(op[0]) { + case '+': + /* + * Do the op into an unsigned to avoid overflow and then cast + * back to check the resulting signage. + */ + temp = l + r; + res = (int64_t) temp; + /* very simplistic check for over-& underflow */ + if ((res < 0 && l > 0 && r > 0) + || (res > 0 && l < 0 && r < 0)) + yyerror("integer overflow or underflow occurred for " + "operation '%s %s %s'", left, op, right); + break; + case '-': + /* + * Do the op into an unsigned to avoid overflow and then cast + * back to check the resulting signage. + */ + temp = l - r; + res = (int64_t) temp; + /* very simplistic check for over-& underflow */ + if ((res < 0 && l > 0 && l > r) + || (res > 0 && l < 0 && l < r) ) + yyerror("integer overflow or underflow occurred for " + "operation '%s %s %s'", left, op, right); + break; + case '/': + if (r == 0) + yyerror("second argument to '%s' must not be zero", op); + res = l / r; + + break; + case '%': + if (r == 0) + yyerror("second argument to '%s' must not be zero", op); + res = l % r; + break; + case '*': + /* shortcut */ + if ((l == 0) || (r == 0)) { + res = 0; + break; + } + + sign = 1; + if (l < 0) + sign *= -1; + if (r < 0) + sign *= -1; + + res = l * r; + /* + * XXX: not the most portable but works on anything with 2's + * complement arithmetic. If the signs don't match or the + * result was 0 on 2's complement this overflowed. + */ + if ((res < 0 && sign > 0) || (res > 0 && sign < 0) || + (res == 0)) + yyerror("integer overflow or underflow occurred for " + "operation '%s %s %s'", left, op, right); + /* NOTREACHED */ + break; + } + return res; +} + +static const char *x = "|&=<>+-*/%:()"; +static const int x_token[] = { + SPEC_OR, SPEC_AND, COMPARE, COMPARE, COMPARE, ADD_SUB_OPERATOR, + ADD_SUB_OPERATOR, MUL_DIV_MOD_OPERATOR, MUL_DIV_MOD_OPERATOR, + MUL_DIV_MOD_OPERATOR, SPEC_REG, LEFT_PARENT, RIGHT_PARENT +}; + +static int handle_ddash = 1; + +int +yylex(void) +{ + const char *p = *av++; + int retval; + + if (!p) + retval = 0; + else if (p[1] == '\0') { + const char *w = strchr(x, p[0]); + if (w) { + retval = x_token[w-x]; + } else { + retval = STRING; + } + } else if (p[1] == '=' && p[2] == '\0' + && (p[0] == '>' || p[0] == '<' || p[0] == '!')) + retval = COMPARE; + else if (handle_ddash && p[0] == '-' && p[1] == '-' && p[2] == '\0') { + /* ignore "--" if passed as first argument and isn't followed + * by another STRING */ + retval = yylex(); + if (retval != STRING && retval != LEFT_PARENT + && retval != RIGHT_PARENT) { + /* is not followed by string or parenthesis, use as + * STRING */ + retval = STRING; + av--; /* was increased in call to yylex() above */ + p = "--"; + } else { + /* "--" is to be ignored */ + p = yylval; + } + } else if (strcmp(p, "length") == 0) + retval = LENGTH; + else + retval = STRING; + + handle_ddash = 0; + yylval = p; + + return retval; +} + +/* + * Print error message and exit with error 2 (syntax error). + */ +static void +yyerror(const char *fmt, ...) +{ + va_list arg; + + va_start(arg, fmt); + verrx(2, fmt, arg); + va_end(arg); +} + +int +main(int argc, const char * const *argv) +{ + setprogname(argv[0]); + (void)setlocale(LC_ALL, ""); + + if (pledge("stdio", NULL) == -1) + err(2, "pledge"); + + + if (argc == 1) { + (void)fprintf(stderr, "usage: %s expression\n", + getprogname()); + exit(2); + } + + av = argv + 1; + + exit(yyparse()); + /* NOTREACHED */ +}