Hi folks,

Here's a diff to expr(1) that does three things I feel make it more useful:

+ Makes it 64-bit capable on 64-bit architectures by changing relevant int:s to long:s. I often use expr as a quick calculator for example when partitioning disks and such, and frequently find myself up against the 32 bit signed integer limits.

+ Makes it able to use C-style radix prefixes to integers in order to do calculations in octal and hexadecimal. In the olden days, early '80:s to be specific, I worked at a company that produced a unix flavour called D-NIX, which had this neat feature in its expr(1). It makes it a breeze to make quick radix conversions, for example. And I guess the habit stuck with me, for to this day it frustrates me not to be able to use expr for that. :-) I hope others find it useful too. For example:

    skynet:/usr/src/bin/expr# expr 0x1ffff + 15
    131086
    skynet:/usr/src/bin/expr#

+ Makes it able to output calculation results in hex and octal. This is of course the reverse functionality to the previous. Works like this:

    skynet:/usr/src/bin/expr# expr -x 16383
    3fff
    skynet:/usr/src/bin/expr# expr -o 16383
    37777
    skynet:/usr/src/bin/expr# expr -X 16383
    3FFF
    skynet:/usr/src/bin/expr# expr -cx 16383
    0x3fff
    skynet:/usr/src/bin/expr#

Now, the downside is that the latter two changes will subtly change expr:s semantics, in the following ways:

- Hexadecimal numbers (starting with 0x or 0X) is no longer recognized as strings. (The common construct expr 'X'$FOO = 'X' would still work as expected, of course.) Probably not a big concern.

- Since expr previously ignored switches altogether, strings starting with '-' will no longer be recognized as strings, but probably generate a "usage:" error message instead. This can be circumvented by the -- notation, and that particular form was actually supported before as well. But still this might be a problem since there is a slim chance it can break current shell scripts. I can't think of a better / more backwards compatible way to implement different output radixes though.

- And of course the 32 bit signed integer edge cases will instead be 64 bit signed integer edge cases on 64-bit architectures. Probably not a problem either.

- Oh, and I don't know if these changes breaks POSIX compliance.

(And as I'm about to post this at 04:00 in the morning, it comes to mind that I might have read somewhere that submitted diffs should only change one behaviour at a time. If that's the case I apologize.)

If these are worthy changes, they need testing of course (on both 32- and 64-bit architectures), and if so I'll make a diff for the man page as well. (And hopefully I didn't f*ck up the enclosed diff...)


Regards,

/Benny


----8<--------8<--------8<--------8<--------8<---- (cut)
Index: expr.c
===================================================================
RCS file: /cvs/src/bin/expr/expr.c,v
retrieving revision 1.17
diff -u -r1.17 expr.c
--- expr.c      21 Jun 2006 18:28:24 -0000      1.17
+++ expr.c      15 Jan 2011 03:00:26 -0000
@@ -14,11 +14,11 @@
 #include <regex.h>
 #include <err.h>

-struct val     *make_int(int);
+struct val     *make_int(long);
 struct val     *make_str(char *);
 void            free_value(struct val *);
-int             is_integer(struct val *, int *);
-int             to_integer(struct val *);
+int             is_integer(struct val *, long *);
+long            to_integer(struct val *);
 void            to_string(struct val *);
 int             is_zero_or_null(struct val *);
 void            nexttoken(int);
@@ -44,7 +44,7 @@

        union {
                char           *s;
-               int             i;
+               long            i;
        } u;
 };

@@ -53,7 +53,7 @@
 char         **av;

 struct val *
-make_int(int i)
+make_int(long i)
 {
        struct val     *vp;

@@ -92,11 +92,14 @@

 /* determine if vp is an integer; if so, return it's value in *r */
 int
-is_integer(struct val *vp, int *r)
+is_integer(struct val *vp, long *r)
 {
        char           *s;
-       int             neg;
-       int             i;
+       long            neg;
+       long            i, d;
+       long            radix=10;
+       static char     *digits="0123456789abcdef";
+       char            *dp;

        if (vp->type == integer) {
                *r = vp->u.i;
@@ -105,7 +108,8 @@

        /*
         * POSIX.2 defines an "integer" as an optional unary minus
-        * followed by digits.
+        * followed by digits. We also consider C style octal and
+        * hexadecimal prefix notation part of an integer.
         */
        s = vp->u.s;
        i = 0;
@@ -113,14 +117,21 @@
        neg = (*s == '-');
        if (neg)
                s++;
+       if (*s=='0') {
+               radix=8; s++;
+               if (*s=='x' || *s=='X') {
+                       radix=16; s++;
+               }
+       }

        while (*s) {
-               if (!isdigit(*s))
+               if ((dp = strchr(digits, tolower(*s)))==NULL)
                        return 0;
+               if ((d = dp-digits) >= radix)
+                       return 0; /* Invalid digit for radix -> string */

-               i *= 10;
-               i += *s - '0';
-
+               i *= radix;
+               i += d;
                s++;
        }

@@ -133,10 +144,10 @@


 /* coerce to vp to an integer */
-int
+long
 to_integer(struct val *vp)
 {
-       int             r;
+       long            r;

        if (vp->type == integer)
                return 1;
@@ -375,7 +386,7 @@
 {
        struct val     *l, *r;
        enum token      op;
-       int             v = 0, li, ri;
+       long            v = 0, li, ri;

        l = eval3();
        while ((op = token) == EQ || op == NE || op == LT || op == GT ||
@@ -491,16 +502,48 @@
 int
 main(int argc, char *argv[])
 {
-       struct val     *vp;
+       struct val      *vp;
+       int             oradix=10;
+       int             cprefix=0;
+       int             c;
+       extern char     *__progname;

        (void) setlocale(LC_ALL, "");

-       if (argc > 1 && !strcmp(argv[1], "--"))
-               argv++;
+       while ((c=getopt(argc, argv, "coxX")) != -1) {
+               switch (c) {
+                       case 'c':
+                               cprefix=1;
+                               break;
+                       case 'o':
+                               oradix=8;
+                               break;
+                       case 'x':
+                               oradix=16;
+                               break;
+                       case 'X':
+                               oradix=-16;
+                               break;
+                       default:
+                               fprintf(stderr,
+                                       "usage: %s [-coxX] <expression>\n",
+                                       __progname);
+                               exit(2); /* Tell user invalid expression */
+               }
+       }
+       argc -= optind;
+       argv += optind;

-       av = argv + 1;
+       av = argv;

        nexttoken(0);
+
+       /* In case there is only one operand, make sure it is validated
+        * as an integer as well, in case we for example want to print it
+        * in a different radix.
+        */
+       (void) to_integer(tokval);
+
        vp = eval0();

        if (token != EOI) {
@@ -508,9 +551,21 @@
                /* NOTREACHED */
        }

-       if (vp->type == integer)
-               printf("%d\n", vp->u.i);
-       else
+       if (vp->type == integer) {
+               switch (oradix) {
+                       case 8:
+                               printf("%s%lo\n", cprefix ? "0" : "", vp->u.i);
+                               break;
+                       case 16:
+                               printf("%s%lx\n", cprefix ? "0x" : "", vp->u.i);
+                               break;
+                       case -16:
+                               printf("%s%lX\n", cprefix ? "0X" : "", vp->u.i);
+                               break;
+                       default:
+                               printf("%ld\n", vp->u.i); break;
+               }
+       } else
                printf("%s\n", vp->u.s);

        exit(is_zero_or_null(vp));
----8<--------8<--------8<--------8<--------8<---- (cut)


--
internetlabbet.se     / work:   +46 8 551 124 80      / "Words must
Benny Lvfgren        /  mobile: +46 70 718 11 90     /   be weighed,
                    /   fax:    +46 8 551 124 89    /    not counted."
                   /    email:  benny -at- internetlabbet.se

Reply via email to