Re: test(1): Fix integer overflows

2018-03-27 Thread Theo Buehler
On Tue, Mar 27, 2018 at 10:48:51PM +0200, Tobias Stoeckmann wrote:
> On Tue, Mar 27, 2018 at 10:36:17PM +0200, Ingo Schwarze wrote:
> > > + int sig;
> > 
> > Drop this variable, it does no good but only harm.
> 
> Yeah, too bad that I didn't notice this left-over from a previous
> development step. Strange enough that regression tests didn't
> choke on it...
> 
> > > + /* skip leading zeros */
> > > + while (*p == '0' && isdigit((unsigned char)p[1]))
> > > + p++;
> > > +
> > > + /* turn 0 always positive */
> > > + if (*p == '0' && !isdigit((unsigned char)p[1]))
> > > + sig = 1;
> 
> As tb@ pointed out, the !isdigit check can be removed as well.
> 
> > > + c = strncmp(p1, p2, len1);
> > 
> > That's another bug:
> > 
> >   schwarze@isnote $ /bin/test -1 -gt -2 ; echo $?
> >   1
> >   schwarze@isnote $ /bin/test 1 -gt 2 ; echo $?
> >   1
> 
> It's really great to have people who do extensive reviews. Thanks
> a lot for spotting this. I have added it to the regression tests.
> 
> > With these changes, OK schwarze@.  See below for the patch that
> > i tested.
> 
> Modified with tb@'s remark. Also I changed the error message
> "bad number" to "invalid", so we stay fully in sync with strtonum's
> error messages.
> 
> > Given that breaking test(1) might cause hard-to-find breakage
> > deep down in build systems and scripts, you might with a to
> > have a second OK for commit, though.
> 
> Definitely, I'm not out to rush this one in.

ok tb

> 
> 
> Index: bin/test/test.c
> ===
> RCS file: /cvs/src/bin/test/test.c,v
> retrieving revision 1.18
> diff -u -p -u -p -r1.18 test.c
> --- bin/test/test.c   24 Jul 2017 22:15:52 -  1.18
> +++ bin/test/test.c   27 Mar 2018 20:43:38 -
> @@ -16,6 +16,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -145,6 +146,8 @@ static int aexpr(enum token n);
>  static int nexpr(enum token n);
>  static int binop(void);
>  static int primary(enum token n);
> +static const char *getnstr(const char *, int *, size_t *);
> +static int intcmp(const char *, const char *);
>  static int filstat(char *nm, enum token mode);
>  static int getn(const char *s);
>  static int newerf(const char *, const char *);
> @@ -290,6 +293,70 @@ primary(enum token n)
>   return strlen(*t_wp) > 0;
>  }
>  
> +static const char *
> +getnstr(const char *s, int *signum, size_t *len)
> +{
> + const char *p, *start;
> +
> + /* skip leading whitespaces */
> + p = s;
> + while (isspace((unsigned char)*p))
> + p++;
> +
> + /* accept optional sign */
> + if (*p == '-') {
> + *signum = -1;
> + p++;
> + } else {
> + *signum = 1;
> + if (*p == '+')
> + p++;
> + }
> +
> + /* skip leading zeros */
> + while (*p == '0' && isdigit((unsigned char)p[1]))
> + p++;
> +
> + /* turn 0 always positive */
> + if (*p == '0')
> + *signum = 1;
> +
> + start = p;
> + while (isdigit((unsigned char)*p))
> + p++;
> + *len = p - start;
> +
> + /* allow trailing whitespaces */
> + while (isspace((unsigned char)*p))
> + p++;
> +
> + /* validate number */
> + if (*p != '\0' || *start == '\0')
> + errx(2, "%s: invalid", s);
> +
> + return start;
> +}
> +
> +static int
> +intcmp(const char *opnd1, const char *opnd2)
> +{
> + const char *p1, *p2;
> + size_t len1, len2;
> + int c, sig1, sig2;
> +
> + p1 = getnstr(opnd1, &sig1, &len1);
> + p2 = getnstr(opnd2, &sig2, &len2);
> +
> + if (sig1 != sig2)
> + c = sig1;
> + else if (len1 != len2)
> + c = (len1 < len2) ? -sig1 : sig1;
> + else
> + c = strncmp(p1, p2, len1) * sig1;
> +
> + return c;
> +}
> +
>  static int
>  binop(void)
>  {
> @@ -313,17 +380,17 @@ binop(void)
>   case STRGT:
>   return strcmp(opnd1, opnd2) > 0;
>   case INTEQ:
> - return getn(opnd1) == getn(opnd2);
> + return intcmp(opnd1, opnd2) == 0;
>   case INTNE:
> - return getn(opnd1) != getn(opnd2);
> + return intcmp(opnd1, opnd2) != 0;
>   case INTGE:
> - return getn(opnd1) >= getn(opnd2);
> + return intcmp(opnd1, opnd2) >= 0;
>   case INTGT:
> - return getn(opnd1) > getn(opnd2);
> + return intcmp(opnd1, opnd2) > 0;
>   case INTLE:
> - return getn(opnd1) <= getn(opnd2);
> + return intcmp(opnd1, opnd2) <= 0;
>   case INTLT:
> - return getn(opnd1) < getn(opnd2);
> + return intcmp(opnd1, opnd2) < 0;
>   case FILNT:
>   return newerf(opnd1, opnd2);
>   case FILOT:
> @@ -455,22 +522,26 @@ t_lex(char *s)
>  static int
>  getn(const char *s)
>  {
> - char *p;
> - long r;
> -
> - errno = 0;
> -

Re: test(1): Fix integer overflows

2018-03-27 Thread Tobias Stoeckmann
On Tue, Mar 27, 2018 at 10:36:17PM +0200, Ingo Schwarze wrote:
> > +   int sig;
> 
> Drop this variable, it does no good but only harm.

Yeah, too bad that I didn't notice this left-over from a previous
development step. Strange enough that regression tests didn't
choke on it...

> > +   /* skip leading zeros */
> > +   while (*p == '0' && isdigit((unsigned char)p[1]))
> > +   p++;
> > +
> > +   /* turn 0 always positive */
> > +   if (*p == '0' && !isdigit((unsigned char)p[1]))
> > +   sig = 1;

As tb@ pointed out, the !isdigit check can be removed as well.

> > +   c = strncmp(p1, p2, len1);
> 
> That's another bug:
> 
>   schwarze@isnote $ /bin/test -1 -gt -2 ; echo $?
>   1
>   schwarze@isnote $ /bin/test 1 -gt 2 ; echo $?
>   1

It's really great to have people who do extensive reviews. Thanks
a lot for spotting this. I have added it to the regression tests.

> With these changes, OK schwarze@.  See below for the patch that
> i tested.

Modified with tb@'s remark. Also I changed the error message
"bad number" to "invalid", so we stay fully in sync with strtonum's
error messages.

> Given that breaking test(1) might cause hard-to-find breakage
> deep down in build systems and scripts, you might with a to
> have a second OK for commit, though.

Definitely, I'm not out to rush this one in.


Index: bin/test/test.c
===
RCS file: /cvs/src/bin/test/test.c,v
retrieving revision 1.18
diff -u -p -u -p -r1.18 test.c
--- bin/test/test.c 24 Jul 2017 22:15:52 -  1.18
+++ bin/test/test.c 27 Mar 2018 20:43:38 -
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -145,6 +146,8 @@ static int aexpr(enum token n);
 static int nexpr(enum token n);
 static int binop(void);
 static int primary(enum token n);
+static const char *getnstr(const char *, int *, size_t *);
+static int intcmp(const char *, const char *);
 static int filstat(char *nm, enum token mode);
 static int getn(const char *s);
 static int newerf(const char *, const char *);
@@ -290,6 +293,70 @@ primary(enum token n)
return strlen(*t_wp) > 0;
 }
 
+static const char *
+getnstr(const char *s, int *signum, size_t *len)
+{
+   const char *p, *start;
+
+   /* skip leading whitespaces */
+   p = s;
+   while (isspace((unsigned char)*p))
+   p++;
+
+   /* accept optional sign */
+   if (*p == '-') {
+   *signum = -1;
+   p++;
+   } else {
+   *signum = 1;
+   if (*p == '+')
+   p++;
+   }
+
+   /* skip leading zeros */
+   while (*p == '0' && isdigit((unsigned char)p[1]))
+   p++;
+
+   /* turn 0 always positive */
+   if (*p == '0')
+   *signum = 1;
+
+   start = p;
+   while (isdigit((unsigned char)*p))
+   p++;
+   *len = p - start;
+
+   /* allow trailing whitespaces */
+   while (isspace((unsigned char)*p))
+   p++;
+
+   /* validate number */
+   if (*p != '\0' || *start == '\0')
+   errx(2, "%s: invalid", s);
+
+   return start;
+}
+
+static int
+intcmp(const char *opnd1, const char *opnd2)
+{
+   const char *p1, *p2;
+   size_t len1, len2;
+   int c, sig1, sig2;
+
+   p1 = getnstr(opnd1, &sig1, &len1);
+   p2 = getnstr(opnd2, &sig2, &len2);
+
+   if (sig1 != sig2)
+   c = sig1;
+   else if (len1 != len2)
+   c = (len1 < len2) ? -sig1 : sig1;
+   else
+   c = strncmp(p1, p2, len1) * sig1;
+
+   return c;
+}
+
 static int
 binop(void)
 {
@@ -313,17 +380,17 @@ binop(void)
case STRGT:
return strcmp(opnd1, opnd2) > 0;
case INTEQ:
-   return getn(opnd1) == getn(opnd2);
+   return intcmp(opnd1, opnd2) == 0;
case INTNE:
-   return getn(opnd1) != getn(opnd2);
+   return intcmp(opnd1, opnd2) != 0;
case INTGE:
-   return getn(opnd1) >= getn(opnd2);
+   return intcmp(opnd1, opnd2) >= 0;
case INTGT:
-   return getn(opnd1) > getn(opnd2);
+   return intcmp(opnd1, opnd2) > 0;
case INTLE:
-   return getn(opnd1) <= getn(opnd2);
+   return intcmp(opnd1, opnd2) <= 0;
case INTLT:
-   return getn(opnd1) < getn(opnd2);
+   return intcmp(opnd1, opnd2) < 0;
case FILNT:
return newerf(opnd1, opnd2);
case FILOT:
@@ -455,22 +522,26 @@ t_lex(char *s)
 static int
 getn(const char *s)
 {
-   char *p;
-   long r;
-
-   errno = 0;
-   r = strtol(s, &p, 10);
-
-   if (errno != 0)
-   errx(2, "%s: out of range", s);
-
-   while (isspace((unsigned char)*p))
-   p++;
+   char buf[32];
+   const char *errstr, *p;
+   size_t len;
+   int r, si

Re: test(1): Fix integer overflows

2018-03-27 Thread Ingo Schwarze
Hi Tobias,

Tobias Stoeckmann wrote on Tue, Mar 27, 2018 at 07:37:32PM +0200:

> While at it, -t has a proper 0-INT_MAX limitation now as well.

Looks correct.

> Please note that we have overflows in pdksh's builtin test as well.
> If bin/test is refactored, I'll try to unify both implementations
> much closer to each other where possible.

Makes sense to me, including doing one step at a time.

> +static const char *
> +getnstr(const char *s, int *signum, size_t *len)
> +{
> + const char *p, *start;
> + int sig;

Drop this variable, it does no good but only harm.

> + /* skip leading whitespaces */
> + p = s;
> + while (isspace((unsigned char)*p))
> + p++;
> +
> + /* accept optional sign */
> + if (*p == '-') {
> + *signum = -1;
> + p++;
> + } else {
> + *signum = 1;
> + if (*p == '+')
> + p++;
> + }
> +
> + /* skip leading zeros */
> + while (*p == '0' && isdigit((unsigned char)p[1]))
> + p++;
> +
> + /* turn 0 always positive */
> + if (*p == '0' && !isdigit((unsigned char)p[1]))
> + sig = 1;

Make that:
*signum = 1;

> + start = p;
> + while (isdigit((unsigned char)*p))
> + p++;
> + *len = p - start;
> +
> + /* allow trailing whitespaces */
> + while (isspace((unsigned char)*p))
> + p++;
> +
> + /* validate number */
> + if (*p != '\0' || *start == '\0')
> + errx(2, "%s: bad number", s);
> +
> + *signum = sig;

That's bad, "sig" is usually uninitialized here.
Just delete this line.

> + return start;
> +}
> +
> +static int
> +intcmp(const char *opnd1, const char *opnd2)
> +{
> + const char *p1, *p2;
> + size_t len1, len2;
> + int c, sig1, sig2;
> +
> + p1 = getnstr(opnd1, &sig1, &len1);
> + p2 = getnstr(opnd2, &sig2, &len2);
> +
> + if (sig1 != sig2)
> + c = sig1;
> + else if (len1 != len2)
> + c = (len1 < len2) ? -sig1 : sig1;
> + else
> + c = strncmp(p1, p2, len1);

That's another bug:

  schwarze@isnote $ /bin/test -1 -gt -2 ; echo $?
  1
  schwarze@isnote $ /bin/test 1 -gt 2 ; echo $?
  1

I think what you meant is:

c = strncmp(p1, p2, len1) * sig1;

With the above changes, i get the expected:

  schwarze@isnote $ /bin/test -1 -gt -2 ; echo $? 
  0
  schwarze@isnote $ /obin/test 1 -gt 2 ; echo $?  
  1

With these changes, OK schwarze@.  See below for the patch that
i tested.

Given that breaking test(1) might cause hard-to-find breakage
deep down in build systems and scripts, you might with a to
have a second OK for commit, though.

Yours,
  Ingo


Index: test.c
===
RCS file: /cvs/src/bin/test/test.c,v
retrieving revision 1.18
diff -u -p -r1.18 test.c
--- test.c  24 Jul 2017 22:15:52 -  1.18
+++ test.c  27 Mar 2018 20:30:47 -
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -145,6 +146,8 @@ static int aexpr(enum token n);
 static int nexpr(enum token n);
 static int binop(void);
 static int primary(enum token n);
+static const char *getnstr(const char *, int *, size_t *);
+static int intcmp(const char *, const char *);
 static int filstat(char *nm, enum token mode);
 static int getn(const char *s);
 static int newerf(const char *, const char *);
@@ -290,6 +293,70 @@ primary(enum token n)
return strlen(*t_wp) > 0;
 }
 
+static const char *
+getnstr(const char *s, int *signum, size_t *len)
+{
+   const char *p, *start;
+
+   /* skip leading whitespaces */
+   p = s;
+   while (isspace((unsigned char)*p))
+   p++;
+
+   /* accept optional sign */
+   if (*p == '-') {
+   *signum = -1;
+   p++;
+   } else {
+   *signum = 1;
+   if (*p == '+')
+   p++;
+   }
+
+   /* skip leading zeros */
+   while (*p == '0' && isdigit((unsigned char)p[1]))
+   p++;
+
+   /* turn 0 always positive */
+   if (*p == '0' && !isdigit((unsigned char)p[1]))
+   *signum = 1;
+
+   start = p;
+   while (isdigit((unsigned char)*p))
+   p++;
+   *len = p - start;
+
+   /* allow trailing whitespaces */
+   while (isspace((unsigned char)*p))
+   p++;
+
+   /* validate number */
+   if (*p != '\0' || *start == '\0')
+   errx(2, "%s: bad number", s);
+
+   return start;
+}
+
+static int
+intcmp(const char *opnd1, const char *opnd2)
+{
+   const char *p1, *p2;
+   size_t len1, len2;
+   int c, sig1, sig2;
+
+   p1 = getnstr(opnd1, &sig1, &len1);
+   p2 = getnstr(opnd2, &sig2, &len2);
+
+   if (sig1 != sig2)
+   c = sig1;
+   else if (len1 != len2)
+   c = (len1 < len2) ? -sig1 : sig1;
+   else
+ 

Re: test(1): Fix integer overflows

2018-03-27 Thread Tobias Stoeckmann
Hi,

On Tue, Mar 27, 2018 at 09:05:12PM +0200, Klemens Nanni wrote:
> FWIW, see "test: Catch integer overflow, fail on trailing whitespaces"
> from 24.07.2017 on tech@:
>   
>   https://marc.info/?l=openbsd-tech&m=150091968903042

sorry I didn't intend to ignore your mail. I didn't follow tech@
closely the last months. Your proposed patch is actually close to what
I came up first -- never thought about dropping this whitespace support.

Just as Ingo did, I have checked the specs here closely again and
found this one:

http://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html

You can find in there this section:

For the commands:
x=010; echo $((x += 1))
the output must be 9.

For the commands:
x=' 1'; echo $((x += 1))
the results are unspecified.

So, POSIX would allow us to do what we want, which means that your
proposal could be done as well.

I tried to see what ksh does, and if there is any way to trigger a
situation in which ' 1' wouldn't be considered a number.

But even this one works:

$ x=' 1 '
$ echo $((x+1))
2
$ _

Here are my two cents and feedback to your mail:

If test(1) stops accepting ' 1 ' as a number, it's in conflict with
ksh. I would prefer if test(1) and built-in test get closer to each
other, not splitting their way of functioning further. But it's no
strong opinion of mine. Having a very strict POSIX test support sounds
appealing as well.


Tobias



Re: test(1): Fix integer overflows

2018-03-27 Thread Ingo Schwarze
Hi Tobias,

one quick remark regarding POSIX:

The relevant text is

  http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap12.html

section 12.1 Utility Argument Syntax, number 6.

Together with

  http://pubs.opengroup.org/onlinepubs/9699919799/utilities/test.html

we have the following consequences:

 * The minimum range we must support is 0 to 2147483647.
 * We are allowed to support a larger range if we want to.
 * Behaviour for integers out of the supported range
   appears to be unspecified.

If i read that correctly, both our current implementation, and the
new syntax and semantics with unlimited range that you propose, are
allowed by POSIX.

I agree that supporting an unlimited range is better and safer,
in particular if it can be done without excessive complication
of the code.


> - Numbers are always base 10, regardless of starting with 0

Yes, that is definitely required.

> - A number can start with arbitrary amounts of whitespaces
> - Then a sign char is allowed (POSIX only mentions -)

I don't think also accepting '+' does any harm - quite to the contrary,
given that we currently accept it, we should continue doing so.

> - An arbitrary amount of digits follows
> - Then an arbitrary amount of whitespaces can follow

I agree that discarding leading and trailing whitespace can do
no harm.  When unquoted, such whitespace is already stripped by
argument parsing, so it seems less surprising to also ignore it
when quoted.

Note that i did not inspect your patch yet.

Yours,
  Ingo



Re: test(1): Fix integer overflows

2018-03-27 Thread Klemens Nanni
FWIW, see "test: Catch integer overflow, fail on trailing whitespaces"
from 24.07.2017 on tech@:

https://marc.info/?l=openbsd-tech&m=150091968903042



test(1): Fix integer overflows

2018-03-27 Thread Tobias Stoeckmann
The test(1) utility is prone to integer overflows on 64 bit systems.
By using strtol and casting the result to an int, it is possible to
run tests which succeed even though they should fail:

$ arch
OpenBSD.amd64
$ /bin/test 4294967296 -eq 0; echo $?
0

I could have chosen the way of FreeBSD, NetBSD, or built-in test of
pdksh and extend the number to intmax_t, but I actually prefered the
way of coreutils, i.e. validating the number as strings.

This version allows numbers based on the rules which I figured out
by reading strtol's man page and POSIX's rather vague specification:

- Numbers are always base 10, regardless of starting with 0
- A number can start with arbitrary amounts of whitespaces
- Then a sign char is allowed (POSIX only mentions -)
- An arbitrary amount of digits follows
- Then an arbitrary amount of whitespaces can follow

Therefore, comparing numbers can be done in a human-understandable
form as strings:

1. If signum differs, the smaller number is the one having a -
2. Leading zeros are skipped
3. If both numbers are positive, the one with more digits is larger
4. If both numbers are negative, the one with more digits is smaller
5. Equality is based on a simple string comparison

I have extended the regression test suite. The last two additions
fail with current implementation.

While at it, -t has a proper 0-INT_MAX limitation now as well.

This is quite a refactoring so tests are very appreciated. Please note
that we have overflows in pdksh's builtin test as well. If bin/test
is refactored, I'll try to unify both implementations much closer to
each other where possible.


Tobias

Index: bin/test/test.c
===
RCS file: /cvs/src/bin/test/test.c,v
retrieving revision 1.18
diff -u -p -u -p -r1.18 test.c
--- bin/test/test.c 24 Jul 2017 22:15:52 -  1.18
+++ bin/test/test.c 27 Mar 2018 17:23:51 -
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -145,6 +146,8 @@ static int aexpr(enum token n);
 static int nexpr(enum token n);
 static int binop(void);
 static int primary(enum token n);
+static const char *getnstr(const char *, int *, size_t *);
+static int intcmp(const char *, const char *);
 static int filstat(char *nm, enum token mode);
 static int getn(const char *s);
 static int newerf(const char *, const char *);
@@ -290,6 +293,72 @@ primary(enum token n)
return strlen(*t_wp) > 0;
 }
 
+static const char *
+getnstr(const char *s, int *signum, size_t *len)
+{
+   const char *p, *start;
+   int sig;
+
+   /* skip leading whitespaces */
+   p = s;
+   while (isspace((unsigned char)*p))
+   p++;
+
+   /* accept optional sign */
+   if (*p == '-') {
+   *signum = -1;
+   p++;
+   } else {
+   *signum = 1;
+   if (*p == '+')
+   p++;
+   }
+
+   /* skip leading zeros */
+   while (*p == '0' && isdigit((unsigned char)p[1]))
+   p++;
+
+   /* turn 0 always positive */
+   if (*p == '0' && !isdigit((unsigned char)p[1]))
+   sig = 1;
+
+   start = p;
+   while (isdigit((unsigned char)*p))
+   p++;
+   *len = p - start;
+
+   /* allow trailing whitespaces */
+   while (isspace((unsigned char)*p))
+   p++;
+
+   /* validate number */
+   if (*p != '\0' || *start == '\0')
+   errx(2, "%s: bad number", s);
+
+   *signum = sig;
+   return start;
+}
+
+static int
+intcmp(const char *opnd1, const char *opnd2)
+{
+   const char *p1, *p2;
+   size_t len1, len2;
+   int c, sig1, sig2;
+
+   p1 = getnstr(opnd1, &sig1, &len1);
+   p2 = getnstr(opnd2, &sig2, &len2);
+
+   if (sig1 != sig2)
+   c = sig1;
+   else if (len1 != len2)
+   c = (len1 < len2) ? -sig1 : sig1;
+   else
+   c = strncmp(p1, p2, len1);
+
+   return c;
+}
+
 static int
 binop(void)
 {
@@ -313,17 +382,17 @@ binop(void)
case STRGT:
return strcmp(opnd1, opnd2) > 0;
case INTEQ:
-   return getn(opnd1) == getn(opnd2);
+   return intcmp(opnd1, opnd2) == 0;
case INTNE:
-   return getn(opnd1) != getn(opnd2);
+   return intcmp(opnd1, opnd2) != 0;
case INTGE:
-   return getn(opnd1) >= getn(opnd2);
+   return intcmp(opnd1, opnd2) >= 0;
case INTGT:
-   return getn(opnd1) > getn(opnd2);
+   return intcmp(opnd1, opnd2) > 0;
case INTLE:
-   return getn(opnd1) <= getn(opnd2);
+   return intcmp(opnd1, opnd2) <= 0;
case INTLT:
-   return getn(opnd1) < getn(opnd2);
+   return intcmp(opnd1, opnd2) < 0;
case FILNT:
return newerf(opnd1, opnd2);
case FILOT:
@@ -455,22 +524,26 @@ t_lex(char *