Re: ksh: fix buffer overflow in u64ton

2018-04-12 Thread Scott Cheloha
On Mon, Apr 09, 2018 at 08:56:28PM +0200, Tobias Stoeckmann wrote:
> As tb@ pointed out, u64ton can overflow on ULONG_MAX. It could also
> happen on systems with 64 bit int and INT_MIN, although we don't have
> such a system supported by our code base.
> 
> You can reach the u64ton function by printing the length of a string
> within a variable like this:
> 
> $ a=string
> $ echo ${#a}
> 6
> $ _
> 
> Technically there is no need to use BITS(uint64_t) because 20+2 would
> be enough. But it seems that there is a strong preference towards
> going "long long" instead of int64_t due to return types of time()
> and strtonum() as well as a highly theoretical support of larger types
> than 64 bit in long long. Although that probably will never happen.

"Never" is an awful long time...

> So, if we go with a data type like long long, I would prefer to have
> the code prepared to scale its buffer with the data type, therefore
> BITS(). And let's be honest -- nobody will notice this waste of extra
> 44 bytes.

Depending on the compiler, it might have been 64-byte aligned anyway,
so we might just be making use of space we "had" anyway.

Ditto what tb@ said about dropping the hex numerals from the loop,
otherwise this is ok with me.

... though I would like to circle back and modify this function again
after you're done with your conversions.  Given the callers remaining
for u64ton() (nee ulton()), the second argument seems like a hack.

Could it eventually look like this?

long long
llton(long long n)
{
static char buf [BITS(uint64_t) + 2];
char *p;
int negative;

negative = (n < 0) ? 1 : 0;
p = [sizeof(buf)];
*--p = '\0';
do {
*--p = "0123456789"[n % 10];
n /= 10;
} while (n != 0);
if (negative)
*--p = '-';
return p;
}

I'm even tempted to just snprintf(3) the buffer and be done with it,
but I'm reluctant to advocate deoptimizing this function, given that
it has worked fine since import.

-Scott

> Tobias
> 
> Index: eval.c
> ===
> RCS file: /cvs/src/bin/ksh/eval.c,v
> retrieving revision 1.60
> diff -u -p -u -p -r1.60 eval.c
> --- eval.c9 Apr 2018 17:53:36 -   1.60
> +++ eval.c9 Apr 2018 18:47:45 -
> @@ -732,7 +732,7 @@ varsub(Expand *xp, char *sp, char *word,
>   if (Flag(FNOUNSET) && c == 0 && !zero_ok)
>   errorf("%s: parameter not set", sp);
>   *stypep = 0; /* unqualified variable/string substitution */
> - xp->str = str_save(u64ton((uint64_t)c, 10), ATEMP);
> + xp->str = str_save(u64ton((uint64_t)c, '\0'), ATEMP);
>   return XSUB;
>   }
>  
> Index: misc.c
> ===
> RCS file: /cvs/src/bin/ksh/misc.c,v
> retrieving revision 1.70
> diff -u -p -u -p -r1.70 misc.c
> --- misc.c9 Apr 2018 17:53:36 -   1.70
> +++ misc.c9 Apr 2018 18:47:45 -
> @@ -56,20 +56,22 @@ initctypes(void)
>   setctypes(" \n\t\"#$&'()*;<>?[\\`|", C_QUOTE);
>  }
>  
> -/* convert uint64_t to base N string */
> +/* convert uint64_t N to a base 10 number as string with optional PREFIX */
>  
>  char *
> -u64ton(uint64_t n, int base)
> +u64ton(uint64_t n, char prefix)
>  {
>   char *p;
> - static char buf [20];
> + static char buf [BITS(uint64_t) + 2];
>  
>   p = [sizeof(buf)];
>   *--p = '\0';
>   do {
> - *--p = "0123456789ABCDEF"[n%base];
> - n /= base;
> + *--p = "0123456789ABCDEF"[n % 10];
> + n /= 10;
>   } while (n != 0);
> + if (prefix != '\0')
> + *--p = prefix;
>   return p;
>  }
>  
> Index: sh.h
> ===
> RCS file: /cvs/src/bin/ksh/sh.h,v
> retrieving revision 1.72
> diff -u -p -u -p -r1.72 sh.h
> --- sh.h  9 Apr 2018 17:53:36 -   1.72
> +++ sh.h  9 Apr 2018 18:47:45 -
> @@ -527,7 +527,7 @@ void  cleanup_proc_env(void);
>  /* misc.c */
>  void setctypes(const char *, int);
>  void initctypes(void);
> -char *   u64ton(uint64_t, int);
> +char *   u64ton(uint64_t, char);
>  char *   str_save(const char *, Area *);
>  char *   str_nsave(const char *, int, Area *);
>  int  option(const char *);
> Index: tree.c
> ===
> RCS file: /cvs/src/bin/ksh/tree.c,v
> retrieving revision 1.34
> diff -u -p -u -p -r1.34 tree.c
> --- tree.c9 Apr 2018 17:53:36 -   1.34
> +++ tree.c9 Apr 2018 18:47:45 -
> @@ -376,9 +376,7 @@ vfptreef(struct shf *shf, int indent, co
>   case 'd': /* decimal */
>   n = va_arg(va, int);
>   neg = n < 0;
> - p = u64ton(neg ? -n : n, 10);
> - if 

Re: ksh: fix buffer overflow in u64ton

2018-04-09 Thread Theo Buehler
On Mon, Apr 09, 2018 at 08:56:28PM +0200, Tobias Stoeckmann wrote:
> As tb@ pointed out, u64ton can overflow on ULONG_MAX. It could also
> happen on systems with 64 bit int and INT_MIN, although we don't have
> such a system supported by our code base.
> 
> You can reach the u64ton function by printing the length of a string
> within a variable like this:
> 
> $ a=string
> $ echo ${#a}
> 6
> $ _
> 
> Technically there is no need to use BITS(uint64_t) because 20+2 would
> be enough. But it seems that there is a strong preference towards
> going "long long" instead of int64_t due to return types of time()
> and strtonum() as well as a highly theoretical support of larger types
> than 64 bit in long long. Although that probably will never happen.
> 
> So, if we go with a data type like long long, I would prefer to have
> the code prepared to scale its buffer with the data type, therefore
> BITS(). And let's be honest -- nobody will notice this waste of extra
> 44 bytes.
> 

I'm ok with the diff, except for this detail:

> +u64ton(uint64_t n, char prefix)
>  {
>   char *p;
> - static char buf [20];
> + static char buf [BITS(uint64_t) + 2];
>  
>   p = [sizeof(buf)];
>   *--p = '\0';
>   do {
> - *--p = "0123456789ABCDEF"[n%base];
> - n /= base;
> + *--p = "0123456789ABCDEF"[n % 10];

I think you should drop ABCDEF in the string literal here.



ksh: fix buffer overflow in u64ton

2018-04-09 Thread Tobias Stoeckmann
As tb@ pointed out, u64ton can overflow on ULONG_MAX. It could also
happen on systems with 64 bit int and INT_MIN, although we don't have
such a system supported by our code base.

You can reach the u64ton function by printing the length of a string
within a variable like this:

$ a=string
$ echo ${#a}
6
$ _

Technically there is no need to use BITS(uint64_t) because 20+2 would
be enough. But it seems that there is a strong preference towards
going "long long" instead of int64_t due to return types of time()
and strtonum() as well as a highly theoretical support of larger types
than 64 bit in long long. Although that probably will never happen.

So, if we go with a data type like long long, I would prefer to have
the code prepared to scale its buffer with the data type, therefore
BITS(). And let's be honest -- nobody will notice this waste of extra
44 bytes.


Tobias

Index: eval.c
===
RCS file: /cvs/src/bin/ksh/eval.c,v
retrieving revision 1.60
diff -u -p -u -p -r1.60 eval.c
--- eval.c  9 Apr 2018 17:53:36 -   1.60
+++ eval.c  9 Apr 2018 18:47:45 -
@@ -732,7 +732,7 @@ varsub(Expand *xp, char *sp, char *word,
if (Flag(FNOUNSET) && c == 0 && !zero_ok)
errorf("%s: parameter not set", sp);
*stypep = 0; /* unqualified variable/string substitution */
-   xp->str = str_save(u64ton((uint64_t)c, 10), ATEMP);
+   xp->str = str_save(u64ton((uint64_t)c, '\0'), ATEMP);
return XSUB;
}
 
Index: misc.c
===
RCS file: /cvs/src/bin/ksh/misc.c,v
retrieving revision 1.70
diff -u -p -u -p -r1.70 misc.c
--- misc.c  9 Apr 2018 17:53:36 -   1.70
+++ misc.c  9 Apr 2018 18:47:45 -
@@ -56,20 +56,22 @@ initctypes(void)
setctypes(" \n\t\"#$&'()*;<>?[\\`|", C_QUOTE);
 }
 
-/* convert uint64_t to base N string */
+/* convert uint64_t N to a base 10 number as string with optional PREFIX */
 
 char *
-u64ton(uint64_t n, int base)
+u64ton(uint64_t n, char prefix)
 {
char *p;
-   static char buf [20];
+   static char buf [BITS(uint64_t) + 2];
 
p = [sizeof(buf)];
*--p = '\0';
do {
-   *--p = "0123456789ABCDEF"[n%base];
-   n /= base;
+   *--p = "0123456789ABCDEF"[n % 10];
+   n /= 10;
} while (n != 0);
+   if (prefix != '\0')
+   *--p = prefix;
return p;
 }
 
Index: sh.h
===
RCS file: /cvs/src/bin/ksh/sh.h,v
retrieving revision 1.72
diff -u -p -u -p -r1.72 sh.h
--- sh.h9 Apr 2018 17:53:36 -   1.72
+++ sh.h9 Apr 2018 18:47:45 -
@@ -527,7 +527,7 @@ voidcleanup_proc_env(void);
 /* misc.c */
 void   setctypes(const char *, int);
 void   initctypes(void);
-char * u64ton(uint64_t, int);
+char * u64ton(uint64_t, char);
 char * str_save(const char *, Area *);
 char * str_nsave(const char *, int, Area *);
 intoption(const char *);
Index: tree.c
===
RCS file: /cvs/src/bin/ksh/tree.c,v
retrieving revision 1.34
diff -u -p -u -p -r1.34 tree.c
--- tree.c  9 Apr 2018 17:53:36 -   1.34
+++ tree.c  9 Apr 2018 18:47:45 -
@@ -376,9 +376,7 @@ vfptreef(struct shf *shf, int indent, co
case 'd': /* decimal */
n = va_arg(va, int);
neg = n < 0;
-   p = u64ton(neg ? -n : n, 10);
-   if (neg)
-   *--p = '-';
+   p = u64ton(neg ? -n : n, neg ? '-' : '\0');
while (*p)
tputc(*p++, shf);
break;
@@ -392,7 +390,7 @@ vfptreef(struct shf *shf, int indent, co
tputS(p, shf);
break;
case 'u': /* unsigned decimal */
-   p = u64ton(va_arg(va, unsigned int), 10);
+   p = u64ton(va_arg(va, unsigned int), '\0');
while (*p)
tputc(*p++, shf);
break;