Re: [PATCH] Revert 78a00a7 and 3cd5386, they are not ready yet.

2018-11-15 Thread Michael Greenberg
Seconded! A test suite for dash would have saved me a great deal of 
trouble. I have a number of tests I've written that I'd be happy to 
contribute.

Is dash ever tested on the POSIX test suite? Certification is expensive, 
but their test suite seems to be quite thorough... and it may even be 
possible to use their tests even if dash isn't going to be officially 
POSIX compliant.

Cheers,
Michael

On 2018-11-15 9:03 AM, Devin Hussey wrote:
>  From 65f9c258658b50aeb67947f5b54e926538560488 Mon Sep 17 00:00:00 2001
> From: Devin Hussey 
> Date: Thu, 15 Nov 2018 08:29:33 -0500
> Subject: [PATCH] Revert 78a00a7 and 3cd5386, they are not ready yet.
>
> They are not ready, as simply running dash on its own configure
> script will reveal (note: config.cache does not exist):
>
>  checking for a BSD-compatible install... (cached)
>  checking whether build environment is sane... yes
>  configure: 1: eval: --is-lightweight: not found
>  configure: WARNING: 'missing' script is too old or missing
>  checking for a thread-safe mkdir -p... (cached)  -p
>  checking for gawk... (cached) no
>  checking for mawk... (cached) no
>  checking for nawk... (cached) no
>  checking for awk... (cached) no
>  checking whether make sets $(MAKE)... (cached) configure: 1: test:
> =: unexpected operator
>  no
>  checking whether make supports nested variables... (cached)
>  configure: 2878: test: =: unexpected operator
>  checking for gcc... (cached) no
>  checking for cc... (cached) no
>  checking for cl.exe... (cached) no
>  configure: error: in `/Users/Devin/dash2':
>  configure: error: no acceptable C compiler found in $PATH
>  See `config.log' for more details
>
> 3cd5386 completely broke expansion.
>
> I got the configure script to run by changing
>
>  return p - 1;
>
> to
>
>  return p;
>
> in argstr, but variable expansion is still broken.
>
> The most broken thing is ${FOO+BAR}:
>
>  $ bash -c 'unset FOO; echo ${FOO+BAR}'
>
>  $ ./dash -c 'unset FOO; echo ${FOO+BAR}'
>  BAR
>  $
>
> We should really set up a test suite, as that isn't the first
> time some obvious bugs have made their way into the code base.
> I wonder if FreeBSD will let us use their tests, as they have
> a very (too?) expansive set of tests.
>
> At the very least, we should make sure dash can run its own
> configure script, as that is the most common shell script
> to run.
>
> Signed-off-by: Devin Hussey 
> ---
>   src/expand.c | 321 ---
>   src/expand.h |   2 +-
>   2 files changed, 176 insertions(+), 147 deletions(-)
>
> diff --git a/src/expand.c b/src/expand.c
> index 14daa63..7a51766 100644
> --- a/src/expand.c
> +++ b/src/expand.c
> @@ -110,13 +110,13 @@ static struct ifsregion *ifslastp;
>   /* holds expanded arg list */
>   static struct arglist exparg;
>
> -static char *argstr(char *p, int flag);
> -static char *exptilde(char *startp, int flag);
> -static char *expari(char *start, int flag);
> +STATIC void argstr(char *, int);
> +STATIC char *exptilde(char *, char *, int);
>   STATIC void expbackq(union node *, int);
> +STATIC const char *subevalvar(char *, char *, int, int, int, int, int);
>   STATIC char *evalvar(char *, int);
>   static size_t strtodest(const char *p, int flags);
> -static size_t memtodest(const char *p, size_t len, int flags);
> +static void memtodest(const char *p, size_t len, int flags);
>   STATIC ssize_t varvalue(char *, int, int, int);
>   STATIC void expandmeta(struct strlist *, int);
>   #ifdef HAVE_GLOB
> @@ -133,7 +133,7 @@ STATIC int pmatch(const char *, const char *);
>   #else
>   #define pmatch(a, b) !fnmatch((a), (b), 0)
>   #endif
> -static size_t cvtnum(intmax_t num, int flags);
> +STATIC int cvtnum(intmax_t);
>   STATIC size_t esclen(const char *, const char *);
>   STATIC char *scanleft(char *, char *, char *, char *, int, int);
>   STATIC char *scanright(char *, char *, char *, char *, int, int);
> @@ -192,11 +192,13 @@ expandarg(union node *arg, struct arglist
> *arglist, int flag)
>   argbackq = arg->narg.backquote;
>   STARTSTACKSTR(expdest);
>   argstr(arg->narg.text, flag);
> +p = _STPUTC('\0', expdest);
> +expdest = p - 1;
>   if (arglist == NULL) {
>   /* here document expanded */
>   goto out;
>   }
> -p = grabstackstr(expdest);
> +p = grabstackstr(p);
>   exparg.lastp = 
>   /*
>* TODO - EXP_REDIR
> @@ -230,7 +232,8 @@ out:
>* $@ like $* since no splitting will be performed.
>*/
>
> -static char *argstr(char *p, int flag)
> +STATIC void
> +argstr(char *p, int flag)
>   {
>   static const char spclchars[] = {
>   '=',
> @@ -240,7 +243,6 @@ static char *argstr(char *p, int flag)
>   CTLESC,
>   CTLVAR,
>   CTLBACKQ,
> -CTLARI,
>   CTLENDARI,
>   0
>   };
> @@ -251,41 +253,35 @@ static char *argstr(char *p, int flag)

Re: [PATCH] Make mktokens accept a random TMPDIR, replace `...` with $(...).

2018-11-15 Thread Devin Hussey
>From 31afca233f67dde67181efd7ed594cd2c25fefa6 Mon Sep 17 00:00:00 2001
From: Devin Hussey 
Date: Thu, 15 Nov 2018 10:30:05 -0500
Subject: [PATCH] Make mktokens accept a random TMPDIR, replace `...` with
 $(...).

Sorry about the multiple commits at once.

Signed-off-by: Devin Hussey 
---
 src/mktokens | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/mktokens b/src/mktokens
index cd52241..9e56c10 100644
--- a/src/mktokens
+++ b/src/mktokens
@@ -37,7 +37,10 @@
 # token marks the end of a list.  The third column is the name to print in
 # error messages.

-cat > /tmp/ka$$ <<\!
+# set TMPDIR if it isn't already
+: "${TMPDIR:=/tmp}"
+
+cat > "${TMPDIR}/ka$$" <<\!
 TEOF1end of file
 TNL0newline
 TSEMI0";"
@@ -68,28 +71,28 @@ TWHILE0"while"
 TBEGIN0"{"
 TEND1"}"
 !
-nl=`wc -l /tmp/ka$$`
+nl=$(wc -l "${TMPDIR}/ka$$")
 exec > token.h
-awk '{print "#define " $1 " " NR-1}' /tmp/ka$$
+awk '{print "#define " $1 " " NR-1}' "${TMPDIR}/ka$$"

 exec > token_vars.h

 echo '
 /* Array indicating which tokens mark the end of a list */
 static const char tokendlist[] = {'
-awk '{print "\t" $2 ","}' /tmp/ka$$
+awk '{print "\t" $2 ","}' "${TMPDIR}/ka$$"
 echo '};

 static const char *const tokname[] = {'
 sed -e 's/"/\\"/g' \
 -e 's/[^ ]*[ ][ ]*[^ ]*[ ][ ]*\(.*\)/"\1",/' \
-/tmp/ka$$
+"${TMPDIR}/ka$$"
 echo '};
 '
-sed 's/"//g' /tmp/ka$$ | awk '
+sed 's/"//g' "${TMPDIR}/ka$$" | awk '
 /TNOT/{print "#define KWDOFFSET " NR-1; print "";
   print "static const char *const parsekwd[] = {"}
 /TNOT/,/neverfound/{if (last) print "\"" last "\","; last = $3}
 END{print "\"" last "\"\n};"}'

-rm /tmp/ka$$
+rm "${TMPDIR}/ka$$"
-- 
2.19.1


Re: [PATCH] Make mktokens accept a random TMPDIR, replace `...` with $(...).

2018-11-15 Thread Eric Blake

On 11/15/18 9:32 AM, Devin Hussey wrote:

From b9724fc82eda2b0d164c33ad3e871d38b298d1ad Mon Sep 17 00:00:00 2001

From: Devin Hussey 
Date: Thu, 15 Nov 2018 10:30:05 -0500
Subject: [PATCH] Make mktokens accept a random TMPDIR, replace `...` with
  $(...).

Sorry about the multiple commits at once.

Signed-off-by: Devin Hussey 
---
  src/mktokens | 17 ++---
  1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/mktokens b/src/mktokens
index cd52241..ec801cc 100644
--- a/src/mktokens
+++ b/src/mktokens
@@ -37,7 +37,10 @@
  # token marks the end of a list.  The third column is the name to print in
  # error messages.

-cat > /tmp/ka$$ <<\!
+# set TMPDIR if it isn't already
+[ -z "${TMPDIR}" ] && TMPDIR="/tmp"


Shorter as:

: "${TMPDIR:=/tmp}"

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org


[PATCH] Make mktokens accept a random TMPDIR, replace `...` with $(...).

2018-11-15 Thread Devin Hussey
>From b9724fc82eda2b0d164c33ad3e871d38b298d1ad Mon Sep 17 00:00:00 2001
From: Devin Hussey 
Date: Thu, 15 Nov 2018 10:30:05 -0500
Subject: [PATCH] Make mktokens accept a random TMPDIR, replace `...` with
 $(...).

Sorry about the multiple commits at once.

Signed-off-by: Devin Hussey 
---
 src/mktokens | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/mktokens b/src/mktokens
index cd52241..ec801cc 100644
--- a/src/mktokens
+++ b/src/mktokens
@@ -37,7 +37,10 @@
 # token marks the end of a list.  The third column is the name to print in
 # error messages.

-cat > /tmp/ka$$ <<\!
+# set TMPDIR if it isn't already
+[ -z "${TMPDIR}" ] && TMPDIR="/tmp"
+
+cat > "${TMPDIR}/ka$$" <<\!
 TEOF1end of file
 TNL0newline
 TSEMI0";"
@@ -68,28 +71,28 @@ TWHILE0"while"
 TBEGIN0"{"
 TEND1"}"
 !
-nl=`wc -l /tmp/ka$$`
+nl=$(wc -l "${TMPDIR}/ka$$")
 exec > token.h
-awk '{print "#define " $1 " " NR-1}' /tmp/ka$$
+awk '{print "#define " $1 " " NR-1}' "${TMPDIR}/ka$$"

 exec > token_vars.h

 echo '
 /* Array indicating which tokens mark the end of a list */
 static const char tokendlist[] = {'
-awk '{print "\t" $2 ","}' /tmp/ka$$
+awk '{print "\t" $2 ","}' "${TMPDIR}/ka$$"
 echo '};

 static const char *const tokname[] = {'
 sed -e 's/"/\\"/g' \
 -e 's/[^ ]*[ ][ ]*[^ ]*[ ][ ]*\(.*\)/"\1",/' \
-/tmp/ka$$
+"${TMPDIR}/ka$$"
 echo '};
 '
-sed 's/"//g' /tmp/ka$$ | awk '
+sed 's/"//g' "${TMPDIR}/ka$$" | awk '
 /TNOT/{print "#define KWDOFFSET " NR-1; print "";
   print "static const char *const parsekwd[] = {"}
 /TNOT/,/neverfound/{if (last) print "\"" last "\","; last = $3}
 END{print "\"" last "\"\n};"}'

-rm /tmp/ka$$
+rm "${TMPDIR}/ka$$"
-- 
2.19.1


[PATCH] Use a real non-cryptographic hash algorithm

2018-11-15 Thread Devin Hussey
>From 502cbd61e386eb014035df66b032e90a9de926b7 Mon Sep 17 00:00:00 2001
From: Devin Hussey 
Date: Thu, 15 Nov 2018 09:12:24 -0500
Subject: [PATCH] Use a real non-cryptographic hash algorithm

dash previously used a modified version of the LoseLose algorithm.

LoseLose is a TERRIBLE algorithm.

It has terrible distribution, leaving many hash entries unused.

However, more importantly, it is incredibly easy to make a
collision; even something as simple as rearranging the letters.

For example. these all produce the hash 1652:

Hello
Holle
Hlleo
Hoell
Haxed\n
HASH\tBAD
Hater

Collsions in hash tables are very bad because it requires
looking through a linked list, and the benefits of O(1) time
in a hash table are completely lost.

The FNV-1a algorithm has comparable performance and code size
while having a much better collision rate.

While there are some other faster and more resistant hashes out there,
namely xxHash, Murmur2, and CityHash, the benefits are minimal on
short strings and they expect a length argument, unlike FNV-1a which
simply uses null-termination, and they are not in the public domain
like FNV-1a.

Signed-off-by: Devin Hussey 
---
 src/alias.c | 14 --
 src/exec.c  | 11 +--
 src/shell.h |  3 +++
 src/var.c   |  5 ++---
 4 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/src/alias.c b/src/alias.c
index daeacbb..f625199 100644
--- a/src/alias.c
+++ b/src/alias.c
@@ -202,19 +202,13 @@ printalias(const struct alias *ap) {

 STATIC struct alias **
 __lookupalias(const char *name) {
-unsigned int hashval;
 struct alias **app;
-const char *p;
-unsigned int ch;
+unsigned int hashval = FNV_OFFSET_BASIS;
+const char *p = name;

-p = name;
+while (*p)
+hashval = (hashval ^ (unsigned char)*p++) * FNV_PRIME;

-ch = (unsigned char)*p;
-hashval = ch << 4;
-while (ch) {
-hashval += ch;
-ch = (unsigned char)*++p;
-}
 app = [hashval % ATABSIZE];

 for (; *app; app = &(*app)->next) {
diff --git a/src/exec.c b/src/exec.c
index 9d0215a..c6b515d 100644
--- a/src/exec.c
+++ b/src/exec.c
@@ -638,16 +638,15 @@ struct tblentry **lastcmdentry;
 STATIC struct tblentry *
 cmdlookup(const char *name, int add)
 {
-unsigned int hashval;
-const char *p;
 struct tblentry *cmdp;
 struct tblentry **pp;

-p = name;
-hashval = (unsigned char)*p << 4;
+unsigned int hashval = FNV_OFFSET_BASIS;
+const char *p = name;
+
 while (*p)
-hashval += (unsigned char)*p++;
-hashval &= 0x7FFF;
+hashval = (hashval ^ (unsigned char)*p++) * FNV_PRIME;
+
 pp = [hashval % CMDTABLESIZE];
 for (cmdp = *pp ; cmdp ; cmdp = cmdp->next) {
 if (equal(cmdp->cmdname, name))
diff --git a/src/shell.h b/src/shell.h
index 98edc8b..d08d0d5 100644
--- a/src/shell.h
+++ b/src/shell.h
@@ -82,6 +82,9 @@ extern char nullstr[1];/* null string */
 #define TRACEV(param)
 #endif

+#define FNV_PRIME 16777619u
+#define FNV_OFFSET_BASIS 2166136261u
+
 #if defined(__GNUC__) && __GNUC__ < 3
 #define va_copy __va_copy
 #endif
diff --git a/src/var.c b/src/var.c
index 0d7e1db..be34731 100644
--- a/src/var.c
+++ b/src/var.c
@@ -637,11 +637,10 @@ void unsetvar(const char *s)
 STATIC struct var **
 hashvar(const char *p)
 {
-unsigned int hashval;
+unsigned int hashval = FNV_OFFSET_BASIS;

-hashval = ((unsigned char) *p) << 4;
 while (*p && *p != '=')
-hashval += (unsigned char) *p++;
+hashval = (hashval ^ (unsigned char) *p++) + FNV_PRIME;
 return [hashval % VTABSIZE];
 }

-- 
2.19.1


[PATCH] Revert 78a00a7 and 3cd5386, they are not ready yet.

2018-11-15 Thread Devin Hussey
>From 65f9c258658b50aeb67947f5b54e926538560488 Mon Sep 17 00:00:00 2001
From: Devin Hussey 
Date: Thu, 15 Nov 2018 08:29:33 -0500
Subject: [PATCH] Revert 78a00a7 and 3cd5386, they are not ready yet.

They are not ready, as simply running dash on its own configure
script will reveal (note: config.cache does not exist):

checking for a BSD-compatible install... (cached)
checking whether build environment is sane... yes
configure: 1: eval: --is-lightweight: not found
configure: WARNING: 'missing' script is too old or missing
checking for a thread-safe mkdir -p... (cached)  -p
checking for gawk... (cached) no
checking for mawk... (cached) no
checking for nawk... (cached) no
checking for awk... (cached) no
checking whether make sets $(MAKE)... (cached) configure: 1: test:
=: unexpected operator
no
checking whether make supports nested variables... (cached)
configure: 2878: test: =: unexpected operator
checking for gcc... (cached) no
checking for cc... (cached) no
checking for cl.exe... (cached) no
configure: error: in `/Users/Devin/dash2':
configure: error: no acceptable C compiler found in $PATH
See `config.log' for more details

3cd5386 completely broke expansion.

I got the configure script to run by changing

return p - 1;

to

return p;

in argstr, but variable expansion is still broken.

The most broken thing is ${FOO+BAR}:

$ bash -c 'unset FOO; echo ${FOO+BAR}'

$ ./dash -c 'unset FOO; echo ${FOO+BAR}'
BAR
$

We should really set up a test suite, as that isn't the first
time some obvious bugs have made their way into the code base.
I wonder if FreeBSD will let us use their tests, as they have
a very (too?) expansive set of tests.

At the very least, we should make sure dash can run its own
configure script, as that is the most common shell script
to run.

Signed-off-by: Devin Hussey 
---
 src/expand.c | 321 ---
 src/expand.h |   2 +-
 2 files changed, 176 insertions(+), 147 deletions(-)

diff --git a/src/expand.c b/src/expand.c
index 14daa63..7a51766 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -110,13 +110,13 @@ static struct ifsregion *ifslastp;
 /* holds expanded arg list */
 static struct arglist exparg;

-static char *argstr(char *p, int flag);
-static char *exptilde(char *startp, int flag);
-static char *expari(char *start, int flag);
+STATIC void argstr(char *, int);
+STATIC char *exptilde(char *, char *, int);
 STATIC void expbackq(union node *, int);
+STATIC const char *subevalvar(char *, char *, int, int, int, int, int);
 STATIC char *evalvar(char *, int);
 static size_t strtodest(const char *p, int flags);
-static size_t memtodest(const char *p, size_t len, int flags);
+static void memtodest(const char *p, size_t len, int flags);
 STATIC ssize_t varvalue(char *, int, int, int);
 STATIC void expandmeta(struct strlist *, int);
 #ifdef HAVE_GLOB
@@ -133,7 +133,7 @@ STATIC int pmatch(const char *, const char *);
 #else
 #define pmatch(a, b) !fnmatch((a), (b), 0)
 #endif
-static size_t cvtnum(intmax_t num, int flags);
+STATIC int cvtnum(intmax_t);
 STATIC size_t esclen(const char *, const char *);
 STATIC char *scanleft(char *, char *, char *, char *, int, int);
 STATIC char *scanright(char *, char *, char *, char *, int, int);
@@ -192,11 +192,13 @@ expandarg(union node *arg, struct arglist
*arglist, int flag)
 argbackq = arg->narg.backquote;
 STARTSTACKSTR(expdest);
 argstr(arg->narg.text, flag);
+p = _STPUTC('\0', expdest);
+expdest = p - 1;
 if (arglist == NULL) {
 /* here document expanded */
 goto out;
 }
-p = grabstackstr(expdest);
+p = grabstackstr(p);
 exparg.lastp = 
 /*
  * TODO - EXP_REDIR
@@ -230,7 +232,8 @@ out:
  * $@ like $* since no splitting will be performed.
  */

-static char *argstr(char *p, int flag)
+STATIC void
+argstr(char *p, int flag)
 {
 static const char spclchars[] = {
 '=',
@@ -240,7 +243,6 @@ static char *argstr(char *p, int flag)
 CTLESC,
 CTLVAR,
 CTLBACKQ,
-CTLARI,
 CTLENDARI,
 0
 };
@@ -251,41 +253,35 @@ static char *argstr(char *p, int flag)
 size_t length;
 int startloc;

-reject += !!(flag & EXP_VARTILDE2);
-reject += flag & EXP_VARTILDE ? 0 : 2;
+if (!(flag & EXP_VARTILDE)) {
+reject += 2;
+} else if (flag & EXP_VARTILDE2) {
+reject++;
+}
 inquotes = 0;
 length = 0;
 if (flag & EXP_TILDE) {
+char *q;
+
 flag &= ~EXP_TILDE;
 tilde:
-if (*p == '~')
-p = exptilde(p, flag);
+q = p;
+if (*q == '~')
+p = exptilde(p, q, flag);
 }
 start:
 startloc = expdest - (char *)stackblock();
 for (;;) {
-int end;
-
 length += strcspn(p + length, reject);
-end = 0;
 c = (signed char)p[length];
-if (!(c & 0x80) || c ==