Re: [PATCH] Revert 78a00a7 and 3cd5386, they are not ready yet.
Seconded! A test suite for dash would have saved me a great deal of trouble. I have a number of tests I've written that I'd be happy to contribute. Is dash ever tested on the POSIX test suite? Certification is expensive, but their test suite seems to be quite thorough... and it may even be possible to use their tests even if dash isn't going to be officially POSIX compliant. Cheers, Michael On 2018-11-15 9:03 AM, Devin Hussey wrote: > From 65f9c258658b50aeb67947f5b54e926538560488 Mon Sep 17 00:00:00 2001 > From: Devin Hussey > Date: Thu, 15 Nov 2018 08:29:33 -0500 > Subject: [PATCH] Revert 78a00a7 and 3cd5386, they are not ready yet. > > They are not ready, as simply running dash on its own configure > script will reveal (note: config.cache does not exist): > > checking for a BSD-compatible install... (cached) > checking whether build environment is sane... yes > configure: 1: eval: --is-lightweight: not found > configure: WARNING: 'missing' script is too old or missing > checking for a thread-safe mkdir -p... (cached) -p > checking for gawk... (cached) no > checking for mawk... (cached) no > checking for nawk... (cached) no > checking for awk... (cached) no > checking whether make sets $(MAKE)... (cached) configure: 1: test: > =: unexpected operator > no > checking whether make supports nested variables... (cached) > configure: 2878: test: =: unexpected operator > checking for gcc... (cached) no > checking for cc... (cached) no > checking for cl.exe... (cached) no > configure: error: in `/Users/Devin/dash2': > configure: error: no acceptable C compiler found in $PATH > See `config.log' for more details > > 3cd5386 completely broke expansion. > > I got the configure script to run by changing > > return p - 1; > > to > > return p; > > in argstr, but variable expansion is still broken. > > The most broken thing is ${FOO+BAR}: > > $ bash -c 'unset FOO; echo ${FOO+BAR}' > > $ ./dash -c 'unset FOO; echo ${FOO+BAR}' > BAR > $ > > We should really set up a test suite, as that isn't the first > time some obvious bugs have made their way into the code base. > I wonder if FreeBSD will let us use their tests, as they have > a very (too?) expansive set of tests. > > At the very least, we should make sure dash can run its own > configure script, as that is the most common shell script > to run. > > Signed-off-by: Devin Hussey > --- > src/expand.c | 321 --- > src/expand.h | 2 +- > 2 files changed, 176 insertions(+), 147 deletions(-) > > diff --git a/src/expand.c b/src/expand.c > index 14daa63..7a51766 100644 > --- a/src/expand.c > +++ b/src/expand.c > @@ -110,13 +110,13 @@ static struct ifsregion *ifslastp; > /* holds expanded arg list */ > static struct arglist exparg; > > -static char *argstr(char *p, int flag); > -static char *exptilde(char *startp, int flag); > -static char *expari(char *start, int flag); > +STATIC void argstr(char *, int); > +STATIC char *exptilde(char *, char *, int); > STATIC void expbackq(union node *, int); > +STATIC const char *subevalvar(char *, char *, int, int, int, int, int); > STATIC char *evalvar(char *, int); > static size_t strtodest(const char *p, int flags); > -static size_t memtodest(const char *p, size_t len, int flags); > +static void memtodest(const char *p, size_t len, int flags); > STATIC ssize_t varvalue(char *, int, int, int); > STATIC void expandmeta(struct strlist *, int); > #ifdef HAVE_GLOB > @@ -133,7 +133,7 @@ STATIC int pmatch(const char *, const char *); > #else > #define pmatch(a, b) !fnmatch((a), (b), 0) > #endif > -static size_t cvtnum(intmax_t num, int flags); > +STATIC int cvtnum(intmax_t); > STATIC size_t esclen(const char *, const char *); > STATIC char *scanleft(char *, char *, char *, char *, int, int); > STATIC char *scanright(char *, char *, char *, char *, int, int); > @@ -192,11 +192,13 @@ expandarg(union node *arg, struct arglist > *arglist, int flag) > argbackq = arg->narg.backquote; > STARTSTACKSTR(expdest); > argstr(arg->narg.text, flag); > +p = _STPUTC('\0', expdest); > +expdest = p - 1; > if (arglist == NULL) { > /* here document expanded */ > goto out; > } > -p = grabstackstr(expdest); > +p = grabstackstr(p); > exparg.lastp = > /* >* TODO - EXP_REDIR > @@ -230,7 +232,8 @@ out: >* $@ like $* since no splitting will be performed. >*/ > > -static char *argstr(char *p, int flag) > +STATIC void > +argstr(char *p, int flag) > { > static const char spclchars[] = { > '=', > @@ -240,7 +243,6 @@ static char *argstr(char *p, int flag) > CTLESC, > CTLVAR, > CTLBACKQ, > -CTLARI, > CTLENDARI, > 0 > }; > @@ -251,41 +253,35 @@ static char *argstr(char *p, int flag)
Re: [PATCH] Make mktokens accept a random TMPDIR, replace `...` with $(...).
>From 31afca233f67dde67181efd7ed594cd2c25fefa6 Mon Sep 17 00:00:00 2001 From: Devin Hussey Date: Thu, 15 Nov 2018 10:30:05 -0500 Subject: [PATCH] Make mktokens accept a random TMPDIR, replace `...` with $(...). Sorry about the multiple commits at once. Signed-off-by: Devin Hussey --- src/mktokens | 17 ++--- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/mktokens b/src/mktokens index cd52241..9e56c10 100644 --- a/src/mktokens +++ b/src/mktokens @@ -37,7 +37,10 @@ # token marks the end of a list. The third column is the name to print in # error messages. -cat > /tmp/ka$$ <<\! +# set TMPDIR if it isn't already +: "${TMPDIR:=/tmp}" + +cat > "${TMPDIR}/ka$$" <<\! TEOF1end of file TNL0newline TSEMI0";" @@ -68,28 +71,28 @@ TWHILE0"while" TBEGIN0"{" TEND1"}" ! -nl=`wc -l /tmp/ka$$` +nl=$(wc -l "${TMPDIR}/ka$$") exec > token.h -awk '{print "#define " $1 " " NR-1}' /tmp/ka$$ +awk '{print "#define " $1 " " NR-1}' "${TMPDIR}/ka$$" exec > token_vars.h echo ' /* Array indicating which tokens mark the end of a list */ static const char tokendlist[] = {' -awk '{print "\t" $2 ","}' /tmp/ka$$ +awk '{print "\t" $2 ","}' "${TMPDIR}/ka$$" echo '}; static const char *const tokname[] = {' sed -e 's/"/\\"/g' \ -e 's/[^ ]*[ ][ ]*[^ ]*[ ][ ]*\(.*\)/"\1",/' \ -/tmp/ka$$ +"${TMPDIR}/ka$$" echo '}; ' -sed 's/"//g' /tmp/ka$$ | awk ' +sed 's/"//g' "${TMPDIR}/ka$$" | awk ' /TNOT/{print "#define KWDOFFSET " NR-1; print ""; print "static const char *const parsekwd[] = {"} /TNOT/,/neverfound/{if (last) print "\"" last "\","; last = $3} END{print "\"" last "\"\n};"}' -rm /tmp/ka$$ +rm "${TMPDIR}/ka$$" -- 2.19.1
Re: [PATCH] Make mktokens accept a random TMPDIR, replace `...` with $(...).
On 11/15/18 9:32 AM, Devin Hussey wrote: From b9724fc82eda2b0d164c33ad3e871d38b298d1ad Mon Sep 17 00:00:00 2001 From: Devin Hussey Date: Thu, 15 Nov 2018 10:30:05 -0500 Subject: [PATCH] Make mktokens accept a random TMPDIR, replace `...` with $(...). Sorry about the multiple commits at once. Signed-off-by: Devin Hussey --- src/mktokens | 17 ++--- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/mktokens b/src/mktokens index cd52241..ec801cc 100644 --- a/src/mktokens +++ b/src/mktokens @@ -37,7 +37,10 @@ # token marks the end of a list. The third column is the name to print in # error messages. -cat > /tmp/ka$$ <<\! +# set TMPDIR if it isn't already +[ -z "${TMPDIR}" ] && TMPDIR="/tmp" Shorter as: : "${TMPDIR:=/tmp}" -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
[PATCH] Make mktokens accept a random TMPDIR, replace `...` with $(...).
>From b9724fc82eda2b0d164c33ad3e871d38b298d1ad Mon Sep 17 00:00:00 2001 From: Devin Hussey Date: Thu, 15 Nov 2018 10:30:05 -0500 Subject: [PATCH] Make mktokens accept a random TMPDIR, replace `...` with $(...). Sorry about the multiple commits at once. Signed-off-by: Devin Hussey --- src/mktokens | 17 ++--- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/mktokens b/src/mktokens index cd52241..ec801cc 100644 --- a/src/mktokens +++ b/src/mktokens @@ -37,7 +37,10 @@ # token marks the end of a list. The third column is the name to print in # error messages. -cat > /tmp/ka$$ <<\! +# set TMPDIR if it isn't already +[ -z "${TMPDIR}" ] && TMPDIR="/tmp" + +cat > "${TMPDIR}/ka$$" <<\! TEOF1end of file TNL0newline TSEMI0";" @@ -68,28 +71,28 @@ TWHILE0"while" TBEGIN0"{" TEND1"}" ! -nl=`wc -l /tmp/ka$$` +nl=$(wc -l "${TMPDIR}/ka$$") exec > token.h -awk '{print "#define " $1 " " NR-1}' /tmp/ka$$ +awk '{print "#define " $1 " " NR-1}' "${TMPDIR}/ka$$" exec > token_vars.h echo ' /* Array indicating which tokens mark the end of a list */ static const char tokendlist[] = {' -awk '{print "\t" $2 ","}' /tmp/ka$$ +awk '{print "\t" $2 ","}' "${TMPDIR}/ka$$" echo '}; static const char *const tokname[] = {' sed -e 's/"/\\"/g' \ -e 's/[^ ]*[ ][ ]*[^ ]*[ ][ ]*\(.*\)/"\1",/' \ -/tmp/ka$$ +"${TMPDIR}/ka$$" echo '}; ' -sed 's/"//g' /tmp/ka$$ | awk ' +sed 's/"//g' "${TMPDIR}/ka$$" | awk ' /TNOT/{print "#define KWDOFFSET " NR-1; print ""; print "static const char *const parsekwd[] = {"} /TNOT/,/neverfound/{if (last) print "\"" last "\","; last = $3} END{print "\"" last "\"\n};"}' -rm /tmp/ka$$ +rm "${TMPDIR}/ka$$" -- 2.19.1
[PATCH] Use a real non-cryptographic hash algorithm
>From 502cbd61e386eb014035df66b032e90a9de926b7 Mon Sep 17 00:00:00 2001 From: Devin Hussey Date: Thu, 15 Nov 2018 09:12:24 -0500 Subject: [PATCH] Use a real non-cryptographic hash algorithm dash previously used a modified version of the LoseLose algorithm. LoseLose is a TERRIBLE algorithm. It has terrible distribution, leaving many hash entries unused. However, more importantly, it is incredibly easy to make a collision; even something as simple as rearranging the letters. For example. these all produce the hash 1652: Hello Holle Hlleo Hoell Haxed\n HASH\tBAD Hater Collsions in hash tables are very bad because it requires looking through a linked list, and the benefits of O(1) time in a hash table are completely lost. The FNV-1a algorithm has comparable performance and code size while having a much better collision rate. While there are some other faster and more resistant hashes out there, namely xxHash, Murmur2, and CityHash, the benefits are minimal on short strings and they expect a length argument, unlike FNV-1a which simply uses null-termination, and they are not in the public domain like FNV-1a. Signed-off-by: Devin Hussey --- src/alias.c | 14 -- src/exec.c | 11 +-- src/shell.h | 3 +++ src/var.c | 5 ++--- 4 files changed, 14 insertions(+), 19 deletions(-) diff --git a/src/alias.c b/src/alias.c index daeacbb..f625199 100644 --- a/src/alias.c +++ b/src/alias.c @@ -202,19 +202,13 @@ printalias(const struct alias *ap) { STATIC struct alias ** __lookupalias(const char *name) { -unsigned int hashval; struct alias **app; -const char *p; -unsigned int ch; +unsigned int hashval = FNV_OFFSET_BASIS; +const char *p = name; -p = name; +while (*p) +hashval = (hashval ^ (unsigned char)*p++) * FNV_PRIME; -ch = (unsigned char)*p; -hashval = ch << 4; -while (ch) { -hashval += ch; -ch = (unsigned char)*++p; -} app = [hashval % ATABSIZE]; for (; *app; app = &(*app)->next) { diff --git a/src/exec.c b/src/exec.c index 9d0215a..c6b515d 100644 --- a/src/exec.c +++ b/src/exec.c @@ -638,16 +638,15 @@ struct tblentry **lastcmdentry; STATIC struct tblentry * cmdlookup(const char *name, int add) { -unsigned int hashval; -const char *p; struct tblentry *cmdp; struct tblentry **pp; -p = name; -hashval = (unsigned char)*p << 4; +unsigned int hashval = FNV_OFFSET_BASIS; +const char *p = name; + while (*p) -hashval += (unsigned char)*p++; -hashval &= 0x7FFF; +hashval = (hashval ^ (unsigned char)*p++) * FNV_PRIME; + pp = [hashval % CMDTABLESIZE]; for (cmdp = *pp ; cmdp ; cmdp = cmdp->next) { if (equal(cmdp->cmdname, name)) diff --git a/src/shell.h b/src/shell.h index 98edc8b..d08d0d5 100644 --- a/src/shell.h +++ b/src/shell.h @@ -82,6 +82,9 @@ extern char nullstr[1];/* null string */ #define TRACEV(param) #endif +#define FNV_PRIME 16777619u +#define FNV_OFFSET_BASIS 2166136261u + #if defined(__GNUC__) && __GNUC__ < 3 #define va_copy __va_copy #endif diff --git a/src/var.c b/src/var.c index 0d7e1db..be34731 100644 --- a/src/var.c +++ b/src/var.c @@ -637,11 +637,10 @@ void unsetvar(const char *s) STATIC struct var ** hashvar(const char *p) { -unsigned int hashval; +unsigned int hashval = FNV_OFFSET_BASIS; -hashval = ((unsigned char) *p) << 4; while (*p && *p != '=') -hashval += (unsigned char) *p++; +hashval = (hashval ^ (unsigned char) *p++) + FNV_PRIME; return [hashval % VTABSIZE]; } -- 2.19.1
[PATCH] Revert 78a00a7 and 3cd5386, they are not ready yet.
>From 65f9c258658b50aeb67947f5b54e926538560488 Mon Sep 17 00:00:00 2001 From: Devin Hussey Date: Thu, 15 Nov 2018 08:29:33 -0500 Subject: [PATCH] Revert 78a00a7 and 3cd5386, they are not ready yet. They are not ready, as simply running dash on its own configure script will reveal (note: config.cache does not exist): checking for a BSD-compatible install... (cached) checking whether build environment is sane... yes configure: 1: eval: --is-lightweight: not found configure: WARNING: 'missing' script is too old or missing checking for a thread-safe mkdir -p... (cached) -p checking for gawk... (cached) no checking for mawk... (cached) no checking for nawk... (cached) no checking for awk... (cached) no checking whether make sets $(MAKE)... (cached) configure: 1: test: =: unexpected operator no checking whether make supports nested variables... (cached) configure: 2878: test: =: unexpected operator checking for gcc... (cached) no checking for cc... (cached) no checking for cl.exe... (cached) no configure: error: in `/Users/Devin/dash2': configure: error: no acceptable C compiler found in $PATH See `config.log' for more details 3cd5386 completely broke expansion. I got the configure script to run by changing return p - 1; to return p; in argstr, but variable expansion is still broken. The most broken thing is ${FOO+BAR}: $ bash -c 'unset FOO; echo ${FOO+BAR}' $ ./dash -c 'unset FOO; echo ${FOO+BAR}' BAR $ We should really set up a test suite, as that isn't the first time some obvious bugs have made their way into the code base. I wonder if FreeBSD will let us use their tests, as they have a very (too?) expansive set of tests. At the very least, we should make sure dash can run its own configure script, as that is the most common shell script to run. Signed-off-by: Devin Hussey --- src/expand.c | 321 --- src/expand.h | 2 +- 2 files changed, 176 insertions(+), 147 deletions(-) diff --git a/src/expand.c b/src/expand.c index 14daa63..7a51766 100644 --- a/src/expand.c +++ b/src/expand.c @@ -110,13 +110,13 @@ static struct ifsregion *ifslastp; /* holds expanded arg list */ static struct arglist exparg; -static char *argstr(char *p, int flag); -static char *exptilde(char *startp, int flag); -static char *expari(char *start, int flag); +STATIC void argstr(char *, int); +STATIC char *exptilde(char *, char *, int); STATIC void expbackq(union node *, int); +STATIC const char *subevalvar(char *, char *, int, int, int, int, int); STATIC char *evalvar(char *, int); static size_t strtodest(const char *p, int flags); -static size_t memtodest(const char *p, size_t len, int flags); +static void memtodest(const char *p, size_t len, int flags); STATIC ssize_t varvalue(char *, int, int, int); STATIC void expandmeta(struct strlist *, int); #ifdef HAVE_GLOB @@ -133,7 +133,7 @@ STATIC int pmatch(const char *, const char *); #else #define pmatch(a, b) !fnmatch((a), (b), 0) #endif -static size_t cvtnum(intmax_t num, int flags); +STATIC int cvtnum(intmax_t); STATIC size_t esclen(const char *, const char *); STATIC char *scanleft(char *, char *, char *, char *, int, int); STATIC char *scanright(char *, char *, char *, char *, int, int); @@ -192,11 +192,13 @@ expandarg(union node *arg, struct arglist *arglist, int flag) argbackq = arg->narg.backquote; STARTSTACKSTR(expdest); argstr(arg->narg.text, flag); +p = _STPUTC('\0', expdest); +expdest = p - 1; if (arglist == NULL) { /* here document expanded */ goto out; } -p = grabstackstr(expdest); +p = grabstackstr(p); exparg.lastp = /* * TODO - EXP_REDIR @@ -230,7 +232,8 @@ out: * $@ like $* since no splitting will be performed. */ -static char *argstr(char *p, int flag) +STATIC void +argstr(char *p, int flag) { static const char spclchars[] = { '=', @@ -240,7 +243,6 @@ static char *argstr(char *p, int flag) CTLESC, CTLVAR, CTLBACKQ, -CTLARI, CTLENDARI, 0 }; @@ -251,41 +253,35 @@ static char *argstr(char *p, int flag) size_t length; int startloc; -reject += !!(flag & EXP_VARTILDE2); -reject += flag & EXP_VARTILDE ? 0 : 2; +if (!(flag & EXP_VARTILDE)) { +reject += 2; +} else if (flag & EXP_VARTILDE2) { +reject++; +} inquotes = 0; length = 0; if (flag & EXP_TILDE) { +char *q; + flag &= ~EXP_TILDE; tilde: -if (*p == '~') -p = exptilde(p, flag); +q = p; +if (*q == '~') +p = exptilde(p, q, flag); } start: startloc = expdest - (char *)stackblock(); for (;;) { -int end; - length += strcspn(p + length, reject); -end = 0; c = (signed char)p[length]; -if (!(c & 0x80) || c ==