Re: dash bug: double-quoted "\" breaks glob protection for next char

2018-03-07 Thread Herbert Xu
On Thu, Mar 08, 2018 at 01:40:32AM +0100, Harald van Dijk wrote:
>
> If the syntax stack is to be stored on the actual stack, then real recursion
> could be used instead, as attached. I tried to avoid recursion for the cases
> that unpatched dash already handled properly.

It does look a lot simpler than I expected.  However, this patch
is still 30% bigger than my patch :)

As real recursion doesn't seem to buy us much I think I'll stick
with the syntax stack for now.

> @@ -941,20 +1042,25 @@ readtoken1(int firstc, char const *syntax, char 
> *eofmark, int striptabs)
>   c != '\\' && c != '`' &&
>   c != '$' && (
>   c != '"' ||
> - eofmark != NULL
> + (eofmark != NULL && 
> !varnest)
> + ) && (
> + c != '}' ||
> + !varnest ||
> + (dqvarnest ? innerdq : 
> dblquote)

So this seems to be the only substantial difference between your
patch and mine.  I have looked at the behaviour of other shells
and I think I will stick with my patch's behaviour for now.

In general, if there are disagreements between shells and the
standard is not specific enough, I'll pick the approach that
results in simpler code.

Thanks,
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line "unsubscribe dash" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] parser: Add syntax stack for recursive parsing

2018-03-07 Thread Herbert Xu
On Wed, Mar 07, 2018 at 08:25:07PM +, Martijn Dekker wrote:
>
> This version introduces a parsing bug:
> 
> $ src/dash -c 'x=0; x=$((${x}+1))'
> src/dash: 1: Syntax error: Unterminated quoted string
> 
> It is triggered by the ${x} (with braces) within an arithmetic expression.

Thanks for testing!

Indeed, I wasn't careful enough when changing the syntax to exclude
the simple VSNORMAL cases.

This patch should fix this problem as well as the one Harald identified:

---8<---
Without a stack of syntaxes we cannot correctly these two cases
together:

"${a#'$$'}"
"${a#"${b-'$$'}"}"

A recursive parser also helps in some other corner cases such
as nested arithmetic expansion with paratheses.

This patch adds a syntax stack allocated from the stack using
alloca.  As a side-effect this allows us to remove the naked
backslashes for patterns within double-quotes, which means that
EXP_QPAT also has to go.

This patch also fixes removes any backslashes that precede right
braces when they are present within a parameter expansion context.

The idea of a recursive parser is based on a patch by Harald van
Dijk.

Signed-off-by: Herbert Xu 

diff --git a/src/expand.c b/src/expand.c
index 2a50830..903e250 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -83,7 +83,7 @@
 #define RMESCAPE_HEAP  0x10/* Malloc strings instead of stalloc */
 
 /* Add CTLESC when necessary. */
-#define QUOTES_ESC (EXP_FULL | EXP_CASE | EXP_QPAT)
+#define QUOTES_ESC (EXP_FULL | EXP_CASE)
 /* Do not skip NUL characters. */
 #define QUOTES_KEEPNUL EXP_TILDE
 
@@ -333,16 +333,6 @@ addquote:
case CTLESC:
startloc++;
length++;
-
-   /*
-* Quoted parameter expansion pattern: remove quote
-* unless inside inner quotes or we have a literal
-* backslash.
-*/
-   if (((flag | inquotes) & (EXP_QPAT | EXP_QUOTED)) ==
-   EXP_QPAT && *p != '\\')
-   break;
-
goto addquote;
case CTLVAR:
p = evalvar(p, flag | inquotes);
@@ -651,8 +641,7 @@ subevalvar(char *p, char *str, int strloc, int subtype, int 
startloc, int varfla
char *(*scan)(char *, char *, char *, char *, int , int);
 
argstr(p, EXP_TILDE | (subtype != VSASSIGN && subtype != VSQUESTION ?
-  (flag & (EXP_QUOTED | EXP_QPAT) ?
-   EXP_QPAT : EXP_CASE) : 0));
+  EXP_CASE : 0));
STPUTC('\0', expdest);
argbackq = saveargbackq;
startp = stackblock() + startloc;
@@ -1644,7 +1633,6 @@ char *
 _rmescapes(char *str, int flag)
 {
char *p, *q, *r;
-   unsigned inquotes;
int notescaped;
int globbing;
 
@@ -1674,24 +1662,23 @@ _rmescapes(char *str, int flag)
q = mempcpy(q, str, len);
}
}
-   inquotes = 0;
globbing = flag & RMESCAPE_GLOB;
notescaped = globbing;
while (*p) {
if (*p == (char)CTLQUOTEMARK) {
-   inquotes = ~inquotes;
p++;
notescaped = globbing;
continue;
}
+   if (*p == '\\') {
+   /* naked back slash */
+   notescaped = 0;
+   goto copy;
+   }
if (*p == (char)CTLESC) {
p++;
if (notescaped)
*q++ = '\\';
-   } else if (*p == '\\' && !inquotes) {
-   /* naked back slash */
-   notescaped = 0;
-   goto copy;
}
notescaped = globbing;
 copy:
diff --git a/src/expand.h b/src/expand.h
index 26dc5b4..90f5328 100644
--- a/src/expand.h
+++ b/src/expand.h
@@ -55,7 +55,6 @@ struct arglist {
 #defineEXP_VARTILDE0x4 /* expand tildes in an assignment */
 #defineEXP_REDIR   0x8 /* file glob for a redirection (1 match 
only) */
 #define EXP_CASE   0x10/* keeps quotes around for CASE pattern */
-#define EXP_QPAT   0x20/* pattern in quoted parameter expansion */
 #define EXP_VARTILDE2  0x40/* expand tildes after colons only */
 #define EXP_WORD   0x80/* expand word in parameter expansion */
 #define EXP_QUOTED 0x100   /* expand word in double quotes */
diff --git a/src/parser.c b/src/parser.c
index 382658e..f329c69 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -80,6 +80,18 @@ struct heredoc {
int striptabs;  /* if set, strip leading tabs */
 };
 
+struct synstack {
+   const char *syntax;
+   struct synstack *prev;
+   struct synstack 

Re: dash tested against ash testsuite: 17 failures

2018-03-07 Thread Harald van Dijk

On 3/8/18 7:30 AM, Herbert Xu wrote:

Could you please resend these patches as two separate emails please?
Patchwork cannot handle two patches in one email:

https://patchwork.kernel.org/patch/10264661/


Ah, didn't realise that. I'll keep that in mind for future mails.

Actually, I'll withdraw my second patch for now, I spotted another 
problem in alias handling (already present in 0.5.9.1) and want to 
double-check that my patch didn't make things worse.



Also it would be nice if you could include the patch descriptions
in each email as these will go into the git tree.


parser: use pgetc_eatbnl() in more places.

dash has a pgetc_eatbnl function in parser.c which skips any 
backslash-newline combinations. It's not used everywhere it could be. 
There is also some duplicated backslash-newline handling elsewhere in 
parser.c. Replace most of the calls to pgetc() with calls to 
pgetc_eatbnl() and remove the duplicated backslash-newline handling.
diff --git a/src/parser.c b/src/parser.c
index 382658e..8b945e3 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -106,6 +106,7 @@ STATIC void parseheredoc(void);
 STATIC int peektoken(void);
 STATIC int readtoken(void);
 STATIC int xxreadtoken(void);
+STATIC int pgetc_eatbnl();
 STATIC int readtoken1(int, char const *, char *, int);
 STATIC void synexpect(int) __attribute__((__noreturn__));
 STATIC void synerror(const char *) __attribute__((__noreturn__));
@@ -656,8 +657,10 @@ parseheredoc(void)
 		if (needprompt) {
 			setprompt(2);
 		}
-		readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
-here->eofmark, here->striptabs);
+		if (here->here->type == NHERE)
+			readtoken1(pgetc(), SQSYNTAX, here->eofmark, here->striptabs);
+		else
+			readtoken1(pgetc_eatbnl(), DQSYNTAX, here->eofmark, here->striptabs);
 		n = (union node *)stalloc(sizeof (struct narg));
 		n->narg.type = NARG;
 		n->narg.next = NULL;
@@ -782,7 +785,7 @@ xxreadtoken(void)
 		setprompt(2);
 	}
 	for (;;) {	/* until token or start of word found */
-		c = pgetc();
+		c = pgetc_eatbnl();
 		switch (c) {
 		case ' ': case '\t':
 		case PEOA:
@@ -791,30 +794,23 @@ xxreadtoken(void)
 			while ((c = pgetc()) != '\n' && c != PEOF);
 			pungetc();
 			continue;
-		case '\\':
-			if (pgetc() == '\n') {
-nlprompt();
-continue;
-			}
-			pungetc();
-			goto breakloop;
 		case '\n':
 			nlnoprompt();
 			RETURN(TNL);
 		case PEOF:
 			RETURN(TEOF);
 		case '&':
-			if (pgetc() == '&')
+			if (pgetc_eatbnl() == '&')
 RETURN(TAND);
 			pungetc();
 			RETURN(TBACKGND);
 		case '|':
-			if (pgetc() == '|')
+			if (pgetc_eatbnl() == '|')
 RETURN(TOR);
 			pungetc();
 			RETURN(TPIPE);
 		case ';':
-			if (pgetc() == ';')
+			if (pgetc_eatbnl() == ';')
 RETURN(TENDCASE);
 			pungetc();
 			RETURN(TSEMI);
@@ -822,11 +818,9 @@ xxreadtoken(void)
 			RETURN(TLP);
 		case ')':
 			RETURN(TRP);
-		default:
-			goto breakloop;
 		}
+		break;
 	}
-breakloop:
 	return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
 #undef RETURN
 }
@@ -836,7 +830,7 @@ static int pgetc_eatbnl(void)
 	int c;
 
 	while ((c = pgetc()) == '\\') {
-		if (pgetc() != '\n') {
+		if (pgetc2() != '\n') {
 			pungetc();
 			break;
 		}
@@ -903,7 +897,7 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
 			attyline();
 			if (syntax == BASESYNTAX)
 return readtoken();
-			c = pgetc();
+			c = syntax == SQSYNTAX ? pgetc() : pgetc_eatbnl();
 			goto loop;
 		}
 #endif
@@ -916,7 +910,7 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
 	goto endword;	/* exit outer loop */
 USTPUTC(c, out);
 nlprompt();
-c = pgetc();
+c = syntax == SQSYNTAX ? pgetc() : pgetc_eatbnl();
 goto loop;		/* continue outer loop */
 			case CWORD:
 USTPUTC(c, out);
@@ -933,8 +927,6 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
 	USTPUTC(CTLESC, out);
 	USTPUTC('\\', out);
 	pungetc();
-} else if (c == '\n') {
-	nlprompt();
 } else {
 	if (
 		dblquote &&
@@ -997,7 +989,7 @@ quotemark:
 	USTPUTC(c, out);
 	--parenlevel;
 } else {
-	if (pgetc() == ')') {
+	if (pgetc_eatbnl() == ')') {
 		USTPUTC(CTLENDARI, out);
 		if (!--arinest)
 			syntax = prevsyntax;
@@ -1025,7 +1017,7 @@ quotemark:
 	USTPUTC(c, out);
 }
 			}
-			c = pgetc();
+			c = syntax == SQSYNTAX ? pgetc() : pgetc_eatbnl();
 		}
 	}
 endword:
@@ -1132,7 +1124,7 @@ parseredir: {
 	np = (union node *)stalloc(sizeof (struct nfile));
 	if (c == '>') {
 		np->nfile.fd = 1;
-		c = pgetc();
+		c = pgetc_eatbnl();
 		if (c == '>')
 			np->type = NAPPEND;
 		else if (c == '|')
@@ -1145,7 +1137,7 @@ parseredir: {
 		}
 	} else {	/* c == '<' */
 		np->nfile.fd = 0;
-		switch (c = pgetc()) {
+		switch (c = pgetc_eatbnl()) {
 		case '<':
 			if (sizeof (struct nfile) != sizeof (struct nhere)) {
 np = (union node *)stalloc(sizeof (struct nhere));
@@ -1154,7 +1146,7 @@ parseredir: {
 			np->type = NHERE;
 	

Re: dash tested against ash testsuite: 17 failures

2018-03-07 Thread Herbert Xu
On Wed, Mar 07, 2018 at 07:19:56PM +0100, Harald van Dijk wrote:
> On 3/7/18 7:51 AM, Herbert Xu wrote:
> >On Wed, Mar 07, 2018 at 07:49:16AM +0100, Harald van Dijk wrote:
> >>
> >>This was wrong in the original patch, but I'm not seeing it in the updated
> >>patch that you replied to. When parsing a heredoc where part of delimiter is
> >>quoted, syntax==SQSYNTAX. Since the calls to pgetc_eatbnl() are conditional
> >>on syntax!=SQSYNTAX, there shouldn't be a problem. It would be a different
> >>story if the delimiter could be an unquoted backslash, but thankfully that
> >>is not possible.
> >
> >Good point.  In that case please resend it with the pgetc2 change
> >and it should be good to go.
> 
> Here you go.
> 
> The problem with
> 
>   dash -c 'alias x=
>   x'
> 
> and
> 
>   dash -c 'alias bs=\\
>   bs
>   '
> 
> looks like it just needs one extra line, so also attached as a separate
> patch.

Could you please resend these patches as two separate emails please?
Patchwork cannot handle two patches in one email:

https://patchwork.kernel.org/patch/10264661/

Also it would be nice if you could include the patch descriptions
in each email as these will go into the git tree.

Thanks,
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line "unsubscribe dash" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: dash bug: double-quoted "\" breaks glob protection for next char

2018-03-07 Thread Harald van Dijk

On 3/7/18 8:04 PM, Harald van Dijk wrote:

On 3/7/18 5:29 PM, Herbert Xu wrote:

On Sun, Mar 04, 2018 at 10:29:25PM +0100, Harald van Dijk wrote:


Another pre-existing dash parsing bug that I encountered now is $(( ( 
$(( 1
)) ) )). This should expand to 1, but gives a hard error in dash, 
again due
to the non-recursive nature of dash's parser. A small generalisation 
of what
I've been doing so far could handle this, so it makes sense to me to 
try to

achieve this at the same time.


Thanks for the patches.  You have convinced that a stacked syntax
is indeed necessary.  However, I'd like to do the reversion of the
run-time quote book-keeping patch in a separate step.

I also didn't quite like the idea of scanning the string backwards
to find the previous syntax.  So here is my attempt at the recursive
parsing using alloca.


If the syntax stack is to be stored on the actual stack, then real 
recursion could be used instead, as attached. I tried to avoid recursion 
for the cases that unpatched dash already handled properly.



It's not completely recursive in that I've
maintained the existing behaviour of double-quotes inside parameter
expansion inside double-quotes.  However, it does seem to address
most of the issues that you have raised.


One thing I found it doesn't handle, although it does look like you try 
to support it, is ${$-"}"}, which should expand to the same thing as $$. 
This doesn't work because the first " is CDQUOTE, not CENDQUOTE, so 
synstack->innerdq doesn't get set, and there's nothing else that 
prevents the quoted } from being treated as the end of the variable 
substitution.


To handle that it can just look at dblquote instead, included in the 
attached.


In this patch, I managed let "${$+\}}" expand to }, but "${$+"\}"}" to 
\}, for the reasons I gave earlier.


Martijn Dekker's test case of 'x=0; x=$((${x}+1))' also works with this.

Cheers,
Harald van Dijk
diff --git a/src/expand.c b/src/expand.c
index 2a50830..903e250 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -83,7 +83,7 @@
 #define RMESCAPE_HEAP	0x10	/* Malloc strings instead of stalloc */
 
 /* Add CTLESC when necessary. */
-#define QUOTES_ESC	(EXP_FULL | EXP_CASE | EXP_QPAT)
+#define QUOTES_ESC	(EXP_FULL | EXP_CASE)
 /* Do not skip NUL characters. */
 #define QUOTES_KEEPNUL	EXP_TILDE
 
@@ -333,16 +333,6 @@ addquote:
 		case CTLESC:
 			startloc++;
 			length++;
-
-			/*
-			 * Quoted parameter expansion pattern: remove quote
-			 * unless inside inner quotes or we have a literal
-			 * backslash.
-			 */
-			if (((flag | inquotes) & (EXP_QPAT | EXP_QUOTED)) ==
-			EXP_QPAT && *p != '\\')
-break;
-
 			goto addquote;
 		case CTLVAR:
 			p = evalvar(p, flag | inquotes);
@@ -651,8 +641,7 @@ subevalvar(char *p, char *str, int strloc, int subtype, int startloc, int varfla
 	char *(*scan)(char *, char *, char *, char *, int , int);
 
 	argstr(p, EXP_TILDE | (subtype != VSASSIGN && subtype != VSQUESTION ?
-			   (flag & (EXP_QUOTED | EXP_QPAT) ?
-			EXP_QPAT : EXP_CASE) : 0));
+			   EXP_CASE : 0));
 	STPUTC('\0', expdest);
 	argbackq = saveargbackq;
 	startp = stackblock() + startloc;
@@ -1644,7 +1633,6 @@ char *
 _rmescapes(char *str, int flag)
 {
 	char *p, *q, *r;
-	unsigned inquotes;
 	int notescaped;
 	int globbing;
 
@@ -1674,24 +1662,23 @@ _rmescapes(char *str, int flag)
 			q = mempcpy(q, str, len);
 		}
 	}
-	inquotes = 0;
 	globbing = flag & RMESCAPE_GLOB;
 	notescaped = globbing;
 	while (*p) {
 		if (*p == (char)CTLQUOTEMARK) {
-			inquotes = ~inquotes;
 			p++;
 			notescaped = globbing;
 			continue;
 		}
+		if (*p == '\\') {
+			/* naked back slash */
+			notescaped = 0;
+			goto copy;
+		}
 		if (*p == (char)CTLESC) {
 			p++;
 			if (notescaped)
 *q++ = '\\';
-		} else if (*p == '\\' && !inquotes) {
-			/* naked back slash */
-			notescaped = 0;
-			goto copy;
 		}
 		notescaped = globbing;
 copy:
diff --git a/src/expand.h b/src/expand.h
index 26dc5b4..90f5328 100644
--- a/src/expand.h
+++ b/src/expand.h
@@ -55,7 +55,6 @@ struct arglist {
 #define	EXP_VARTILDE	0x4	/* expand tildes in an assignment */
 #define	EXP_REDIR	0x8	/* file glob for a redirection (1 match only) */
 #define EXP_CASE	0x10	/* keeps quotes around for CASE pattern */
-#define EXP_QPAT	0x20	/* pattern in quoted parameter expansion */
 #define EXP_VARTILDE2	0x40	/* expand tildes after colons only */
 #define EXP_WORD	0x80	/* expand word in parameter expansion */
 #define EXP_QUOTED	0x100	/* expand word in double quotes */
diff --git a/src/parser.c b/src/parser.c
index 382658e..17d60b0 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -827,7 +827,8 @@ xxreadtoken(void)
 		}
 	}
 breakloop:
-	return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
+	readtoken1(c, BASESYNTAX, (char *)NULL, 0);
+	return lasttoken;
 #undef RETURN
 }
 
@@ -855,47 +856,147 @@ static int pgetc_eatbnl(void)
  * word which marks the end of the document and striptabs is true if
  * leading tabs should be stripped from the document.  The 

Re: dash bug: double-quoted "\" breaks glob protection for next char

2018-03-07 Thread Martijn Dekker
Op 07-03-18 om 16:29 schreef Herbert Xu:
> I also didn't quite like the idea of scanning the string backwards
> to find the previous syntax.  So here is my attempt at the recursive
> parsing using alloca. 

This version introduces a parsing bug:

$ src/dash -c 'x=0; x=$((${x}+1))'
src/dash: 1: Syntax error: Unterminated quoted string

It is triggered by the ${x} (with braces) within an arithmetic expression.

- M.

--
To unsubscribe from this list: send the line "unsubscribe dash" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: dash bug: double-quoted "\" breaks glob protection for next char

2018-03-07 Thread Harald van Dijk

On 3/7/18 5:29 PM, Herbert Xu wrote:

On Sun, Mar 04, 2018 at 10:29:25PM +0100, Harald van Dijk wrote:


Another pre-existing dash parsing bug that I encountered now is $(( ( $(( 1
)) ) )). This should expand to 1, but gives a hard error in dash, again due
to the non-recursive nature of dash's parser. A small generalisation of what
I've been doing so far could handle this, so it makes sense to me to try to
achieve this at the same time.


Thanks for the patches.  You have convinced that a stacked syntax
is indeed necessary.  However, I'd like to do the reversion of the
run-time quote book-keeping patch in a separate step.

I also didn't quite like the idea of scanning the string backwards
to find the previous syntax.  So here is my attempt at the recursive
parsing using alloca.


Nice approach.


It's not completely recursive in that I've
maintained the existing behaviour of double-quotes inside parameter
expansion inside double-quotes.  However, it does seem to address
most of the issues that you have raised.


One thing I found it doesn't handle, although it does look like you try 
to support it, is ${$-"}"}, which should expand to the same thing as $$. 
This doesn't work because the first " is CDQUOTE, not CENDQUOTE, so 
synstack->innerdq doesn't get set, and there's nothing else that 
prevents the quoted } from being treated as the end of the variable 
substitution.



As I have not reverted the run-time quote book-keeping, the handling
of $@/$* remains unchanged.


I'll try to re-do those fixes based on the approach you're going for here.

Cheers,
Harald van Dijk
--
To unsubscribe from this list: send the line "unsubscribe dash" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: dash tested against ash testsuite: 17 failures

2018-03-07 Thread Harald van Dijk

On 3/7/18 7:51 AM, Herbert Xu wrote:

On Wed, Mar 07, 2018 at 07:49:16AM +0100, Harald van Dijk wrote:


This was wrong in the original patch, but I'm not seeing it in the updated
patch that you replied to. When parsing a heredoc where part of delimiter is
quoted, syntax==SQSYNTAX. Since the calls to pgetc_eatbnl() are conditional
on syntax!=SQSYNTAX, there shouldn't be a problem. It would be a different
story if the delimiter could be an unquoted backslash, but thankfully that
is not possible.


Good point.  In that case please resend it with the pgetc2 change
and it should be good to go.


Here you go.

The problem with

  dash -c 'alias x=
  x'

and

  dash -c 'alias bs=\\
  bs
  '

looks like it just needs one extra line, so also attached as a separate 
patch.


Cheers,
Harald van Dijk
diff --git a/src/parser.c b/src/parser.c
index 382658e..8b945e3 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -106,6 +106,7 @@ STATIC void parseheredoc(void);
 STATIC int peektoken(void);
 STATIC int readtoken(void);
 STATIC int xxreadtoken(void);
+STATIC int pgetc_eatbnl();
 STATIC int readtoken1(int, char const *, char *, int);
 STATIC void synexpect(int) __attribute__((__noreturn__));
 STATIC void synerror(const char *) __attribute__((__noreturn__));
@@ -656,8 +657,10 @@ parseheredoc(void)
 		if (needprompt) {
 			setprompt(2);
 		}
-		readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
-here->eofmark, here->striptabs);
+		if (here->here->type == NHERE)
+			readtoken1(pgetc(), SQSYNTAX, here->eofmark, here->striptabs);
+		else
+			readtoken1(pgetc_eatbnl(), DQSYNTAX, here->eofmark, here->striptabs);
 		n = (union node *)stalloc(sizeof (struct narg));
 		n->narg.type = NARG;
 		n->narg.next = NULL;
@@ -782,7 +785,7 @@ xxreadtoken(void)
 		setprompt(2);
 	}
 	for (;;) {	/* until token or start of word found */
-		c = pgetc();
+		c = pgetc_eatbnl();
 		switch (c) {
 		case ' ': case '\t':
 		case PEOA:
@@ -791,30 +794,23 @@ xxreadtoken(void)
 			while ((c = pgetc()) != '\n' && c != PEOF);
 			pungetc();
 			continue;
-		case '\\':
-			if (pgetc() == '\n') {
-nlprompt();
-continue;
-			}
-			pungetc();
-			goto breakloop;
 		case '\n':
 			nlnoprompt();
 			RETURN(TNL);
 		case PEOF:
 			RETURN(TEOF);
 		case '&':
-			if (pgetc() == '&')
+			if (pgetc_eatbnl() == '&')
 RETURN(TAND);
 			pungetc();
 			RETURN(TBACKGND);
 		case '|':
-			if (pgetc() == '|')
+			if (pgetc_eatbnl() == '|')
 RETURN(TOR);
 			pungetc();
 			RETURN(TPIPE);
 		case ';':
-			if (pgetc() == ';')
+			if (pgetc_eatbnl() == ';')
 RETURN(TENDCASE);
 			pungetc();
 			RETURN(TSEMI);
@@ -822,11 +818,9 @@ xxreadtoken(void)
 			RETURN(TLP);
 		case ')':
 			RETURN(TRP);
-		default:
-			goto breakloop;
 		}
+		break;
 	}
-breakloop:
 	return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
 #undef RETURN
 }
@@ -836,7 +830,7 @@ static int pgetc_eatbnl(void)
 	int c;
 
 	while ((c = pgetc()) == '\\') {
-		if (pgetc() != '\n') {
+		if (pgetc2() != '\n') {
 			pungetc();
 			break;
 		}
@@ -903,7 +897,7 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
 			attyline();
 			if (syntax == BASESYNTAX)
 return readtoken();
-			c = pgetc();
+			c = syntax == SQSYNTAX ? pgetc() : pgetc_eatbnl();
 			goto loop;
 		}
 #endif
@@ -916,7 +910,7 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
 	goto endword;	/* exit outer loop */
 USTPUTC(c, out);
 nlprompt();
-c = pgetc();
+c = syntax == SQSYNTAX ? pgetc() : pgetc_eatbnl();
 goto loop;		/* continue outer loop */
 			case CWORD:
 USTPUTC(c, out);
@@ -933,8 +927,6 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
 	USTPUTC(CTLESC, out);
 	USTPUTC('\\', out);
 	pungetc();
-} else if (c == '\n') {
-	nlprompt();
 } else {
 	if (
 		dblquote &&
@@ -997,7 +989,7 @@ quotemark:
 	USTPUTC(c, out);
 	--parenlevel;
 } else {
-	if (pgetc() == ')') {
+	if (pgetc_eatbnl() == ')') {
 		USTPUTC(CTLENDARI, out);
 		if (!--arinest)
 			syntax = prevsyntax;
@@ -1025,7 +1017,7 @@ quotemark:
 	USTPUTC(c, out);
 }
 			}
-			c = pgetc();
+			c = syntax == SQSYNTAX ? pgetc() : pgetc_eatbnl();
 		}
 	}
 endword:
@@ -1132,7 +1124,7 @@ parseredir: {
 	np = (union node *)stalloc(sizeof (struct nfile));
 	if (c == '>') {
 		np->nfile.fd = 1;
-		c = pgetc();
+		c = pgetc_eatbnl();
 		if (c == '>')
 			np->type = NAPPEND;
 		else if (c == '|')
@@ -1145,7 +1137,7 @@ parseredir: {
 		}
 	} else {	/* c == '<' */
 		np->nfile.fd = 0;
-		switch (c = pgetc()) {
+		switch (c = pgetc_eatbnl()) {
 		case '<':
 			if (sizeof (struct nfile) != sizeof (struct nhere)) {
 np = (union node *)stalloc(sizeof (struct nhere));
@@ -1154,7 +1146,7 @@ parseredir: {
 			np->type = NHERE;
 			heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc));
 			heredoc->here = np;
-			if ((c = pgetc()) == '-') {
+			if ((c = pgetc_eatbnl()) == '-') {
 

Re: Greater resolution in test -nt / test -ot

2018-03-07 Thread Martijn Dekker
Op 07-03-18 om 15:46 schreef Martijn Dekker:
> Op 06-03-18 om 09:19 schreef Herbert Xu:
>> On Thu, Jun 22, 2017 at 10:30:02AM +0200, Petr Skočík wrote:
>>> would you be willing to pull something like this?
> [...]
>>> I could use greater resolution in `test -nt` / `test -ot`, and st_mtim
>>> field is standardized under POSIX.1-2008 (or so stat(2) says).
>>
>> Sure.  But your patch is corrupted.
> 
> Fixed patch attached.
> 
> But I wouldn't apply it as is. My system does not have st_mtim. So I
> think it needs a configure test and a fallback to the old method.

Here's an attempt to make that happen. See attached.

- M.
diff --git a/configure.ac b/configure.ac
index 9c4ced8..0417fa2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -149,6 +149,20 @@ AC_CHECK_FUNC(open64,, [
AC_DEFINE(open64, open, [64-bit operations are the same as 32-bit])
 ])
 
+dnl Check if struct stat has st_mtim.
+AC_MSG_CHECKING(for stat::st_mtim)
+AC_COMPILE_IFELSE(
+[AC_LANG_PROGRAM([#include 
+#include 
+#include ],
+[struct stat foo; return sizeof(foo.st_mtim.tv_sec)])],
+have_st_mtim=yes, have_st_mtim=no)
+AC_MSG_RESULT($have_st_mtim)
+if test "$have_st_mtim" = "yes"; then
+   AC_DEFINE([HAVE_ST_MTIM], [1],
+   [Define if your `struct stat' has `st_mtim'])
+fi
+
 AC_ARG_WITH(libedit, AS_HELP_STRING(--with-libedit, [Compile with libedit 
support]))
 use_libedit=
 if test "$with_libedit" = "yes"; then
diff --git a/src/bltin/test.c b/src/bltin/test.c
index 58c05fe..d1458df 100644
--- a/src/bltin/test.c
+++ b/src/bltin/test.c
@@ -476,9 +476,17 @@ newerf (const char *f1, const char *f2)
 {
struct stat b1, b2;
 
+#ifdef HAVE_ST_MTIM
+   return (stat (f1, ) == 0 &&
+   stat (f2, ) == 0 &&
+   ( b1.st_mtim.tv_sec > b2.st_mtim.tv_sec ||
+(b1.st_mtim.tv_sec == b2.st_mtim.tv_sec && (b1.st_mtim.tv_nsec 
> b2.st_mtim.tv_nsec )))
+   );
+#else
return (stat (f1, ) == 0 &&
stat (f2, ) == 0 &&
b1.st_mtime > b2.st_mtime);
+#endif
 }
 
 static int
@@ -486,9 +494,17 @@ olderf (const char *f1, const char *f2)
 {
struct stat b1, b2;
 
+#ifdef HAVE_ST_MTIM
+   return (stat (f1, ) == 0 &&
+   stat (f2, ) == 0 &&
+   (b1.st_mtim.tv_sec < b2.st_mtim.tv_sec ||
+(b1.st_mtim.tv_sec == b2.st_mtim.tv_sec && (b1.st_mtim.tv_nsec 
< b2.st_mtim.tv_nsec )))
+   );
+#else
return (stat (f1, ) == 0 &&
stat (f2, ) == 0 &&
b1.st_mtime < b2.st_mtime);
+#endif
 }
 
 static int


Re: dash bug: double-quoted "\" breaks glob protection for next char

2018-03-07 Thread Herbert Xu
On Sun, Mar 04, 2018 at 10:29:25PM +0100, Harald van Dijk wrote:
>
> Another pre-existing dash parsing bug that I encountered now is $(( ( $(( 1
> )) ) )). This should expand to 1, but gives a hard error in dash, again due
> to the non-recursive nature of dash's parser. A small generalisation of what
> I've been doing so far could handle this, so it makes sense to me to try to
> achieve this at the same time.

Thanks for the patches.  You have convinced that a stacked syntax
is indeed necessary.  However, I'd like to do the reversion of the
run-time quote book-keeping patch in a separate step.

I also didn't quite like the idea of scanning the string backwards
to find the previous syntax.  So here is my attempt at the recursive
parsing using alloca.  It's not completely recursive in that I've
maintained the existing behaviour of double-quotes inside parameter
expansion inside double-quotes.  However, it does seem to address
most of the issues that you have raised.

As I have not reverted the run-time quote book-keeping, the handling
of $@/$* remains unchanged.

I will explore reverting it and will decide whether to do so
depending on how the code size/performance changes.

---8<---
Subject: parser: Add syntax stack for recursive parsing

Without a stack of syntaxes we cannot correctly these two cases
together:

"${a#'$$'}"
"${a#"${b-'$$'}"}"

A recursive parser also helps in some other corner cases such
as nested arithmetic expansion with paratheses.

This patch adds a syntax stack allocated from the stack using
alloca.  As a side-effect this allows us to remove the naked
backslashes for patterns within double-quotes, which means that
EXP_QPAT also has to go.

This patch also fixes removes any backslashes that precede right
braces when they are present within a parameter expansion context.

The idea of a recursive parser is based on a patch by Harald van
Dijk.

Signed-off-by: Herbert Xu 

diff --git a/src/expand.c b/src/expand.c
index 2a50830..903e250 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -83,7 +83,7 @@
 #define RMESCAPE_HEAP  0x10/* Malloc strings instead of stalloc */
 
 /* Add CTLESC when necessary. */
-#define QUOTES_ESC (EXP_FULL | EXP_CASE | EXP_QPAT)
+#define QUOTES_ESC (EXP_FULL | EXP_CASE)
 /* Do not skip NUL characters. */
 #define QUOTES_KEEPNUL EXP_TILDE
 
@@ -333,16 +333,6 @@ addquote:
case CTLESC:
startloc++;
length++;
-
-   /*
-* Quoted parameter expansion pattern: remove quote
-* unless inside inner quotes or we have a literal
-* backslash.
-*/
-   if (((flag | inquotes) & (EXP_QPAT | EXP_QUOTED)) ==
-   EXP_QPAT && *p != '\\')
-   break;
-
goto addquote;
case CTLVAR:
p = evalvar(p, flag | inquotes);
@@ -651,8 +641,7 @@ subevalvar(char *p, char *str, int strloc, int subtype, int 
startloc, int varfla
char *(*scan)(char *, char *, char *, char *, int , int);
 
argstr(p, EXP_TILDE | (subtype != VSASSIGN && subtype != VSQUESTION ?
-  (flag & (EXP_QUOTED | EXP_QPAT) ?
-   EXP_QPAT : EXP_CASE) : 0));
+  EXP_CASE : 0));
STPUTC('\0', expdest);
argbackq = saveargbackq;
startp = stackblock() + startloc;
@@ -1644,7 +1633,6 @@ char *
 _rmescapes(char *str, int flag)
 {
char *p, *q, *r;
-   unsigned inquotes;
int notescaped;
int globbing;
 
@@ -1674,24 +1662,23 @@ _rmescapes(char *str, int flag)
q = mempcpy(q, str, len);
}
}
-   inquotes = 0;
globbing = flag & RMESCAPE_GLOB;
notescaped = globbing;
while (*p) {
if (*p == (char)CTLQUOTEMARK) {
-   inquotes = ~inquotes;
p++;
notescaped = globbing;
continue;
}
+   if (*p == '\\') {
+   /* naked back slash */
+   notescaped = 0;
+   goto copy;
+   }
if (*p == (char)CTLESC) {
p++;
if (notescaped)
*q++ = '\\';
-   } else if (*p == '\\' && !inquotes) {
-   /* naked back slash */
-   notescaped = 0;
-   goto copy;
}
notescaped = globbing;
 copy:
diff --git a/src/expand.h b/src/expand.h
index 26dc5b4..90f5328 100644
--- a/src/expand.h
+++ b/src/expand.h
@@ -55,7 +55,6 @@ struct arglist {
 #defineEXP_VARTILDE0x4 /* expand tildes in an assignment 

Re: [PATCH] fix "Illegal number" on FreeBSD & macOS for x=; echo $((x))

2018-03-07 Thread Martijn Dekker
Op 07-03-18 om 06:26 schreef Herbert Xu:
> Martijn Dekker  wrote:
>>
>>> Since base is always a constant 0 or a constant 10, never a
>>> user-provided value, the only error that strtoimax will ever report on
>>> glibc systems is ERANGE. Checking only ERANGE therefore preserves the
>>> glibc behaviour, and allows the exact same set of errors to be detected
>>> on non-glibc systems.
>>
>> That makes sense, thanks.
> 
> Could you resend your patch with this change please?

OK, see below.

- M.

diff --git a/src/mystring.c b/src/mystring.c
index 0106bd2..de624b8 100644
--- a/src/mystring.c
+++ b/src/mystring.c
@@ -125,7 +125,7 @@ intmax_t atomax(const char *s, int base)
errno = 0;
r = strtoimax(s, , base);

-   if (errno != 0)
+   if (errno == ERANGE)
badnum(s);

/*

--
To unsubscribe from this list: send the line "unsubscribe dash" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html