On Wed, Mar 07, 2018 at 08:25:07PM +0000, Martijn Dekker wrote:
>
> This version introduces a parsing bug:
> 
> $ src/dash -c 'x=0; x=$((${x}+1))'
> src/dash: 1: Syntax error: Unterminated quoted string
> 
> It is triggered by the ${x} (with braces) within an arithmetic expression.

Thanks for testing!

Indeed, I wasn't careful enough when changing the syntax to exclude
the simple VSNORMAL cases.

This patch should fix this problem as well as the one Harald identified:

---8<---
Without a stack of syntaxes we cannot correctly these two cases
together:

        "${a#'$$'}"
        "${a#"${b-'$$'}"}"

A recursive parser also helps in some other corner cases such
as nested arithmetic expansion with paratheses.

This patch adds a syntax stack allocated from the stack using
alloca.  As a side-effect this allows us to remove the naked
backslashes for patterns within double-quotes, which means that
EXP_QPAT also has to go.

This patch also fixes removes any backslashes that precede right
braces when they are present within a parameter expansion context.

The idea of a recursive parser is based on a patch by Harald van
Dijk.

Signed-off-by: Herbert Xu <herb...@gondor.apana.org.au>

diff --git a/src/expand.c b/src/expand.c
index 2a50830..903e250 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -83,7 +83,7 @@
 #define RMESCAPE_HEAP  0x10    /* Malloc strings instead of stalloc */
 
 /* Add CTLESC when necessary. */
-#define QUOTES_ESC     (EXP_FULL | EXP_CASE | EXP_QPAT)
+#define QUOTES_ESC     (EXP_FULL | EXP_CASE)
 /* Do not skip NUL characters. */
 #define QUOTES_KEEPNUL EXP_TILDE
 
@@ -333,16 +333,6 @@ addquote:
                case CTLESC:
                        startloc++;
                        length++;
-
-                       /*
-                        * Quoted parameter expansion pattern: remove quote
-                        * unless inside inner quotes or we have a literal
-                        * backslash.
-                        */
-                       if (((flag | inquotes) & (EXP_QPAT | EXP_QUOTED)) ==
-                           EXP_QPAT && *p != '\\')
-                               break;
-
                        goto addquote;
                case CTLVAR:
                        p = evalvar(p, flag | inquotes);
@@ -651,8 +641,7 @@ subevalvar(char *p, char *str, int strloc, int subtype, int 
startloc, int varfla
        char *(*scan)(char *, char *, char *, char *, int , int);
 
        argstr(p, EXP_TILDE | (subtype != VSASSIGN && subtype != VSQUESTION ?
-                              (flag & (EXP_QUOTED | EXP_QPAT) ?
-                               EXP_QPAT : EXP_CASE) : 0));
+                              EXP_CASE : 0));
        STPUTC('\0', expdest);
        argbackq = saveargbackq;
        startp = stackblock() + startloc;
@@ -1644,7 +1633,6 @@ char *
 _rmescapes(char *str, int flag)
 {
        char *p, *q, *r;
-       unsigned inquotes;
        int notescaped;
        int globbing;
 
@@ -1674,24 +1662,23 @@ _rmescapes(char *str, int flag)
                        q = mempcpy(q, str, len);
                }
        }
-       inquotes = 0;
        globbing = flag & RMESCAPE_GLOB;
        notescaped = globbing;
        while (*p) {
                if (*p == (char)CTLQUOTEMARK) {
-                       inquotes = ~inquotes;
                        p++;
                        notescaped = globbing;
                        continue;
                }
+               if (*p == '\\') {
+                       /* naked back slash */
+                       notescaped = 0;
+                       goto copy;
+               }
                if (*p == (char)CTLESC) {
                        p++;
                        if (notescaped)
                                *q++ = '\\';
-               } else if (*p == '\\' && !inquotes) {
-                       /* naked back slash */
-                       notescaped = 0;
-                       goto copy;
                }
                notescaped = globbing;
 copy:
diff --git a/src/expand.h b/src/expand.h
index 26dc5b4..90f5328 100644
--- a/src/expand.h
+++ b/src/expand.h
@@ -55,7 +55,6 @@ struct arglist {
 #define        EXP_VARTILDE    0x4     /* expand tildes in an assignment */
 #define        EXP_REDIR       0x8     /* file glob for a redirection (1 match 
only) */
 #define EXP_CASE       0x10    /* keeps quotes around for CASE pattern */
-#define EXP_QPAT       0x20    /* pattern in quoted parameter expansion */
 #define EXP_VARTILDE2  0x40    /* expand tildes after colons only */
 #define EXP_WORD       0x80    /* expand word in parameter expansion */
 #define EXP_QUOTED     0x100   /* expand word in double quotes */
diff --git a/src/parser.c b/src/parser.c
index 382658e..f329c69 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -80,6 +80,18 @@ struct heredoc {
        int striptabs;          /* if set, strip leading tabs */
 };
 
+struct synstack {
+       const char *syntax;
+       struct synstack *prev;
+       struct synstack *next;
+       int innerdq;
+       int varpushed;
+       int dblquote;
+       int varnest;            /* levels of variables expansion */
+       int parenlevel;         /* levels of parens in arithmetic */
+       int dqvarnest;          /* levels of variables expansion within double 
quotes */
+};
+
 
 
 struct heredoc *heredoclist;   /* list of here documents to read */
@@ -847,6 +859,21 @@ static int pgetc_eatbnl(void)
        return c;
 }
 
+static void synstack_push(struct synstack **stack, struct synstack *next,
+                         const char *syntax)
+{
+       memset(next, 0, sizeof(*next));
+       next->syntax = syntax;
+       next->next = *stack;
+       (*stack)->prev = next;
+       *stack = next;
+}
+
+static void synstack_pop(struct synstack **stack)
+{
+       *stack = (*stack)->next;
+}
+
 
 
 /*
@@ -876,24 +903,15 @@ readtoken1(int firstc, char const *syntax, char *eofmark, 
int striptabs)
        size_t len;
        struct nodelist *bqlist;
        int quotef;
-       int dblquote;
-       int varnest;    /* levels of variables expansion */
-       int arinest;    /* levels of arithmetic expansion */
-       int parenlevel; /* levels of parens in arithmetic */
-       int dqvarnest;  /* levels of variables expansion within double quotes */
        int oldstyle;
-       /* syntax before arithmetic */
-       char const *uninitialized_var(prevsyntax);
+       /* syntax stack */
+       struct synstack synbase = { .syntax = syntax };
+       struct synstack *synstack = &synbase;
 
-       dblquote = 0;
        if (syntax == DQSYNTAX)
-               dblquote = 1;
+               synstack->dblquote = 1;
        quotef = 0;
        bqlist = NULL;
-       varnest = 0;
-       arinest = 0;
-       parenlevel = 0;
-       dqvarnest = 0;
 
        STARTSTACKSTR(out);
        loop: { /* for each line, until end of word */
@@ -901,7 +919,7 @@ readtoken1(int firstc, char const *syntax, char *eofmark, 
int striptabs)
                if (c == '\034' && doprompt
                 && attyset() && ! equal(termval(), "emacs")) {
                        attyline();
-                       if (syntax == BASESYNTAX)
+                       if (synstack->syntax == BASESYNTAX)
                                return readtoken();
                        c = pgetc();
                        goto loop;
@@ -910,9 +928,9 @@ readtoken1(int firstc, char const *syntax, char *eofmark, 
int striptabs)
                CHECKEND();     /* set c to PEOF if at end of here document */
                for (;;) {      /* until end of line or end of word */
                        CHECKSTRSPACE(4, out);  /* permit 4 calls to USTPUTC */
-                       switch(syntax[c]) {
+                       switch(synstack->syntax[c]) {
                        case CNL:       /* '\n' */
-                               if (syntax == BASESYNTAX)
+                               if (synstack->syntax == BASESYNTAX)
                                        goto endword;   /* exit outer loop */
                                USTPUTC(c, out);
                                nlprompt();
@@ -922,7 +940,7 @@ readtoken1(int firstc, char const *syntax, char *eofmark, 
int striptabs)
                                USTPUTC(c, out);
                                break;
                        case CCTL:
-                               if (eofmark == NULL || dblquote)
+                               if (eofmark == NULL || synstack->dblquote)
                                        USTPUTC(CTLESC, out);
                                USTPUTC(c, out);
                                break;
@@ -937,13 +955,17 @@ readtoken1(int firstc, char const *syntax, char *eofmark, 
int striptabs)
                                        nlprompt();
                                } else {
                                        if (
-                                               dblquote &&
+                                               synstack->dblquote &&
                                                c != '\\' && c != '`' &&
                                                c != '$' && (
                                                        c != '"' ||
                                                        eofmark != NULL
+                                               ) && (
+                                                       c != '}' ||
+                                                       !synstack->varnest
                                                )
                                        ) {
+                                               USTPUTC(CTLESC, out);
                                                USTPUTC('\\', out);
                                        }
                                        USTPUTC(CTLESC, out);
@@ -952,55 +974,64 @@ readtoken1(int firstc, char const *syntax, char *eofmark, 
int striptabs)
                                }
                                break;
                        case CSQUOTE:
-                               syntax = SQSYNTAX;
+                               synstack->syntax = SQSYNTAX;
 quotemark:
                                if (eofmark == NULL) {
                                        USTPUTC(CTLQUOTEMARK, out);
                                }
                                break;
                        case CDQUOTE:
-                               syntax = DQSYNTAX;
-                               dblquote = 1;
+                               synstack->syntax = DQSYNTAX;
+                               synstack->dblquote = 1;
+toggledq:
+                               if (synstack->varnest)
+                                       synstack->innerdq ^= 1;
                                goto quotemark;
                        case CENDQUOTE:
-                               if (eofmark && !varnest)
+                               if (eofmark && !synstack->varnest) {
                                        USTPUTC(c, out);
-                               else {
-                                       if (dqvarnest == 0) {
-                                               syntax = BASESYNTAX;
-                                               dblquote = 0;
-                                       }
-                                       quotef++;
-                                       goto quotemark;
+                                       break;
                                }
-                               break;
+
+                               if (synstack->dqvarnest == 0) {
+                                       synstack->syntax = BASESYNTAX;
+                                       synstack->dblquote = 0;
+                               }
+
+                               quotef++;
+
+                               if (c == '"')
+                                       goto toggledq;
+
+                               goto quotemark;
                        case CVAR:      /* '$' */
                                PARSESUB();             /* parse substitution */
                                break;
                        case CENDVAR:   /* '}' */
-                               if (varnest > 0) {
-                                       varnest--;
-                                       if (dqvarnest > 0) {
-                                               dqvarnest--;
-                                       }
+                               if (!synstack->innerdq &&
+                                   synstack->varnest > 0) {
+                                       if (!--synstack->varnest &&
+                                           synstack->varpushed)
+                                               synstack_pop(&synstack);
+                                       else if (synstack->dqvarnest > 0)
+                                               synstack->dqvarnest--;
                                        USTPUTC(CTLENDVAR, out);
                                } else {
                                        USTPUTC(c, out);
                                }
                                break;
                        case CLP:       /* '(' in arithmetic */
-                               parenlevel++;
+                               synstack->parenlevel++;
                                USTPUTC(c, out);
                                break;
                        case CRP:       /* ')' in arithmetic */
-                               if (parenlevel > 0) {
+                               if (synstack->parenlevel > 0) {
                                        USTPUTC(c, out);
-                                       --parenlevel;
+                                       --synstack->parenlevel;
                                } else {
                                        if (pgetc() == ')') {
                                                USTPUTC(CTLENDARI, out);
-                                               if (!--arinest)
-                                                       syntax = prevsyntax;
+                                               synstack_pop(&synstack);
                                        } else {
                                                /*
                                                 * unbalanced parens
@@ -1019,7 +1050,7 @@ quotemark:
                        case CIGN:
                                break;
                        default:
-                               if (varnest == 0)
+                               if (synstack->varnest == 0)
                                        goto endword;   /* exit outer loop */
                                if (c != PEOA) {
                                        USTPUTC(c, out);
@@ -1029,11 +1060,11 @@ quotemark:
                }
        }
 endword:
-       if (syntax == ARISYNTAX)
+       if (synstack->syntax == ARISYNTAX)
                synerror("Missing '))'");
-       if (syntax != BASESYNTAX && eofmark == NULL)
+       if (synstack->syntax != BASESYNTAX && eofmark == NULL)
                synerror("Unterminated quoted string");
-       if (varnest != 0) {
+       if (synstack->varnest != 0) {
                /* { */
                synerror("Missing '}'");
        }
@@ -1210,6 +1241,8 @@ parsesub: {
                        PARSEBACKQNEW();
                }
        } else {
+               const char *newsyn = synstack->syntax;
+
                USTPUTC(CTLVAR, out);
                typeloc = out - (char *)stackblock();
                STADJUST(1, out);
@@ -1260,6 +1293,8 @@ varname:
                }
 
                if (subtype == 0) {
+                       int cc = c;
+
                        switch (c) {
                        case ':':
                                subtype = VSNUL;
@@ -1273,27 +1308,40 @@ varname:
                                break;
                        case '%':
                        case '#':
-                               {
-                                       int cc = c;
-                                       subtype = c == '#' ? VSTRIMLEFT :
-                                                            VSTRIMRIGHT;
-                                       c = pgetc_eatbnl();
-                                       if (c == cc)
-                                               subtype++;
-                                       else
-                                               pungetc();
-                                       break;
-                               }
+                               subtype = c == '#' ? VSTRIMLEFT :
+                                                    VSTRIMRIGHT;
+                               c = pgetc_eatbnl();
+                               if (c == cc)
+                                       subtype++;
+                               else
+                                       pungetc();
+
+                               newsyn = BASESYNTAX;
+                               break;
                        }
                } else {
 badsub:
                        pungetc();
                }
+
+               if (newsyn == ARISYNTAX && subtype > VSNORMAL)
+                       newsyn = DQSYNTAX;
+
+               if (newsyn != synstack->syntax) {
+                       synstack_push(&synstack,
+                                     synstack->prev ?:
+                                     alloca(sizeof(*synstack)),
+                                     newsyn);
+
+                       synstack->varpushed++;
+                       synstack->dblquote = newsyn != BASESYNTAX;
+               }
+
                *((char *)stackblock() + typeloc) = subtype;
                if (subtype != VSNORMAL) {
-                       varnest++;
-                       if (dblquote)
-                               dqvarnest++;
+                       synstack->varnest++;
+                       if (synstack->dblquote)
+                               synstack->dqvarnest++;
                }
                STPUTC('=', out);
        }
@@ -1352,7 +1400,7 @@ parsebackq: {
                                        continue;
                                }
                                 if (pc != '\\' && pc != '`' && pc != '$'
-                                    && (!dblquote || pc != '"'))
+                                    && (!synstack->dblquote || pc != '"'))
                                         STPUTC('\\', pout);
                                if (pc > PEOA) {
                                        break;
@@ -1428,10 +1476,10 @@ done:
  */
 parsearith: {
 
-       if (++arinest == 1) {
-               prevsyntax = syntax;
-               syntax = ARISYNTAX;
-       }
+       synstack_push(&synstack,
+                     synstack->prev ?: alloca(sizeof(*synstack)),
+                     ARISYNTAX);
+       synstack->dblquote = 1;
        USTPUTC(CTLARI, out);
        goto parsearith_return;
 }
-- 
Email: Herbert Xu <herb...@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line "unsubscribe dash" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to