On 11/20/22 7:50 AM, Koichi Murase wrote:

   This difference is caused because the slash after the backslash is
   only checked after a matching character is found
   (lib/glob/sm_loop.c:703).  The same check should be applied also
   before a matching character is found (lib/glob/sm_loop.c:573).  I
   attach a patch for this [r0037.brackmatch6.remaining-slash.patch].

I agree with this; it was an inadvertent omission.


There is another related inconsistency.  I just modified my new
extglob engine to follow Bash's choice described above, but then the
behavior became different from that of the actual implementation of
Bash of the current devel.

"If a pattern ends with an unescaped <backslash>, it is unspecified whether
the pattern does not match anything or the pattern is treated as invalid."
        [...]
b. If this is the behavior for the unescaped backslashes outside the
   bracket expressions, which is intensionally different from those in
   the bracket expressions, would it be possible to change the
   treatment of the unescaped backslashes inside the bracket
   expression the same as that of outside so the bracket `[' matches
   literally (as expected in cases #28..#31 of my previous reply [1])?
   The attached [r0037.brackmatch7.unescaped-backslash-option-b.patch]
   is the corresponding patch.

I have changed my mind, and I agree with this. I think the latest draft
of the POSIX standard requires it unambiguously:

"A <left-square-bracket> that does not introduce a valid bracket expression
shall match the character itself."

and while it does not say so explicitly (the description defers to the
regular expression description, which describes valid bracket expressions
as having a closing bracket), I think it's reasonable to conclude that an
incomplete bracket expression without a `]' is invalid, this text from the
regular expression description aside:

"An expression containing a '[' that is unescaped and is not part of a
bracket expression produces undefined results."

since the text in "Patterns Matching a Single Character" supersedes it.

I attached the latest patch against bash-5.2.9.

Chet
--
``The lyf so short, the craft so long to lerne.'' - Chaucer
                 ``Ars longa, vita brevis'' - Hippocrates
Chet Ramey, UTech, CWRU    c...@case.edu    http://tiswww.cwru.edu/~chet/
*** ../bash-5.2-patched/lib/glob/sm_loop.c      2021-08-03 10:24:49.000000000 
-0400
--- lib/glob/sm_loop.c  2022-11-22 14:51:12.000000000 -0500
***************
*** 344,347 ****
--- 344,352 ----
              return (FNM_NOMATCH);
  
+           /* If we are matching pathnames, we can't match a slash with a
+              bracket expression. */
+           if (sc == L('/') && (flags & FNM_PATHNAME))
+             return (FNM_NOMATCH);
+ 
            /* `?' cannot match `.' or `..' if it is the first character of the
               string or if it is the first character following a slash and
***************
*** 404,407 ****
--- 409,414 ----
  }
  
+ #define SLASH_PATHNAME(c)     (c == L('/') && (flags & FNM_PATHNAME))
+ 
  /* Use prototype definition here because of type promotion. */
  static CHAR *
***************
*** 452,455 ****
--- 459,468 ----
          pc = FOLD (p[1]);
          p += 4;
+ 
+         /* Finding a slash in a bracket expression means you have to
+            match the bracket as an ordinary character (see below). */
+         if (pc == L('/') && (flags & FNM_PATHNAME))
+           return ((test == L('[')) ? savep : (CHAR *)0); /*]*/
+ 
          if (COLLEQUIV (test, pc))
            {
***************
*** 464,467 ****
--- 477,484 ----
              if (c == L('\0'))
                return ((test == L('[')) ? savep : (CHAR *)0); /*]*/
+             else if (c == L('/') && (flags & FNM_PATHNAME))
+               return ((test == L('[')) ? savep : (CHAR *)0); /*]*/
+             else if (c == L(']'))
+               break;
              c = FOLD (c);
              continue;
***************
*** 476,484 ****
          pc = 0;       /* make sure invalid char classes don't match. */
          /* Find end of character class name */
!         for (close = p + 1; *close != '\0'; close++)
            if (*close == L(':') && *(close+1) == L(']'))
              break;
  
!         if (*close != L('\0'))
            {
              ccname = (CHAR *)malloc ((close - p) * sizeof (CHAR));
--- 493,501 ----
          pc = 0;       /* make sure invalid char classes don't match. */
          /* Find end of character class name */
!         for (close = p + 1; *close != '\0' && SLASH_PATHNAME(*close) == 0; 
close++)
            if (*close == L(':') && *(close+1) == L(']'))
              break;
  
!         if (*close != L('\0') && SLASH_PATHNAME(*close) == 0)
            {
              ccname = (CHAR *)malloc ((close - p) * sizeof (CHAR));
***************
*** 527,530 ****
--- 544,549 ----
              if (c == L('\0'))
                return ((test == L('[')) ? savep : (CHAR *)0);
+             else if (c == L('/') && (flags & FNM_PATHNAME))
+               return ((test == L('[')) ? savep : (CHAR *)0); /*]*/
              else if (c == L(']'))
                break;
***************
*** 551,555 ****
        {
          if (*p == '\0')
!           return (CHAR *)0;
          cstart = cend = *p++;
        }
--- 570,576 ----
        {
          if (*p == '\0')
!           return ((test == L('[')) ? savep : (CHAR *)0);
!         else if (*p == L('/') && (flags & FNM_PATHNAME))
!           return ((test == L('[')) ? savep : (CHAR *)0);
          cstart = cend = *p++;
        }
***************
*** 566,569 ****
--- 587,600 ----
        return ((test == L('[')) ? savep : (CHAR *)0);
  
+       /* POSIX.2 2.13.3 says: `If a <slash> character is found following an
+          unescaped <left-square-bracket> character before a corresponding
+          <right-square-bracket> is found, the open bracket shall be treated
+          as an ordinary character.' If we find a slash in a bracket
+          expression and the flags indicate we're supposed to be treating the
+          string like a pathname, we have to treat the `[' as just a character
+          to be matched. */
+       if (c == L('/') && (flags & FNM_PATHNAME))
+       return ((test == L('[')) ? savep : (CHAR *)0);
+ 
        c = *p++;
        c = FOLD (c);
***************
*** 571,578 ****
        if (c == L('\0'))
        return ((test == L('[')) ? savep : (CHAR *)0);
! 
!       if ((flags & FNM_PATHNAME) && c == L('/'))
!       /* [/] can never match when matching a pathname.  */
!       return (CHAR *)0;
  
        /* This introduces a range, unless the `-' is the last
--- 602,607 ----
        if (c == L('\0'))
        return ((test == L('[')) ? savep : (CHAR *)0);
!       else if (c == L('/') && (flags & FNM_PATHNAME))
!       return ((test == L('[')) ? savep : (CHAR *)0);
  
        /* This introduces a range, unless the `-' is the last
***************
*** 585,589 ****
            cend = *p++;
          if (cend == L('\0'))
!           return (CHAR *)0;
          if (cend == L('[') && *p == L('.'))
            {
--- 614,620 ----
            cend = *p++;
          if (cend == L('\0'))
!           return ((test == L('[')) ? savep : (CHAR *)0);
!         else if (cend == L('/') && (flags & FNM_PATHNAME))
!           return ((test == L('[')) ? savep : (CHAR *)0);
          if (cend == L('[') && *p == L('.'))
            {
***************
*** 637,640 ****
--- 668,673 ----
        if (c == L('\0'))
        return ((test == L('[')) ? savep : (CHAR *)0);
+       else if (c == L('/') && (flags & FNM_PATHNAME))
+       return ((test == L('[')) ? savep : (CHAR *)0);
  
        oc = c;
***************
*** 644,648 ****
          brcnt++;
          brchrp = p++;         /* skip over the char after the left bracket */
!         if ((c = *p) == L('\0'))
            return ((test == L('[')) ? savep : (CHAR *)0);
          /* If *brchrp == ':' we should check that the rest of the characters
--- 677,684 ----
          brcnt++;
          brchrp = p++;         /* skip over the char after the left bracket */
!         c = *p;
!         if (c == L('\0'))
!           return ((test == L('[')) ? savep : (CHAR *)0);
!         else if (c == L('/') && (flags & FNM_PATHNAME))
            return ((test == L('[')) ? savep : (CHAR *)0);
          /* If *brchrp == ':' we should check that the rest of the characters
***************
*** 666,671 ****
        {
          if (*p == '\0')
!           return (CHAR *)0;
!         /* XXX 1003.2d11 is unclear if this is right. */
          ++p;
        }
--- 702,710 ----
        {
          if (*p == '\0')
!           return ((test == L('[')) ? savep : (CHAR *)0);
!         /* We don't allow backslash to quote slash if we're matching 
pathnames */
!         else if (*p == L('/') && (flags & FNM_PATHNAME))
!           return ((test == L('[')) ? savep : (CHAR *)0);
!         /* Posix issue 8 leaves this unspecified for the shell. */
          ++p;
        }

Reply via email to