On 11/17/22 6:05 AM, Koichi Murase wrote:

Thank you for the patch.  I applied it locally and tried it.  I attach
a test script that I used: [bracket-slash.sh].  Now I switched to a
loadable builtin that directly calls strmatch instead of hacking
GLOBIGNORE.  Here are the results:

   ---Tests for a slash in bracket expressions---
   #1: pat=ab[/]ef          str=ab[/]ef          yes/yes
   #2: pat=ab[/]ef          str=ab/ef            no/no
   #3: pat=ab[c/d]ef        str=ab[c/d]ef        yes/yes
   #4: pat=ab[c/d]ef        str=abcef            yes/no
   #5: pat=ab[.-/]ef        str=ab[.-/]ef        no/yes
   #6: pat=ab[.-/]ef        str=ab.ef            yes/no
   #7: pat=ab[[=/=]]ef      str=ab[[=/=]]ef      yes/yes
   #8: pat=ab[[=/=]]ef      str=ab/ef            no/no
   #9: pat=ab[[=c=]/]ef     str=ab[=/]ef         yes/yes
   #10: pat=ab[[=c=]/]ef     str=abcef            yes/no
   #11: pat=ab[[:alpha:]/]ef str=ab[:/]ef         yes/yes
   #12: pat=ab[[:alpha:]/]ef str=abxef            yes/no
   #13: pat=ab[/[abc]]ef     str=ab[/c]ef         yes/yes
   #14: pat=ab[/[abc]]ef     str=abc]ef           no/no
   #15: pat=ab[c[=/=]]ef     str=ab[c[=/=]]ef     yes/yes
   #16: pat=ab[c[=/=]]ef     str=abc[=/=]ef       no/no
   #17: pat=ab[c[=/=]]ef     str=abcef            yes/no
   ---Tests for incomplete bracket expressions---
   #18: pat=ab[c             str=ab[c             yes/yes
   #19: pat=ab[c             str=abc              no/no
   #20: pat=ab[c[=d=         str=ab[c[=d=         yes/yes
   #21: pat=ab[c[=d=         str=abc              no/no
   #22: pat=ab[c[.d          str=ab[c[.d          yes/yes
   #23: pat=ab[c[.d          str=abc              no/no
   #24: pat=ab[c[:alpha:     str=ab[c[:alpha:     yes/yes
   #25: pat=ab[c[:alpha:     str=abc              no/no
   #26: pat=ab[c-            str=ab[c-            no/yes
   #27: pat=ab[c-            str=abc              no/no
   #28: pat=ab[c\            str=ab[c\            no/yes
   #29: pat=ab[c\            str=abc              no/no
   #30: pat=ab[[\            str=ab[[\            no/yes
   #31: pat=ab[[\            str=ab[              no/no

"yes" and "no" on the left of / in the fourth column are the results
after applying the patch you provided. "yes" and "no" on the right of
/ are the results that *I* expect (see below paragraphs).

The new treatment seems to only handle a slash that directly appears
as an independent character in bracket expressions (cases
#1,#2,#13,#14), but if I literally read the standard you quoted, I
feel we should also handle other slashes (#3..#6,#9..#12,#15..#17).

Yes, I was undecided at the time whether character classes and equivalence
classes should have the same treatment or fall back to invalid classes. I
think your interpretation there is the right one.

The same is true for ranges. If either the first character or the second
character in the range is a slash it should cause the bracket to be matched
literally.

Incidentally, I made an additional change so that an incomplete range
expression causes the bracket to be matched literally.

Thanks for the discussion. I attached an updated version of the patch I
sent yesterday. I wasn't quite as aggressive as you with macros.

Chet

--
``The lyf so short, the craft so long to lerne.'' - Chaucer
                 ``Ars longa, vita brevis'' - Hippocrates
Chet Ramey, UTech, CWRU    c...@case.edu    http://tiswww.cwru.edu/~chet/
*** ../bash-5.2-patched/lib/glob/sm_loop.c      2021-08-03 10:24:49.000000000 
-0400
--- lib/glob/sm_loop.c  2022-11-17 15:42:42.000000000 -0500
***************
*** 344,347 ****
--- 344,352 ----
              return (FNM_NOMATCH);
  
+           /* If we are matching pathnames, we can't match a slash with a
+              bracket expression. */
+           if (sc == L('/') && (flags & FNM_PATHNAME))
+             return (FNM_NOMATCH);
+ 
            /* `?' cannot match `.' or `..' if it is the first character of the
               string or if it is the first character following a slash and
***************
*** 404,407 ****
--- 409,414 ----
  }
  
+ #define SLASH_PATHNAME(c)     (c == L('/') && (flags & FNM_PATHNAME))
+ 
  /* Use prototype definition here because of type promotion. */
  static CHAR *
***************
*** 452,455 ****
--- 459,468 ----
          pc = FOLD (p[1]);
          p += 4;
+ 
+         /* Finding a slash in a bracket expression means you have to
+            match the bracket as an ordinary character (see below). */
+         if (pc == L('/') && (flags & FNM_PATHNAME))
+           return ((test == L('[')) ? savep : (CHAR *)0); /*]*/
+ 
          if (COLLEQUIV (test, pc))
            {
***************
*** 464,467 ****
--- 477,484 ----
              if (c == L('\0'))
                return ((test == L('[')) ? savep : (CHAR *)0); /*]*/
+             else if (c == L('/') && (flags & FNM_PATHNAME))
+               return ((test == L('[')) ? savep : (CHAR *)0); /*]*/
+             else if (c == L(']'))
+               break;
              c = FOLD (c);
              continue;
***************
*** 476,484 ****
          pc = 0;       /* make sure invalid char classes don't match. */
          /* Find end of character class name */
!         for (close = p + 1; *close != '\0'; close++)
            if (*close == L(':') && *(close+1) == L(']'))
              break;
  
!         if (*close != L('\0'))
            {
              ccname = (CHAR *)malloc ((close - p) * sizeof (CHAR));
--- 493,501 ----
          pc = 0;       /* make sure invalid char classes don't match. */
          /* Find end of character class name */
!         for (close = p + 1; *close != '\0' && SLASH_PATHNAME(*close) == 0; 
close++)
            if (*close == L(':') && *(close+1) == L(']'))
              break;
  
!         if (*close != L('\0') && SLASH_PATHNAME(*close) == 0)
            {
              ccname = (CHAR *)malloc ((close - p) * sizeof (CHAR));
***************
*** 527,530 ****
--- 544,549 ----
              if (c == L('\0'))
                return ((test == L('[')) ? savep : (CHAR *)0);
+             else if (c == L('/') && (flags & FNM_PATHNAME))
+               return ((test == L('[')) ? savep : (CHAR *)0); /*]*/
              else if (c == L(']'))
                break;
***************
*** 566,569 ****
--- 585,598 ----
        return ((test == L('[')) ? savep : (CHAR *)0);
  
+       /* POSIX.2 2.13.3 says: `If a <slash> character is found following an
+          unescaped <left-square-bracket> character before a corresponding
+          <right-square-bracket> is found, the open bracket shall be treated
+          as an ordinary character.' If we find a slash in a bracket
+          expression and the flags indicate we're supposed to be treating the
+          string like a pathname, we have to treat the `[' as just a character
+          to be matched. */
+       if (c == L('/') && (flags & FNM_PATHNAME))
+       return ((test == L('[')) ? savep : (CHAR *)0);
+ 
        c = *p++;
        c = FOLD (c);
***************
*** 571,578 ****
        if (c == L('\0'))
        return ((test == L('[')) ? savep : (CHAR *)0);
! 
!       if ((flags & FNM_PATHNAME) && c == L('/'))
!       /* [/] can never match when matching a pathname.  */
!       return (CHAR *)0;
  
        /* This introduces a range, unless the `-' is the last
--- 600,605 ----
        if (c == L('\0'))
        return ((test == L('[')) ? savep : (CHAR *)0);
!       else if (c == L('/') && (flags & FNM_PATHNAME))
!       return ((test == L('[')) ? savep : (CHAR *)0);
  
        /* This introduces a range, unless the `-' is the last
***************
*** 585,589 ****
            cend = *p++;
          if (cend == L('\0'))
!           return (CHAR *)0;
          if (cend == L('[') && *p == L('.'))
            {
--- 612,618 ----
            cend = *p++;
          if (cend == L('\0'))
!           return ((test == L('[')) ? savep : (CHAR *)0);
!         else if (cend == L('/') && (flags & FNM_PATHNAME))
!           return ((test == L('[')) ? savep : (CHAR *)0);
          if (cend == L('[') && *p == L('.'))
            {
***************
*** 637,640 ****
--- 666,671 ----
        if (c == L('\0'))
        return ((test == L('[')) ? savep : (CHAR *)0);
+       else if (c == L('/') && (flags & FNM_PATHNAME))
+       return ((test == L('[')) ? savep : (CHAR *)0);
  
        oc = c;
***************
*** 644,648 ****
          brcnt++;
          brchrp = p++;         /* skip over the char after the left bracket */
!         if ((c = *p) == L('\0'))
            return ((test == L('[')) ? savep : (CHAR *)0);
          /* If *brchrp == ':' we should check that the rest of the characters
--- 675,682 ----
          brcnt++;
          brchrp = p++;         /* skip over the char after the left bracket */
!         c = *p;
!         if (c == L('\0'))
!           return ((test == L('[')) ? savep : (CHAR *)0);
!         else if (c == L('/') && (flags & FNM_PATHNAME))
            return ((test == L('[')) ? savep : (CHAR *)0);
          /* If *brchrp == ':' we should check that the rest of the characters
***************
*** 667,670 ****
--- 701,706 ----
          if (*p == '\0')
            return (CHAR *)0;
+         else if (*p == L('/') && (flags & FNM_PATHNAME))
+           return ((test == L('[')) ? savep : (CHAR *)0);
          /* XXX 1003.2d11 is unclear if this is right. */
          ++p;

Reply via email to