On 11/17/22 6:05 AM, Koichi Murase wrote:
Thank you for the patch. I applied it locally and tried it. I attach
a test script that I used: [bracket-slash.sh]. Now I switched to a
loadable builtin that directly calls strmatch instead of hacking
GLOBIGNORE. Here are the results:
---Tests for a slash in bracket expressions---
#1: pat=ab[/]ef str=ab[/]ef yes/yes
#2: pat=ab[/]ef str=ab/ef no/no
#3: pat=ab[c/d]ef str=ab[c/d]ef yes/yes
#4: pat=ab[c/d]ef str=abcef yes/no
#5: pat=ab[.-/]ef str=ab[.-/]ef no/yes
#6: pat=ab[.-/]ef str=ab.ef yes/no
#7: pat=ab[[=/=]]ef str=ab[[=/=]]ef yes/yes
#8: pat=ab[[=/=]]ef str=ab/ef no/no
#9: pat=ab[[=c=]/]ef str=ab[=/]ef yes/yes
#10: pat=ab[[=c=]/]ef str=abcef yes/no
#11: pat=ab[[:alpha:]/]ef str=ab[:/]ef yes/yes
#12: pat=ab[[:alpha:]/]ef str=abxef yes/no
#13: pat=ab[/[abc]]ef str=ab[/c]ef yes/yes
#14: pat=ab[/[abc]]ef str=abc]ef no/no
#15: pat=ab[c[=/=]]ef str=ab[c[=/=]]ef yes/yes
#16: pat=ab[c[=/=]]ef str=abc[=/=]ef no/no
#17: pat=ab[c[=/=]]ef str=abcef yes/no
---Tests for incomplete bracket expressions---
#18: pat=ab[c str=ab[c yes/yes
#19: pat=ab[c str=abc no/no
#20: pat=ab[c[=d= str=ab[c[=d= yes/yes
#21: pat=ab[c[=d= str=abc no/no
#22: pat=ab[c[.d str=ab[c[.d yes/yes
#23: pat=ab[c[.d str=abc no/no
#24: pat=ab[c[:alpha: str=ab[c[:alpha: yes/yes
#25: pat=ab[c[:alpha: str=abc no/no
#26: pat=ab[c- str=ab[c- no/yes
#27: pat=ab[c- str=abc no/no
#28: pat=ab[c\ str=ab[c\ no/yes
#29: pat=ab[c\ str=abc no/no
#30: pat=ab[[\ str=ab[[\ no/yes
#31: pat=ab[[\ str=ab[ no/no
"yes" and "no" on the left of / in the fourth column are the results
after applying the patch you provided. "yes" and "no" on the right of
/ are the results that *I* expect (see below paragraphs).
The new treatment seems to only handle a slash that directly appears
as an independent character in bracket expressions (cases
#1,#2,#13,#14), but if I literally read the standard you quoted, I
feel we should also handle other slashes (#3..#6,#9..#12,#15..#17).
Yes, I was undecided at the time whether character classes and equivalence
classes should have the same treatment or fall back to invalid classes. I
think your interpretation there is the right one.
The same is true for ranges. If either the first character or the second
character in the range is a slash it should cause the bracket to be matched
literally.
Incidentally, I made an additional change so that an incomplete range
expression causes the bracket to be matched literally.
Thanks for the discussion. I attached an updated version of the patch I
sent yesterday. I wasn't quite as aggressive as you with macros.
Chet
--
``The lyf so short, the craft so long to lerne.'' - Chaucer
``Ars longa, vita brevis'' - Hippocrates
Chet Ramey, UTech, CWRU c...@case.edu http://tiswww.cwru.edu/~chet/
*** ../bash-5.2-patched/lib/glob/sm_loop.c 2021-08-03 10:24:49.000000000
-0400
--- lib/glob/sm_loop.c 2022-11-17 15:42:42.000000000 -0500
***************
*** 344,347 ****
--- 344,352 ----
return (FNM_NOMATCH);
+ /* If we are matching pathnames, we can't match a slash with a
+ bracket expression. */
+ if (sc == L('/') && (flags & FNM_PATHNAME))
+ return (FNM_NOMATCH);
+
/* `?' cannot match `.' or `..' if it is the first character of the
string or if it is the first character following a slash and
***************
*** 404,407 ****
--- 409,414 ----
}
+ #define SLASH_PATHNAME(c) (c == L('/') && (flags & FNM_PATHNAME))
+
/* Use prototype definition here because of type promotion. */
static CHAR *
***************
*** 452,455 ****
--- 459,468 ----
pc = FOLD (p[1]);
p += 4;
+
+ /* Finding a slash in a bracket expression means you have to
+ match the bracket as an ordinary character (see below). */
+ if (pc == L('/') && (flags & FNM_PATHNAME))
+ return ((test == L('[')) ? savep : (CHAR *)0); /*]*/
+
if (COLLEQUIV (test, pc))
{
***************
*** 464,467 ****
--- 477,484 ----
if (c == L('\0'))
return ((test == L('[')) ? savep : (CHAR *)0); /*]*/
+ else if (c == L('/') && (flags & FNM_PATHNAME))
+ return ((test == L('[')) ? savep : (CHAR *)0); /*]*/
+ else if (c == L(']'))
+ break;
c = FOLD (c);
continue;
***************
*** 476,484 ****
pc = 0; /* make sure invalid char classes don't match. */
/* Find end of character class name */
! for (close = p + 1; *close != '\0'; close++)
if (*close == L(':') && *(close+1) == L(']'))
break;
! if (*close != L('\0'))
{
ccname = (CHAR *)malloc ((close - p) * sizeof (CHAR));
--- 493,501 ----
pc = 0; /* make sure invalid char classes don't match. */
/* Find end of character class name */
! for (close = p + 1; *close != '\0' && SLASH_PATHNAME(*close) == 0;
close++)
if (*close == L(':') && *(close+1) == L(']'))
break;
! if (*close != L('\0') && SLASH_PATHNAME(*close) == 0)
{
ccname = (CHAR *)malloc ((close - p) * sizeof (CHAR));
***************
*** 527,530 ****
--- 544,549 ----
if (c == L('\0'))
return ((test == L('[')) ? savep : (CHAR *)0);
+ else if (c == L('/') && (flags & FNM_PATHNAME))
+ return ((test == L('[')) ? savep : (CHAR *)0); /*]*/
else if (c == L(']'))
break;
***************
*** 566,569 ****
--- 585,598 ----
return ((test == L('[')) ? savep : (CHAR *)0);
+ /* POSIX.2 2.13.3 says: `If a <slash> character is found following an
+ unescaped <left-square-bracket> character before a corresponding
+ <right-square-bracket> is found, the open bracket shall be treated
+ as an ordinary character.' If we find a slash in a bracket
+ expression and the flags indicate we're supposed to be treating the
+ string like a pathname, we have to treat the `[' as just a character
+ to be matched. */
+ if (c == L('/') && (flags & FNM_PATHNAME))
+ return ((test == L('[')) ? savep : (CHAR *)0);
+
c = *p++;
c = FOLD (c);
***************
*** 571,578 ****
if (c == L('\0'))
return ((test == L('[')) ? savep : (CHAR *)0);
!
! if ((flags & FNM_PATHNAME) && c == L('/'))
! /* [/] can never match when matching a pathname. */
! return (CHAR *)0;
/* This introduces a range, unless the `-' is the last
--- 600,605 ----
if (c == L('\0'))
return ((test == L('[')) ? savep : (CHAR *)0);
! else if (c == L('/') && (flags & FNM_PATHNAME))
! return ((test == L('[')) ? savep : (CHAR *)0);
/* This introduces a range, unless the `-' is the last
***************
*** 585,589 ****
cend = *p++;
if (cend == L('\0'))
! return (CHAR *)0;
if (cend == L('[') && *p == L('.'))
{
--- 612,618 ----
cend = *p++;
if (cend == L('\0'))
! return ((test == L('[')) ? savep : (CHAR *)0);
! else if (cend == L('/') && (flags & FNM_PATHNAME))
! return ((test == L('[')) ? savep : (CHAR *)0);
if (cend == L('[') && *p == L('.'))
{
***************
*** 637,640 ****
--- 666,671 ----
if (c == L('\0'))
return ((test == L('[')) ? savep : (CHAR *)0);
+ else if (c == L('/') && (flags & FNM_PATHNAME))
+ return ((test == L('[')) ? savep : (CHAR *)0);
oc = c;
***************
*** 644,648 ****
brcnt++;
brchrp = p++; /* skip over the char after the left bracket */
! if ((c = *p) == L('\0'))
return ((test == L('[')) ? savep : (CHAR *)0);
/* If *brchrp == ':' we should check that the rest of the characters
--- 675,682 ----
brcnt++;
brchrp = p++; /* skip over the char after the left bracket */
! c = *p;
! if (c == L('\0'))
! return ((test == L('[')) ? savep : (CHAR *)0);
! else if (c == L('/') && (flags & FNM_PATHNAME))
return ((test == L('[')) ? savep : (CHAR *)0);
/* If *brchrp == ':' we should check that the rest of the characters
***************
*** 667,670 ****
--- 701,706 ----
if (*p == '\0')
return (CHAR *)0;
+ else if (*p == L('/') && (flags & FNM_PATHNAME))
+ return ((test == L('[')) ? savep : (CHAR *)0);
/* XXX 1003.2d11 is unclear if this is right. */
++p;