BASH PATCH REPORT
                             =================

Bash-Release:   5.3
Patch-ID:       bash53-008

Bug-Reported-by:        Grisha Levit <[email protected]>
Bug-Reference-ID:       <[email protected]>
Bug-Reference-URL:      
https://lists.gnu.org/archive/html/bug-bash/2025-10/msg00145.html

Bug-Description:

Bash tries to consume entire multibyte characters when looking for backslash
escapes in $'...' strings, and treats too many characters as potentially
beginning a multibyte character in UTF-8 locales. Being more selective about
when to call mbrtowc() can lead to optimized string processing and script
speedups. This patch also handles the unlikely situation of a locale
encoding null wide characters with non-null bytes.

Patch (apply with `patch -p0'):

*** ../bash-5.3-patched/lib/sh/strtrans.c       Fri Oct 13 11:57:46 2023
--- lib/sh/strtrans.c   Mon Oct 27 14:30:35 2025
***************
*** 56,60 ****
    unsigned long v;
    size_t clen;
!   int mb_cur_max;
  #if defined (HANDLE_MULTIBYTE)
    wchar_t wc;
--- 56,60 ----
    unsigned long v;
    size_t clen;
!   size_t mb_cur_max;
  #if defined (HANDLE_MULTIBYTE)
    wchar_t wc;
***************
*** 64,68 ****
      return ((char *)0);
  
!   mb_cur_max = MB_CUR_MAX;
  #if defined (HANDLE_MULTIBYTE)
    temp = 4*len + 4;
--- 64,68 ----
      return ((char *)0);
  
!   mb_cur_max = locale_mb_cur_max;
  #if defined (HANDLE_MULTIBYTE)
    temp = 4*len + 4;
***************
*** 80,87 ****
          clen = 1;
  #if defined (HANDLE_MULTIBYTE)
!         if ((locale_utf8locale && (c & 0x80)) ||
!             (locale_utf8locale == 0 && mb_cur_max > 0 && is_basic (c) == 0))
            {
              clen = mbrtowc (&wc, s - 1, mb_cur_max, 0);
              if (MB_INVALIDCH (clen))
                clen = 1;
--- 80,91 ----
          clen = 1;
  #if defined (HANDLE_MULTIBYTE)
!         /* We read an entire multibyte character at a time if we are in a
!            locale where a backslash can possibly appear as part of a
!            multibyte character. UTF-8 encodings prohibit this. */
!         if (locale_utf8locale == 0 && mb_cur_max > 1 && is_basic (c) == 0)
            {
              clen = mbrtowc (&wc, s - 1, mb_cur_max, 0);
+             if (MB_NULLWCH (clen))
+               break;                  /* it apparently can happen */
              if (MB_INVALIDCH (clen))
                clen = 1;
***************
*** 228,237 ****
    char *r, *ret;
    const char  *s;
-   size_t l, rsize;
    unsigned char c;
    size_t clen;
    int b;
- #if defined (HANDLE_MULTIBYTE)
    wchar_t wc;
  #endif
  
--- 232,241 ----
    char *r, *ret;
    const char  *s;
    unsigned char c;
+ #if defined (HANDLE_MULTIBYTE)
    size_t clen;
    int b;
    wchar_t wc;
+   DECLARE_MBSTATE;
  #endif
  
***************
*** 239,245 ****
      return ((char *)0);
  
!   l = strlen (str);
!   rsize = 4 * l + 4;
!   r = ret = (char *)xmalloc (rsize);
  
    *r++ = '$';
--- 243,247 ----
      return ((char *)0);
  
!   r = ret = (char *)xmalloc (4 * strlen (str) + 4);
  
    *r++ = '$';
***************
*** 248,255 ****
    for (s = str; c = *s; s++)
      {
-       b = 1;          /* 1 == add backslash; 0 == no backslash */
-       l = 1;
-       clen = 1;
- 
        switch (c)
        {
--- 250,253 ----
***************
*** 267,303 ****
        default:
  #if defined (HANDLE_MULTIBYTE)
!         b = is_basic (c);
!         /* XXX - clen comparison to 0 is dicey */
!         if ((b == 0 && ((clen = mbrtowc (&wc, s, MB_CUR_MAX, 0)) < 0 || 
MB_INVALIDCH (clen) || iswprint (wc) == 0)) ||
!             (b == 1 && ISPRINT (c) == 0))
! #else
!         if (ISPRINT (c) == 0)
! #endif
            {
!             *r++ = '\\';
!             *r++ = TOCHAR ((c >> 6) & 07);
!             *r++ = TOCHAR ((c >> 3) & 07);
!             *r++ = TOCHAR (c & 07);
!             continue;
            }
!         l = 0;
!         break;
!       }
!       if (b == 0 && clen == 0)
!       break;
  
!       if (l)
!       *r++ = '\\';
! 
!       if (clen == 1)
!       *r++ = c;
!       else
!       {
!         for (b = 0; b < (int)clen; b++)
!           *r++ = (unsigned char)s[b];
!         s += clen - 1;        /* -1 because of the increment above */
        }
      }
  
    *r++ = '\'';
    *r = '\0';
--- 265,304 ----
        default:
  #if defined (HANDLE_MULTIBYTE)
!         if ((locale_utf8locale && (c & 0x80)) ||
!             (locale_utf8locale == 0 && locale_mb_cur_max > 1 && is_basic (c) 
== 0))
            {
!             clen = mbrtowc (&wc, s, locale_mb_cur_max, &state);
!             if (MB_NULLWCH (clen))
!               goto quote_end;
!             if (MB_INVALIDCH (clen))
!               INITIALIZE_MBSTATE;
!             else if (iswprint (wc))
!               {
!                 for (b = 0; b < (int)clen; b++)
!                   *r++ = (unsigned char)s[b];
!                 s += clen - 1;        /* -1 because of the increment above */
!                 continue;
!               }
            }
!         else
! #endif
!           if (ISPRINT (c))
!             {
!               *r++ = c;
!               continue;
!             }
  
!         *r++ = '\\';
!         *r++ = TOCHAR ((c >> 6) & 07);
!         *r++ = TOCHAR ((c >> 3) & 07);
!         *r++ = TOCHAR (c & 07);
!         continue;
        }
+ 
+       *r++ = '\\';
+       *r++ = c;
      }
  
+ quote_end:
    *r++ = '\'';
    *r = '\0';
***************
*** 349,353 ****
      {
  #if defined (HANDLE_MULTIBYTE)
!       if (is_basic (c) == 0)
        return (ansic_wshouldquote (s));
  #endif
--- 350,355 ----
      {
  #if defined (HANDLE_MULTIBYTE)
!       if ((locale_utf8locale && (c & 0x80)) ||
!         (locale_utf8locale == 0 && locale_mb_cur_max > 1 && is_basic (c) == 
0))
        return (ansic_wshouldquote (s));
  #endif

*** ../bash-5.3/patchlevel.h    2020-06-22 14:51:03.000000000 -0400
--- patchlevel.h        2020-10-01 11:01:28.000000000 -0400
***************
*** 26,30 ****
     looks for to find the patch level (for the sccs version string). */
  
! #define PATCHLEVEL 7
  
  #endif /* _PATCHLEVEL_H_ */
--- 26,30 ----
     looks for to find the patch level (for the sccs version string). */
  
! #define PATCHLEVEL 8
  
  #endif /* _PATCHLEVEL_H_ */

-- 
``The lyf so short, the craft so long to lerne.'' - Chaucer
                 ``Ars longa, vita brevis'' - Hippocrates
Chet Ramey, UTech, CWRU    [email protected]    http://tiswww.cwru.edu/~chet/

Reply via email to