Package: libc6
Version: 2.33-8
Severity: important

Dear Maintainer,

Consider the following reproducer:
-- >8 --
#include <stdio.h>
#include <wchar.h>
#include <locale.h>

int main(int argc, char **) {
        if(argc > 1)
                fprintf(stderr, "loc=%s\n", setlocale(LC_ALL, ""));

        for(int i = 0; i <= 0xFF; ++i) {
                char bs[] = {i, 0};
                mbstate_t ctx = {};
                wchar_t wc = -1;
                printf("%02x: %d, ", i, mbrtowc(&wc, bs, 1, &ctx));
                printf("%ld\n", wc);
        }
}
-- >8 --

Yielding the following output:
-- >8 --
$ ./b | paste - - - - - - - -
00: 0, 0        01: 1, 1        02: 1, 2        03: 1, 3        04: 1, 4        
05: 1, 5        06: 1, 6        07: 1, 7
08: 1, 8        09: 1, 9        0a: 1, 10       0b: 1, 11       0c: 1, 12       
0d: 1, 13       0e: 1, 14       0f: 1, 15
10: 1, 16       11: 1, 17       12: 1, 18       13: 1, 19       14: 1, 20       
15: 1, 21       16: 1, 22       17: 1, 23
18: 1, 24       19: 1, 25       1a: 1, 26       1b: 1, 27       1c: 1, 28       
1d: 1, 29       1e: 1, 30       1f: 1, 31
20: 1, 32       21: 1, 33       22: 1, 34       23: 1, 35       24: 1, 36       
25: 1, 37       26: 1, 38       27: 1, 39
28: 1, 40       29: 1, 41       2a: 1, 42       2b: 1, 43       2c: 1, 44       
2d: 1, 45       2e: 1, 46       2f: 1, 47
30: 1, 48       31: 1, 49       32: 1, 50       33: 1, 51       34: 1, 52       
35: 1, 53       36: 1, 54       37: 1, 55
38: 1, 56       39: 1, 57       3a: 1, 58       3b: 1, 59       3c: 1, 60       
3d: 1, 61       3e: 1, 62       3f: 1, 63
40: 1, 64       41: 1, 65       42: 1, 66       43: 1, 67       44: 1, 68       
45: 1, 69       46: 1, 70       47: 1, 71
48: 1, 72       49: 1, 73       4a: 1, 74       4b: 1, 75       4c: 1, 76       
4d: 1, 77       4e: 1, 78       4f: 1, 79
50: 1, 80       51: 1, 81       52: 1, 82       53: 1, 83       54: 1, 84       
55: 1, 85       56: 1, 86       57: 1, 87
58: 1, 88       59: 1, 89       5a: 1, 90       5b: 1, 91       5c: 1, 92       
5d: 1, 93       5e: 1, 94       5f: 1, 95
60: 1, 96       61: 1, 97       62: 1, 98       63: 1, 99       64: 1, 100      
65: 1, 101      66: 1, 102      67: 1, 103
68: 1, 104      69: 1, 105      6a: 1, 106      6b: 1, 107      6c: 1, 108      
6d: 1, 109      6e: 1, 110      6f: 1, 111
70: 1, 112      71: 1, 113      72: 1, 114      73: 1, 115      74: 1, 116      
75: 1, 117      76: 1, 118      77: 1, 119
78: 1, 120      79: 1, 121      7a: 1, 122      7b: 1, 123      7c: 1, 124      
7d: 1, 125      7e: 1, 126      7f: 1, 127
80: -1, -1      81: -1, -1      82: -1, -1      83: -1, -1      84: -1, -1      
85: -1, -1      86: -1, -1      87: -1, -1
88: -1, -1      89: -1, -1      8a: -1, -1      8b: -1, -1      8c: -1, -1      
8d: -1, -1      8e: -1, -1      8f: -1, -1
90: -1, -1      91: -1, -1      92: -1, -1      93: -1, -1      94: -1, -1      
95: -1, -1      96: -1, -1      97: -1, -1
98: -1, -1      99: -1, -1      9a: -1, -1      9b: -1, -1      9c: -1, -1      
9d: -1, -1      9e: -1, -1      9f: -1, -1
a0: -1, -1      a1: -1, -1      a2: -1, -1      a3: -1, -1      a4: -1, -1      
a5: -1, -1      a6: -1, -1      a7: -1, -1
a8: -1, -1      a9: -1, -1      aa: -1, -1      ab: -1, -1      ac: -1, -1      
ad: -1, -1      ae: -1, -1      af: -1, -1
b0: -1, -1      b1: -1, -1      b2: -1, -1      b3: -1, -1      b4: -1, -1      
b5: -1, -1      b6: -1, -1      b7: -1, -1
b8: -1, -1      b9: -1, -1      ba: -1, -1      bb: -1, -1      bc: -1, -1      
bd: -1, -1      be: -1, -1      bf: -1, -1
c0: -1, -1      c1: -1, -1      c2: -1, -1      c3: -1, -1      c4: -1, -1      
c5: -1, -1      c6: -1, -1      c7: -1, -1
c8: -1, -1      c9: -1, -1      ca: -1, -1      cb: -1, -1      cc: -1, -1      
cd: -1, -1      ce: -1, -1      cf: -1, -1
d0: -1, -1      d1: -1, -1      d2: -1, -1      d3: -1, -1      d4: -1, -1      
d5: -1, -1      d6: -1, -1      d7: -1, -1
d8: -1, -1      d9: -1, -1      da: -1, -1      db: -1, -1      dc: -1, -1      
dd: -1, -1      de: -1, -1      df: -1, -1
e0: -1, -1      e1: -1, -1      e2: -1, -1      e3: -1, -1      e4: -1, -1      
e5: -1, -1      e6: -1, -1      e7: -1, -1
e8: -1, -1      e9: -1, -1      ea: -1, -1      eb: -1, -1      ec: -1, -1      
ed: -1, -1      ee: -1, -1      ef: -1, -1
f0: -1, -1      f1: -1, -1      f2: -1, -1      f3: -1, -1      f4: -1, -1      
f5: -1, -1      f6: -1, -1      f7: -1, -1
f8: -1, -1      f9: -1, -1      fa: -1, -1      fb: -1, -1      fc: -1, -1      
fd: -1, -1      fe: -1, -1      ff: -1, -1

$ LC_ALL=POSIX ./b _ | paste - - - - - - - -
loc=C
00: 0, 0        01: 1, 1        02: 1, 2        03: 1, 3        04: 1, 4        
05: 1, 5        06: 1, 6        07: 1, 7
08: 1, 8        09: 1, 9        0a: 1, 10       0b: 1, 11       0c: 1, 12       
0d: 1, 13       0e: 1, 14       0f: 1, 15
10: 1, 16       11: 1, 17       12: 1, 18       13: 1, 19       14: 1, 20       
15: 1, 21       16: 1, 22       17: 1, 23
18: 1, 24       19: 1, 25       1a: 1, 26       1b: 1, 27       1c: 1, 28       
1d: 1, 29       1e: 1, 30       1f: 1, 31
20: 1, 32       21: 1, 33       22: 1, 34       23: 1, 35       24: 1, 36       
25: 1, 37       26: 1, 38       27: 1, 39
28: 1, 40       29: 1, 41       2a: 1, 42       2b: 1, 43       2c: 1, 44       
2d: 1, 45       2e: 1, 46       2f: 1, 47
30: 1, 48       31: 1, 49       32: 1, 50       33: 1, 51       34: 1, 52       
35: 1, 53       36: 1, 54       37: 1, 55
38: 1, 56       39: 1, 57       3a: 1, 58       3b: 1, 59       3c: 1, 60       
3d: 1, 61       3e: 1, 62       3f: 1, 63
40: 1, 64       41: 1, 65       42: 1, 66       43: 1, 67       44: 1, 68       
45: 1, 69       46: 1, 70       47: 1, 71
48: 1, 72       49: 1, 73       4a: 1, 74       4b: 1, 75       4c: 1, 76       
4d: 1, 77       4e: 1, 78       4f: 1, 79
50: 1, 80       51: 1, 81       52: 1, 82       53: 1, 83       54: 1, 84       
55: 1, 85       56: 1, 86       57: 1, 87
58: 1, 88       59: 1, 89       5a: 1, 90       5b: 1, 91       5c: 1, 92       
5d: 1, 93       5e: 1, 94       5f: 1, 95
60: 1, 96       61: 1, 97       62: 1, 98       63: 1, 99       64: 1, 100      
65: 1, 101      66: 1, 102      67: 1, 103
68: 1, 104      69: 1, 105      6a: 1, 106      6b: 1, 107      6c: 1, 108      
6d: 1, 109      6e: 1, 110      6f: 1, 111
70: 1, 112      71: 1, 113      72: 1, 114      73: 1, 115      74: 1, 116      
75: 1, 117      76: 1, 118      77: 1, 119
78: 1, 120      79: 1, 121      7a: 1, 122      7b: 1, 123      7c: 1, 124      
7d: 1, 125      7e: 1, 126      7f: 1, 127
80: -1, -1      81: -1, -1      82: -1, -1      83: -1, -1      84: -1, -1      
85: -1, -1      86: -1, -1      87: -1, -1
88: -1, -1      89: -1, -1      8a: -1, -1      8b: -1, -1      8c: -1, -1      
8d: -1, -1      8e: -1, -1      8f: -1, -1
90: -1, -1      91: -1, -1      92: -1, -1      93: -1, -1      94: -1, -1      
95: -1, -1      96: -1, -1      97: -1, -1
98: -1, -1      99: -1, -1      9a: -1, -1      9b: -1, -1      9c: -1, -1      
9d: -1, -1      9e: -1, -1      9f: -1, -1
a0: -1, -1      a1: -1, -1      a2: -1, -1      a3: -1, -1      a4: -1, -1      
a5: -1, -1      a6: -1, -1      a7: -1, -1
a8: -1, -1      a9: -1, -1      aa: -1, -1      ab: -1, -1      ac: -1, -1      
ad: -1, -1      ae: -1, -1      af: -1, -1
b0: -1, -1      b1: -1, -1      b2: -1, -1      b3: -1, -1      b4: -1, -1      
b5: -1, -1      b6: -1, -1      b7: -1, -1
b8: -1, -1      b9: -1, -1      ba: -1, -1      bb: -1, -1      bc: -1, -1      
bd: -1, -1      be: -1, -1      bf: -1, -1
c0: -1, -1      c1: -1, -1      c2: -1, -1      c3: -1, -1      c4: -1, -1      
c5: -1, -1      c6: -1, -1      c7: -1, -1
c8: -1, -1      c9: -1, -1      ca: -1, -1      cb: -1, -1      cc: -1, -1      
cd: -1, -1      ce: -1, -1      cf: -1, -1
d0: -1, -1      d1: -1, -1      d2: -1, -1      d3: -1, -1      d4: -1, -1      
d5: -1, -1      d6: -1, -1      d7: -1, -1
d8: -1, -1      d9: -1, -1      da: -1, -1      db: -1, -1      dc: -1, -1      
dd: -1, -1      de: -1, -1      df: -1, -1
e0: -1, -1      e1: -1, -1      e2: -1, -1      e3: -1, -1      e4: -1, -1      
e5: -1, -1      e6: -1, -1      e7: -1, -1
e8: -1, -1      e9: -1, -1      ea: -1, -1      eb: -1, -1      ec: -1, -1      
ed: -1, -1      ee: -1, -1      ef: -1, -1
f0: -1, -1      f1: -1, -1      f2: -1, -1      f3: -1, -1      f4: -1, -1      
f5: -1, -1      f6: -1, -1      f7: -1, -1
f8: -1, -1      f9: -1, -1      fa: -1, -1      fb: -1, -1      fc: -1, -1      
fd: -1, -1      fe: -1, -1      ff: -1, -1
-- >8 --

This breaks all programs that expect to process text/data portably,
since in LC_ALL=C half of all bytes collapse to one character
(for sort this means that they all collate equally, &c., &c.)!

Consider a diff of XBD 6.2 ("Character Encoding"), Issue 7 vs Issue 7 TC2:
-- >8 --
@@ -1768,9 +1664,13 @@

 <h3><a name="tag_06_02">   6.2 </a>Character Encoding</h3>

-<p>The POSIX locale contains the characters in <a href="#tagtcjh_3">Portable 
Character Set</a> , which have the properties listed
-in <a href="../basedefs/V1_chap07.html#tag_07_03_01"><i>LC_CTYPE</i></a> . In 
other locales, the presence, meaning, and
-representation of any additional characters are locale-specific.</p>
+<p>The POSIX locale shall contain 256 single-byte characters including the 
characters in <a href="#tagtcjh_3">Portable Character
+Set</a> and <a href="#tagtcjh_4">Non-Portable Control Characters</a>, which 
have the properties listed in <a href=
+"../basedefs/V1_chap07.html#tag_07_03_01"><i>LC_CTYPE</i></a>. It is 
unspecified whether characters not listed in those two tables
+are classified as <b>punct</b> or <b>cntrl</b>, or neither. Other locales 
shall contain the characters in <a href=
+"#tagtcjh_3">Portable Character Set</a> and may contain any or all of the 
control characters identified in <a href=
+"#tagtcjh_4">Non-Portable Control Characters</a>; the presence, meaning, and 
representation of any additional characters are
+locale-specific.</p>

 <p>In locales other than the POSIX locale, a character may have a 
state-dependent encoding. There are two types of these
 encodings:</p>
-- >8 --

This text is widely supported with global changes later originating from bug 
674:
  > An invalid character sequence is detected. In the POSIX locale an EILSEQ 
error cannot occur since all byte values are valid characters.[/CX]
  > In the POSIX locale each byte is a valid single-byte character, and 
therefore this problem is avoided.
&c.
This text is unchanged in Issue 8 Draft 2.1.

Agonised,
наб

-- System Information:
Debian Release: bookworm/sid
  APT prefers unstable
  APT policy: (500, 'unstable')
Architecture: x32 (x86_64)
Foreign Architectures: amd64, i386

Kernel: Linux 5.18.0-3-amd64 (SMP w/2 CPU threads; PREEMPT)
Kernel taint flags: TAINT_PROPRIETARY_MODULE, TAINT_OOT_MODULE, 
TAINT_UNSIGNED_MODULE
Locale: LANG=en_GB.UTF-8, LC_CTYPE=en_GB.UTF-8 (charmap=UTF-8), LANGUAGE not set
Shell: /bin/sh linked to /usr/bin/dash
Init: systemd (via /run/systemd/system)
LSM: AppArmor: enabled

Versions of packages libc6 depends on:
ii  libgcc-s1  12.1.0-2

Versions of packages libc6 recommends:
ii  libidn2-0  2.3.3-1

Versions of packages libc6 suggests:
ii  debconf [debconf-2.0]  1.5.79
pn  glibc-doc              <none>
ii  libc-l10n              2.33-8
ii  libnss-nis             3.1-4
ii  libnss-nisplus         1.3-4
ii  locales                2.33-8

-- debconf information excluded

Attachment: signature.asc
Description: PGP signature

Reply via email to