https://github.com/python/cpython/commit/65b74228283d35771cfad41084f33631268615df commit: 65b74228283d35771cfad41084f33631268615df branch: 3.13 author: Miss Islington (bot) <[email protected]> committer: serhiy-storchaka <[email protected]> date: 2026-06-29T14:40:10+03:00 summary:
[3.13] gh-133031: Support non-ASCII characters in curses.textpad.Textbox (GH-152451) (GH-152469) Textbox mangled non-ASCII characters of an 8-bit locale encoding: it masked reads with curses.ascii.ascii(), which clears the 8th bit, and tested input with curses.ascii.isprint(), which rejects bytes above 127. Decode cells and input bytes with the window's encoding instead. Cells are read with instr() and the lower-right cell is written as text, since inch() and insch() with an int byte mishandle a non-ASCII character on a wide build. This uses only the byte-oriented curses API, so it works without wide-character support. (cherry picked from commit c253f0c14b046cf79881d0602620a3b2a15c2316) Co-authored-by: Serhiy Storchaka <[email protected]> Co-authored-by: Claude Opus 4.8 <[email protected]> files: A Misc/NEWS.d/next/Library/2026-06-27-12-30-00.gh-issue-133031.Na8Bit.rst M Lib/curses/textpad.py M Lib/test/test_curses.py diff --git a/Lib/curses/textpad.py b/Lib/curses/textpad.py index 57b2f4a523c95bc..c58a7174d194cf3 100644 --- a/Lib/curses/textpad.py +++ b/Lib/curses/textpad.py @@ -56,13 +56,42 @@ def _update_max_yx(self): self.maxy = maxy - 1 self.maxx = maxx - 1 + def _decode(self, ch): + # The text of a chtype cell or input byte, decoded with the window's + # encoding. A_CHARTEXT keeps the character byte, dropping the attributes. + return bytes([ch & curses.A_CHARTEXT]).decode(self.win.encoding, 'replace') + + def _char_at(self, *yx): + # The text of the cell at the given position (default: the cursor). + # instr() re-encodes it to the window's encoding; inch() cannot + # represent a non-ASCII 8-bit-locale character on a wide build. + return self.win.instr(*yx, 1).decode(self.win.encoding, 'replace') + + def _cell_at(self, *yx): + # The cell at the given position (default: the cursor) as a chtype + # addch() can write back with its rendition. inch() mangles a non-ASCII + # character on a wide build, so take the byte from instr() and the + # attributes from inch(). + return self.win.instr(*yx, 1)[0] | self.win.inch(*yx) & curses.A_ATTRIBUTES + + def _isprint(self, cell): + # Whether a chtype cell holds a printable character; _decode() drops the + # attribute bits. + return self._decode(cell).isprintable() + + def _printable_key(self, ch): + # Whether the integer keystroke is a printable character, not a key + # code. 0..255 are character bytes (decoded with the window's encoding); + # larger values are function and navigation keys. + return ch <= 0xff and self._decode(ch).isprintable() + def _end_of_line(self, y): """Go to the location of the first blank on the given line, returning the index of the last non-blank character.""" self._update_max_yx() last = self.maxx while True: - if curses.ascii.ascii(self.win.inch(y, last)) != curses.ascii.SP: + if self._char_at(y, last) != ' ': last = min(self.maxx, last+1) break elif last == 0: @@ -76,15 +105,16 @@ def _insert_printable_char(self, ch): backyx = None while True: if self.insert_mode: - oldch = self.win.inch() + oldch = self._cell_at() if y >= self.maxy and x >= self.maxx: # Use insch() in the lower-right cell: addch() there would move # the cursor out of the window, raising an error and scrolling - # a scrollable window. - self.win.insch(ch) + # a scrollable window. Pass it as text: insch() does not decode + # an int byte through the locale on a wide build. + self.win.insch(self._decode(ch), ch & curses.A_ATTRIBUTES) break self.win.addch(ch) - if not self.insert_mode or not curses.ascii.isprint(oldch): + if not self.insert_mode or not self._isprint(oldch): break ch = oldch (y, x) = self.win.getyx() @@ -100,7 +130,7 @@ def do_command(self, ch): self._update_max_yx() (y, x) = self.win.getyx() self.lastcmd = ch - if curses.ascii.isprint(ch): + if self._printable_key(ch): self._insert_printable_char(ch) elif ch == curses.ascii.SOH: # ^a self.win.move(y, 0) @@ -174,7 +204,7 @@ def gather(self): for x in range(self.maxx+1): if self.stripspaces and x > stop: break - result = result + chr(curses.ascii.ascii(self.win.inch(y, x))) + result = result + self._char_at(y, x) if self.maxy > 0: result = result + "\n" return result diff --git a/Lib/test/test_curses.py b/Lib/test/test_curses.py index 4b2bcc29e7e0759..708c139505ddc89 100644 --- a/Lib/test/test_curses.py +++ b/Lib/test/test_curses.py @@ -1524,6 +1524,68 @@ def test_textbox_fill_last_cell_scrollok(self): self._type(box, 'def') self.assertEqual(box.gather(), 'abc\ndef\n') + def test_textbox_8bit(self): + # A character of an 8-bit locale encoding is entered and read back + # through the byte API. The byte path also runs on a wide build, so the + # test is not skipped there. Run the suite under an 8-bit locale + # (ISO-8859-1, ISO-8859-15 or KOI8-U) to reach the non-ASCII cases; each + # string is used only if the encoding maps it to single bytes. 'abc' is + # ASCII, 'café' is common to the Latin encodings, and the rest are + # distinctive (byte 0xA4 is '¤'/'€'/'є' in ISO-8859-1/-15/KOI8-U). + encoding = self.stdscr.encoding + for text in ['abc', 'café', 'naïve ¤¦', 'café €Šž', 'дякую єі']: + try: + data = text.encode(encoding) + except UnicodeEncodeError: + continue + if len(data) != len(text): + continue # a multibyte encoding is not the 8-bit byte path + with self.subTest(text=text): + box, win = self._make_textbox(1, 16) + for byte in data: + box.do_command(byte) + self.assertEqual(box.gather(), text + ' ') + + def test_textbox_8bit_insert(self): + # Insert mode shifts the rest of the line right by reading each cell back + # and rewriting it; a non-ASCII 8-bit-locale character must survive the + # shift, even on a wide build where inch() mangles it. See + # test_textbox_8bit for the character choices. + encoding = self.stdscr.encoding + for ch in ['é', '¤', '€', 'є']: + try: + data = ch.encode(encoding) + except UnicodeEncodeError: + continue + if len(data) != 1: + continue + with self.subTest(ch=ch): + box, win = self._make_textbox(1, 10, insert_mode=True) + for byte in ('a' + ch + 'c').encode(encoding): + box.do_command(byte) + win.move(0, 1) + box.do_command(ord('b')) # insert 'b', shifting ch and 'c' right + self.assertEqual(box.gather(), 'ab' + ch + 'c ') + + def test_textbox_8bit_fill_last_cell(self): + # A non-ASCII 8-bit-locale character must survive being written to the + # lower-right cell, which uses insch() rather than addch(). See + # test_textbox_8bit for the character choices. + encoding = self.stdscr.encoding + for ch in ['é', '¤', '€', 'є']: + try: + data = ch.encode(encoding) + except UnicodeEncodeError: + continue + if len(data) != 1: + continue + with self.subTest(ch=ch): + text = 'ab' + ch # the last character fills the corner + box, win = self._make_textbox(1, len(text), stripspaces=0) + for byte in text.encode(encoding): + box.do_command(byte) + self.assertEqual(box.gather(), text) + def test_textbox_movement(self): box, win = self._make_textbox(3, 10) self._type(box, 'abc') @@ -1879,6 +1941,11 @@ def setUp(self): self.mock_win = MagicMock(spec=curses.window) self.mock_win.getyx.return_value = (1, 1) self.mock_win.getmaxyx.return_value = (10, 20) + self.mock_win.encoding = 'utf-8' + # A non-blank cell so that _end_of_line() reports a full line: instr() + # backs the text reads, inch() the insert-mode shift. + self.mock_win.instr.return_value = b'x' + self.mock_win.inch.return_value = ord('x') self.textbox = curses.textpad.Textbox(self.mock_win) def test_init(self): diff --git a/Misc/NEWS.d/next/Library/2026-06-27-12-30-00.gh-issue-133031.Na8Bit.rst b/Misc/NEWS.d/next/Library/2026-06-27-12-30-00.gh-issue-133031.Na8Bit.rst new file mode 100644 index 000000000000000..96e9efe20e42f04 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-27-12-30-00.gh-issue-133031.Na8Bit.rst @@ -0,0 +1,3 @@ +:class:`curses.textpad.Textbox` now enters and reads back the non-ASCII +characters of an 8-bit locale encoding, instead of mangling them with a 7-bit +mask. _______________________________________________ Python-checkins mailing list -- [email protected] To unsubscribe send an email to [email protected] https://mail.python.org/mailman3//lists/python-checkins.python.org Member address: [email protected]
