https://github.com/python/cpython/commit/499d31f52d22bb753fcafb3cbc6c7d712666cc8c commit: 499d31f52d22bb753fcafb3cbc6c7d712666cc8c branch: main author: Serhiy Storchaka <[email protected]> committer: serhiy-storchaka <[email protected]> date: 2026-06-28T15:49:28+03:00 summary:
gh-133031: Support the full Unicode range in curses.textpad.Textbox (GH-152482) Read input with get_wch() and the window back with in_wch(), so combining characters and characters outside the locale encoding now work where curses has wide-character support. edit() passes non-ASCII characters to validate() as strings, keeping ASCII and key codes as integers so existing validators keep working. Co-authored-by: Claude Opus 4.8 <[email protected]> files: A Misc/NEWS.d/next/Library/2026-06-27-12-45-00.gh-issue-133031.Wide00.rst M Doc/library/curses.rst M Doc/whatsnew/3.16.rst M Lib/curses/textpad.py M Lib/test/test_curses.py diff --git a/Doc/library/curses.rst b/Doc/library/curses.rst index 8987e82ee5d026..8d069ed8b7d1c4 100644 --- a/Doc/library/curses.rst +++ b/Doc/library/curses.rst @@ -2645,6 +2645,11 @@ You can instantiate a :class:`Textbox` object as follows: upper-left corner of the containing window, with coordinates ``(0, 0)``. The instance's :attr:`stripspaces` flag is initially on. + .. versionchanged:: next + Entering and reading back the full Unicode range, including combining + characters, is now supported when curses is built with wide-character + support. + :class:`Textbox` objects have the following methods: @@ -2659,6 +2664,10 @@ You can instantiate a :class:`Textbox` object as follows: string; whether blanks in the window are included is affected by the :attr:`stripspaces` attribute. + .. versionchanged:: next + *validate* is now called with a non-ASCII character as a string; + other keystrokes are still passed as an integer. + .. method:: do_command(ch) diff --git a/Doc/whatsnew/3.16.rst b/Doc/whatsnew/3.16.rst index cbe0df1c8a65ff..23b88c5d279c2d 100644 --- a/Doc/whatsnew/3.16.rst +++ b/Doc/whatsnew/3.16.rst @@ -192,6 +192,11 @@ curses against an ncurses with ``NCURSES_EXT_FUNCS``. (Contributed by Serhiy Storchaka in :gh:`152334`.) +* :class:`curses.textpad.Textbox` now supports entering and reading back the + full Unicode range, including combining characters, when curses is built with + wide-character support. + (Contributed by Serhiy Storchaka in :gh:`133031`.) + gzip ---- diff --git a/Lib/curses/textpad.py b/Lib/curses/textpad.py index c58a7174d194cf..70fa2c25f64632 100644 --- a/Lib/curses/textpad.py +++ b/Lib/curses/textpad.py @@ -57,32 +57,13 @@ def _update_max_yx(self): self.maxx = maxx - 1 def _decode(self, ch): - # The text of a chtype cell or input byte, decoded with the window's - # encoding. A_CHARTEXT keeps the character byte, dropping the attributes. + # Decode an integer keystroke or byte to text with the window's encoding. + # A_CHARTEXT drops any attribute bits. return bytes([ch & curses.A_CHARTEXT]).decode(self.win.encoding, 'replace') - def _char_at(self, *yx): - # The text of the cell at the given position (default: the cursor). - # instr() re-encodes it to the window's encoding; inch() cannot - # represent a non-ASCII 8-bit-locale character on a wide build. - return self.win.instr(*yx, 1).decode(self.win.encoding, 'replace') - - def _cell_at(self, *yx): - # The cell at the given position (default: the cursor) as a chtype - # addch() can write back with its rendition. inch() mangles a non-ASCII - # character on a wide build, so take the byte from instr() and the - # attributes from inch(). - return self.win.instr(*yx, 1)[0] | self.win.inch(*yx) & curses.A_ATTRIBUTES - - def _isprint(self, cell): - # Whether a chtype cell holds a printable character; _decode() drops the - # attribute bits. - return self._decode(cell).isprintable() - def _printable_key(self, ch): - # Whether the integer keystroke is a printable character, not a key - # code. 0..255 are character bytes (decoded with the window's encoding); - # larger values are function and navigation keys. + # Whether the integer keystroke is a printable character, not a key code: + # 0..255 are character bytes, larger values are function keys. return ch <= 0xff and self._decode(ch).isprintable() def _end_of_line(self, y): @@ -91,7 +72,8 @@ def _end_of_line(self, y): self._update_max_yx() last = self.maxx while True: - if self._char_at(y, last) != ' ': + # The text of the cell at (y, last). + if str(self.win.in_wch(y, last)) != ' ': last = min(self.maxx, last+1) break elif last == 0: @@ -105,16 +87,22 @@ def _insert_printable_char(self, ch): backyx = None while True: if self.insert_mode: - oldch = self._cell_at() + # The displaced cell, as a complexchar so addch() can rewrite it + # with its rendition. + oldch = self.win.in_wch() if y >= self.maxy and x >= self.maxx: - # Use insch() in the lower-right cell: addch() there would move - # the cursor out of the window, raising an error and scrolling - # a scrollable window. Pass it as text: insch() does not decode - # an int byte through the locale on a wide build. - self.win.insch(self._decode(ch), ch & curses.A_ATTRIBUTES) + # Use insch() in the lower-right cell; addch() there would push + # the cursor out of the window (an error, and it scrolls a + # scrollable window). insch() does not decode an int byte + # through the locale on a wide build, so pass it as text. + if isinstance(ch, int): + self.win.insch(self._decode(ch), ch & curses.A_ATTRIBUTES) + else: + self.win.insch(ch) break self.win.addch(ch) - if not self.insert_mode or not self._isprint(oldch): + # In insert mode keep shifting cells right until a blank one. + if not self.insert_mode or not str(oldch).isprintable(): break ch = oldch (y, x) = self.win.getyx() @@ -130,9 +118,17 @@ def do_command(self, ch): self._update_max_yx() (y, x) = self.win.getyx() self.lastcmd = ch - if self._printable_key(ch): + if isinstance(ch, str): + # A character from get_wch(); a control character is dispatched + # below by its code point. + if ch.isprintable(): + self._insert_printable_char(ch) + return 1 + ch = ord(ch) + elif self._printable_key(ch): self._insert_printable_char(ch) - elif ch == curses.ascii.SOH: # ^a + return 1 + if ch == curses.ascii.SOH: # ^a self.win.move(y, 0) elif ch in (curses.ascii.STX,curses.KEY_LEFT, curses.ascii.BS, @@ -204,7 +200,7 @@ def gather(self): for x in range(self.maxx+1): if self.stripspaces and x > stop: break - result = result + self._char_at(y, x) + result = result + str(self.win.in_wch(y, x)) if self.maxy > 0: result = result + "\n" return result @@ -212,7 +208,12 @@ def gather(self): def edit(self, validate=None): "Edit in the widget window and collect the results." while 1: - ch = self.win.getch() + ch = self.win.get_wch() + # Represent an ASCII keystroke by its code point, the way getch() + # always has, so that existing validators and the command dispatch + # keep working; only non-ASCII characters are passed as strings. + if isinstance(ch, str) and ch.isascii(): + ch = ord(ch) if validate: ch = validate(ch) if not ch: diff --git a/Lib/test/test_curses.py b/Lib/test/test_curses.py index 079e69a52c1504..20b1441d98584c 100644 --- a/Lib/test/test_curses.py +++ b/Lib/test/test_curses.py @@ -2247,9 +2247,9 @@ def test_textbox_fill_last_cell_scrollok(self): self.assertEqual(box.gather(), 'abc\ndef\n') def test_textbox_8bit(self): - # A character of an 8-bit locale encoding is entered and read back - # through the byte API. The byte path also runs on a wide build, so the - # test is not skipped there. Run the suite under an 8-bit locale + # An 8-bit-locale character is entered as integer bytes -- the way + # do_command() receives getch() input -- and read back; runs on both + # builds. Run the suite under an 8-bit locale # (ISO-8859-1, ISO-8859-15 or KOI8-U) to reach the non-ASCII cases; each # string is used only if the encoding maps it to single bytes. 'abc' is # ASCII, 'café' is common to the Latin encodings, and the rest are @@ -2270,9 +2270,8 @@ def test_textbox_8bit(self): def test_textbox_8bit_insert(self): # Insert mode shifts the rest of the line right by reading each cell back - # and rewriting it; a non-ASCII 8-bit-locale character must survive the - # shift, even on a wide build where inch() mangles it. See - # test_textbox_8bit for the character choices. + # and rewriting it; an 8-bit-locale character entered as bytes must + # survive the shift. See test_textbox_8bit for the character choices. encoding = self.stdscr.encoding for ch in ['é', '¤', '€', 'є']: try: @@ -2290,8 +2289,8 @@ def test_textbox_8bit_insert(self): self.assertEqual(box.gather(), 'ab' + ch + 'c ') def test_textbox_8bit_fill_last_cell(self): - # A non-ASCII 8-bit-locale character must survive being written to the - # lower-right cell, which uses insch() rather than addch(). See + # An 8-bit-locale character entered as bytes must survive being written + # to the lower-right cell, which uses insch() rather than addch(). See # test_textbox_8bit for the character choices. encoding = self.stdscr.encoding for ch in ['é', '¤', '€', 'є']: @@ -2308,6 +2307,53 @@ def test_textbox_8bit_fill_last_cell(self): box.do_command(byte) self.assertEqual(box.gather(), text) + def test_textbox_unicode(self): + # Like test_textbox_8bit, but characters are entered as strings -- the + # way do_command() receives get_wch() input -- rather than integer + # bytes. Each string is used only if encodable in the current locale. + for text in ['abc', 'héšλ', 'café', 'naïve ¤', 'soupçon €Š', 'дякую єі']: + if self._encodable(text): + with self.subTest(text=text): + box, win = self._make_textbox(1, 12) + for ch in text: + box.do_command(ch) + self.assertEqual(box.gather(), text + ' ') + + def test_textbox_unicode_insert_mode(self): + # Like test_textbox_8bit_insert, but the character is entered as a string + # (get_wch() input). Each string is used only if encodable. + for text in ['abcd', 'aβλc', 'aéàc', 'a¤½c', 'a€Šc', 'aдві']: + if self._encodable(text): + with self.subTest(text=text): + box, win = self._make_textbox(1, 10, insert_mode=True) + for ch in text[0] + text[2:]: # all but the 2nd character + box.do_command(ch) + win.move(0, 1) + box.do_command(text[1]) # insert it at position 1 + self.assertEqual(box.gather(), text + ' ') + + @requires_wide_build + def test_textbox_combining(self): + # A spacing character plus a combining mark is a single cell, which + # needs the wide build (a narrow build stores one byte per cell). + text = 'e\u0301' # 'e' + COMBINING ACUTE ACCENT + if self._encodable(text): + box, win = self._make_textbox(1, 10) + for ch in text: + box.do_command(ch) + self.assertEqual(box.gather(), text + ' ') + + def test_textbox_edit_wide(self): + # edit() reads characters through get_wch(). Each is used only if + # encodable in the current locale. + for ch in ['A', 'é', '¤', '€', 'д']: + if self._encodable(ch): + with self.subTest(ch=ch): + box, win = self._make_textbox(1, 10) + for c in reversed(['a', ch, chr(curses.ascii.BEL)]): + curses.unget_wch(c) + self.assertEqual(box.edit(), 'a' + ch + ' ') + def test_textbox_movement(self): box, win = self._make_textbox(3, 10) self._type(box, 'abc') diff --git a/Misc/NEWS.d/next/Library/2026-06-27-12-45-00.gh-issue-133031.Wide00.rst b/Misc/NEWS.d/next/Library/2026-06-27-12-45-00.gh-issue-133031.Wide00.rst new file mode 100644 index 00000000000000..c33365910ae289 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-27-12-45-00.gh-issue-133031.Wide00.rst @@ -0,0 +1,3 @@ +:class:`curses.textpad.Textbox` now supports entering and reading back the full +Unicode range, including combining characters, when curses is built with +wide-character support. _______________________________________________ Python-checkins mailing list -- [email protected] To unsubscribe send an email to [email protected] https://mail.python.org/mailman3//lists/python-checkins.python.org Member address: [email protected]
