https://github.com/python/cpython/commit/65d110d987c4cd43a36138d164327aec1d4cfa1d
commit: 65d110d987c4cd43a36138d164327aec1d4cfa1d
branch: 3.13
author: Miss Islington (bot) <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-06-27T21:04:19Z
summary:
[3.13] gh-152415: Exercise curses non-ASCII tests under 8-bit locale encodings
(GH-152416) (GH-152453) (GH-152457)
The non-ASCII tests only exercised what the runner's locale could encode (in
practice UTF-8). Add 8-bit-encoding cases to the character and string I/O
tests, each guarded by the existing encodability check: ASCII, a character
common to the Latin encodings ('é'), and ones distinctive to a single encoding
(byte 0xA4 is '¤' in ISO-8859-1, '€' in ISO-8859-15, 'є' in KOI8-U). Run the
whole suite under different locales to cover them; unrepresentable cases skip.
* gh-152415: Verify character output round-trips in test_output_character
Read each written character back with in_wch() or instr() rather than
inch(), which on a wide build returns the low byte of the code point
instead of the locale-encoded byte and so mangles a non-ASCII character
of an 8-bit locale. This lets the int-argument cases cover '€'/'є', and
adds matching coverage for the str argument.
insch() with an int byte > 127 is checked only for Latin-1: on a wide
build ncurses winsch stores a printable byte directly as a code point
instead of decoding it through the locale.
(cherry picked from commit 003d3620cc0f44caca7bf26c3e6964f5f379645f)
(cherry picked from commit a75aa418deeec926a10eef12cf4e8215fd67c947)
Co-authored-by: Serhiy Storchaka <[email protected]>
Co-authored-by: Claude Opus 4.8 <[email protected]>
files:
M Lib/test/test_curses.py
diff --git a/Lib/test/test_curses.py b/Lib/test/test_curses.py
index 2bd84a8503e09f..4b2bcc29e7e075 100644
--- a/Lib/test/test_curses.py
+++ b/Lib/test/test_curses.py
@@ -255,6 +255,33 @@ def test_refresh_control(self):
self.assertIs(win.is_wintouched(), syncok)
self.assertIs(stdscr.is_wintouched(), syncok)
+ # Many tests below use a common set of non-ASCII cases, each applied only
+ # when the window encoding can represent it -- so the whole suite is meant
to
+ # be run under several locales (e.g. ISO-8859-1, ISO-8859-15, KOI8-U):
+ # 'A'/'a' ASCII
+ # 'é' common to the Latin encodings
+ # '¤'/'€'/'є' byte 0xA4 in ISO-8859-1 / ISO-8859-15 / KOI8-U
+ # Precomposed characters are used so a round-trip does not depend on the
form.
+
+ def _encodable(self, s):
+ # Wide characters are only supported in a locale that can encode them.
+ try:
+ s.encode(self.stdscr.encoding)
+ except UnicodeEncodeError:
+ return False
+ return True
+
+ def _read_char(self, y, x):
+ # The character written to a cell, read back for output checks. inch()
+ # is unusable here: on a wide build it returns the low 8 bits of the
+ # character's code point rather than its locale-encoded byte, mangling
+ # anything outside Latin-1. in_wch() reads the wide cell directly;
+ # without it, instr() re-encodes the cell to the window encoding.
+ stdscr = self.stdscr
+ if hasattr(stdscr, 'in_wch'):
+ return str(stdscr.in_wch(y, x))
+ return stdscr.instr(y, x, 1).decode(stdscr.encoding)
+
def test_output_character(self):
stdscr = self.stdscr
encoding = stdscr.encoding
@@ -264,32 +291,98 @@ def test_output_character(self):
stdscr.addch('A')
stdscr.addch(b'A')
stdscr.addch(65)
- c = '\u20ac'
- try:
- stdscr.addch(c)
- except UnicodeEncodeError:
- self.assertRaises(UnicodeEncodeError, c.encode, encoding)
- except OverflowError:
- encoded = c.encode(encoding)
- self.assertNotEqual(len(encoded), 1, repr(encoded))
+ # See _encodable for the character set. Each is either written (mapped
+ # to a single byte), or raises UnicodeEncodeError (not in the encoding)
+ # or OverflowError (a multibyte sequence, e.g. in UTF-8).
+ for c in ('A', '\u00e9', '\u00a4', '\u20ac', '\u0454'):
+ try:
+ stdscr.addch(c)
+ except UnicodeEncodeError:
+ self.assertRaises(UnicodeEncodeError, c.encode, encoding)
+ except OverflowError:
+ encoded = c.encode(encoding)
+ self.assertNotEqual(len(encoded), 1, repr(encoded))
stdscr.addch('A', curses.A_BOLD)
stdscr.addch(1, 2, 'A')
stdscr.addch(2, 3, 'A', curses.A_BOLD)
self.assertIs(stdscr.is_wintouched(), True)
+ # The same characters supplied as an int chtype (a byte > 127). The
+ # cell is read back with _read_char(), not inch(): on a wide build the
+ # int is stored through the locale as a wide character that inch()
+ # cannot represent for a character outside Latin-1.
+ for c in ('é', '¤', '€', 'є'):
+ try:
+ b = c.encode(encoding)
+ except UnicodeEncodeError:
+ continue
+ if len(b) != 1:
+ continue
+ # A wide build stores a character outside Latin-1 as a wide cell,
+ # not as its encoded byte, so it cannot round-trip here.
+ if ord(c) > 0xff and hasattr(stdscr, 'get_wch'):
+ continue
+ v = b[0]
+ with self.subTest(c=c):
+ stdscr.addch(0, 0, v)
+ self.assertEqual(self._read_char(0, 0), c)
+ stdscr.addch(0, 1, v, curses.A_BOLD)
+ self.assertEqual(self._read_char(0, 1), c)
+ self.assertTrue(stdscr.inch(0, 1) & curses.A_BOLD)
+ stdscr.move(2, 0)
+ stdscr.echochar(v)
+ self.assertEqual(self._read_char(2, 0), c)
+ # insch() round-trips a byte only where its code point equals
+ # the byte value (Latin-1): on a wide build ncurses winsch
+ # stores a printable byte directly as a code point instead of
+ # decoding it through the locale.
+ if ord(c) < 0x100:
+ stdscr.insch(1, 0, v)
+ self.assertEqual(self._read_char(1, 0), c)
+
+ # The same characters supplied as a str. Unlike the int path above, a
+ # str is stored as a wide-character cell on a wide build, so every
+ # encodable character round-trips, insch() included. A multibyte
+ # character does not fit a cell on a narrow build and is skipped.
+ wide = hasattr(stdscr, 'in_wch')
+ for c in ('é', '¤', '€', 'є'):
+ if not self._encodable(c):
+ continue
+ if not wide and len(c.encode(encoding)) != 1:
+ continue
+ # A wide build stores a character outside Latin-1 as a wide cell,
+ # not as its encoded byte, so it cannot round-trip here.
+ if ord(c) > 0xff and hasattr(stdscr, 'get_wch'):
+ continue
+ with self.subTest(c=c):
+ stdscr.addch(0, 0, c)
+ self.assertEqual(self._read_char(0, 0), c)
+ stdscr.addch(0, 1, c, curses.A_BOLD)
+ self.assertEqual(self._read_char(0, 1), c)
+ self.assertTrue(stdscr.inch(0, 1) & curses.A_BOLD)
+ stdscr.insch(1, 0, c)
+ self.assertEqual(self._read_char(1, 0), c)
+ stdscr.move(2, 0)
+ stdscr.echochar(c)
+ self.assertEqual(self._read_char(2, 0), c)
+
# echochar()
stdscr.refresh()
stdscr.move(0, 0)
stdscr.echochar('A')
stdscr.echochar(b'A')
stdscr.echochar(65)
- with self.assertRaises((UnicodeEncodeError, OverflowError)):
- # Unicode is not fully supported yet, but at least it does
- # not crash.
- # It is supposed to fail because either the character is
- # not encodable with the current encoding, or it is encoded to
- # a multibyte sequence.
- stdscr.echochar('\u0114')
+ # See _encodable for the character set; as in the addch() loop above.
+ for c in ('A', '\u00e9', '\u00a4', '\u20ac', '\u0454'):
+ try:
+ stdscr.echochar(c)
+ except UnicodeEncodeError:
+ # The character is not encodable with the current encoding.
+ self.assertRaises(UnicodeEncodeError, c.encode, encoding)
+ except OverflowError:
+ # The character is encoded to a multibyte sequence.
+ encoded = c.encode(encoding)
+ self.assertNotEqual(len(encoded), 1, repr(encoded))
stdscr.echochar('A', curses.A_BOLD)
self.assertIs(stdscr.is_wintouched(), False)
@@ -299,14 +392,18 @@ def test_output_string(self):
# addstr()/insstr()
for func in [stdscr.addstr, stdscr.insstr]:
with self.subTest(func.__qualname__):
- stdscr.move(0, 0)
func('abcd')
func(b'abcd')
- s = 'àßçđ'
- try:
- func(s)
- except UnicodeEncodeError:
- self.assertRaises(UnicodeEncodeError, s.encode, encoding)
+ # Common and encoding-distinctive strings (see _encodable for
the
+ # 0xA4 set); 'àßçđ' is UTF-8-only. Each is written if the
+ # encoding allows, else raises UnicodeEncodeError.
+ for s in ('soupçon', 'àßçđ', 'soupçon ¤', 'soupçon €',
'дякую'):
+ stdscr.move(0, 0)
+ try:
+ func(s)
+ except UnicodeEncodeError:
+ self.assertRaises(UnicodeEncodeError, s.encode,
encoding)
+ stdscr.move(0, 0)
func('abcd', curses.A_BOLD)
func(1, 2, 'abcd')
func(2, 3, 'abcd', curses.A_BOLD)
@@ -317,11 +414,14 @@ def test_output_string(self):
stdscr.move(0, 0)
func('1234', 3)
func(b'1234', 3)
- s = '\u0661\u0662\u0663\u0664'
- try:
- func(s, 3)
- except UnicodeEncodeError:
- self.assertRaises(UnicodeEncodeError, s.encode, encoding)
+ # As above (see _encodable); Arabic-Indic digits are
UTF-8-only.
+ for s in ('caf\u00e9', '\u0661\u0662\u0663\u0664', 'caf\u00e9
\u00a4', 'caf\u00e9 \u20ac', '\u0434\u044f\u043a\u0443\u044e'):
+ stdscr.move(0, 0)
+ try:
+ func(s, 3)
+ except UnicodeEncodeError:
+ self.assertRaises(UnicodeEncodeError, s.encode,
encoding)
+ stdscr.move(0, 0)
func('1234', 5)
func('1234', 3, curses.A_BOLD)
func(1, 2, '1234', 3)
@@ -411,6 +511,24 @@ def test_read_from_window(self):
self.assertEqual(stdscr.instr(0, 2, 4), b'BCD ')
self.assertRaises(ValueError, stdscr.instr, -2)
self.assertRaises(ValueError, stdscr.instr, 0, 2, -2)
+ # A non-ASCII character of an 8-bit locale reads back as its encoded
+ # byte (see _encodable for the set). instr() returns the locale bytes
+ # for any single-byte character; inch() packs the text into a chtype,
so
+ # on a wide build it only round-trips a Latin-1 codepoint (byte ==
+ # codepoint).
+ encoding = stdscr.encoding
+ for ch in ('A', 'é', '¤', '€', 'є'):
+ try:
+ b = ch.encode(encoding)
+ except UnicodeEncodeError:
+ continue
+ if len(b) != 1:
+ continue
+ with self.subTest(ch=ch):
+ stdscr.addstr(2, 0, ch)
+ self.assertEqual(stdscr.instr(2, 0, 1), b)
+ if ord(ch) < 0x100:
+ self.assertEqual(stdscr.inch(2, 0) & curses.A_CHARTEXT,
b[0])
def test_coordinate_errors(self):
# Addressing a cell outside the window raises curses.error.
@@ -447,6 +565,10 @@ def test_getch(self):
self.assertEqual(win.getch(), b'm'[0])
self.assertEqual(win.getch(), b'\n'[0])
+ # A key value > 127 is delivered unchanged (it is not locale text).
+ curses.ungetch(0xE9)
+ self.assertEqual(win.getch(), 0xE9)
+
def test_getstr(self):
win = curses.newwin(5, 12, 5, 2)
curses.echo()
@@ -619,6 +741,33 @@ def test_background(self):
self.assertEqual(win.inch(0, 0), b'L'[0] | curses.A_REVERSE)
self.assertEqual(win.inch(0, 5), b'#'[0] | curses.A_REVERSE)
+ # A non-ASCII background character of an 8-bit locale reads back as its
+ # encoded byte. See _encodable for the character set.
+ win.bkgd(' ')
+ encoding = win.encoding
+ for ch in ('é', '¤', '€', 'є'):
+ try:
+ b = ch.encode(encoding)
+ except UnicodeEncodeError:
+ continue
+ if len(b) != 1:
+ continue
+ # A wide build stores a character outside Latin-1 as a wide cell,
+ # not as its encoded byte, so it cannot round-trip here.
+ if ord(ch) > 0xff and hasattr(win, 'get_wch'):
+ continue
+ with self.subTest(ch=ch):
+ win.bkgd(ch)
+ self.assertEqual(win.getbkgd(), b[0])
+ if ord(ch) < 0x100:
+ # The same byte given as an int. A wide build stores it
+ # through the locale, so only a Latin-1 byte round-trips.
+ win.bkgd(' ')
+ win.bkgdset(b[0])
+ self.assertEqual(win.getbkgd(), b[0])
+ win.bkgd(b[0])
+ self.assertEqual(win.getbkgd(), b[0])
+
def test_overlay(self):
srcwin = curses.newwin(5, 18, 3, 4)
lorem_ipsum(srcwin)
@@ -711,6 +860,16 @@ def test_borders_and_lines(self):
win.border(65, 66)
win.border(65)
win.border()
+ # With no arguments, border() fills the edges with ACS line and corner
+ # characters.
+ chartext = curses.A_CHARTEXT
+ maxy, maxx = win.getmaxyx()
+ self.assertEqual(win.inch(0, 0) & chartext, curses.ACS_ULCORNER &
chartext)
+ self.assertEqual(win.inch(0, maxx-1) & chartext, curses.ACS_URCORNER &
chartext)
+ self.assertEqual(win.inch(maxy-1, 0) & chartext, curses.ACS_LLCORNER &
chartext)
+ self.assertEqual(win.inch(maxy-1, maxx-1) & chartext,
curses.ACS_LRCORNER & chartext)
+ self.assertEqual(win.inch(0, 1) & chartext, curses.ACS_HLINE &
chartext)
+ self.assertEqual(win.inch(1, 0) & chartext, curses.ACS_VLINE &
chartext)
win.box(':', '~')
self.assertEqual(win.instr(0, 1, 8), b'~~~~~~~~')
@@ -721,6 +880,11 @@ def test_borders_and_lines(self):
self.assertRaises(TypeError, win.box, 65, 66, 67)
self.assertRaises(TypeError, win.box, 65)
win.box()
+ # With no arguments, box() likewise draws ACS corners and lines.
+ self.assertEqual(win.inch(0, 0) & chartext, curses.ACS_ULCORNER &
chartext)
+ self.assertEqual(win.inch(0, maxx-1) & chartext, curses.ACS_URCORNER &
chartext)
+ self.assertEqual(win.inch(0, 1) & chartext, curses.ACS_HLINE &
chartext)
+ self.assertEqual(win.inch(1, 0) & chartext, curses.ACS_VLINE &
chartext)
win.move(1, 2)
win.hline('-', 5)
@@ -742,6 +906,43 @@ def test_borders_and_lines(self):
self.assertEqual(win.inch(2, 1), b';'[0] | curses.A_STANDOUT)
self.assertEqual(win.inch(3, 1), b'a'[0])
+ # A border or line character of an 8-bit locale round-trips as its
+ # encoded byte. See _encodable for the character set.
+ encoding = win.encoding
+ for ch in ('é', '¤', '€', 'є'):
+ try:
+ b = ch.encode(encoding)
+ except UnicodeEncodeError:
+ continue
+ if len(b) != 1:
+ continue
+ # A wide build stores a character outside Latin-1 as a wide cell,
+ # not as its encoded byte, so it cannot round-trip here.
+ if ord(ch) > 0xff and hasattr(win, 'get_wch'):
+ continue
+ with self.subTest(ch=ch):
+ win.erase()
+ win.hline(2, 0, ch, 5)
+ self.assertEqual(win.instr(2, 0, 5), b * 5)
+ win.vline(0, 0, ch, 3)
+ self.assertEqual(win.instr(0, 0, 1), b)
+ self.assertEqual(win.instr(1, 0, 1), b)
+ win.border(ch, ch, ch, ch, ch, ch, ch, ch)
+ self.assertEqual(win.instr(0, 0), b * maxx)
+ if ord(ch) < 0x100:
+ # The same byte given as an int. A wide build stores it
+ # through the locale, so only a Latin-1 byte round-trips.
+ v = b[0]
+ win.erase()
+ win.hline(2, 0, v, 5)
+ self.assertEqual(win.instr(2, 0, 5), b * 5)
+ win.vline(0, 0, v, 3)
+ self.assertEqual(win.instr(1, 0, 1), b)
+ win.border(v, v, v, v, v, v, v, v)
+ self.assertEqual(win.instr(0, 0), b * maxx)
+ win.box(v, v)
+ self.assertEqual(win.instr(0, 1, 1), b)
+
def test_unctrl(self):
# TODO: wunctrl()
self.assertEqual(curses.unctrl(b'A'), b'A')
@@ -750,6 +951,19 @@ def test_unctrl(self):
self.assertEqual(curses.unctrl(b'\n'), b'^J')
self.assertEqual(curses.unctrl('\n'), b'^J')
self.assertEqual(curses.unctrl(10), b'^J')
+ # A printable non-ASCII byte of an 8-bit locale is returned unchanged.
+ # See _encodable for the character set.
+ encoding = self.stdscr.encoding
+ for ch in ('é', '¤', '€', 'є'):
+ try:
+ b = ch.encode(encoding)
+ except UnicodeEncodeError:
+ continue
+ if len(b) != 1:
+ continue
+ with self.subTest(ch=ch):
+ self.assertEqual(curses.unctrl(ch), b)
+ self.assertEqual(curses.unctrl(b[0]), b) # the byte as an int
self.assertRaises(TypeError, curses.unctrl, b'')
self.assertRaises(TypeError, curses.unctrl, b'AB')
self.assertRaises(TypeError, curses.unctrl, '')
@@ -1449,7 +1663,8 @@ def test_issue6243(self):
def test_unget_wch(self):
stdscr = self.stdscr
encoding = stdscr.encoding
- for ch in ('a', '\xe9', '\u20ac', '\U0010FFFF'):
+ # See _encodable for the character set, plus a non-BMP character.
+ for ch in ('a', '\xe9', '\xa4', '\u20ac', '\u0454', '\U0010FFFF'):
try:
ch.encode(encoding)
except UnicodeEncodeError:
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]