https://github.com/python/cpython/commit/b52e8ce4af89503e1a375802c4902918c69fb1ec
commit: b52e8ce4af89503e1a375802c4902918c69fb1ec
branch: main
author: Stan Ulbrych <[email protected]>
committer: pablogsal <[email protected]>
date: 2025-12-11T04:20:55Z
summary:
gh-142539: Fix `traceback` caret location calculation for `SyntaxError`s with
wide chars (#142540)
files:
A Misc/NEWS.d/next/Library/2025-12-10-21-19-10.gh-issue-142539._8Vzr0.rst
M Lib/test/test_traceback.py
M Lib/traceback.py
diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
index d107ad925941fe..259f70f1ea0dbc 100644
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -88,6 +88,12 @@ def syntax_error_bad_indentation2(self):
def tokenizer_error_with_caret_range(self):
compile("blech ( ", "?", "exec")
+ def syntax_error_with_caret_wide_char(self):
+ compile("女女女=1; 女女女/", "?", "exec")
+
+ def syntax_error_with_caret_wide_char_range(self):
+ compile("f(x, 女女女 for 女女女 in range(30), z)", "?", "exec")
+
def test_caret(self):
err = self.get_exception_format(self.syntax_error_with_caret,
SyntaxError)
@@ -125,6 +131,20 @@ def test_caret(self):
self.assertEqual(err[1].find("("), err[2].find("^")) # in the right
place
self.assertEqual(err[2].count("^"), 1)
+ def test_caret_wide_char(self):
+ err = self.get_exception_format(self.syntax_error_with_caret_wide_char,
+ SyntaxError)
+ self.assertIn("^", err[2])
+ # "女女女=1; 女女女/" has display width 17
+ self.assertEqual(err[2].find("^"), 4 + 17)
+
+ err =
self.get_exception_format(self.syntax_error_with_caret_wide_char_range,
+ SyntaxError)
+ self.assertIn("^", err[2])
+ self.assertEqual(err[2].find("^"), 4 + 5)
+ # "女女女 for 女女女 in range(30)" has display width 30
+ self.assertEqual(err[2].count("^"), 30)
+
def test_nocaret(self):
exc = SyntaxError("error", ("x.py", 23, None, "bad syntax"))
err = traceback.format_exception_only(SyntaxError, exc)
diff --git a/Lib/traceback.py b/Lib/traceback.py
index c1052adeed25a1..f95d6bdbd016ac 100644
--- a/Lib/traceback.py
+++ b/Lib/traceback.py
@@ -1464,10 +1464,11 @@ def _format_syntax_error(self, stype, **kwargs):
# Convert 1-based column offset to 0-based index into stripped
text
colno = offset - 1 - spaces
end_colno = end_offset - 1 - spaces
- caretspace = ' '
if colno >= 0:
- # non-space whitespace (likes tabs) must be kept for
alignment
- caretspace = ((c if c.isspace() else ' ') for c in
ltext[:colno])
+ # Calculate display width to account for wide characters
+ dp_colno = _display_width(ltext, colno)
+ highlighted = ltext[colno:end_colno]
+ caret_count = _display_width(highlighted) if highlighted
else (end_colno - colno)
start_color = end_color = ""
if colorize:
# colorize from colno to end_colno
@@ -1480,9 +1481,9 @@ def _format_syntax_error(self, stype, **kwargs):
end_color = theme.reset
yield ' {}\n'.format(ltext)
yield ' {}{}{}{}\n'.format(
- "".join(caretspace),
+ ' ' * dp_colno,
start_color,
- ('^' * (end_colno - colno)),
+ '^' * caret_count,
end_color,
)
else:
diff --git
a/Misc/NEWS.d/next/Library/2025-12-10-21-19-10.gh-issue-142539._8Vzr0.rst
b/Misc/NEWS.d/next/Library/2025-12-10-21-19-10.gh-issue-142539._8Vzr0.rst
new file mode 100644
index 00000000000000..ddebe9f3ed8f8c
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-12-10-21-19-10.gh-issue-142539._8Vzr0.rst
@@ -0,0 +1,2 @@
+:mod:`traceback`: Fix location of carets in :exc:`SyntaxError`\s when the
+source contains wide characters.
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]