https://github.com/python/cpython/commit/56eda256336310a08d4beb75b998488cb359444b
commit: 56eda256336310a08d4beb75b998488cb359444b
branch: main
author: Pablo Galindo Salgado <[email protected]>
committer: pablogsal <[email protected]>
date: 2025-02-13T01:07:37Z
summary:
gh-116042: Fix location for SyntaxErrors of invalid escapes in the tokenizer
(#116049)
files:
A
Misc/NEWS.d/next/Core_and_Builtins/2025-02-13-00-28-43.gh-issue-116042.861juq.rst
M Lib/test/test_cmd_line_script.py
M Lib/test/test_string_literals.py
M Parser/pegen_errors.c
M Parser/string_parser.c
diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py
index e7f3e46c1868f7..53dc9b1a7effb5 100644
--- a/Lib/test/test_cmd_line_script.py
+++ b/Lib/test/test_cmd_line_script.py
@@ -660,7 +660,7 @@ def
test_syntaxerror_invalid_escape_sequence_multi_line(self):
self.assertEqual(
stderr.splitlines()[-3:],
[ b' foo = """\\q"""',
- b' ^^^^^^^^',
+ b' ^^',
b'SyntaxError: "\\q" is an invalid escape sequence. '
b'Did you mean "\\\\q"? A raw string is also an option.'
],
diff --git a/Lib/test/test_string_literals.py b/Lib/test/test_string_literals.py
index f56195ca27672c..9d57233eb0882a 100644
--- a/Lib/test/test_string_literals.py
+++ b/Lib/test/test_string_literals.py
@@ -120,7 +120,7 @@ def test_eval_str_invalid_escape(self):
r'Such sequences will not work in the future. '
r'Did you mean "\\z"? A raw string is also an
option.')
self.assertEqual(w[0].filename, '<string>')
- self.assertEqual(w[0].lineno, 1)
+ self.assertEqual(w[0].lineno, 2)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('error', category=SyntaxWarning)
@@ -131,7 +131,7 @@ def test_eval_str_invalid_escape(self):
self.assertEqual(exc.msg, r'"\z" is an invalid escape sequence. '
r'Did you mean "\\z"? A raw string is also an
option.')
self.assertEqual(exc.filename, '<string>')
- self.assertEqual(exc.lineno, 1)
+ self.assertEqual(exc.lineno, 2)
self.assertEqual(exc.offset, 1)
# Check that the warning is raised only once if there are syntax errors
@@ -160,7 +160,7 @@ def test_eval_str_invalid_octal_escape(self):
r'Such sequences will not work in the future. '
r'Did you mean "\\407"? A raw string is also an
option.')
self.assertEqual(w[0].filename, '<string>')
- self.assertEqual(w[0].lineno, 1)
+ self.assertEqual(w[0].lineno, 2)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('error', category=SyntaxWarning)
@@ -171,9 +171,32 @@ def test_eval_str_invalid_octal_escape(self):
self.assertEqual(exc.msg, r'"\407" is an invalid octal escape
sequence. '
r'Did you mean "\\407"? A raw string is also
an option.')
self.assertEqual(exc.filename, '<string>')
- self.assertEqual(exc.lineno, 1)
+ self.assertEqual(exc.lineno, 2)
self.assertEqual(exc.offset, 1)
+ def test_invalid_escape_locations_with_offset(self):
+ with warnings.catch_warnings(record=True) as w:
+ warnings.simplefilter('always', category=SyntaxWarning)
+ eval("\"'''''''''''''''''''''invalid\ Escape\"")
+ self.assertEqual(len(w), 1)
+ self.assertEqual(str(w[0].message),
+ r'"\ " is an invalid escape sequence. Such sequences '
+ r'will not work in the future. Did you mean "\\ "? '
+ r'A raw string is also an option.')
+ self.assertEqual(w[0].filename, '<string>')
+ self.assertEqual(w[0].lineno, 1)
+
+ with warnings.catch_warnings(record=True) as w:
+ warnings.simplefilter('always', category=SyntaxWarning)
+ eval("\"''Incorrect \ logic?\"")
+ self.assertEqual(len(w), 1)
+ self.assertEqual(str(w[0].message),
+ r'"\ " is an invalid escape sequence. Such
sequences '
+ r'will not work in the future. Did you mean "\\ "?
'
+ r'A raw string is also an option.')
+ self.assertEqual(w[0].filename, '<string>')
+ self.assertEqual(w[0].lineno, 1)
+
def test_eval_str_raw(self):
self.assertEqual(eval(""" r'x' """), 'x')
self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
@@ -215,7 +238,7 @@ def test_eval_bytes_invalid_escape(self):
r'Such sequences will not work in the future. '
r'Did you mean "\\z"? A raw string is also an
option.')
self.assertEqual(w[0].filename, '<string>')
- self.assertEqual(w[0].lineno, 1)
+ self.assertEqual(w[0].lineno, 2)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('error', category=SyntaxWarning)
@@ -226,7 +249,7 @@ def test_eval_bytes_invalid_escape(self):
self.assertEqual(exc.msg, r'"\z" is an invalid escape sequence. '
r'Did you mean "\\z"? A raw string is also an
option.')
self.assertEqual(exc.filename, '<string>')
- self.assertEqual(exc.lineno, 1)
+ self.assertEqual(exc.lineno, 2)
def test_eval_bytes_invalid_octal_escape(self):
for i in range(0o400, 0o1000):
@@ -241,7 +264,7 @@ def test_eval_bytes_invalid_octal_escape(self):
r'Such sequences will not work in the future. '
r'Did you mean "\\407"? A raw string is also an
option.')
self.assertEqual(w[0].filename, '<string>')
- self.assertEqual(w[0].lineno, 1)
+ self.assertEqual(w[0].lineno, 2)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('error', category=SyntaxWarning)
@@ -252,7 +275,7 @@ def test_eval_bytes_invalid_octal_escape(self):
self.assertEqual(exc.msg, r'"\407" is an invalid octal escape
sequence. '
r'Did you mean "\\407"? A raw string is also an
option.')
self.assertEqual(exc.filename, '<string>')
- self.assertEqual(exc.lineno, 1)
+ self.assertEqual(exc.lineno, 2)
def test_eval_bytes_raw(self):
self.assertEqual(eval(""" br'x' """), b'x')
diff --git
a/Misc/NEWS.d/next/Core_and_Builtins/2025-02-13-00-28-43.gh-issue-116042.861juq.rst
b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-13-00-28-43.gh-issue-116042.861juq.rst
new file mode 100644
index 00000000000000..098804fa92e804
--- /dev/null
+++
b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-13-00-28-43.gh-issue-116042.861juq.rst
@@ -0,0 +1,2 @@
+Fix location for SyntaxErrors of invalid escapes in the tokenizer. Patch by
+Pablo Galindo
diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c
index 6146f69912bfa3..f62b8695995617 100644
--- a/Parser/pegen_errors.c
+++ b/Parser/pegen_errors.c
@@ -352,8 +352,8 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject
*errtype,
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done ==
E_EOF);
if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
- Py_ssize_t size = p->tok->inp - p->tok->buf;
- error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
+ Py_ssize_t size = p->tok->inp - p->tok->line_start;
+ error_line = PyUnicode_DecodeUTF8(p->tok->line_start, size,
"replace");
}
else if (p->tok->fp == NULL || p->tok->fp == stdin) {
error_line = get_error_line_from_tokenizer_buffers(p, lineno);
diff --git a/Parser/string_parser.c b/Parser/string_parser.c
index 9dd8f9ef28bd4f..b93300b00a8545 100644
--- a/Parser/string_parser.c
+++ b/Parser/string_parser.c
@@ -11,7 +11,7 @@
//// STRING HANDLING FUNCTIONS ////
static int
-warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape,
Token *t)
+warn_invalid_escape_sequence(Parser *p, const char* buffer, const char
*first_invalid_escape, Token *t)
{
if (p->call_invalid_rules) {
// Do not report warnings if we are in the second pass of the parser
@@ -48,8 +48,46 @@ warn_invalid_escape_sequence(Parser *p, const char
*first_invalid_escape, Token
else {
category = PyExc_DeprecationWarning;
}
+
+ // Calculate the lineno and the col_offset of the invalid escape sequence
+ const char *start = buffer;
+ const char *end = first_invalid_escape;
+ int lineno = t->lineno;
+ int col_offset = t->col_offset;
+ while (start < end) {
+ if (*start == '\n') {
+ lineno++;
+ col_offset = 0;
+ }
+ else {
+ col_offset++;
+ }
+ start++;
+ }
+
+ // Count the number of quotes in the token
+ char first_quote = 0;
+ if (lineno == t->lineno) {
+ int quote_count = 0;
+ char* tok = PyBytes_AsString(t->bytes);
+ for (int i = 0; i < PyBytes_Size(t->bytes); i++) {
+ if (tok[i] == '\'' || tok[i] == '\"') {
+ if (quote_count == 0) {
+ first_quote = tok[i];
+ }
+ if (tok[i] == first_quote) {
+ quote_count++;
+ }
+ } else {
+ break;
+ }
+ }
+
+ col_offset += quote_count;
+ }
+
if (PyErr_WarnExplicitObject(category, msg, p->tok->filename,
- t->lineno, NULL, NULL) < 0) {
+ lineno, NULL, NULL) < 0) {
if (PyErr_ExceptionMatches(category)) {
/* Replace the Syntax/DeprecationWarning exception with a
SyntaxError
to get a more accurate error report */
@@ -60,13 +98,13 @@ warn_invalid_escape_sequence(Parser *p, const char
*first_invalid_escape, Token
error location, if p->known_err_token is not set. */
p->known_err_token = t;
if (octal) {
- RAISE_SYNTAX_ERROR(
+ RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno,
col_offset-1, lineno, col_offset+1,
"\"\\%.3s\" is an invalid octal escape sequence. "
"Did you mean \"\\\\%.3s\"? A raw string is also an
option.",
first_invalid_escape, first_invalid_escape);
}
else {
- RAISE_SYNTAX_ERROR(
+ RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno,
col_offset-1, lineno, col_offset+1,
"\"\\%c\" is an invalid escape sequence. "
"Did you mean \"\\\\%c\"? A raw string is also an option.",
c, c);
@@ -163,7 +201,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s,
size_t len, Token *t)
// HACK: later we can simply pass the line no, since we don't preserve the
tokens
// when we are decoding the string but we preserve the line numbers.
if (v != NULL && first_invalid_escape != NULL && t != NULL) {
- if (warn_invalid_escape_sequence(parser, first_invalid_escape, t) < 0)
{
+ if (warn_invalid_escape_sequence(parser, s, first_invalid_escape, t) <
0) {
/* We have not decref u before because first_invalid_escape points
inside u. */
Py_XDECREF(u);
@@ -185,7 +223,7 @@ decode_bytes_with_escapes(Parser *p, const char *s,
Py_ssize_t len, Token *t)
}
if (first_invalid_escape != NULL) {
- if (warn_invalid_escape_sequence(p, first_invalid_escape, t) < 0) {
+ if (warn_invalid_escape_sequence(p, s, first_invalid_escape, t) < 0) {
Py_DECREF(result);
return NULL;
}
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]