https://github.com/python/cpython/commit/d9dafc790dc7d9c04693f4546e6f607cc334008a
commit: d9dafc790dc7d9c04693f4546e6f607cc334008a
branch: 3.13
author: Miss Islington (bot) <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2024-10-21T16:07:56+03:00
summary:

[3.13] gh-125660: Reject invalid unicode escapes for Python implementation of 
JSON decoder (GH-125683) (GH-125694)

(cherry picked from commit df751363e386d1f77c5ba9515a5539902457d386)

Co-authored-by: Nice Zombies <[email protected]>

files:
A Misc/NEWS.d/next/Library/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst
M Lib/json/decoder.py
M Lib/test/test_json/test_scanstring.py

diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py
index d69a45d6793069..ff4bfcdcc407b9 100644
--- a/Lib/json/decoder.py
+++ b/Lib/json/decoder.py
@@ -50,17 +50,18 @@ def __reduce__(self):
 }
 
 
+HEXDIGITS = re.compile(r'[0-9A-Fa-f]{4}', FLAGS)
 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
 BACKSLASH = {
     '"': '"', '\\': '\\', '/': '/',
     'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
 }
 
-def _decode_uXXXX(s, pos):
-    esc = s[pos + 1:pos + 5]
-    if len(esc) == 4 and esc[1] not in 'xX':
+def _decode_uXXXX(s, pos, _m=HEXDIGITS.match):
+    esc = _m(s, pos + 1)
+    if esc is not None:
         try:
-            return int(esc, 16)
+            return int(esc.group(), 16)
         except ValueError:
             pass
     msg = "Invalid \\uXXXX escape"
diff --git a/Lib/test/test_json/test_scanstring.py 
b/Lib/test/test_json/test_scanstring.py
index 2d3ee8a8bf0f92..cca556a3b95bab 100644
--- a/Lib/test/test_json/test_scanstring.py
+++ b/Lib/test/test_json/test_scanstring.py
@@ -116,6 +116,11 @@ def test_bad_escapes(self):
             '"\\u012z"',
             '"\\u0x12"',
             '"\\u0X12"',
+            '"\\u{0}"'.format("\uff10" * 4),
+            '"\\u 123"',
+            '"\\u-123"',
+            '"\\u+123"',
+            '"\\u1_23"',
             '"\\ud834\\"',
             '"\\ud834\\u"',
             '"\\ud834\\ud"',
@@ -127,6 +132,11 @@ def test_bad_escapes(self):
             '"\\ud834\\udd2z"',
             '"\\ud834\\u0x20"',
             '"\\ud834\\u0X20"',
+            '"\\ud834\\u{0}"'.format("\uff10" * 4),
+            '"\\ud834\\u 123"',
+            '"\\ud834\\u-123"',
+            '"\\ud834\\u+123"',
+            '"\\ud834\\u1_23"',
         ]
         for s in bad_escapes:
             with self.assertRaises(self.JSONDecodeError, msg=s):
diff --git 
a/Misc/NEWS.d/next/Library/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst 
b/Misc/NEWS.d/next/Library/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst
new file mode 100644
index 00000000000000..74d76c7bddae7d
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst
@@ -0,0 +1 @@
+Reject invalid unicode escapes for Python implementation of :func:`json.loads`.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to