https://github.com/python/cpython/commit/3d69d1832209ef6e3fd9afacad3b5b8bd8a5f465
commit: 3d69d1832209ef6e3fd9afacad3b5b8bd8a5f465
branch: 3.14
author: Victor Stinner <vstin...@python.org>
committer: vstinner <vstin...@python.org>
date: 2025-06-09T14:37:26+02:00
summary:

[3.14] gh-133968: Add PyUnicodeWriter_WriteASCII() function (#133973) (#134974)

gh-133968: Add PyUnicodeWriter_WriteASCII() function (#133973)

Replace most PyUnicodeWriter_WriteUTF8() calls with
PyUnicodeWriter_WriteASCII().

(cherry picked from commit f49a07b531543dd8a42d90f5b1c89c0312fbf806)

Co-authored-by: Peter Bierma <zintensity...@gmail.com>
Co-authored-by: Bénédikt Tran <10796600+picn...@users.noreply.github.com>

files:
A Misc/NEWS.d/next/C_API/2025-05-13-16-06-46.gh-issue-133968.6alWst.rst
M Doc/c-api/unicode.rst
M Doc/whatsnew/3.14.rst
M Include/cpython/unicodeobject.h
M Lib/test/test_capi/test_unicode.py
M Modules/_json.c
M Modules/_ssl.c
M Modules/_testcapi/unicode.c
M Objects/genericaliasobject.c
M Objects/typevarobject.c
M Objects/unicodeobject.c
M Objects/unionobject.c
M Parser/asdl_c.py
M Python/Python-ast.c
M Python/context.c
M Python/hamt.c

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 47f8b2dadc3bcc..45f50ba5f97d26 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -1798,9 +1798,24 @@ object.
 
    See also :c:func:`PyUnicodeWriter_DecodeUTF8Stateful`.
 
+.. c:function:: int PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer, const 
char *str, Py_ssize_t size)
+
+   Write the ASCII string *str* into *writer*.
+
+   *size* is the string length in bytes. If *size* is equal to ``-1``, call
+   ``strlen(str)`` to get the string length.
+
+   *str* must only contain ASCII characters. The behavior is undefined if
+   *str* contains non-ASCII characters.
+
+   On success, return ``0``.
+   On error, set an exception, leave the writer unchanged, and return ``-1``.
+
+   .. versionadded:: next
+
 .. c:function:: int PyUnicodeWriter_WriteWideChar(PyUnicodeWriter *writer, 
const wchar_t *str, Py_ssize_t size)
 
-   Writer the wide string *str* into *writer*.
+   Write the wide string *str* into *writer*.
 
    *size* is a number of wide characters. If *size* is equal to ``-1``, call
    ``wcslen(str)`` to get the string length.
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index 1896410919d6b0..2c7460fa159331 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -2584,6 +2584,7 @@ New features
   * :c:func:`PyUnicodeWriter_Discard`
   * :c:func:`PyUnicodeWriter_Finish`
   * :c:func:`PyUnicodeWriter_Format`
+  * :c:func:`PyUnicodeWriter_WriteASCII`
   * :c:func:`PyUnicodeWriter_WriteChar`
   * :c:func:`PyUnicodeWriter_WriteRepr`
   * :c:func:`PyUnicodeWriter_WriteStr`
@@ -2860,7 +2861,7 @@ Deprecated
     :c:func:`PyUnicodeWriter_WriteSubstring(writer, str, start, end) 
<PyUnicodeWriter_WriteSubstring>`.
   * :c:func:`!_PyUnicodeWriter_WriteASCIIString`:
     replace ``_PyUnicodeWriter_WriteASCIIString(&writer, str)`` with
-    :c:func:`PyUnicodeWriter_WriteUTF8(writer, str) 
<PyUnicodeWriter_WriteUTF8>`.
+    :c:func:`PyUnicodeWriter_WriteASCII(writer, str) 
<PyUnicodeWriter_WriteASCII>`.
   * :c:func:`!_PyUnicodeWriter_WriteLatin1String`:
     replace ``_PyUnicodeWriter_WriteLatin1String(&writer, str)`` with
     :c:func:`PyUnicodeWriter_WriteUTF8(writer, str) 
<PyUnicodeWriter_WriteUTF8>`.
diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
index 136f5d5c5f8425..3d0414f5291fe4 100644
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@@ -478,6 +478,10 @@ PyAPI_FUNC(int) PyUnicodeWriter_WriteUTF8(
     PyUnicodeWriter *writer,
     const char *str,
     Py_ssize_t size);
+PyAPI_FUNC(int) PyUnicodeWriter_WriteASCII(
+    PyUnicodeWriter *writer,
+    const char *str,
+    Py_ssize_t size);
 PyAPI_FUNC(int) PyUnicodeWriter_WriteWideChar(
     PyUnicodeWriter *writer,
     const wchar_t *str,
diff --git a/Lib/test/test_capi/test_unicode.py 
b/Lib/test/test_capi/test_unicode.py
index 3408c10f426058..c8be4f3faa9483 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -1776,6 +1776,13 @@ def test_utf8(self):
         self.assertEqual(writer.finish(),
                          "ascii-latin1=\xE9-euro=\u20AC.")
 
+    def test_ascii(self):
+        writer = self.create_writer(0)
+        writer.write_ascii(b"Hello ", -1)
+        writer.write_ascii(b"", 0)
+        writer.write_ascii(b"Python! <truncated>", 6)
+        self.assertEqual(writer.finish(), "Hello Python")
+
     def test_invalid_utf8(self):
         writer = self.create_writer(0)
         with self.assertRaises(UnicodeDecodeError):
diff --git 
a/Misc/NEWS.d/next/C_API/2025-05-13-16-06-46.gh-issue-133968.6alWst.rst 
b/Misc/NEWS.d/next/C_API/2025-05-13-16-06-46.gh-issue-133968.6alWst.rst
new file mode 100644
index 00000000000000..47d5a3bda39942
--- /dev/null
+++ b/Misc/NEWS.d/next/C_API/2025-05-13-16-06-46.gh-issue-133968.6alWst.rst
@@ -0,0 +1,4 @@
+Add :c:func:`PyUnicodeWriter_WriteASCII` function to write an ASCII string
+into a :c:type:`PyUnicodeWriter`. The function is faster than
+:c:func:`PyUnicodeWriter_WriteUTF8`, but has an undefined behavior if the
+input string contains non-ASCII characters. Patch by Victor Stinner.
diff --git a/Modules/_json.c b/Modules/_json.c
index 646bb6cdde8f03..6c381fab449db2 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -1476,13 +1476,13 @@ encoder_listencode_obj(PyEncoderObject *s, 
PyUnicodeWriter *writer,
     int rv;
 
     if (obj == Py_None) {
-      return PyUnicodeWriter_WriteUTF8(writer, "null", 4);
+      return PyUnicodeWriter_WriteASCII(writer, "null", 4);
     }
     else if (obj == Py_True) {
-      return PyUnicodeWriter_WriteUTF8(writer, "true", 4);
+      return PyUnicodeWriter_WriteASCII(writer, "true", 4);
     }
     else if (obj == Py_False) {
-      return PyUnicodeWriter_WriteUTF8(writer, "false", 5);
+      return PyUnicodeWriter_WriteASCII(writer, "false", 5);
     }
     else if (PyUnicode_Check(obj)) {
         PyObject *encoded = encoder_encode_string(s, obj);
@@ -1655,7 +1655,7 @@ encoder_listencode_dict(PyEncoderObject *s, 
PyUnicodeWriter *writer,
 
     if (PyDict_GET_SIZE(dct) == 0) {
         /* Fast path */
-        return PyUnicodeWriter_WriteUTF8(writer, "{}", 2);
+        return PyUnicodeWriter_WriteASCII(writer, "{}", 2);
     }
 
     if (s->markers != Py_None) {
@@ -1756,7 +1756,7 @@ encoder_listencode_list(PyEncoderObject *s, 
PyUnicodeWriter *writer,
         return -1;
     if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
         Py_DECREF(s_fast);
-        return PyUnicodeWriter_WriteUTF8(writer, "[]", 2);
+        return PyUnicodeWriter_WriteASCII(writer, "[]", 2);
     }
 
     if (s->markers != Py_None) {
diff --git a/Modules/_ssl.c b/Modules/_ssl.c
index 97a29f4d0e1830..1c1e15eb0e528a 100644
--- a/Modules/_ssl.c
+++ b/Modules/_ssl.c
@@ -563,7 +563,7 @@ fill_and_set_sslerror(_sslmodulestate *state,
                 goto fail;
             }
         }
-        if (PyUnicodeWriter_WriteUTF8(writer, "] ", 2) < 0) {
+        if (PyUnicodeWriter_WriteASCII(writer, "] ", 2) < 0) {
             goto fail;
         }
     }
diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c
index b8ecf53f4f8b9c..e70f5c68bc3b69 100644
--- a/Modules/_testcapi/unicode.c
+++ b/Modules/_testcapi/unicode.c
@@ -332,6 +332,27 @@ writer_write_utf8(PyObject *self_raw, PyObject *args)
 }
 
 
+static PyObject*
+writer_write_ascii(PyObject *self_raw, PyObject *args)
+{
+    WriterObject *self = (WriterObject *)self_raw;
+    if (writer_check(self) < 0) {
+        return NULL;
+    }
+
+    char *str;
+    Py_ssize_t size;
+    if (!PyArg_ParseTuple(args, "yn", &str, &size)) {
+        return NULL;
+    }
+
+    if (PyUnicodeWriter_WriteASCII(self->writer, str, size) < 0) {
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+
 static PyObject*
 writer_write_widechar(PyObject *self_raw, PyObject *args)
 {
@@ -513,6 +534,7 @@ writer_finish(PyObject *self_raw, PyObject *Py_UNUSED(args))
 static PyMethodDef writer_methods[] = {
     {"write_char", _PyCFunction_CAST(writer_write_char), METH_VARARGS},
     {"write_utf8", _PyCFunction_CAST(writer_write_utf8), METH_VARARGS},
+    {"write_ascii", _PyCFunction_CAST(writer_write_ascii), METH_VARARGS},
     {"write_widechar", _PyCFunction_CAST(writer_write_widechar), METH_VARARGS},
     {"write_ucs4", _PyCFunction_CAST(writer_write_ucs4), METH_VARARGS},
     {"write_str", _PyCFunction_CAST(writer_write_str), METH_VARARGS},
diff --git a/Objects/genericaliasobject.c b/Objects/genericaliasobject.c
index ec3d01f00a3c3c..07b57f0c552ce9 100644
--- a/Objects/genericaliasobject.c
+++ b/Objects/genericaliasobject.c
@@ -65,7 +65,7 @@ ga_repr_items_list(PyUnicodeWriter *writer, PyObject *p)
 
     for (Py_ssize_t i = 0; i < len; i++) {
         if (i > 0) {
-            if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
+            if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
                 return -1;
             }
         }
@@ -109,7 +109,7 @@ ga_repr(PyObject *self)
     }
     for (Py_ssize_t i = 0; i < len; i++) {
         if (i > 0) {
-            if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
+            if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
                 goto error;
             }
         }
@@ -126,7 +126,7 @@ ga_repr(PyObject *self)
     }
     if (len == 0) {
         // for something like tuple[()] we should print a "()"
-        if (PyUnicodeWriter_WriteUTF8(writer, "()", 2) < 0) {
+        if (PyUnicodeWriter_WriteASCII(writer, "()", 2) < 0) {
             goto error;
         }
     }
diff --git a/Objects/typevarobject.c b/Objects/typevarobject.c
index 6c199a52aa0ae6..cead6e69af5451 100644
--- a/Objects/typevarobject.c
+++ b/Objects/typevarobject.c
@@ -192,7 +192,7 @@ constevaluator_call(PyObject *self, PyObject *args, 
PyObject *kwargs)
             for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(value); i++) {
                 PyObject *item = PyTuple_GET_ITEM(value, i);
                 if (i > 0) {
-                    if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
+                    if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
                         PyUnicodeWriter_Discard(writer);
                         return NULL;
                     }
@@ -273,7 +273,7 @@ _Py_typing_type_repr(PyUnicodeWriter *writer, PyObject *p)
     }
 
     if (p == (PyObject *)&_PyNone_Type) {
-        return PyUnicodeWriter_WriteUTF8(writer, "None", 4);
+        return PyUnicodeWriter_WriteASCII(writer, "None", 4);
     }
 
     if ((rc = PyObject_HasAttrWithError(p, &_Py_ID(__origin__))) > 0 &&
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 9b8e3bb5462d4b..1d01dad9e33e29 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -14108,6 +14108,20 @@ _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter 
*writer,
     return 0;
 }
 
+
+int
+PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer,
+                           const char *str,
+                           Py_ssize_t size)
+{
+    assert(writer != NULL);
+    _Py_AssertHoldsTstate();
+
+    _PyUnicodeWriter *priv_writer = (_PyUnicodeWriter*)writer;
+    return _PyUnicodeWriter_WriteASCIIString(priv_writer, str, size);
+}
+
+
 int
 PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
                           const char *str,
diff --git a/Objects/unionobject.c b/Objects/unionobject.c
index 66435924b6c6c3..00ca5b9bf80341 100644
--- a/Objects/unionobject.c
+++ b/Objects/unionobject.c
@@ -290,7 +290,7 @@ union_repr(PyObject *self)
     }
 
     for (Py_ssize_t i = 0; i < len; i++) {
-        if (i > 0 && PyUnicodeWriter_WriteUTF8(writer, " | ", 3) < 0) {
+        if (i > 0 && PyUnicodeWriter_WriteASCII(writer, " | ", 3) < 0) {
             goto error;
         }
         PyObject *p = PyTuple_GET_ITEM(alias->args, i);
@@ -300,12 +300,12 @@ union_repr(PyObject *self)
     }
 
 #if 0
-    PyUnicodeWriter_WriteUTF8(writer, "|args=", 6);
+    PyUnicodeWriter_WriteASCII(writer, "|args=", 6);
     PyUnicodeWriter_WriteRepr(writer, alias->args);
-    PyUnicodeWriter_WriteUTF8(writer, "|h=", 3);
+    PyUnicodeWriter_WriteASCII(writer, "|h=", 3);
     PyUnicodeWriter_WriteRepr(writer, alias->hashable_args);
     if (alias->unhashable_args) {
-        PyUnicodeWriter_WriteUTF8(writer, "|u=", 3);
+        PyUnicodeWriter_WriteASCII(writer, "|u=", 3);
         PyUnicodeWriter_WriteRepr(writer, alias->unhashable_args);
     }
 #endif
diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py
index 22dcfe1b0d99bf..dba20226c3283a 100755
--- a/Parser/asdl_c.py
+++ b/Parser/asdl_c.py
@@ -1512,7 +1512,7 @@ def visitModule(self, mod):
 
     for (Py_ssize_t i = 0; i < Py_MIN(length, 2); i++) {
         if (i > 0) {
-            if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
+            if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
                 goto error;
             }
         }
@@ -1536,7 +1536,7 @@ def visitModule(self, mod):
         }
 
         if (i == 0 && length > 2) {
-            if (PyUnicodeWriter_WriteUTF8(writer, ", ...", 5) < 0) {
+            if (PyUnicodeWriter_WriteASCII(writer, ", ...", 5) < 0) {
                 goto error;
             }
         }
@@ -1640,7 +1640,7 @@ def visitModule(self, mod):
         }
 
         if (i > 0) {
-            if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
+            if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
                 Py_DECREF(name);
                 Py_DECREF(value_repr);
                 goto error;
diff --git a/Python/Python-ast.c b/Python/Python-ast.c
index f7625ab1205bdc..660bc598a4862c 100644
--- a/Python/Python-ast.c
+++ b/Python/Python-ast.c
@@ -5796,7 +5796,7 @@ ast_repr_list(PyObject *list, int depth)
 
     for (Py_ssize_t i = 0; i < Py_MIN(length, 2); i++) {
         if (i > 0) {
-            if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
+            if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
                 goto error;
             }
         }
@@ -5820,7 +5820,7 @@ ast_repr_list(PyObject *list, int depth)
         }
 
         if (i == 0 && length > 2) {
-            if (PyUnicodeWriter_WriteUTF8(writer, ", ...", 5) < 0) {
+            if (PyUnicodeWriter_WriteASCII(writer, ", ...", 5) < 0) {
                 goto error;
             }
         }
@@ -5924,7 +5924,7 @@ ast_repr_max_depth(AST_object *self, int depth)
         }
 
         if (i > 0) {
-            if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
+            if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
                 Py_DECREF(name);
                 Py_DECREF(value_repr);
                 goto error;
diff --git a/Python/context.c b/Python/context.c
index dceaae9b42979d..9927cab915cae7 100644
--- a/Python/context.c
+++ b/Python/context.c
@@ -979,7 +979,7 @@ contextvar_tp_repr(PyObject *op)
         return NULL;
     }
 
-    if (PyUnicodeWriter_WriteUTF8(writer, "<ContextVar name=", 17) < 0) {
+    if (PyUnicodeWriter_WriteASCII(writer, "<ContextVar name=", 17) < 0) {
         goto error;
     }
     if (PyUnicodeWriter_WriteRepr(writer, self->var_name) < 0) {
@@ -987,7 +987,7 @@ contextvar_tp_repr(PyObject *op)
     }
 
     if (self->var_default != NULL) {
-        if (PyUnicodeWriter_WriteUTF8(writer, " default=", 9) < 0) {
+        if (PyUnicodeWriter_WriteASCII(writer, " default=", 9) < 0) {
             goto error;
         }
         if (PyUnicodeWriter_WriteRepr(writer, self->var_default) < 0) {
@@ -1182,15 +1182,15 @@ token_tp_repr(PyObject *op)
     if (writer == NULL) {
         return NULL;
     }
-    if (PyUnicodeWriter_WriteUTF8(writer, "<Token", 6) < 0) {
+    if (PyUnicodeWriter_WriteASCII(writer, "<Token", 6) < 0) {
         goto error;
     }
     if (self->tok_used) {
-        if (PyUnicodeWriter_WriteUTF8(writer, " used", 5) < 0) {
+        if (PyUnicodeWriter_WriteASCII(writer, " used", 5) < 0) {
             goto error;
         }
     }
-    if (PyUnicodeWriter_WriteUTF8(writer, " var=", 5) < 0) {
+    if (PyUnicodeWriter_WriteASCII(writer, " var=", 5) < 0) {
         goto error;
     }
     if (PyUnicodeWriter_WriteRepr(writer, (PyObject *)self->tok_var) < 0) {
diff --git a/Python/hamt.c b/Python/hamt.c
index f9bbf63961d8de..906149cc6cdbdc 100644
--- a/Python/hamt.c
+++ b/Python/hamt.c
@@ -1176,7 +1176,7 @@ hamt_node_bitmap_dump(PyHamtNode_Bitmap *node,
         }
 
         if (key_or_null == NULL) {
-            if (PyUnicodeWriter_WriteUTF8(writer, "NULL:\n", -1) < 0) {
+            if (PyUnicodeWriter_WriteASCII(writer, "NULL:\n", 6) < 0) {
                 goto error;
             }
 
@@ -1194,7 +1194,7 @@ hamt_node_bitmap_dump(PyHamtNode_Bitmap *node,
             }
         }
 
-        if (PyUnicodeWriter_WriteUTF8(writer, "\n", 1) < 0) {
+        if (PyUnicodeWriter_WriteASCII(writer, "\n", 1) < 0) {
             goto error;
         }
     }
@@ -1915,7 +1915,7 @@ hamt_node_array_dump(PyHamtNode_Array *node,
             goto error;
         }
 
-        if (PyUnicodeWriter_WriteUTF8(writer, "\n", 1) < 0) {
+        if (PyUnicodeWriter_WriteASCII(writer, "\n", 1) < 0) {
             goto error;
         }
     }

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: arch...@mail-archive.com

Reply via email to