[Python-checkins] gh-119182: Add PyUnicodeWriter C API (#119184)

vstinner Mon, 17 Jun 2024 08:11:15 -0700

https://github.com/python/cpython/commit/5c4235cd8ce00852cfcb2d3a2cb4c66c6c53c4bf
commit: 5c4235cd8ce00852cfcb2d3a2cb4c66c6c53c4bf
branch: main
author: Victor Stinner <[email protected]>
committer: vstinner <[email protected]>
date: 2024-06-17T17:10:52+02:00
summary:


 gh-119182: Add PyUnicodeWriter C API (#119184)

files:
A Misc/NEWS.d/next/C API/2024-06-07-22-12-30.gh-issue-119182.yt8Ar7.rst
M Doc/c-api/unicode.rst
M Doc/whatsnew/3.14.rst
M Include/cpython/unicodeobject.h
M Modules/_testcapi/unicode.c
M Objects/unicodeobject.c

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 7320d035bab513..02e696c303fa91 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -1502,3 +1502,87 @@ They all return ``NULL`` or ``-1`` if an exception 
occurs.
    :c:func:`PyUnicode_InternInPlace`, returning either a new Unicode string
    object that has been interned, or a new ("owned") reference to an earlier
    interned string object with the same value.
+
+PyUnicodeWriter
+^^^^^^^^^^^^^^^
+
+The :c:type:`PyUnicodeWriter` API can be used to create a Python :class:`str`
+object.
+
+.. versionadded:: 3.14
+
+.. c:type:: PyUnicodeWriter
+
+   A Unicode writer instance.
+
+   The instance must be destroyed by :c:func:`PyUnicodeWriter_Finish` on
+   success, or :c:func:`PyUnicodeWriter_Discard` on error.
+
+.. c:function:: PyUnicodeWriter* PyUnicodeWriter_Create(Py_ssize_t length)
+
+   Create a Unicode writer instance.
+
+   Set an exception and return ``NULL`` on error.
+
+.. c:function:: PyObject* PyUnicodeWriter_Finish(PyUnicodeWriter *writer)
+
+   Return the final Python :class:`str` object and destroy the writer instance.
+
+   Set an exception and return ``NULL`` on error.
+
+.. c:function:: void PyUnicodeWriter_Discard(PyUnicodeWriter *writer)
+
+   Discard the internal Unicode buffer and destroy the writer instance.
+
+.. c:function:: int PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 
ch)
+
+   Write the single Unicode character *ch* into *writer*.
+
+   On success, return ``0``.
+   On error, set an exception, leave the writer unchanged, and return ``-1``.
+
+.. c:function:: int PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer, const 
char *str, Py_ssize_t size)
+
+   Decode the string *str* from UTF-8 in strict mode and write the output into 
*writer*.
+
+   *size* is the string length in bytes. If *size* is equal to ``-1``, call
+   ``strlen(str)`` to get the string length.
+
+   On success, return ``0``.
+   On error, set an exception, leave the writer unchanged, and return ``-1``.
+
+   To use a different error handler than ``strict``,
+   :c:func:`PyUnicode_DecodeUTF8` can be used with
+   :c:func:`PyUnicodeWriter_WriteStr`.
+
+.. c:function:: int PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject 
*obj)
+
+   Call :c:func:`PyObject_Str` on *obj* and write the output into *writer*.
+
+   On success, return ``0``.
+   On error, set an exception, leave the writer unchanged, and return ``-1``.
+
+.. c:function:: int PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, 
PyObject *obj)
+
+   Call :c:func:`PyObject_Repr` on *obj* and write the output into *writer*.
+
+   On success, return ``0``.
+   On error, set an exception, leave the writer unchanged, and return ``-1``.
+
+.. c:function:: int PyUnicodeWriter_WriteSubstring(PyUnicodeWriter *writer, 
PyObject *str, Py_ssize_t start, Py_ssize_t end)
+
+   Write the substring ``str[start:end]`` into *writer*.
+
+   *str* must be Python :class:`str` object. *start* must be greater than or
+   equal to 0, and less than or equal to *end*. *end* must be less than or
+   equal to *str* length.
+
+   On success, return ``0``.
+   On error, set an exception, leave the writer unchanged, and return ``-1``.
+
+.. c:function:: int PyUnicodeWriter_Format(PyUnicodeWriter *writer, const char 
*format, ...)
+
+   Similar to :c:func:`PyUnicode_FromFormat`, but write the output directly 
into *writer*.
+
+   On success, return ``0``.
+   On error, set an exception, leave the writer unchanged, and return ``-1``.
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index a102af13a08362..55541ff14d88ce 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -283,6 +283,21 @@ New Features
 * Add :c:func:`PyLong_GetSign` function to get the sign of :class:`int` 
objects.
   (Contributed by Sergey B Kirpichev in :gh:`116560`.)
 
+* Add a new :c:type:`PyUnicodeWriter` API to create a Python :class:`str`
+  object:
+
+  * :c:func:`PyUnicodeWriter_Create`.
+  * :c:func:`PyUnicodeWriter_Discard`.
+  * :c:func:`PyUnicodeWriter_Finish`.
+  * :c:func:`PyUnicodeWriter_WriteChar`.
+  * :c:func:`PyUnicodeWriter_WriteUTF8`.
+  * :c:func:`PyUnicodeWriter_WriteStr`.
+  * :c:func:`PyUnicodeWriter_WriteRepr`.
+  * :c:func:`PyUnicodeWriter_WriteSubstring`.
+  * :c:func:`PyUnicodeWriter_Format`.
+
+  (Contributed by Victor Stinner in :gh:`119182`.)
+
 Porting to Python 3.14
 ----------------------
 
diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
index d9b54bce83202d..e5e1b6be118588 100644
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@@ -444,7 +444,40 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
     Py_ssize_t size);
 
 
-/* --- _PyUnicodeWriter API ----------------------------------------------- */
+/* --- Public PyUnicodeWriter API ----------------------------------------- */
+
+typedef struct PyUnicodeWriter PyUnicodeWriter;
+
+PyAPI_FUNC(PyUnicodeWriter*) PyUnicodeWriter_Create(Py_ssize_t length);
+PyAPI_FUNC(void) PyUnicodeWriter_Discard(PyUnicodeWriter *writer);
+PyAPI_FUNC(PyObject*) PyUnicodeWriter_Finish(PyUnicodeWriter *writer);
+
+PyAPI_FUNC(int) PyUnicodeWriter_WriteChar(
+    PyUnicodeWriter *writer,
+    Py_UCS4 ch);
+PyAPI_FUNC(int) PyUnicodeWriter_WriteUTF8(
+    PyUnicodeWriter *writer,
+    const char *str,
+    Py_ssize_t size);
+
+PyAPI_FUNC(int) PyUnicodeWriter_WriteStr(
+    PyUnicodeWriter *writer,
+    PyObject *obj);
+PyAPI_FUNC(int) PyUnicodeWriter_WriteRepr(
+    PyUnicodeWriter *writer,
+    PyObject *obj);
+PyAPI_FUNC(int) PyUnicodeWriter_WriteSubstring(
+    PyUnicodeWriter *writer,
+    PyObject *str,
+    Py_ssize_t start,
+    Py_ssize_t end);
+PyAPI_FUNC(int) PyUnicodeWriter_Format(
+    PyUnicodeWriter *writer,
+    const char *format,
+    ...);
+
+
+/* --- Private _PyUnicodeWriter API --------------------------------------- */
 
 typedef struct {
     PyObject *buffer;
@@ -466,7 +499,7 @@ typedef struct {
     /* If readonly is 1, buffer is a shared string (cannot be modified)
        and size is set to 0. */
     unsigned char readonly;
-} _PyUnicodeWriter ;
+} _PyUnicodeWriter;
 
 // Initialize a Unicode writer.
 //
diff --git a/Misc/NEWS.d/next/C 
API/2024-06-07-22-12-30.gh-issue-119182.yt8Ar7.rst b/Misc/NEWS.d/next/C 
API/2024-06-07-22-12-30.gh-issue-119182.yt8Ar7.rst
new file mode 100644
index 00000000000000..3d1384c9f3252f
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2024-06-07-22-12-30.gh-issue-119182.yt8Ar7.rst     
@@ -0,0 +1,13 @@
+Add a new :c:type:`PyUnicodeWriter` API to create a Python :class:`str` object:
+
+* :c:func:`PyUnicodeWriter_Create`.
+* :c:func:`PyUnicodeWriter_Discard`.
+* :c:func:`PyUnicodeWriter_Finish`.
+* :c:func:`PyUnicodeWriter_WriteChar`.
+* :c:func:`PyUnicodeWriter_WriteUTF8`.
+* :c:func:`PyUnicodeWriter_WriteStr`.
+* :c:func:`PyUnicodeWriter_WriteRepr`.
+* :c:func:`PyUnicodeWriter_WriteSubstring`.
+* :c:func:`PyUnicodeWriter_Format`.
+
+Patch by Victor Stinner.
diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c
index 015db9017139d0..79f99c404cd757 100644
--- a/Modules/_testcapi/unicode.c
+++ b/Modules/_testcapi/unicode.c
@@ -221,6 +221,221 @@ unicode_copycharacters(PyObject *self, PyObject *args)
 }
 
 
+static PyObject *
+test_unicodewriter(PyObject *self, PyObject *Py_UNUSED(args))
+{
+    PyUnicodeWriter *writer = PyUnicodeWriter_Create(100);
+    if (writer == NULL) {
+        return NULL;
+    }
+
+    // test PyUnicodeWriter_WriteUTF8()
+    if (PyUnicodeWriter_WriteUTF8(writer, "var", -1) < 0) {
+        goto error;
+    }
+
+    // test PyUnicodeWriter_WriteChar()
+    if (PyUnicodeWriter_WriteChar(writer, '=') < 0) {
+        goto error;
+    }
+
+    // test PyUnicodeWriter_WriteSubstring()
+    PyObject *str = PyUnicode_FromString("[long]");
+    if (str == NULL) {
+        goto error;
+    }
+    int ret = PyUnicodeWriter_WriteSubstring(writer, str, 1, 5);
+    Py_CLEAR(str);
+    if (ret < 0) {
+        goto error;
+    }
+
+    // test PyUnicodeWriter_WriteStr()
+    str = PyUnicode_FromString(" value ");
+    if (str == NULL) {
+        goto error;
+    }
+    ret = PyUnicodeWriter_WriteStr(writer, str);
+    Py_CLEAR(str);
+    if (ret < 0) {
+        goto error;
+    }
+
+    // test PyUnicodeWriter_WriteRepr()
+    str = PyUnicode_FromString("repr");
+    if (str == NULL) {
+        goto error;
+    }
+    ret = PyUnicodeWriter_WriteRepr(writer, str);
+    Py_CLEAR(str);
+    if (ret < 0) {
+        goto error;
+    }
+
+    PyObject *result = PyUnicodeWriter_Finish(writer);
+    if (result == NULL) {
+        return NULL;
+    }
+    assert(PyUnicode_EqualToUTF8(result, "var=long value 'repr'"));
+    Py_DECREF(result);
+
+    Py_RETURN_NONE;
+
+error:
+    PyUnicodeWriter_Discard(writer);
+    return NULL;
+}
+
+
+static PyObject *
+test_unicodewriter_utf8(PyObject *self, PyObject *Py_UNUSED(args))
+{
+    PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
+    if (writer == NULL) {
+        return NULL;
+    }
+    if (PyUnicodeWriter_WriteUTF8(writer, "ascii", -1) < 0) {
+        goto error;
+    }
+    if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
+        goto error;
+    }
+    if (PyUnicodeWriter_WriteUTF8(writer, "latin1=\xC3\xA9", -1) < 0) {
+        goto error;
+    }
+    if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
+        goto error;
+    }
+    if (PyUnicodeWriter_WriteUTF8(writer, "euro=\xE2\x82\xAC", -1) < 0) {
+        goto error;
+    }
+    if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
+        goto error;
+    }
+
+    PyObject *result = PyUnicodeWriter_Finish(writer);
+    if (result == NULL) {
+        return NULL;
+    }
+    assert(PyUnicode_EqualToUTF8(result,
+                                 "ascii-latin1=\xC3\xA9-euro=\xE2\x82\xAC."));
+    Py_DECREF(result);
+
+    Py_RETURN_NONE;
+
+error:
+    PyUnicodeWriter_Discard(writer);
+    return NULL;
+}
+
+
+static PyObject *
+test_unicodewriter_invalid_utf8(PyObject *self, PyObject *Py_UNUSED(args))
+{
+    PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
+    if (writer == NULL) {
+        return NULL;
+    }
+    assert(PyUnicodeWriter_WriteUTF8(writer, "invalid=\xFF", -1) < 0);
+    PyUnicodeWriter_Discard(writer);
+
+    assert(PyErr_ExceptionMatches(PyExc_UnicodeDecodeError));
+    PyErr_Clear();
+
+    Py_RETURN_NONE;
+}
+
+
+static PyObject *
+test_unicodewriter_recover_error(PyObject *self, PyObject *Py_UNUSED(args))
+{
+    // test recovering from PyUnicodeWriter_WriteUTF8() error
+    PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
+    if (writer == NULL) {
+        return NULL;
+    }
+    assert(PyUnicodeWriter_WriteUTF8(writer, "value=", -1) == 0);
+
+    // write fails with an invalid string
+    assert(PyUnicodeWriter_WriteUTF8(writer, "invalid\xFF", -1) < 0);
+    PyErr_Clear();
+
+    // retry write with a valid string
+    assert(PyUnicodeWriter_WriteUTF8(writer, "valid", -1) == 0);
+
+    PyObject *result = PyUnicodeWriter_Finish(writer);
+    if (result == NULL) {
+        return NULL;
+    }
+    assert(PyUnicode_EqualToUTF8(result, "value=valid"));
+    Py_DECREF(result);
+
+    Py_RETURN_NONE;
+}
+
+
+static PyObject *
+test_unicodewriter_format(PyObject *self, PyObject *Py_UNUSED(args))
+{
+    PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
+    if (writer == NULL) {
+        return NULL;
+    }
+
+    // test PyUnicodeWriter_Format()
+    if (PyUnicodeWriter_Format(writer, "%s %i", "Hello", 123) < 0) {
+        goto error;
+    }
+
+    // test PyUnicodeWriter_WriteChar()
+    if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
+        goto error;
+    }
+
+    PyObject *result = PyUnicodeWriter_Finish(writer);
+    if (result == NULL) {
+        return NULL;
+    }
+    assert(PyUnicode_EqualToUTF8(result, "Hello 123."));
+    Py_DECREF(result);
+
+    Py_RETURN_NONE;
+
+error:
+    PyUnicodeWriter_Discard(writer);
+    return NULL;
+}
+
+
+static PyObject *
+test_unicodewriter_format_recover_error(PyObject *self, PyObject 
*Py_UNUSED(args))
+{
+    // test recovering from PyUnicodeWriter_Format() error
+    PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
+    if (writer == NULL) {
+        return NULL;
+    }
+
+    assert(PyUnicodeWriter_Format(writer, "%s ", "Hello") == 0);
+
+    // PyUnicodeWriter_Format() fails with an invalid format string
+    assert(PyUnicodeWriter_Format(writer, "%s\xff", "World") < 0);
+    PyErr_Clear();
+
+    // Retry PyUnicodeWriter_Format() with a valid format string
+    assert(PyUnicodeWriter_Format(writer, "%s.", "World") == 0);
+
+    PyObject *result = PyUnicodeWriter_Finish(writer);
+    if (result == NULL) {
+        return NULL;
+    }
+    assert(PyUnicode_EqualToUTF8(result, "Hello World."));
+    Py_DECREF(result);
+
+    Py_RETURN_NONE;
+}
+
+
 static PyMethodDef TestMethods[] = {
     {"unicode_new",              unicode_new,                    METH_VARARGS},
     {"unicode_fill",             unicode_fill,                   METH_VARARGS},
@@ -229,6 +444,12 @@ static PyMethodDef TestMethods[] = {
     {"unicode_asucs4copy",       unicode_asucs4copy,             METH_VARARGS},
     {"unicode_asutf8",           unicode_asutf8,                 METH_VARARGS},
     {"unicode_copycharacters",   unicode_copycharacters,         METH_VARARGS},
+    {"test_unicodewriter",       test_unicodewriter,             METH_NOARGS},
+    {"test_unicodewriter_utf8",  test_unicodewriter_utf8,        METH_NOARGS},
+    {"test_unicodewriter_invalid_utf8", test_unicodewriter_invalid_utf8, 
METH_NOARGS},
+    {"test_unicodewriter_recover_error", test_unicodewriter_recover_error, 
METH_NOARGS},
+    {"test_unicodewriter_format", test_unicodewriter_format,     METH_NOARGS},
+    {"test_unicodewriter_format_recover_error", 
test_unicodewriter_format_recover_error, METH_NOARGS},
     {NULL},
 };
 
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 3b0b4173408724..1f8c89dd12a528 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2872,23 +2872,21 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
     return f;
 }
 
-PyObject *
-PyUnicode_FromFormatV(const char *format, va_list vargs)
+static int
+unicode_from_format(_PyUnicodeWriter *writer, const char *format, va_list 
vargs)
 {
+    writer->min_length += strlen(format) + 100;
+    writer->overallocate = 1;
+
     va_list vargs2;
     const char *f;
-    _PyUnicodeWriter writer;
-
-    _PyUnicodeWriter_Init(&writer);
-    writer.min_length = strlen(format) + 100;
-    writer.overallocate = 1;
 
     // Copy varags to be able to pass a reference to a subfunction.
     va_copy(vargs2, vargs);
 
     for (f = format; *f; ) {
         if (*f == '%') {
-            f = unicode_fromformat_arg(&writer, f, &vargs2);
+            f = unicode_fromformat_arg(writer, f, &vargs2);
             if (f == NULL)
                 goto fail;
         }
@@ -2912,21 +2910,33 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
             len = p - f;
 
             if (*p == '\0')
-                writer.overallocate = 0;
+                writer->overallocate = 0;
 
-            if (_PyUnicodeWriter_WriteASCIIString(&writer, f, len) < 0)
+            if (_PyUnicodeWriter_WriteASCIIString(writer, f, len) < 0)
                 goto fail;
 
             f = p;
         }
     }
     va_end(vargs2);
-    return _PyUnicodeWriter_Finish(&writer);
+    return 0;
 
   fail:
     va_end(vargs2);
-    _PyUnicodeWriter_Dealloc(&writer);
-    return NULL;
+    return -1;
+}
+
+PyObject *
+PyUnicode_FromFormatV(const char *format, va_list vargs)
+{
+    _PyUnicodeWriter writer;
+    _PyUnicodeWriter_Init(&writer);
+
+    if (unicode_from_format(&writer, format, vargs) < 0) {
+        _PyUnicodeWriter_Dealloc(&writer);
+        return NULL;
+    }
+    return _PyUnicodeWriter_Finish(&writer);
 }
 
 PyObject *
@@ -2941,6 +2951,23 @@ PyUnicode_FromFormat(const char *format, ...)
     return ret;
 }
 
+int
+PyUnicodeWriter_Format(PyUnicodeWriter *writer, const char *format, ...)
+{
+    _PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer;
+    Py_ssize_t old_pos = _writer->pos;
+
+    va_list vargs;
+    va_start(vargs, format);
+    int res = unicode_from_format(_writer, format, vargs);
+    va_end(vargs);
+
+    if (res < 0) {
+        _writer->pos = old_pos;
+    }
+    return res;
+}
+
 static Py_ssize_t
 unicode_get_widechar_size(PyObject *unicode)
 {
@@ -4927,6 +4954,7 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
 }
 
 
+// Used by PyUnicodeWriter_WriteUTF8() implementation
 static int
 unicode_decode_utf8_writer(_PyUnicodeWriter *writer,
                            const char *s, Py_ssize_t size,
@@ -13080,6 +13108,7 @@ unicode_endswith_impl(PyObject *self, PyObject *subobj, 
Py_ssize_t start,
     return PyBool_FromLong(result);
 }
 
+
 static inline void
 _PyUnicodeWriter_Update(_PyUnicodeWriter *writer)
 {
@@ -13103,6 +13132,7 @@ _PyUnicodeWriter_Update(_PyUnicodeWriter *writer)
     }
 }
 
+
 void
 _PyUnicodeWriter_Init(_PyUnicodeWriter *writer)
 {
@@ -13111,12 +13141,41 @@ _PyUnicodeWriter_Init(_PyUnicodeWriter *writer)
     /* ASCII is the bare minimum */
     writer->min_char = 127;
 
-    /* use a value smaller than PyUnicode_1BYTE_KIND() so
+    /* use a kind value smaller than PyUnicode_1BYTE_KIND so
        _PyUnicodeWriter_PrepareKind() will copy the buffer. */
-    writer->kind = 0;
-    assert(writer->kind <= PyUnicode_1BYTE_KIND);
+    assert(writer->kind == 0);
+    assert(writer->kind < PyUnicode_1BYTE_KIND);
 }
 
+
+PyUnicodeWriter*
+PyUnicodeWriter_Create(Py_ssize_t length)
+{
+    const size_t size = sizeof(_PyUnicodeWriter);
+    PyUnicodeWriter *pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size);
+    if (pub_writer == NULL) {
+        return (PyUnicodeWriter *)PyErr_NoMemory();
+    }
+    _PyUnicodeWriter *writer = (_PyUnicodeWriter *)pub_writer;
+
+    _PyUnicodeWriter_Init(writer);
+    if (_PyUnicodeWriter_Prepare(writer, length, 127) < 0) {
+        PyUnicodeWriter_Discard(pub_writer);
+        return NULL;
+    }
+    writer->overallocate = 1;
+
+    return pub_writer;
+}
+
+
+void PyUnicodeWriter_Discard(PyUnicodeWriter *writer)
+{
+    _PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer);
+    PyMem_Free(writer);
+}
+
+
 // Initialize _PyUnicodeWriter with initial buffer
 static inline void
 _PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer)
@@ -13127,6 +13186,7 @@ _PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter 
*writer, PyObject *buffer)
     writer->min_length = writer->size;
 }
 
+
 int
 _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
                                  Py_ssize_t length, Py_UCS4 maxchar)
@@ -13242,9 +13302,17 @@ _PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, 
Py_UCS4 ch)
     return _PyUnicodeWriter_WriteCharInline(writer, ch);
 }
 
+int
+PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch)
+{
+    return _PyUnicodeWriter_WriteChar((_PyUnicodeWriter*)writer, ch);
+}
+
 int
 _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
 {
+    assert(PyUnicode_Check(str));
+
     Py_UCS4 maxchar;
     Py_ssize_t len;
 
@@ -13270,6 +13338,34 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, 
PyObject *str)
     return 0;
 }
 
+int
+PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)
+{
+    PyObject *str = PyObject_Str(obj);
+    if (str == NULL) {
+        return -1;
+    }
+
+    int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
+    Py_DECREF(str);
+    return res;
+}
+
+
+int
+PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj)
+{
+    PyObject *repr = PyObject_Repr(obj);
+    if (repr == NULL) {
+        return -1;
+    }
+
+    int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, repr);
+    Py_DECREF(repr);
+    return res;
+}
+
+
 int
 _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str,
                                 Py_ssize_t start, Py_ssize_t end)
@@ -13302,6 +13398,29 @@ _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter 
*writer, PyObject *str,
     return 0;
 }
 
+
+int
+PyUnicodeWriter_WriteSubstring(PyUnicodeWriter *writer, PyObject *str,
+                               Py_ssize_t start, Py_ssize_t end)
+{
+    if (!PyUnicode_Check(str)) {
+        PyErr_Format(PyExc_TypeError, "expect str, not %T", str);
+        return -1;
+    }
+    if (start < 0 || start > end) {
+        PyErr_Format(PyExc_ValueError, "invalid start argument");
+        return -1;
+    }
+    if (end > PyUnicode_GET_LENGTH(str)) {
+        PyErr_Format(PyExc_ValueError, "invalid end argument");
+        return -1;
+    }
+
+    return _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter*)writer, str,
+                                           start, end);
+}
+
+
 int
 _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
                                   const char *ascii, Py_ssize_t len)
@@ -13362,6 +13481,25 @@ _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter 
*writer,
     return 0;
 }
 
+int
+PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
+                          const char *str,
+                          Py_ssize_t size)
+{
+    if (size < 0) {
+        size = strlen(str);
+    }
+
+    _PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer;
+    Py_ssize_t old_pos = _writer->pos;
+    int res = unicode_decode_utf8_writer(_writer, str, size,
+                                         _Py_ERROR_STRICT, NULL, NULL);
+    if (res < 0) {
+        _writer->pos = old_pos;
+    }
+    return res;
+}
+
 int
 _PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
                                    const char *str, Py_ssize_t len)
@@ -13408,6 +13546,17 @@ _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer)
     return unicode_result(str);
 }
 
+
+PyObject*
+PyUnicodeWriter_Finish(PyUnicodeWriter *writer)
+{
+    PyObject *str = _PyUnicodeWriter_Finish((_PyUnicodeWriter*)writer);
+    assert(((_PyUnicodeWriter*)writer)->buffer == NULL);
+    PyMem_Free(writer);
+    return str;
+}
+
+
 void
 _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer)
 {

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

[Python-checkins] gh-119182: Add PyUnicodeWriter C API (#119184)

Reply via email to