https://github.com/python/cpython/commit/05adfbba2abafcdd271bf144a7b3f80bcd927288
commit: 05adfbba2abafcdd271bf144a7b3f80bcd927288
branch: main
author: Pieter Eendebak <pieter.eende...@gmail.com>
committer: serhiy-storchaka <storch...@gmail.com>
date: 2024-05-06T11:04:39+03:00
summary:

gh-95382: Improve performance of json encoder with indent (GH-118105)

files:
A Misc/NEWS.d/next/Core and 
Builtins/2024-05-03-18-01-26.gh-issue-95382.73FSEv.rst
M Lib/json/encoder.py
M Modules/_json.c

diff --git a/Lib/json/encoder.py b/Lib/json/encoder.py
index 597849eca0524a..323332f064edf8 100644
--- a/Lib/json/encoder.py
+++ b/Lib/json/encoder.py
@@ -244,15 +244,18 @@ def floatstr(o, allow_nan=self.allow_nan,
             return text
 
 
-        if (_one_shot and c_make_encoder is not None
-                and self.indent is None):
+        if self.indent is None or isinstance(self.indent, str):
+            indent = self.indent
+        else:
+            indent = ' ' * self.indent
+        if _one_shot and c_make_encoder is not None:
             _iterencode = c_make_encoder(
-                markers, self.default, _encoder, self.indent,
+                markers, self.default, _encoder, indent,
                 self.key_separator, self.item_separator, self.sort_keys,
                 self.skipkeys, self.allow_nan)
         else:
             _iterencode = _make_iterencode(
-                markers, self.default, _encoder, self.indent, floatstr,
+                markers, self.default, _encoder, indent, floatstr,
                 self.key_separator, self.item_separator, self.sort_keys,
                 self.skipkeys, _one_shot)
         return _iterencode(o, 0)
@@ -272,9 +275,6 @@ def _make_iterencode(markers, _default, _encoder, _indent, 
_floatstr,
         _intstr=int.__repr__,
     ):
 
-    if _indent is not None and not isinstance(_indent, str):
-        _indent = ' ' * _indent
-
     def _iterencode_list(lst, _current_indent_level):
         if not lst:
             yield '[]'
diff --git a/Misc/NEWS.d/next/Core and 
Builtins/2024-05-03-18-01-26.gh-issue-95382.73FSEv.rst b/Misc/NEWS.d/next/Core 
and Builtins/2024-05-03-18-01-26.gh-issue-95382.73FSEv.rst
new file mode 100644
index 00000000000000..097a663e3f5e24
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and 
Builtins/2024-05-03-18-01-26.gh-issue-95382.73FSEv.rst  
@@ -0,0 +1,2 @@
+Improve performance of :func:`json.dumps` and :func:`json.dump` when using the 
argument *indent*. Depending on the data the encoding using
+:func:`json.dumps` with *indent* can be up to 2 to 3 times faster.
diff --git a/Modules/_json.c b/Modules/_json.c
index fc39f624b723f5..e33ef1f5eea92f 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -85,11 +85,11 @@ encoder_dealloc(PyObject *self);
 static int
 encoder_clear(PyEncoderObject *self);
 static int
-encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject 
*seq, Py_ssize_t indent_level);
+encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject 
*seq, PyObject *newline_indent);
 static int
-encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject 
*obj, Py_ssize_t indent_level);
+encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject 
*obj, PyObject *newline_indent);
 static int
-encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject 
*dct, Py_ssize_t indent_level);
+encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject 
*dct, PyObject *newline_indent);
 static PyObject *
 _encoded_const(PyObject *obj);
 static void
@@ -1251,6 +1251,17 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject 
*kwds)
     return (PyObject *)s;
 }
 
+static PyObject *
+_create_newline_indent(PyObject *indent, Py_ssize_t indent_level)
+{
+    PyObject *newline_indent = PyUnicode_FromOrdinal('\n');
+    if (newline_indent != NULL && indent_level) {
+        PyUnicode_AppendAndDel(&newline_indent,
+                               PySequence_Repeat(indent, indent_level));
+    }
+    return newline_indent;
+}
+
 static PyObject *
 encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
 {
@@ -1267,10 +1278,20 @@ encoder_call(PyEncoderObject *self, PyObject *args, 
PyObject *kwds)
     _PyUnicodeWriter_Init(&writer);
     writer.overallocate = 1;
 
-    if (encoder_listencode_obj(self, &writer, obj, indent_level)) {
+    PyObject *newline_indent = NULL;
+    if (self->indent != Py_None) {
+        newline_indent = _create_newline_indent(self->indent, indent_level);
+        if (newline_indent == NULL) {
+            _PyUnicodeWriter_Dealloc(&writer);
+            return NULL;
+        }
+    }
+    if (encoder_listencode_obj(self, &writer, obj, newline_indent)) {
         _PyUnicodeWriter_Dealloc(&writer);
+        Py_XDECREF(newline_indent);
         return NULL;
     }
+    Py_XDECREF(newline_indent);
 
     result = PyTuple_New(1);
     if (result == NULL ||
@@ -1358,7 +1379,7 @@ _steal_accumulate(_PyUnicodeWriter *writer, PyObject 
*stolen)
 
 static int
 encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
-                       PyObject *obj, Py_ssize_t indent_level)
+                       PyObject *obj, PyObject *newline_indent)
 {
     /* Encode Python object obj to a JSON term */
     PyObject *newobj;
@@ -1394,14 +1415,14 @@ encoder_listencode_obj(PyEncoderObject *s, 
_PyUnicodeWriter *writer,
     else if (PyList_Check(obj) || PyTuple_Check(obj)) {
         if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
             return -1;
-        rv = encoder_listencode_list(s, writer, obj, indent_level);
+        rv = encoder_listencode_list(s, writer, obj, newline_indent);
         _Py_LeaveRecursiveCall();
         return rv;
     }
     else if (PyDict_Check(obj)) {
         if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
             return -1;
-        rv = encoder_listencode_dict(s, writer, obj, indent_level);
+        rv = encoder_listencode_dict(s, writer, obj, newline_indent);
         _Py_LeaveRecursiveCall();
         return rv;
     }
@@ -1435,7 +1456,7 @@ encoder_listencode_obj(PyEncoderObject *s, 
_PyUnicodeWriter *writer,
             Py_XDECREF(ident);
             return -1;
         }
-        rv = encoder_listencode_obj(s, writer, newobj, indent_level);
+        rv = encoder_listencode_obj(s, writer, newobj, newline_indent);
         _Py_LeaveRecursiveCall();
 
         Py_DECREF(newobj);
@@ -1456,7 +1477,9 @@ encoder_listencode_obj(PyEncoderObject *s, 
_PyUnicodeWriter *writer,
 
 static int
 encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool 
*first,
-                         PyObject *key, PyObject *value, Py_ssize_t 
indent_level)
+                         PyObject *key, PyObject *value,
+                         PyObject *newline_indent,
+                         PyObject *item_separator)
 {
     PyObject *keystr = NULL;
     PyObject *encoded;
@@ -1493,7 +1516,7 @@ encoder_encode_key_value(PyEncoderObject *s, 
_PyUnicodeWriter *writer, bool *fir
         *first = false;
     }
     else {
-        if (_PyUnicodeWriter_WriteStr(writer, s->item_separator) < 0) {
+        if (_PyUnicodeWriter_WriteStr(writer, item_separator) < 0) {
             Py_DECREF(keystr);
             return -1;
         }
@@ -1511,7 +1534,7 @@ encoder_encode_key_value(PyEncoderObject *s, 
_PyUnicodeWriter *writer, bool *fir
     if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
         return -1;
     }
-    if (encoder_listencode_obj(s, writer, value, indent_level) < 0) {
+    if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) {
         return -1;
     }
     return 0;
@@ -1519,13 +1542,15 @@ encoder_encode_key_value(PyEncoderObject *s, 
_PyUnicodeWriter *writer, bool *fir
 
 static int
 encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
-                        PyObject *dct, Py_ssize_t indent_level)
+                        PyObject *dct, PyObject *newline_indent)
 {
     /* Encode Python dict dct a JSON term */
     PyObject *ident = NULL;
     PyObject *items = NULL;
     PyObject *key, *value;
     bool first = true;
+    PyObject *new_newline_indent = NULL;
+    PyObject *separator_indent = NULL;
 
     if (PyDict_GET_SIZE(dct) == 0)  /* Fast path */
         return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2);
@@ -1549,14 +1574,21 @@ encoder_listencode_dict(PyEncoderObject *s, 
_PyUnicodeWriter *writer,
     if (_PyUnicodeWriter_WriteChar(writer, '{'))
         goto bail;
 
+    PyObject *current_item_separator = s->item_separator; // borrowed reference
     if (s->indent != Py_None) {
-        /* TODO: DOES NOT RUN */
-        indent_level += 1;
-        /*
-            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
-            separator = _item_separator + newline_indent
-            buf += newline_indent
-        */
+        new_newline_indent = PyUnicode_Concat(newline_indent, s->indent);
+        if (new_newline_indent == NULL) {
+            goto bail;
+        }
+        separator_indent = PyUnicode_Concat(current_item_separator, 
new_newline_indent);
+        if (separator_indent == NULL) {
+            goto bail;
+        }
+        // update item separator with a borrowed reference
+        current_item_separator = separator_indent;
+        if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
+            goto bail;
+        }
     }
 
     if (s->sort_keys || !PyDict_CheckExact(dct)) {
@@ -1574,7 +1606,9 @@ encoder_listencode_dict(PyEncoderObject *s, 
_PyUnicodeWriter *writer,
 
             key = PyTuple_GET_ITEM(item, 0);
             value = PyTuple_GET_ITEM(item, 1);
-            if (encoder_encode_key_value(s, writer, &first, key, value, 
indent_level) < 0)
+            if (encoder_encode_key_value(s, writer, &first, key, value,
+                                         new_newline_indent,
+                                         current_item_separator) < 0)
                 goto bail;
         }
         Py_CLEAR(items);
@@ -1582,7 +1616,9 @@ encoder_listencode_dict(PyEncoderObject *s, 
_PyUnicodeWriter *writer,
     } else {
         Py_ssize_t pos = 0;
         while (PyDict_Next(dct, &pos, &key, &value)) {
-            if (encoder_encode_key_value(s, writer, &first, key, value, 
indent_level) < 0)
+            if (encoder_encode_key_value(s, writer, &first, key, value,
+                                         new_newline_indent,
+                                         current_item_separator) < 0)
                 goto bail;
         }
     }
@@ -1592,12 +1628,15 @@ encoder_listencode_dict(PyEncoderObject *s, 
_PyUnicodeWriter *writer,
             goto bail;
         Py_CLEAR(ident);
     }
-    /* TODO DOES NOT RUN; dead code
     if (s->indent != Py_None) {
-        indent_level -= 1;
+        Py_CLEAR(new_newline_indent);
+        Py_CLEAR(separator_indent);
+
+        if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
+            goto bail;
+        }
+    }
 
-        yield '\n' + (' ' * (_indent * _current_indent_level))
-    }*/
     if (_PyUnicodeWriter_WriteChar(writer, '}'))
         goto bail;
     return 0;
@@ -1605,16 +1644,20 @@ encoder_listencode_dict(PyEncoderObject *s, 
_PyUnicodeWriter *writer,
 bail:
     Py_XDECREF(items);
     Py_XDECREF(ident);
+    Py_XDECREF(separator_indent);
+    Py_XDECREF(new_newline_indent);
     return -1;
 }
 
 static int
 encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
-                        PyObject *seq, Py_ssize_t indent_level)
+                        PyObject *seq, PyObject *newline_indent)
 {
     PyObject *ident = NULL;
     PyObject *s_fast = NULL;
     Py_ssize_t i;
+    PyObject *new_newline_indent = NULL;
+    PyObject *separator_indent = NULL;
 
     ident = NULL;
     s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
@@ -1643,22 +1686,31 @@ encoder_listencode_list(PyEncoderObject *s, 
_PyUnicodeWriter *writer,
 
     if (_PyUnicodeWriter_WriteChar(writer, '['))
         goto bail;
+
+    PyObject *separator = s->item_separator; // borrowed reference
     if (s->indent != Py_None) {
-        /* TODO: DOES NOT RUN */
-        indent_level += 1;
-        /*
-            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
-            separator = _item_separator + newline_indent
-            buf += newline_indent
-        */
+        new_newline_indent = PyUnicode_Concat(newline_indent, s->indent);
+        if (new_newline_indent == NULL) {
+            goto bail;
+        }
+
+        if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
+            goto bail;
+        }
+
+        separator_indent = PyUnicode_Concat(separator, new_newline_indent);
+        if (separator_indent == NULL) {
+            goto bail;
+        }
+        separator = separator_indent; // assign separator with borrowed 
reference
     }
     for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
         PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
         if (i) {
-            if (_PyUnicodeWriter_WriteStr(writer, s->item_separator))
+            if (_PyUnicodeWriter_WriteStr(writer, separator) < 0)
                 goto bail;
         }
-        if (encoder_listencode_obj(s, writer, obj, indent_level))
+        if (encoder_listencode_obj(s, writer, obj, new_newline_indent))
             goto bail;
     }
     if (ident != NULL) {
@@ -1667,12 +1719,14 @@ encoder_listencode_list(PyEncoderObject *s, 
_PyUnicodeWriter *writer,
         Py_CLEAR(ident);
     }
 
-    /* TODO: DOES NOT RUN
     if (s->indent != Py_None) {
-        indent_level -= 1;
+        Py_CLEAR(new_newline_indent);
+        Py_CLEAR(separator_indent);
+        if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
+            goto bail;
+        }
+    }
 
-        yield '\n' + (' ' * (_indent * _current_indent_level))
-    }*/
     if (_PyUnicodeWriter_WriteChar(writer, ']'))
         goto bail;
     Py_DECREF(s_fast);
@@ -1681,6 +1735,8 @@ encoder_listencode_list(PyEncoderObject *s, 
_PyUnicodeWriter *writer,
 bail:
     Py_XDECREF(ident);
     Py_DECREF(s_fast);
+    Py_XDECREF(separator_indent);
+    Py_XDECREF(new_newline_indent);
     return -1;
 }
 
@@ -1721,7 +1777,7 @@ encoder_clear(PyEncoderObject *self)
     return 0;
 }
 
-PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> 
iterable");
+PyDoc_STRVAR(encoder_doc, "Encoder(markers, default, encoder, indent, 
key_separator, item_separator, sort_keys, skipkeys, allow_nan)");
 
 static PyType_Slot PyEncoderType_slots[] = {
     {Py_tp_doc, (void *)encoder_doc},

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to