Current string serialization code puts all characters one by one.
This is slow because dynamic string needs to perform length checks
on every ds_put_char() and it's also doesn't allow compiler to use
better memory copy operations, i.e. doesn't allow copying few bytes
at once.

Special symbols are rare in a typical database.  Quotes are frequent,
but not too frequent.  In databases created by ovn-kubernetes, for
example, usually there are at least 10 to 50 chars between quotes.
So, it's better to count characters that doesn't require escaping
and use fast data copy for the whole sequential block.

Testing with a synthetic benchmark (included) on my laptop shows
following performance improvement:

   Size      Q  S       Before       After       Diff
 -----------------------------------------------------
 100000      0  0 :    0.227 ms     0.142 ms   -37.4 %
 100000      2  1 :    0.277 ms     0.186 ms   -32.8 %
 100000      10 1 :    0.361 ms     0.309 ms   -14.4 %
 10000000    0  0 :   22.720 ms    12.160 ms   -46.4 %
 10000000    2  1 :   27.470 ms    19.300 ms   -29.7 %
 10000000    10 1 :   37.950 ms    31.250 ms   -17.6 %
 100000000   0  0 :  239.600 ms   126.700 ms   -47.1 %
 100000000   2  1 :  292.400 ms   188.600 ms   -35.4 %
 100000000   10 1 :  387.700 ms   321.200 ms   -17.1 %

Here Q - probability (%) for a character to be a '\"' and
S - probability (%) to be a special character ( < 32).

Testing with a closer to real world scenario shows overall decrease
of the time needed for database compaction by ~5-10 %.  And this
change also decreases CPU consumption in general, because string
serialization is used in many different places including ovsdb
monitors and raft.

Signed-off-by: Ilya Maximets <i.maxim...@ovn.org>
---
 lib/json.c        | 23 +++++++++++++---
 tests/test-json.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+), 3 deletions(-)

diff --git a/lib/json.c b/lib/json.c
index 32d25003b..b2c845171 100644
--- a/lib/json.c
+++ b/lib/json.c
@@ -1696,14 +1696,31 @@ json_serialize_string(const char *string, struct ds *ds)
 {
     uint8_t c;
     uint8_t c2;
+    int count;
     const char *escape;
+    const char *start;
 
     ds_put_char(ds, '"');
+    count = 0;
+    start = string;
     while ((c = *string++) != '\0') {
-        escape = chars_escaping[c];
-        while ((c2 = *escape++) != '\0') {
-            ds_put_char(ds, c2);
+        if (c >= ' ' && c != '\"' && c != '\\') {
+            count++;
+            continue;
+        } else {
+            if (count) {
+                ds_put_buffer(ds, start, count);
+                count = 0;
+            }
+            start = string;
+            escape = chars_escaping[c];
+            while ((c2 = *escape++) != '\0') {
+                ds_put_char(ds, c2);
+            }
         }
     }
+    if (count) {
+        ds_put_buffer(ds, start, count);
+    }
     ds_put_char(ds, '"');
 }
diff --git a/tests/test-json.c b/tests/test-json.c
index a7ee595e0..60be82bc8 100644
--- a/tests/test-json.c
+++ b/tests/test-json.c
@@ -22,6 +22,8 @@
 #include <getopt.h>
 #include <stdio.h>
 #include "ovstest.h"
+#include "random.h"
+#include "timeval.h"
 #include "util.h"
 
 /* --pretty: If set, the JSON output is pretty-printed, instead of printed as
@@ -157,3 +159,69 @@ test_json_main(int argc, char *argv[])
 }
 
 OVSTEST_REGISTER("test-json", test_json_main);
+
+static void
+json_string_benchmark_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
+{
+    struct {
+        int n;
+        int quote_probablility;
+        int special_probability;
+        int iter;
+    } configs[] = {
+        { 100000,     0, 0, 1000, },
+        { 100000,     2, 1, 1000, },
+        { 100000,    10, 1, 1000, },
+        { 10000000,   0, 0, 100,  },
+        { 10000000,   2, 1, 100,  },
+        { 10000000,  10, 1, 100,  },
+        { 100000000,  0, 0, 10.   },
+        { 100000000,  2, 1, 10,   },
+        { 100000000, 10, 1, 10,   },
+    };
+
+    printf("  SIZE      Q  S            TIME\n");
+    printf("--------------------------------------\n");
+
+    for (int i = 0; i < ARRAY_SIZE(configs); i++) {
+        int iter = configs[i].iter;
+        int n = configs[i].n;
+        char *str = xzalloc(n);
+
+        for (int j = 0; j < n - 1; j++) {
+            int r = random_range(100);
+
+            if (r < configs[i].special_probability) {
+                str[j] = random_range(' ' - 1) + 1;
+            } else if (r < (configs[i].special_probability
+                            + configs[i].quote_probablility)) {
+                str[j] = '\"';
+            } else {
+                str[j] = random_range(256 - ' ') + ' ';
+            }
+        }
+
+        printf("%-11d %-2d %-2d: ", n, configs[i].quote_probablility,
+                                       configs[i].special_probability);
+        fflush(stdout);
+
+        struct json *json = json_string_create_nocopy(str);
+        uint64_t start = time_msec();
+
+        char **res = xzalloc(iter * sizeof *res);
+        for (int j = 0; j < iter; j++ ) {
+            res[j] = json_to_string(json, 0);
+        }
+
+        printf("%16.3lf ms\n", (double) (time_msec() - start) / iter);
+        json_destroy(json);
+        for (int j = 0; j < iter; j++ ) {
+            free(res[j]);
+        }
+        free(res);
+    }
+
+    exit(0);
+}
+
+OVSTEST_REGISTER("json-string-benchmark", json_string_benchmark_main);
-- 
2.31.1

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to