Current string serialization code puts all characters one by one. This is slow because dynamic string needs to perform length checks on every ds_put_char() and it's also doesn't allow compiler to use better memory copy operations, i.e. doesn't allow copying few bytes at once.
Special symbols are rare in a typical database. Quotes are frequent, but not too frequent. In databases created by ovn-kubernetes, for example, usually there are at least 10 to 50 chars between quotes. So, it's better to count characters that doesn't require escaping and use fast data copy for the whole sequential block. Testing with a synthetic benchmark (included) on my laptop shows following performance improvement: Size Q S Before After Diff ----------------------------------------------------- 100000 0 0 : 0.227 ms 0.142 ms -37.4 % 100000 2 1 : 0.277 ms 0.186 ms -32.8 % 100000 10 1 : 0.361 ms 0.309 ms -14.4 % 10000000 0 0 : 22.720 ms 12.160 ms -46.4 % 10000000 2 1 : 27.470 ms 19.300 ms -29.7 % 10000000 10 1 : 37.950 ms 31.250 ms -17.6 % 100000000 0 0 : 239.600 ms 126.700 ms -47.1 % 100000000 2 1 : 292.400 ms 188.600 ms -35.4 % 100000000 10 1 : 387.700 ms 321.200 ms -17.1 % Here Q - probability (%) for a character to be a '\"' and S - probability (%) to be a special character ( < 32). Testing with a closer to real world scenario shows overall decrease of the time needed for database compaction by ~5-10 %. And this change also decreases CPU consumption in general, because string serialization is used in many different places including ovsdb monitors and raft. Signed-off-by: Ilya Maximets <i.maxim...@ovn.org> --- lib/json.c | 23 +++++++++++++--- tests/test-json.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 3 deletions(-) diff --git a/lib/json.c b/lib/json.c index 32d25003b..b2c845171 100644 --- a/lib/json.c +++ b/lib/json.c @@ -1696,14 +1696,31 @@ json_serialize_string(const char *string, struct ds *ds) { uint8_t c; uint8_t c2; + int count; const char *escape; + const char *start; ds_put_char(ds, '"'); + count = 0; + start = string; while ((c = *string++) != '\0') { - escape = chars_escaping[c]; - while ((c2 = *escape++) != '\0') { - ds_put_char(ds, c2); + if (c >= ' ' && c != '\"' && c != '\\') { + count++; + continue; + } else { + if (count) { + ds_put_buffer(ds, start, count); + count = 0; + } + start = string; + escape = chars_escaping[c]; + while ((c2 = *escape++) != '\0') { + ds_put_char(ds, c2); + } } } + if (count) { + ds_put_buffer(ds, start, count); + } ds_put_char(ds, '"'); } diff --git a/tests/test-json.c b/tests/test-json.c index a7ee595e0..60be82bc8 100644 --- a/tests/test-json.c +++ b/tests/test-json.c @@ -22,6 +22,8 @@ #include <getopt.h> #include <stdio.h> #include "ovstest.h" +#include "random.h" +#include "timeval.h" #include "util.h" /* --pretty: If set, the JSON output is pretty-printed, instead of printed as @@ -157,3 +159,69 @@ test_json_main(int argc, char *argv[]) } OVSTEST_REGISTER("test-json", test_json_main); + +static void +json_string_benchmark_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) +{ + struct { + int n; + int quote_probablility; + int special_probability; + int iter; + } configs[] = { + { 100000, 0, 0, 1000, }, + { 100000, 2, 1, 1000, }, + { 100000, 10, 1, 1000, }, + { 10000000, 0, 0, 100, }, + { 10000000, 2, 1, 100, }, + { 10000000, 10, 1, 100, }, + { 100000000, 0, 0, 10. }, + { 100000000, 2, 1, 10, }, + { 100000000, 10, 1, 10, }, + }; + + printf(" SIZE Q S TIME\n"); + printf("--------------------------------------\n"); + + for (int i = 0; i < ARRAY_SIZE(configs); i++) { + int iter = configs[i].iter; + int n = configs[i].n; + char *str = xzalloc(n); + + for (int j = 0; j < n - 1; j++) { + int r = random_range(100); + + if (r < configs[i].special_probability) { + str[j] = random_range(' ' - 1) + 1; + } else if (r < (configs[i].special_probability + + configs[i].quote_probablility)) { + str[j] = '\"'; + } else { + str[j] = random_range(256 - ' ') + ' '; + } + } + + printf("%-11d %-2d %-2d: ", n, configs[i].quote_probablility, + configs[i].special_probability); + fflush(stdout); + + struct json *json = json_string_create_nocopy(str); + uint64_t start = time_msec(); + + char **res = xzalloc(iter * sizeof *res); + for (int j = 0; j < iter; j++ ) { + res[j] = json_to_string(json, 0); + } + + printf("%16.3lf ms\n", (double) (time_msec() - start) / iter); + json_destroy(json); + for (int j = 0; j < iter; j++ ) { + free(res[j]); + } + free(res); + } + + exit(0); +} + +OVSTEST_REGISTER("json-string-benchmark", json_string_benchmark_main); -- 2.31.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev