On Tue, Aug 24, 2021 at 5:07 PM Ilya Maximets <i.maxim...@ovn.org> wrote: > > Current string serialization code puts all characters one by one. > This is slow because dynamic string needs to perform length checks > on every ds_put_char() and it's also doesn't allow compiler to use > better memory copy operations, i.e. doesn't allow copying few bytes > at once. > > Special symbols are rare in a typical database. Quotes are frequent, > but not too frequent. In databases created by ovn-kubernetes, for > example, usually there are at least 10 to 50 chars between quotes. > So, it's better to count characters that doesn't require escaping > and use fast data copy for the whole sequential block. > > Testing with a synthetic benchmark (included) on my laptop shows > following performance improvement: > > Size Q S Before After Diff > ----------------------------------------------------- > 100000 0 0 : 0.227 ms 0.142 ms -37.4 % > 100000 2 1 : 0.277 ms 0.186 ms -32.8 % > 100000 10 1 : 0.361 ms 0.309 ms -14.4 % > 10000000 0 0 : 22.720 ms 12.160 ms -46.4 % > 10000000 2 1 : 27.470 ms 19.300 ms -29.7 % > 10000000 10 1 : 37.950 ms 31.250 ms -17.6 % > 100000000 0 0 : 239.600 ms 126.700 ms -47.1 % > 100000000 2 1 : 292.400 ms 188.600 ms -35.4 % > 100000000 10 1 : 387.700 ms 321.200 ms -17.1 % > > Here Q - probability (%) for a character to be a '\"' and > S - probability (%) to be a special character ( < 32). > > Testing with a closer to real world scenario shows overall decrease > of the time needed for database compaction by ~5-10 %. And this > change also decreases CPU consumption in general, because string > serialization is used in many different places including ovsdb > monitors and raft. > > Signed-off-by: Ilya Maximets <i.maxim...@ovn.org>
Hi Ilya, The patch LGTM. It makes sense to optimize this way. I've one comment below. You can consider my Ack Acked-by: Numan Siddique <num...@ovn.org> Thanks Numan > --- > lib/json.c | 23 +++++++++++++--- > tests/test-json.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 88 insertions(+), 3 deletions(-) > > diff --git a/lib/json.c b/lib/json.c > index 32d25003b..b2c845171 100644 > --- a/lib/json.c > +++ b/lib/json.c > @@ -1696,14 +1696,31 @@ json_serialize_string(const char *string, struct ds > *ds) > { > uint8_t c; > uint8_t c2; > + int count; > const char *escape; > + const char *start; > > ds_put_char(ds, '"'); > + count = 0; > + start = string; > while ((c = *string++) != '\0') { > - escape = chars_escaping[c]; > - while ((c2 = *escape++) != '\0') { > - ds_put_char(ds, c2); > + if (c >= ' ' && c != '\"' && c != '\\') { > + count++; > + continue; > + } else { > + if (count) { > + ds_put_buffer(ds, start, count); > + count = 0; > + } > + start = string; > + escape = chars_escaping[c]; > + while ((c2 = *escape++) != '\0') { > + ds_put_char(ds, c2); > + } Instead of the above while() {}, why can't we do - ds_put_buffer(ds, escape, strlen(escape)); Thanks Numan > } > } > + if (count) { > + ds_put_buffer(ds, start, count); > + } > ds_put_char(ds, '"'); > } > diff --git a/tests/test-json.c b/tests/test-json.c > index a7ee595e0..60be82bc8 100644 > --- a/tests/test-json.c > +++ b/tests/test-json.c > @@ -22,6 +22,8 @@ > #include <getopt.h> > #include <stdio.h> > #include "ovstest.h" > +#include "random.h" > +#include "timeval.h" > #include "util.h" > > /* --pretty: If set, the JSON output is pretty-printed, instead of printed as > @@ -157,3 +159,69 @@ test_json_main(int argc, char *argv[]) > } > > OVSTEST_REGISTER("test-json", test_json_main); > + > +static void > +json_string_benchmark_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) > +{ > + struct { > + int n; > + int quote_probablility; > + int special_probability; > + int iter; > + } configs[] = { > + { 100000, 0, 0, 1000, }, > + { 100000, 2, 1, 1000, }, > + { 100000, 10, 1, 1000, }, > + { 10000000, 0, 0, 100, }, > + { 10000000, 2, 1, 100, }, > + { 10000000, 10, 1, 100, }, > + { 100000000, 0, 0, 10. }, > + { 100000000, 2, 1, 10, }, > + { 100000000, 10, 1, 10, }, > + }; > + > + printf(" SIZE Q S TIME\n"); > + printf("--------------------------------------\n"); > + > + for (int i = 0; i < ARRAY_SIZE(configs); i++) { > + int iter = configs[i].iter; > + int n = configs[i].n; > + char *str = xzalloc(n); > + > + for (int j = 0; j < n - 1; j++) { > + int r = random_range(100); > + > + if (r < configs[i].special_probability) { > + str[j] = random_range(' ' - 1) + 1; > + } else if (r < (configs[i].special_probability > + + configs[i].quote_probablility)) { > + str[j] = '\"'; > + } else { > + str[j] = random_range(256 - ' ') + ' '; > + } > + } > + > + printf("%-11d %-2d %-2d: ", n, configs[i].quote_probablility, > + configs[i].special_probability); > + fflush(stdout); > + > + struct json *json = json_string_create_nocopy(str); > + uint64_t start = time_msec(); > + > + char **res = xzalloc(iter * sizeof *res); > + for (int j = 0; j < iter; j++ ) { > + res[j] = json_to_string(json, 0); > + } > + > + printf("%16.3lf ms\n", (double) (time_msec() - start) / iter); > + json_destroy(json); > + for (int j = 0; j < iter; j++ ) { > + free(res[j]); > + } > + free(res); > + } > + > + exit(0); > +} > + > +OVSTEST_REGISTER("json-string-benchmark", json_string_benchmark_main); > -- > 2.31.1 > > _______________________________________________ > dev mailing list > d...@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev