On 8/26/21 4:31 AM, Numan Siddique wrote: > On Tue, Aug 24, 2021 at 5:07 PM Ilya Maximets <[email protected]> wrote: >> >> Current string serialization code puts all characters one by one. >> This is slow because dynamic string needs to perform length checks >> on every ds_put_char() and it's also doesn't allow compiler to use >> better memory copy operations, i.e. doesn't allow copying few bytes >> at once. >> >> Special symbols are rare in a typical database. Quotes are frequent, >> but not too frequent. In databases created by ovn-kubernetes, for >> example, usually there are at least 10 to 50 chars between quotes. >> So, it's better to count characters that doesn't require escaping >> and use fast data copy for the whole sequential block. >> >> Testing with a synthetic benchmark (included) on my laptop shows >> following performance improvement: >> >> Size Q S Before After Diff >> ----------------------------------------------------- >> 100000 0 0 : 0.227 ms 0.142 ms -37.4 % >> 100000 2 1 : 0.277 ms 0.186 ms -32.8 % >> 100000 10 1 : 0.361 ms 0.309 ms -14.4 % >> 10000000 0 0 : 22.720 ms 12.160 ms -46.4 % >> 10000000 2 1 : 27.470 ms 19.300 ms -29.7 % >> 10000000 10 1 : 37.950 ms 31.250 ms -17.6 % >> 100000000 0 0 : 239.600 ms 126.700 ms -47.1 % >> 100000000 2 1 : 292.400 ms 188.600 ms -35.4 % >> 100000000 10 1 : 387.700 ms 321.200 ms -17.1 % >> >> Here Q - probability (%) for a character to be a '\"' and >> S - probability (%) to be a special character ( < 32). >> >> Testing with a closer to real world scenario shows overall decrease >> of the time needed for database compaction by ~5-10 %. And this >> change also decreases CPU consumption in general, because string >> serialization is used in many different places including ovsdb >> monitors and raft. >> >> Signed-off-by: Ilya Maximets <[email protected]> > > Hi Ilya, > > The patch LGTM. It makes sense to optimize this way. > > I've one comment below. > > You can consider my Ack > > Acked-by: Numan Siddique <[email protected]> > > Thanks > Numan >
I agree with Numan, the patch looks good, I do have some minor comments below though. Acked-by: Dumitru Ceara <[email protected]> >> --- >> lib/json.c | 23 +++++++++++++--- >> tests/test-json.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++ >> 2 files changed, 88 insertions(+), 3 deletions(-) >> >> diff --git a/lib/json.c b/lib/json.c >> index 32d25003b..b2c845171 100644 >> --- a/lib/json.c >> +++ b/lib/json.c >> @@ -1696,14 +1696,31 @@ json_serialize_string(const char *string, struct ds >> *ds) >> { >> uint8_t c; >> uint8_t c2; >> + int count; Nit: s/int/size_t/ >> const char *escape; >> + const char *start; >> >> ds_put_char(ds, '"'); >> + count = 0; >> + start = string; >> while ((c = *string++) != '\0') { >> - escape = chars_escaping[c]; >> - while ((c2 = *escape++) != '\0') { >> - ds_put_char(ds, c2); >> + if (c >= ' ' && c != '\"' && c != '\\') { Nit: no need to escape '"'. >> + count++; >> + continue; >> + } else { >> + if (count) { >> + ds_put_buffer(ds, start, count); >> + count = 0; >> + } >> + start = string; >> + escape = chars_escaping[c]; >> + while ((c2 = *escape++) != '\0') { >> + ds_put_char(ds, c2); >> + } > > Instead of the above while() {}, why can't we do - ds_put_buffer(ds, > escape, strlen(escape)); I tried something like this (actually avoided strlen altogether) and I didn't see any performance benefit on my machine when using the benchmark Ilya added (likely because of low probabilities for special chars): diff --git a/lib/json.c b/lib/json.c index aad9254ab2..5f88f140c7 100644 --- a/lib/json.c +++ b/lib/json.c @@ -1725,11 +1725,45 @@ static const char *chars_escaping[256] = { "\xf8", "\xf9", "\xfa", "\xfb", "\xfc", "\xfd", "\xfe", "\xff" }; +static size_t chars_escaping_len[256] = { + 6, 6, 6, 6, 6, 6, 6, 6, + 2, 2, 2, 6, 2, 2, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 1, 1, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 2, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, +}; + static void json_serialize_string(const char *string, struct ds *ds) { uint8_t c; - uint8_t c2; int count; const char *escape; const char *start; @@ -1748,9 +1782,7 @@ json_serialize_string(const char *string, struct ds *ds) } start = string; escape = chars_escaping[c]; - while ((c2 = *escape++) != '\0') { - ds_put_char(ds, c2); - } + ds_put_buffer(ds, escape, chars_escaping_len[c]); } } if (count) { --- > > Thanks > Numan > >> } >> } >> + if (count) { >> + ds_put_buffer(ds, start, count); >> + } >> ds_put_char(ds, '"'); >> } >> diff --git a/tests/test-json.c b/tests/test-json.c >> index a7ee595e0..60be82bc8 100644 >> --- a/tests/test-json.c >> +++ b/tests/test-json.c >> @@ -22,6 +22,8 @@ >> #include <getopt.h> >> #include <stdio.h> >> #include "ovstest.h" >> +#include "random.h" >> +#include "timeval.h" >> #include "util.h" >> >> /* --pretty: If set, the JSON output is pretty-printed, instead of printed >> as >> @@ -157,3 +159,69 @@ test_json_main(int argc, char *argv[]) >> } >> >> OVSTEST_REGISTER("test-json", test_json_main); >> + >> +static void >> +json_string_benchmark_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) >> +{ >> + struct { >> + int n; >> + int quote_probablility; >> + int special_probability; >> + int iter; >> + } configs[] = { >> + { 100000, 0, 0, 1000, }, >> + { 100000, 2, 1, 1000, }, >> + { 100000, 10, 1, 1000, }, >> + { 10000000, 0, 0, 100, }, >> + { 10000000, 2, 1, 100, }, >> + { 10000000, 10, 1, 100, }, >> + { 100000000, 0, 0, 10. }, >> + { 100000000, 2, 1, 10, }, >> + { 100000000, 10, 1, 10, }, >> + }; >> + >> + printf(" SIZE Q S TIME\n"); >> + printf("--------------------------------------\n"); >> + >> + for (int i = 0; i < ARRAY_SIZE(configs); i++) { >> + int iter = configs[i].iter; >> + int n = configs[i].n; >> + char *str = xzalloc(n); >> + >> + for (int j = 0; j < n - 1; j++) { >> + int r = random_range(100); >> + >> + if (r < configs[i].special_probability) { >> + str[j] = random_range(' ' - 1) + 1; >> + } else if (r < (configs[i].special_probability >> + + configs[i].quote_probablility)) { >> + str[j] = '\"'; Nit: no need to escape '"'. >> + } else { >> + str[j] = random_range(256 - ' ') + ' '; >> + } >> + } >> + >> + printf("%-11d %-2d %-2d: ", n, configs[i].quote_probablility, >> + configs[i].special_probability); >> + fflush(stdout); >> + >> + struct json *json = json_string_create_nocopy(str); >> + uint64_t start = time_msec(); >> + >> + char **res = xzalloc(iter * sizeof *res); >> + for (int j = 0; j < iter; j++ ) { >> + res[j] = json_to_string(json, 0); >> + } >> + >> + printf("%16.3lf ms\n", (double) (time_msec() - start) / iter); >> + json_destroy(json); >> + for (int j = 0; j < iter; j++ ) { >> + free(res[j]); >> + } >> + free(res); >> + } >> + >> + exit(0); >> +} >> + >> +OVSTEST_REGISTER("json-string-benchmark", json_string_benchmark_main); >> -- >> 2.31.1 >> >> _______________________________________________ >> dev mailing list >> [email protected] >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev >> > _______________________________________________ > dev mailing list > [email protected] > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
