On Fri, Jun 6, 2025 at 7:23 AM Ilya Maximets <i.maxim...@ovn.org> wrote: > > The 'struct json' contains a union and the largest element of that > union is 'struct json_array', which takes 24 bytes. It means, that a > lot of space in this structure remains unused whenever the type is not > JSON_ARRAY. > > For example, the 'string' pointer used for JSON_STRING only takes 8 > bytes on a 64-bit system leaving 24 - 8 = 16 bytes unused. There is > also a 4-byte hole between the 'type' and the 'count'. > > A pretty common optimization technique for storing strings is to > store short ones in place of the pointer and only allocate dynamically > the larger strings that do not fit. In our case, we have even larger > space of 24 bytes to work with. So, we could use all 24 bytes to > store the strings (23 string bytes + '\0') and use the 4 byte unused > space outside the union to store the storage type. > > This approach should allow us to save on memory allocation for short > strings and also save on accesses to them, as the content will fit > into the same cache line as the 'struct json' itself. > > In practice, large OVN databases tend to operate with quite large > strings. For example, all the logical flow matches and actions in > OVN Southbound database would not fit. However, this approach still > allows to improve performance with large OVN databases. > > With 350MB OVN Northbound database with 12M atoms: > > Before After Improvement > ovsdb-client dump 18.6 sec 16.6 sec 10.7 % > Compaction 14.0 sec 13.4 sec 4.2 % > Memory usage (RSS) 2.28 GB 2.05 GB 10.0 % > > With 615MB OVN Southbound database with 23M atoms: > > Before After Improvement > ovsdb-client dump 46.1 sec 43.7 sec 5.2 % > Compaction 34.8 sec 32.5 sec 6.6 % > Memory usage (RSS) 5.29 GB 4.80 GB 9.3 % > > In the results above, 'ovsdb-client dump' is measuring how log it > takes for the server to prepare and send a reply, 'Memory usage (RSS)' > reflects the RSS of the ovsdb-server after loading the full database. > ovn-heater tests report similar reduction in CPU and memory usage > on heavy operations like compaction. > > Signed-off-by: Ilya Maximets <i.maxim...@ovn.org> > --- > include/openvswitch/json.h | 14 ++++++++++- > lib/json.c | 48 ++++++++++++++++++++++++++++---------- > python/ovs/db/data.py | 6 +++-- > tests/test-json.c | 6 ++++- > tests/test-ovsdb.c | 9 ++++--- > 5 files changed, 64 insertions(+), 19 deletions(-) > > diff --git a/include/openvswitch/json.h b/include/openvswitch/json.h > index 097bd057d..6e747f5d9 100644 > --- a/include/openvswitch/json.h > +++ b/include/openvswitch/json.h > @@ -63,16 +63,28 @@ struct json_array { > struct json **elems; > }; > > +/* Maximum string length that can be stored inline ('\0' is not included). */ > +#define JSON_STRING_INLINE_LEN (sizeof(struct json_array) - 1) > + > +enum json_storage_type { > + JSON_STRING_DYNAMIC = 0, /* JSON_STRING is stored via 'str_ptr'. */ > + JSON_STRING_INLINE, /* JSON_STRING is stored in 'str' array. */ > +}; > + > /* A JSON value. */ > struct json { > enum json_type type; > + enum json_storage_type storage_type; > size_t count; > union { > struct shash *object; /* Contains "struct json *"s. */ > struct json_array array; > long long int integer; > double real; > - char *string; /* JSON_STRING or JSON_SERIALIZED_OBJECT. */ > + union { > + char str[JSON_STRING_INLINE_LEN + 1]; > + char *str_ptr; > + }; /* JSON_STRING or JSON_SERIALIZED_OBJECT. */ > }; > }; > > diff --git a/lib/json.c b/lib/json.c > index 20626c445..061fb24de 100644 > --- a/lib/json.c > +++ b/lib/json.c > @@ -179,14 +179,26 @@ struct json * > json_string_create_nocopy(char *s) > { > struct json *json = json_create(JSON_STRING); > - json->string = s; > + json->storage_type = JSON_STRING_DYNAMIC; > + json->str_ptr = s; > return json; > } > > struct json * > json_string_create(const char *s) > { > - return json_string_create_nocopy(xstrdup(s)); > + struct json *json = json_create(JSON_STRING); > + size_t length = strlen(s); > + > + if (length <= JSON_STRING_INLINE_LEN) { > + json->storage_type = JSON_STRING_INLINE; > + memcpy(json->str, s, length); > + json->str[length] = '\0'; > + } else { > + json->storage_type = JSON_STRING_DYNAMIC; > + json->str_ptr = xmemdup0(s, length); > + } > + return json; > } > > struct json *json_string_create_uuid(const struct uuid *uuid) > @@ -198,7 +210,7 @@ struct json * > json_serialized_object_create(const struct json *src) > { > struct json *json = json_create(JSON_SERIALIZED_OBJECT); > - json->string = json_to_string(src, JSSF_SORT); > + json->str_ptr = json_to_string(src, JSSF_SORT); > return json; > } > > @@ -206,7 +218,7 @@ struct json * > json_serialized_object_create_with_yield(const struct json *src) > { > struct json *json = json_create(JSON_SERIALIZED_OBJECT); > - json->string = json_to_string(src, JSSF_SORT | JSSF_YIELD); > + json->str_ptr = json_to_string(src, JSSF_SORT | JSSF_YIELD); > return json; > } > > @@ -357,14 +369,16 @@ const char * > json_string(const struct json *json) > { > ovs_assert(json->type == JSON_STRING); > - return json->string; > + return json->storage_type == JSON_STRING_DYNAMIC > + ? json->str_ptr : json->str; > } > > const char * > json_serialized_object(const struct json *json) > { > ovs_assert(json->type == JSON_SERIALIZED_OBJECT); > - return json->string; > + ovs_assert(json->storage_type == JSON_STRING_DYNAMIC);
Sorry, I missed this the first time, json_serialized_object_create/_with_yield() doesn't set storage_type, which it probably should. Or maybe json_create() should set this by default until json_string_create() sets it to INLINE? -M > + return json->str_ptr; > } > > struct json_array * > @@ -419,8 +433,13 @@ json_destroy__(struct json *json, bool yield) > break; > > case JSON_STRING: > + if (json->storage_type == JSON_STRING_DYNAMIC) { > + free(json->str_ptr); > + } > + break; > + > case JSON_SERIALIZED_OBJECT: > - free(json->string); > + free(json->str_ptr); > break; > > case JSON_NULL: > @@ -588,9 +607,11 @@ json_hash(const struct json *json, size_t basis) > return json_hash_array(&json->array, basis); > > case JSON_STRING: > - case JSON_SERIALIZED_OBJECT: > return hash_string(json_string(json), basis); > > + case JSON_SERIALIZED_OBJECT: > + return hash_string(json->str_ptr, basis); > + > case JSON_NULL: > case JSON_FALSE: > case JSON_TRUE: > @@ -664,9 +685,11 @@ json_equal(const struct json *a, const struct json *b) > return json_equal_array(&a->array, &b->array); > > case JSON_STRING: > - case JSON_SERIALIZED_OBJECT: > return !strcmp(json_string(a), json_string(b)); > > + case JSON_SERIALIZED_OBJECT: > + return !strcmp(a->str_ptr, b->str_ptr); > + > case JSON_NULL: > case JSON_FALSE: > case JSON_TRUE: > @@ -1000,7 +1023,8 @@ json_string_escape(const char *in, struct ds *out) > { > struct json json = { > .type = JSON_STRING, > - .string = CONST_CAST(char *, in), > + .storage_type = JSON_STRING_DYNAMIC, > + .str_ptr = CONST_CAST(char *, in), > }; > json_to_ds(&json, 0, out); > } > @@ -1064,7 +1088,7 @@ struct json * > json_from_serialized_object(const struct json *json) > { > ovs_assert(json->type == JSON_SERIALIZED_OBJECT); > - return json_from_string(json_string(json)); > + return json_from_string(json->str_ptr); > } > > /* Reads the file named 'file_name', parses its contents as a JSON object or > @@ -1660,7 +1684,7 @@ json_serialize(const struct json *json, struct > json_serializer *s) > break; > > case JSON_SERIALIZED_OBJECT: > - ds_put_cstr(ds, json_string(json)); > + ds_put_cstr(ds, json->str_ptr); > break; > > case JSON_N_TYPES: > diff --git a/python/ovs/db/data.py b/python/ovs/db/data.py > index 3e9c5049f..30a34a098 100644 > --- a/python/ovs/db/data.py > +++ b/python/ovs/db/data.py > @@ -573,7 +573,8 @@ class Datum(object): > % (name, n)] > for key in sorted(self.values): > s += [' { .type = JSON_STRING, ' > - '.string = "%s", .count = 2 },' > + '.storage_type = JSON_STRING_DYNAMIC, ' > + '.str_ptr = "%s", .count = 2 },' > % escapeCString(key.value)] > s += ["};"] > s += ["static union ovsdb_atom %s_keys[%d] = {" % (name, n)] > @@ -592,7 +593,8 @@ class Datum(object): > % (name, n)] > for k, v in sorted(self.values): > s += [' { .type = JSON_STRING, ' > - '.string = "%s", .count = 2 },' > + '.storage_type = JSON_STRING_DYNAMIC, ' > + '.str_ptr = "%s", .count = 2 },' > % escapeCString(v.value)] > s += ["};"] > s += ["static union ovsdb_atom %s_values[%d] = {" % (name, > n)] > diff --git a/tests/test-json.c b/tests/test-json.c > index 2ba92d119..e7992e510 100644 > --- a/tests/test-json.c > +++ b/tests/test-json.c > @@ -100,11 +100,15 @@ test_json_equal(const struct json *a, const struct json > *b, > return; > > case JSON_STRING: > - case JSON_SERIALIZED_OBJECT: > ovs_assert(json_string(a) != json_string(b)); > ovs_assert(!strcmp(json_string(a), json_string(b))); > return; > > + case JSON_SERIALIZED_OBJECT: > + ovs_assert(a->str_ptr != b->str_ptr); > + ovs_assert(!strcmp(a->str_ptr, b->str_ptr)); > + return; > + > case JSON_NULL: > case JSON_FALSE: > case JSON_TRUE: > diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c > index 0511e4170..ad4ca3b42 100644 > --- a/tests/test-ovsdb.c > +++ b/tests/test-ovsdb.c > @@ -2413,10 +2413,13 @@ substitute_uuids(struct json *json, const struct > ovsdb_symbol_table *symtab) > if (json->type == JSON_STRING) { > const struct ovsdb_symbol *symbol; > > - symbol = ovsdb_symbol_table_get(symtab, json->string); > + symbol = ovsdb_symbol_table_get(symtab, json_string(json)); > if (symbol) { > - free(json->string); > - json->string = xasprintf(UUID_FMT, UUID_ARGS(&symbol->uuid)); > + if (json->storage_type == JSON_STRING_DYNAMIC) { > + free(json->str_ptr); > + } > + json->storage_type = JSON_STRING_DYNAMIC; > + json->str_ptr = xasprintf(UUID_FMT, UUID_ARGS(&symbol->uuid)); > } > } else if (json->type == JSON_ARRAY) { > size_t i; > -- > 2.49.0 > > _______________________________________________ > dev mailing list > d...@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev