On Fri, Jun 6, 2025 at 7:23 AM Ilya Maximets <i.maxim...@ovn.org> wrote:
>
> The 'struct json' contains a union and the largest element of that
> union is 'struct json_array', which takes 24 bytes.  It means, that a
> lot of space in this structure remains unused whenever the type is not
> JSON_ARRAY.
>
> For example, the 'string' pointer used for JSON_STRING only takes 8
> bytes on a 64-bit system leaving 24 - 8 = 16 bytes unused.  There is
> also a 4-byte hole between the 'type' and the 'count'.
>
> A pretty common optimization technique for storing strings is to
> store short ones in place of the pointer and only allocate dynamically
> the larger strings that do not fit.  In our case, we have even larger
> space of 24 bytes to work with.  So, we could use all 24 bytes to
> store the strings (23 string bytes + '\0') and use the 4 byte unused
> space outside the union to store the storage type.
>
> This approach should allow us to save on memory allocation for short
> strings and also save on accesses to them, as the content will fit
> into the same cache line as the 'struct json' itself.
>
> In practice, large OVN databases tend to operate with quite large
> strings.  For example, all the logical flow matches and actions in
> OVN Southbound database would not fit.  However, this approach still
> allows to improve performance with large OVN databases.
>
> With 350MB OVN Northbound database with 12M atoms:
>
>                          Before        After       Improvement
>  ovsdb-client dump      18.6 sec      16.6 sec       10.7 %
>  Compaction             14.0 sec      13.4 sec        4.2 %
>  Memory usage (RSS)     2.28 GB       2.05 GB        10.0 %
>
> With 615MB OVN Southbound database with 23M atoms:
>
>                          Before        After       Improvement
>  ovsdb-client dump      46.1 sec      43.7 sec        5.2 %
>  Compaction             34.8 sec      32.5 sec        6.6 %
>  Memory usage (RSS)     5.29 GB       4.80 GB         9.3 %
>
> In the results above, 'ovsdb-client dump' is measuring how log it
> takes for the server to prepare and send a reply, 'Memory usage (RSS)'
> reflects the RSS of the ovsdb-server after loading the full database.
> ovn-heater tests report similar reduction in CPU and memory usage
> on heavy operations like compaction.
>
> Signed-off-by: Ilya Maximets <i.maxim...@ovn.org>
> ---
>  include/openvswitch/json.h | 14 ++++++++++-
>  lib/json.c                 | 48 ++++++++++++++++++++++++++++----------
>  python/ovs/db/data.py      |  6 +++--
>  tests/test-json.c          |  6 ++++-
>  tests/test-ovsdb.c         |  9 ++++---
>  5 files changed, 64 insertions(+), 19 deletions(-)
>
> diff --git a/include/openvswitch/json.h b/include/openvswitch/json.h
> index 097bd057d..6e747f5d9 100644
> --- a/include/openvswitch/json.h
> +++ b/include/openvswitch/json.h
> @@ -63,16 +63,28 @@ struct json_array {
>      struct json **elems;
>  };
>
> +/* Maximum string length that can be stored inline ('\0' is not included). */
> +#define JSON_STRING_INLINE_LEN (sizeof(struct json_array) - 1)
> +
> +enum json_storage_type {
> +    JSON_STRING_DYNAMIC = 0, /* JSON_STRING is stored via 'str_ptr'. */
> +    JSON_STRING_INLINE,      /* JSON_STRING is stored in 'str' array. */
> +};
> +
>  /* A JSON value. */
>  struct json {
>      enum json_type type;
> +    enum json_storage_type storage_type;
>      size_t count;
>      union {
>          struct shash *object;   /* Contains "struct json *"s. */
>          struct json_array array;
>          long long int integer;
>          double real;
> -        char *string; /* JSON_STRING or JSON_SERIALIZED_OBJECT. */
> +        union {
> +            char str[JSON_STRING_INLINE_LEN + 1];
> +            char *str_ptr;
> +        }; /* JSON_STRING or JSON_SERIALIZED_OBJECT. */
>      };
>  };
>
> diff --git a/lib/json.c b/lib/json.c
> index 20626c445..061fb24de 100644
> --- a/lib/json.c
> +++ b/lib/json.c
> @@ -179,14 +179,26 @@ struct json *
>  json_string_create_nocopy(char *s)
>  {
>      struct json *json = json_create(JSON_STRING);
> -    json->string = s;
> +    json->storage_type = JSON_STRING_DYNAMIC;
> +    json->str_ptr = s;
>      return json;
>  }
>
>  struct json *
>  json_string_create(const char *s)
>  {
> -    return json_string_create_nocopy(xstrdup(s));
> +    struct json *json = json_create(JSON_STRING);
> +    size_t length = strlen(s);
> +
> +    if (length <= JSON_STRING_INLINE_LEN) {
> +        json->storage_type = JSON_STRING_INLINE;
> +        memcpy(json->str, s, length);
> +        json->str[length] = '\0';
> +    } else {
> +        json->storage_type = JSON_STRING_DYNAMIC;
> +        json->str_ptr = xmemdup0(s, length);
> +    }
> +    return json;
>  }
>
>  struct json *json_string_create_uuid(const struct uuid *uuid)
> @@ -198,7 +210,7 @@ struct json *
>  json_serialized_object_create(const struct json *src)
>  {
>      struct json *json = json_create(JSON_SERIALIZED_OBJECT);
> -    json->string = json_to_string(src, JSSF_SORT);
> +    json->str_ptr = json_to_string(src, JSSF_SORT);
>      return json;
>  }
>
> @@ -206,7 +218,7 @@ struct json *
>  json_serialized_object_create_with_yield(const struct json *src)
>  {
>      struct json *json = json_create(JSON_SERIALIZED_OBJECT);
> -    json->string = json_to_string(src, JSSF_SORT | JSSF_YIELD);
> +    json->str_ptr = json_to_string(src, JSSF_SORT | JSSF_YIELD);
>      return json;
>  }
>
> @@ -357,14 +369,16 @@ const char *
>  json_string(const struct json *json)
>  {
>      ovs_assert(json->type == JSON_STRING);
> -    return json->string;
> +    return json->storage_type == JSON_STRING_DYNAMIC
> +           ? json->str_ptr : json->str;
>  }
>
>  const char *
>  json_serialized_object(const struct json *json)
>  {
>      ovs_assert(json->type == JSON_SERIALIZED_OBJECT);
> -    return json->string;
> +    ovs_assert(json->storage_type == JSON_STRING_DYNAMIC);

Sorry, I missed this the first time,
json_serialized_object_create/_with_yield() doesn't set storage_type,
which it probably should. Or maybe json_create() should set this by
default until json_string_create() sets it to INLINE?

-M

> +    return json->str_ptr;
>  }
>
>  struct json_array *
> @@ -419,8 +433,13 @@ json_destroy__(struct json *json, bool yield)
>          break;
>
>      case JSON_STRING:
> +        if (json->storage_type == JSON_STRING_DYNAMIC) {
> +            free(json->str_ptr);
> +        }
> +        break;
> +
>      case JSON_SERIALIZED_OBJECT:
> -        free(json->string);
> +        free(json->str_ptr);
>          break;
>
>      case JSON_NULL:
> @@ -588,9 +607,11 @@ json_hash(const struct json *json, size_t basis)
>          return json_hash_array(&json->array, basis);
>
>      case JSON_STRING:
> -    case JSON_SERIALIZED_OBJECT:
>          return hash_string(json_string(json), basis);
>
> +    case JSON_SERIALIZED_OBJECT:
> +        return hash_string(json->str_ptr, basis);
> +
>      case JSON_NULL:
>      case JSON_FALSE:
>      case JSON_TRUE:
> @@ -664,9 +685,11 @@ json_equal(const struct json *a, const struct json *b)
>          return json_equal_array(&a->array, &b->array);
>
>      case JSON_STRING:
> -    case JSON_SERIALIZED_OBJECT:
>          return !strcmp(json_string(a), json_string(b));
>
> +    case JSON_SERIALIZED_OBJECT:
> +        return !strcmp(a->str_ptr, b->str_ptr);
> +
>      case JSON_NULL:
>      case JSON_FALSE:
>      case JSON_TRUE:
> @@ -1000,7 +1023,8 @@ json_string_escape(const char *in, struct ds *out)
>  {
>      struct json json = {
>          .type = JSON_STRING,
> -        .string = CONST_CAST(char *, in),
> +        .storage_type = JSON_STRING_DYNAMIC,
> +        .str_ptr = CONST_CAST(char *, in),
>      };
>      json_to_ds(&json, 0, out);
>  }
> @@ -1064,7 +1088,7 @@ struct json *
>  json_from_serialized_object(const struct json *json)
>  {
>      ovs_assert(json->type == JSON_SERIALIZED_OBJECT);
> -    return json_from_string(json_string(json));
> +    return json_from_string(json->str_ptr);
>  }
>
>  /* Reads the file named 'file_name', parses its contents as a JSON object or
> @@ -1660,7 +1684,7 @@ json_serialize(const struct json *json, struct 
> json_serializer *s)
>          break;
>
>      case JSON_SERIALIZED_OBJECT:
> -        ds_put_cstr(ds, json_string(json));
> +        ds_put_cstr(ds, json->str_ptr);
>          break;
>
>      case JSON_N_TYPES:
> diff --git a/python/ovs/db/data.py b/python/ovs/db/data.py
> index 3e9c5049f..30a34a098 100644
> --- a/python/ovs/db/data.py
> +++ b/python/ovs/db/data.py
> @@ -573,7 +573,8 @@ class Datum(object):
>                    % (name, n)]
>              for key in sorted(self.values):
>                  s += ['    { .type = JSON_STRING, '
> -                      '.string = "%s", .count = 2 },'
> +                      '.storage_type = JSON_STRING_DYNAMIC, '
> +                      '.str_ptr = "%s", .count = 2 },'
>                        % escapeCString(key.value)]
>              s += ["};"]
>              s += ["static union ovsdb_atom %s_keys[%d] = {" % (name, n)]
> @@ -592,7 +593,8 @@ class Datum(object):
>                        % (name, n)]
>                  for k, v in sorted(self.values):
>                      s += ['    { .type = JSON_STRING, '
> -                          '.string = "%s", .count = 2 },'
> +                          '.storage_type = JSON_STRING_DYNAMIC, '
> +                          '.str_ptr = "%s", .count = 2 },'
>                            % escapeCString(v.value)]
>                  s += ["};"]
>                  s += ["static union ovsdb_atom %s_values[%d] = {" % (name, 
> n)]
> diff --git a/tests/test-json.c b/tests/test-json.c
> index 2ba92d119..e7992e510 100644
> --- a/tests/test-json.c
> +++ b/tests/test-json.c
> @@ -100,11 +100,15 @@ test_json_equal(const struct json *a, const struct json 
> *b,
>          return;
>
>      case JSON_STRING:
> -    case JSON_SERIALIZED_OBJECT:
>          ovs_assert(json_string(a) != json_string(b));
>          ovs_assert(!strcmp(json_string(a), json_string(b)));
>          return;
>
> +    case JSON_SERIALIZED_OBJECT:
> +        ovs_assert(a->str_ptr != b->str_ptr);
> +        ovs_assert(!strcmp(a->str_ptr, b->str_ptr));
> +        return;
> +
>      case JSON_NULL:
>      case JSON_FALSE:
>      case JSON_TRUE:
> diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c
> index 0511e4170..ad4ca3b42 100644
> --- a/tests/test-ovsdb.c
> +++ b/tests/test-ovsdb.c
> @@ -2413,10 +2413,13 @@ substitute_uuids(struct json *json, const struct 
> ovsdb_symbol_table *symtab)
>      if (json->type == JSON_STRING) {
>          const struct ovsdb_symbol *symbol;
>
> -        symbol = ovsdb_symbol_table_get(symtab, json->string);
> +        symbol = ovsdb_symbol_table_get(symtab, json_string(json));
>          if (symbol) {
> -            free(json->string);
> -            json->string = xasprintf(UUID_FMT, UUID_ARGS(&symbol->uuid));
> +            if (json->storage_type == JSON_STRING_DYNAMIC) {
> +                free(json->str_ptr);
> +            }
> +            json->storage_type = JSON_STRING_DYNAMIC;
> +            json->str_ptr = xasprintf(UUID_FMT, UUID_ARGS(&symbol->uuid));
>          }
>      } else if (json->type == JSON_ARRAY) {
>          size_t i;
> --
> 2.49.0
>
> _______________________________________________
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to