On 23/09/2021 00:47, Ilya Maximets wrote:
> Current algorithm of ovsdb_datum_union looks like this:
>
> for-each atom in b:
> if not bin_search(a, atom):
> push(a, clone(atom))
> quicksort(a)
>
> So, the complexity looks like this:
>
> Nb * log2(Na) + Nb + (Na + Nb) * log2(Na + Nb)
> Comparisons clones Comparisons for quicksort
> for search
>
> ovsdb_datum_union() is heavily used in database transactions while
> new element is added to a set. For example, if new logical switch
> port is added to a logical switch in OVN. This is a very common
> use case where CMS adds one new port to an existing switch that
> already has, let's say, 100 ports. For this case ovsdb-server will
> have to perform:
>
> 1 * log2(100) + 1 clone + 101 * log2(101)
> Comparisons Comparisons for
> for search quicksort.
> ~7 1 ~707
> Roughly 714 comparisons of atoms and 1 clone.
>
> Since binary search can give us position, where new atom should go
> (it's the 'low' index after the search completion) for free, the
> logic can be re-worked like this:
>
> copied = 0
> for-each atom in b:
> desired_position = bin_search(a, atom)
> push(result, a[ copied : desired_position - 1 ])
> copied = desired_position
> push(result, clone(atom))
> push(result, a[ copied : Na ])
> swap(a, result)
>
> Complexity of this schema:
>
> Nb * log2(Na) + Nb + Na
> Comparisons clones memory copy on push
> for search
>
> 'swap' is just a swap of a few pointers. 'push' is not a 'clone',
> but a simple memory copy of 'union ovsdb_atom'.
>
> In general, this schema substitutes complexity of a quicksort
> with complexity of a memory copy of Na atom structures, where we're
> not even copying strings that these atoms are pointing to.
>
> Complexity in the example above goes down from 714 comparisons
> to 7 comparisons and memcpy of 100 * sizeof (union ovsdb_atom) bytes.
>
> General complexity of a memory copy should always be lower than
> complexity of a quicksort, especially because these copies usually
> performed in bulk, so this new schema should work faster for any input.
>
> All in all, this change allows to execute several times more
> transactions per second for transactions that adds new entries to sets.
>
> Alternatively, union can be implemented as a linear merge of two
> sorted arrays, but this will result in O(Na) comparisons, which
> is more than Nb * log2(Na) in common case, since Na is usually
> far bigger than Nb. Linear merge will also mean per-atom memory
> copies instead of copying in bulk.
>
> 'replace' functionality of ovsdb_datum_union() had no users, so it
> just removed. But it can easily be added back if needed in the future.
>
> Signed-off-by: Ilya Maximets <[email protected]>
> ---
> lib/db-ctl-base.c | 28 +++++++------
> lib/ovsdb-data.c | 103 ++++++++++++++++++++++++++++++----------------
> lib/ovsdb-data.h | 10 ++---
> lib/ovsdb-idl.c | 29 ++++++-------
> ovsdb/mutation.c | 2 +-
> vswitchd/bridge.c | 6 +--
> 6 files changed, 105 insertions(+), 73 deletions(-)
>
> diff --git a/lib/db-ctl-base.c b/lib/db-ctl-base.c
> index 77cc76a9f..3923683fd 100644
> --- a/lib/db-ctl-base.c
> +++ b/lib/db-ctl-base.c
> @@ -314,15 +314,18 @@ get_row_by_id(struct ctl_context *ctx,
> row, id->name_column, key, value);
>
> /* Extract the name from the column. */
> - const union ovsdb_atom *name;
> + const union ovsdb_atom *name = NULL;
> if (!id->key) {
> name = datum->n == 1 ? &datum->keys[0] : NULL;
> } else {
> const union ovsdb_atom key_atom
> = { .string = CONST_CAST(char *, id->key) };
> - unsigned int i = ovsdb_datum_find_key(datum, &key_atom,
> - OVSDB_TYPE_STRING);
> - name = i == UINT_MAX ? NULL : &datum->values[i];
> + unsigned int i;
> +
> + if (ovsdb_datum_find_key(datum, &key_atom,
> + OVSDB_TYPE_STRING, &i)) {
> + name = &datum->values[i];
> + }
> }
> if (!name) {
> continue;
> @@ -819,14 +822,14 @@ check_condition(const struct ovsdb_idl_table_class
> *table,
> goto out;
> }
>
> - idx = ovsdb_datum_find_key(have_datum,
> - &want_key, column->type.key.type);
> - if (idx == UINT_MAX && !is_set_operator(operator)) {
> + bool found = ovsdb_datum_find_key(have_datum, &want_key,
> + column->type.key.type, &idx);
> + if (!found && !is_set_operator(operator)) {
> retval = false;
> } else {
> struct ovsdb_datum a;
>
> - if (idx != UINT_MAX) {
> + if (found) {
> a.n = 1;
> a.keys = &have_datum->values[idx];
> a.values = NULL;
> @@ -992,9 +995,8 @@ cmd_get(struct ctl_context *ctx)
> return;
> }
>
> - idx = ovsdb_datum_find_key(datum, &key,
> - column->type.key.type);
> - if (idx == UINT_MAX) {
> + if (!ovsdb_datum_find_key(datum, &key,
> + column->type.key.type, &idx)) {
> if (must_exist) {
> ctl_error(
> ctx, "no key \"%s\" in %s record \"%s\" column %s",
> @@ -1375,7 +1377,7 @@ set_column(const struct ovsdb_idl_table_class *table,
> ovsdb_atom_destroy(&value, column->type.value.type);
>
> ovsdb_datum_union(&datum, ovsdb_idl_read(row, column),
> - &column->type, false);
> + &column->type);
> ovsdb_idl_txn_verify(row, column);
> ovsdb_idl_txn_write(row, column, &datum);
> } else {
> @@ -1514,7 +1516,7 @@ cmd_add(struct ctl_context *ctx)
> ovsdb_datum_destroy(&old, &column->type);
> return;
> }
> - ovsdb_datum_union(&old, &add, type, false);
> + ovsdb_datum_union(&old, &add, type);
> ovsdb_datum_destroy(&add, type);
> }
> if (old.n > type->n_max) {
> diff --git a/lib/ovsdb-data.c b/lib/ovsdb-data.c
> index 1f491a98b..7f9ff83f4 100644
> --- a/lib/ovsdb-data.c
> +++ b/lib/ovsdb-data.c
> @@ -799,7 +799,7 @@ ovsdb_atom_check_constraints(const union ovsdb_atom *atom,
> const struct ovsdb_base_type *base)
> {
> if (base->enum_
> - && ovsdb_datum_find_key(base->enum_, atom, base->type) == UINT_MAX) {
> + && !ovsdb_datum_find_key(base->enum_, atom, base->type, NULL)) {
> struct ovsdb_error *error;
> struct ds actual = DS_EMPTY_INITIALIZER;
> struct ds valid = DS_EMPTY_INITIALIZER;
> @@ -1784,14 +1784,16 @@ ovsdb_datum_compare_3way(const struct ovsdb_datum *a,
> a->n));
> }
>
> -/* If 'key' is one of the keys in 'datum', returns its index within 'datum',
> - * otherwise UINT_MAX. 'key.type' must be the type of the atoms stored in
> the
> - * 'keys' array in 'datum'.
> +/* If 'key' is one of the keys in 'datum', returns 'true' and sets '*pos' to
> + * its index within 'datum', otherwise returns 'false' and sets '*pos' to the
> + * index where 'key' should have been. 'key.type' must be the type of the
> + * atoms stored in the 'keys' array in 'datum'.
> */
> -unsigned int
> +bool
> ovsdb_datum_find_key(const struct ovsdb_datum *datum,
> const union ovsdb_atom *key,
> - enum ovsdb_atomic_type key_type)
> + enum ovsdb_atomic_type key_type,
> + unsigned int *pos)
> {
> unsigned int low = 0;
> unsigned int high = datum->n;
> @@ -1803,10 +1805,16 @@ ovsdb_datum_find_key(const struct ovsdb_datum *datum,
> } else if (cmp > 0) {
> low = idx + 1;
> } else {
> - return idx;
> + if (pos) {
> + *pos = idx;
> + }
> + return true;
> }
> }
> - return UINT_MAX;
> + if (pos) {
> + *pos = low;
> + }
> + return false;
> }
>
> /* If 'key' and 'value' is one of the key-value pairs in 'datum', returns its
> @@ -1821,10 +1829,11 @@ ovsdb_datum_find_key_value(const struct ovsdb_datum
> *datum,
> const union ovsdb_atom *value,
> enum ovsdb_atomic_type value_type)
> {
> - unsigned int idx = ovsdb_datum_find_key(datum, key, key_type);
> - if (idx != UINT_MAX
> - && value_type != OVSDB_TYPE_VOID
> - && !ovsdb_atom_equals(&datum->values[idx], value, value_type)) {
> + unsigned int idx;
> +
> + if (!ovsdb_datum_find_key(datum, key, key_type, &idx)
> + || (value_type != OVSDB_TYPE_VOID
> + && !ovsdb_atom_equals(&datum->values[idx], value, value_type))) {
> idx = UINT_MAX;
> }
> return idx;
> @@ -1948,38 +1957,62 @@ ovsdb_datum_add_unsafe(struct ovsdb_datum *datum,
> }
> }
>
> +/* Adds 'n' atoms starting from index 'start_idx' from 'src' to the end of
> + * 'dst'. 'dst' should have enough memory allocated to hold the additional
> + * 'n' atoms. Atoms are not cloned, i.e. 'dst' will reference the same data.
> + * Caller also should take care of the result being sorted. */
> +static void
> +ovsdb_datum_push_unsafe(struct ovsdb_datum *dst,
> + const struct ovsdb_datum *src,
> + unsigned int start_idx, unsigned int n,
> + const struct ovsdb_type *type)
> +{
> + memcpy(&dst->keys[dst->n], &src->keys[start_idx], n * sizeof
> src->keys[0]);
> + if (type->value.type != OVSDB_TYPE_VOID) {
> + memcpy(&dst->values[dst->n], &src->values[start_idx],
> + n * sizeof src->values[0]);
> + }
> + dst->n += n;
> +}
> +
> void
> ovsdb_datum_union(struct ovsdb_datum *a, const struct ovsdb_datum *b,
> - const struct ovsdb_type *type, bool replace)
> + const struct ovsdb_type *type)
> {
> - unsigned int n;
> - size_t bi;
> + struct ovsdb_datum result;
> + unsigned int copied, pos;
>
> - n = a->n;
> - for (bi = 0; bi < b->n; bi++) {
> - unsigned int ai;
> + ovsdb_datum_init_empty(&result);
> + ovsdb_datum_reallocate(&result, type, a->n + b->n);
>
> - ai = ovsdb_datum_find_key(a, &b->keys[bi], type->key.type);
> - if (ai == UINT_MAX) {
> - if (n == a->n) {
> - ovsdb_datum_reallocate(a, type, a->n + (b->n - bi));
> - }
> - ovsdb_atom_clone(&a->keys[n], &b->keys[bi], type->key.type);
> - if (type->value.type != OVSDB_TYPE_VOID) {
> - ovsdb_atom_clone(&a->values[n], &b->values[bi],
> - type->value.type);
> - }
> - n++;
> - } else if (replace && type->value.type != OVSDB_TYPE_VOID) {
> - ovsdb_atom_destroy(&a->values[ai], type->value.type);
> - ovsdb_atom_clone(&a->values[ai], &b->values[bi],
> + copied = 0;
> + for (size_t bi = 0; bi < b->n; bi++) {
> + if (ovsdb_datum_find_key(a, &b->keys[bi], type->key.type, &pos)) {
> + /* Atom with the same key already exists. */
> + continue;
> + }
> + if (pos > copied) {
> + /* Need to copy some atoms from 'a' first. */
> + ovsdb_datum_push_unsafe(&result, a, copied, pos - copied, type);
> + copied = pos;
> + }
> + /* Inserting new atom from 'b'. */
> + ovsdb_atom_clone(&result.keys[result.n], &b->keys[bi],
> type->key.type);
> + if (type->value.type != OVSDB_TYPE_VOID) {
> + ovsdb_atom_clone(&result.values[result.n], &b->values[bi],
> type->value.type);
> }
> + result.n++;
> }
In the 3rd patch in this series you ensure that
```if (new_size < type->n_min || new_size > type->n_max) {```
Do we need to do something like that here?
Otherwise,
Acked-by: Mark D. Gray <[email protected]>
> - if (n != a->n) {
> - a->n = n;
> - ovs_assert(!ovsdb_datum_sort(a, type->key.type));
> + if (a->n > copied) {
> + /* Copying remaining atoms. */
> + ovsdb_datum_push_unsafe(&result, a, copied, a->n - copied, type);
> }
> + /* All atoms are copied now. */
> + a->n = 0;
> +
> + ovsdb_datum_swap(&result, a);
> + ovsdb_datum_destroy(&result, type);
> }
>
> void
> diff --git a/lib/ovsdb-data.h b/lib/ovsdb-data.h
> index aa035ebad..4581e792f 100644
> --- a/lib/ovsdb-data.h
> +++ b/lib/ovsdb-data.h
> @@ -209,9 +209,10 @@ bool ovsdb_datum_equals(const struct ovsdb_datum *,
> const struct ovsdb_type *);
>
> /* Search. */
> -unsigned int ovsdb_datum_find_key(const struct ovsdb_datum *,
> - const union ovsdb_atom *key,
> - enum ovsdb_atomic_type key_type);
> +bool ovsdb_datum_find_key(const struct ovsdb_datum *,
> + const union ovsdb_atom *key,
> + enum ovsdb_atomic_type key_type,
> + unsigned int *pos);
> unsigned int ovsdb_datum_find_key_value(const struct ovsdb_datum *,
> const union ovsdb_atom *key,
> enum ovsdb_atomic_type key_type,
> @@ -227,8 +228,7 @@ bool ovsdb_datum_excludes_all(const struct ovsdb_datum *,
> const struct ovsdb_type *);
> void ovsdb_datum_union(struct ovsdb_datum *,
> const struct ovsdb_datum *,
> - const struct ovsdb_type *,
> - bool replace);
> + const struct ovsdb_type *);
> void ovsdb_datum_subtract(struct ovsdb_datum *a,
> const struct ovsdb_type *a_type,
> const struct ovsdb_datum *b,
> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
> index d3caccec8..c9c567e2c 100644
> --- a/lib/ovsdb-idl.c
> +++ b/lib/ovsdb-idl.c
> @@ -2840,9 +2840,8 @@ ovsdb_idl_txn_extract_mutations(struct ovsdb_idl_row
> *row,
> struct ovsdb_datum *new_datum;
> unsigned int pos;
> new_datum = map_op_datum(map_op);
> - pos = ovsdb_datum_find_key(old_datum,
> - &new_datum->keys[0],
> - key_type);
> + ovsdb_datum_find_key(old_datum, &new_datum->keys[0],
> + key_type, &pos);
> if (ovsdb_atom_equals(&new_datum->values[0],
> &old_datum->values[pos],
> value_type)) {
> @@ -2851,11 +2850,9 @@ ovsdb_idl_txn_extract_mutations(struct ovsdb_idl_row
> *row,
> }
> } else if (map_op_type(map_op) == MAP_OP_DELETE){
> /* Verify that there is a key to delete. */
> - unsigned int pos;
> - pos = ovsdb_datum_find_key(old_datum,
> -
> &map_op_datum(map_op)->keys[0],
> - key_type);
> - if (pos == UINT_MAX) {
> + if (!ovsdb_datum_find_key(old_datum,
> + &map_op_datum(map_op)->keys[0],
> + key_type, NULL)) {
> /* No key to delete. Move on to next update. */
> VLOG_WARN("Trying to delete a key that doesn't "
> "exist in the map.");
> @@ -2950,11 +2947,9 @@ ovsdb_idl_txn_extract_mutations(struct ovsdb_idl_row
> *row,
> any_ins = true;
> } else { /* SETP_OP_DELETE */
> /* Verify that there is a key to delete. */
> - unsigned int pos;
> - pos = ovsdb_datum_find_key(old_datum,
> -
> &set_op_datum(set_op)->keys[0],
> - key_type);
> - if (pos == UINT_MAX) {
> + if (!ovsdb_datum_find_key(old_datum,
> + &set_op_datum(set_op)->keys[0],
> + key_type, NULL)) {
> /* No key to delete. Move on to next update. */
> VLOG_WARN("Trying to delete a key that doesn't "
> "exist in the set.");
> @@ -4119,7 +4114,6 @@ ovsdb_idl_txn_write_partial_map(const struct
> ovsdb_idl_row *row_,
> struct ovsdb_idl_row *row = CONST_CAST(struct ovsdb_idl_row *, row_);
> enum ovsdb_atomic_type key_type;
> enum map_op_type op_type;
> - unsigned int pos;
> const struct ovsdb_datum *old_datum;
>
> if (!is_valid_partial_update(row, column, datum)) {
> @@ -4131,8 +4125,11 @@ ovsdb_idl_txn_write_partial_map(const struct
> ovsdb_idl_row *row_,
> /* Find out if this is an insert or an update. */
> key_type = column->type.key.type;
> old_datum = ovsdb_idl_read(row, column);
> - pos = ovsdb_datum_find_key(old_datum, &datum->keys[0], key_type);
> - op_type = pos == UINT_MAX ? MAP_OP_INSERT : MAP_OP_UPDATE;
> + if (ovsdb_datum_find_key(old_datum, &datum->keys[0], key_type, NULL)) {
> + op_type = MAP_OP_UPDATE;
> + } else {
> + op_type = MAP_OP_INSERT;
> + }
>
> ovsdb_idl_txn_add_map_op(row, column, datum, op_type);
> }
> diff --git a/ovsdb/mutation.c b/ovsdb/mutation.c
> index 56edc5f00..03d1c3499 100644
> --- a/ovsdb/mutation.c
> +++ b/ovsdb/mutation.c
> @@ -383,7 +383,7 @@ ovsdb_mutation_set_execute(struct ovsdb_row *row,
> break;
>
> case OVSDB_M_INSERT:
> - ovsdb_datum_union(dst, arg, dst_type, false);
> + ovsdb_datum_union(dst, arg, dst_type);
> error = ovsdb_mutation_check_count(dst, dst_type);
> break;
>
> diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
> index cb7c5cb76..c790a56ad 100644
> --- a/vswitchd/bridge.c
> +++ b/vswitchd/bridge.c
> @@ -4229,7 +4229,7 @@ bridge_configure_aa(struct bridge *br)
> union ovsdb_atom atom;
>
> atom.integer = m->isid;
> - if (ovsdb_datum_find_key(mc, &atom, OVSDB_TYPE_INTEGER) == UINT_MAX)
> {
> + if (!ovsdb_datum_find_key(mc, &atom, OVSDB_TYPE_INTEGER, NULL)) {
> VLOG_INFO("Deleting isid=%"PRIu32", vlan=%"PRIu16,
> m->isid, m->vlan);
> bridge_aa_mapping_destroy(m);
> @@ -4826,7 +4826,7 @@ queue_ids_include(const struct ovsdb_datum *queues,
> int64_t target)
> union ovsdb_atom atom;
>
> atom.integer = target;
> - return ovsdb_datum_find_key(queues, &atom, OVSDB_TYPE_INTEGER) !=
> UINT_MAX;
> + return ovsdb_datum_find_key(queues, &atom, OVSDB_TYPE_INTEGER, NULL);
> }
>
> static void
> @@ -5020,7 +5020,7 @@ bridge_configure_mirrors(struct bridge *br)
> union ovsdb_atom atom;
>
> atom.uuid = m->uuid;
> - if (ovsdb_datum_find_key(mc, &atom, OVSDB_TYPE_UUID) == UINT_MAX) {
> + if (!ovsdb_datum_find_key(mc, &atom, OVSDB_TYPE_UUID, NULL)) {
> mirror_destroy(m);
> }
> }
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev