On Thu, Oct 14, 2021 at 7:58 AM Ilya Maximets <[email protected]> wrote:
>
> If user frequently changes a lot of rows in a database, transaction
> history could grow way larger than the database itself.  This wastes
> a lot of memory and also makes monitor_cond_since slower than
> usual monotor_cond if the transaction id is old enough, because
> re-construction of the changes from a history is slower than just
> creation of initial database snapshot.  This is also the case if
> user deleted a lot of data, so transaction history still holds all of
> it while the database itself doesn't.
>
> In case of current lb-per-service model in ovn-kubernetes, each
> load-balancer is added to every logical switch/router.  Such a
> transaction touches more than a half of a OVN_Northbound database.
> And each of these transactions is added to the transaction history.
> Since transaction history depth is 100, in worst case scenario,
> it will hold 100 copies of a database increasing memory consumption
> dramatically.  In tests with 3000 LBs and 120 LSs, memory goes up
> to 3 GB, while holding at 30 MB if transaction history disabled in
> the code.
>
> Fixing that by keeping count of the number of ovsdb_atom's in the
> database and not allowing the total number of atoms in transaction
> history to grow larger than this value.  Counting atoms is fairly
> cheap because we don't need to iterate over them, so it doesn't have
> significant performance impact.  It would be ideal to measure the
> size of individual atoms, but that will hit the performance.
> Counting cells instead of atoms is not sufficient, because OVN
> users are adding hundreds or thousands of atoms to a single cell,
> so they are largely different in size.
>
> Signed-off-by: Ilya Maximets <[email protected]>
> ---
>  ovsdb/ovsdb.c       |  5 ++++
>  ovsdb/ovsdb.h       |  3 +++
>  ovsdb/transaction.c | 56 +++++++++++++++++++++++++++++++++++++++++++--
>  3 files changed, 62 insertions(+), 2 deletions(-)
>
> diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c
> index 126d16a2f..e6d866182 100644
> --- a/ovsdb/ovsdb.c
> +++ b/ovsdb/ovsdb.c
> @@ -422,6 +422,8 @@ ovsdb_create(struct ovsdb_schema *schema, struct
ovsdb_storage *storage)
>      ovs_list_init(&db->triggers);
>      db->run_triggers_now = db->run_triggers = false;
>
> +    db->n_atoms = 0;
> +
>      db->is_relay = false;
>      ovs_list_init(&db->txn_forward_new);
>      hmap_init(&db->txn_forward_sent);
> @@ -518,6 +520,9 @@ ovsdb_get_memory_usage(const struct ovsdb *db, struct
simap *usage)
>      }
>
>      simap_increase(usage, "cells", cells);
> +    simap_increase(usage, "atoms", db->n_atoms);
> +    simap_increase(usage, "txn-history", db->n_txn_history);
> +    simap_increase(usage, "txn-history-atoms", db->n_txn_history_atoms);
>
>      if (db->storage) {
>          ovsdb_storage_get_memory_usage(db->storage, usage);
> diff --git a/ovsdb/ovsdb.h b/ovsdb/ovsdb.h
> index 4a7bd0f0e..ec2d235ec 100644
> --- a/ovsdb/ovsdb.h
> +++ b/ovsdb/ovsdb.h
> @@ -90,8 +90,11 @@ struct ovsdb {
>      /* History trasanctions for incremental monitor transfer. */
>      bool need_txn_history;     /* Need to maintain history of
transactions. */
>      unsigned int n_txn_history; /* Current number of history
transactions. */
> +    unsigned int n_txn_history_atoms; /* Total number of atoms in
history. */
>      struct ovs_list txn_history; /* Contains "struct
ovsdb_txn_history_node. */
>
> +    size_t n_atoms;  /* Total number of ovsdb atoms in the database. */
> +
>      /* Relay mode. */
>      bool is_relay;  /* True, if database is in relay mode. */
>      /* List that holds transactions waiting to be forwarded to the
server. */
> diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c
> index dcccc61c0..a7ca09b58 100644
> --- a/ovsdb/transaction.c
> +++ b/ovsdb/transaction.c
> @@ -41,6 +41,9 @@ struct ovsdb_txn {
>      struct ovs_list txn_tables; /* Contains "struct ovsdb_txn_table"s. */
>      struct ds comment;
>      struct uuid txnid; /* For clustered mode only. It is the eid. */
> +    size_t n_atoms;    /* Number of atoms in all transaction rows. */
> +    ssize_t n_atoms_diff;  /* Difference between number of added and
> +                            * removed atoms. */
>  };
>
>  /* A table modified by a transaction. */
> @@ -842,6 +845,37 @@ check_index_uniqueness(struct ovsdb_txn *txn
OVS_UNUSED,
>      return NULL;
>  }
>
> +static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
> +count_atoms(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row)
> +{
> +    struct ovsdb_table *table = txn_row->table;
> +    ssize_t n_atoms_old = 0, n_atoms_new = 0;
> +    struct shash_node *node;
> +
> +    SHASH_FOR_EACH (node, &table->schema->columns) {
> +        const struct ovsdb_column *column = node->data;
> +        const struct ovsdb_type *type = &column->type;
> +        unsigned int idx = column->index;
> +
> +        if (txn_row->old) {
> +            n_atoms_old += txn_row->old->fields[idx].n;
> +            if (type->value.type != OVSDB_TYPE_VOID) {
> +                n_atoms_old += txn_row->old->fields[idx].n;
> +            }
> +        }
> +        if (txn_row->new) {
> +            n_atoms_new += txn_row->new->fields[idx].n;
> +            if (type->value.type != OVSDB_TYPE_VOID) {
> +                n_atoms_new += txn_row->new->fields[idx].n;
> +            }
> +        }
> +    }
> +
> +    txn->n_atoms += n_atoms_old + n_atoms_new;
> +    txn->n_atoms_diff += n_atoms_new - n_atoms_old;
> +    return NULL;
> +}
> +
>  static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
>  update_version(struct ovsdb_txn *txn OVS_UNUSED, struct ovsdb_txn_row
*txn_row)
>  {
> @@ -910,6 +944,12 @@ ovsdb_txn_precommit(struct ovsdb_txn *txn)
>          return error;
>      }
>
> +    /* Count atoms. */
> +    error = for_each_txn_row(txn, count_atoms);
> +    if (error) {
> +        return error;
> +    }
> +
>      /* Update _version for rows that changed.  */
>      error = for_each_txn_row(txn, update_version);
>      if (error) {
> @@ -925,6 +965,8 @@ ovsdb_txn_clone(const struct ovsdb_txn *txn)
>      struct ovsdb_txn *txn_cloned = xzalloc(sizeof *txn_cloned);
>      ovs_list_init(&txn_cloned->txn_tables);
>      txn_cloned->txnid = txn->txnid;
> +    txn_cloned->n_atoms = txn->n_atoms;
> +    txn_cloned->n_atoms_diff = txn->n_atoms_diff;
>
>      struct ovsdb_txn_table *t;
>      LIST_FOR_EACH (t, node, &txn->txn_tables) {
> @@ -983,6 +1025,7 @@ ovsdb_txn_add_to_history(struct ovsdb_txn *txn)
>          node->txn = ovsdb_txn_clone(txn);
>          ovs_list_push_back(&txn->db->txn_history, &node->node);
>          txn->db->n_txn_history++;
> +        txn->db->n_txn_history_atoms += txn->n_atoms;
>      }
>  }
>
> @@ -993,6 +1036,7 @@ ovsdb_txn_complete(struct ovsdb_txn *txn)
>      if (!ovsdb_txn_is_empty(txn)) {
>
>          txn->db->run_triggers_now = txn->db->run_triggers = true;
> +        txn->db->n_atoms += txn->n_atoms_diff;
>          ovsdb_monitors_commit(txn->db, txn);
>          ovsdb_error_assert(for_each_txn_row(txn,
ovsdb_txn_update_weak_refs));
>          ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_row_commit));
> @@ -1448,12 +1492,18 @@ ovsdb_txn_history_run(struct ovsdb *db)
>      if (!db->need_txn_history) {
>          return;
>      }
> -    /* Remove old histories to limit the size of the history */
> -    while (db->n_txn_history > 100) {
> +    /* Remove old histories to limit the size of the history.  Removing
until
> +     * the number of ovsdb atoms in history becomes less than the number
of
> +     * atoms in the database, because it will be faster to just get a
database
> +     * snapshot than re-constructing changes from the history that big.
*/
> +    while (db->n_txn_history &&
> +           (db->n_txn_history > 100 ||
> +            db->n_txn_history_atoms > db->n_atoms)) {
>          struct ovsdb_txn_history_node *txn_h_node = CONTAINER_OF(
>                  ovs_list_pop_front(&db->txn_history),
>                  struct ovsdb_txn_history_node, node);
>
> +        db->n_txn_history_atoms -= txn_h_node->txn->n_atoms;
>          ovsdb_txn_destroy_cloned(txn_h_node->txn);
>          free(txn_h_node);
>          db->n_txn_history--;
> @@ -1465,6 +1515,7 @@ ovsdb_txn_history_init(struct ovsdb *db, bool
need_txn_history)
>  {
>      db->need_txn_history = need_txn_history;
>      db->n_txn_history = 0;
> +    db->n_txn_history_atoms = 0;
>      ovs_list_init(&db->txn_history);
>  }
>
> @@ -1483,4 +1534,5 @@ ovsdb_txn_history_destroy(struct ovsdb *db)
>          free(txn_h_node);
>      }
>      db->n_txn_history = 0;
> +    db->n_txn_history_atoms = 0;
>  }
> --
> 2.31.1
>

Thanks Ilya. This looks good to me. Although the atoms number is not equal
to the size of the data, it should be sufficient for the purpose of
controlling history size.
It would be good to have a test case to ensure this behavior, and also
ensure the atoms count is as expected, just to avoid messing up the
counters by mistake in the future. (if that happens it is not easily
noticed because there is no functional difference but possible performance
impact of fast-resync)

Acked-by: Han Zhou <[email protected]>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to