At a first glance, change tracking should never be allowed for
write-only columns. However, some clients (e.g., ovn-northd) that are
mostly exclusive writers of a database, use change tracking to avoid
duplicating the IDL row records into a local cache when implementing
incremental processing.
The default behavior of the IDL is to automatically turn a write-only
column into a read-write column whenever the client enables change
tracking for that column.
For the afore mentioned clients, this becomes a performance issue.
Commit 1cc618c32524 ("ovsdb-idl: Fix atomicity of writes that don't
change a column's value.") explains why writes that don't change a
column's value cannot be optimized out early if the column is
read/write.
Furthermore, if there is at least one record in any table that
changed during a transaction, then *all* records that have been
written are added to the transaction, even if their values didn't
change. If there are many such rows (e.g., like in ovn-northd's
case) this incurs a significant overhead because:
a. the client has to build this large transaction
b. the transaction has to be sent over the network
c. the server needs to parse this (mostly) no-op update
We now introduce new IDL APIs allowing users to explicitly request
monitoring mode combinations for different columns in the IDL.
These accept the combination 'OVSDB_IDL_MONITOR | OVSDB_IDL_TRACK'
as a valid input allowing users to change track write-only
tables.
We benchmarked ovn-northd performance when using this new mode
against NB and SB databases taken from ovn-kubernetes scale tests.
We noticed that when a minor change is performed to the Northbound
database (e.g., NB_Global.nb_cfg is incremented) the time it takes to
build the Southbound transaction becomes negligible (vs ~1.5 seconds
before this change).
End-to-end ovn-kubernetes scale tests on 120-node clusters also show
significant reduction of latency to bring up pods; both average and P99
latency decreased by ~30%.
Signed-off-by: Dumitru Ceara <[email protected]>
---
V2:
- Addressed Numan's comments:
- Added APIs to allow per column configuration of modes.
- Fixed unit tests to actually enable change tracking for write-only
columns.
- Fixed ovsdb_idl_row_change() to correctly update row/table/idl change
seqnos if change tracking is enabled (even for write-only rows).
Note: The OVN counter part change is:
https://github.com/dceara/ovn/commit/0d0c689e6469ae2f8e195c423e9a91fbc514bd60
---
NEWS | 4 +++
lib/ovsdb-idl.c | 85 +++++++++++++++++++++++++++++++++-------------
lib/ovsdb-idl.h | 8 ++++-
tests/ovsdb-idl.at | 20 +++++++++--
tests/test-ovsdb.c | 25 ++++++++++----
5 files changed, 108 insertions(+), 34 deletions(-)
diff --git a/NEWS b/NEWS
index 5074b97aa51c..c1c41da94e9b 100644
--- a/NEWS
+++ b/NEWS
@@ -14,6 +14,10 @@ Post-v2.17.0
- OVSDB:
* 'relay' service model now supports transaction history, i.e. honors the
'last-txn-id' field in 'monitor_cond_since' requests from clients.
+ - OVSDB-IDL:
+ * New interfaces for explcitly setting column modes. The combination
+ 'OVSDB_IDL_MONITOR | OVSDB_IDL_TRACK' is now considered valid and
+ enables change tracking of write-only columns.
v2.17.0 - 17 Feb 2022
diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
index 882ede75598d..5f62c267d496 100644
--- a/lib/ovsdb-idl.c
+++ b/lib/ovsdb-idl.c
@@ -859,20 +859,6 @@ ovsdb_idl_get_mode(struct ovsdb_idl *idl,
return &table->modes[column - table->class_->columns];
}
-static void
-ovsdb_idl_set_mode(struct ovsdb_idl *idl,
- const struct ovsdb_idl_column *column,
- unsigned char mode)
-{
- const struct ovsdb_idl_table *table = ovsdb_idl_table_from_column(idl,
- column);
- size_t column_idx = column - table->class_->columns;
-
- if (table->modes[column_idx] != mode) {
- *ovsdb_idl_get_mode(idl, column) = mode;
- }
-}
-
static void
add_ref_table(struct ovsdb_idl *idl, const struct ovsdb_base_type *base)
{
@@ -902,7 +888,8 @@ void
ovsdb_idl_add_column(struct ovsdb_idl *idl,
const struct ovsdb_idl_column *column)
{
- ovsdb_idl_set_mode(idl, column, OVSDB_IDL_MONITOR | OVSDB_IDL_ALERT);
+ ovsdb_idl_set_column_mode(idl, column,
+ OVSDB_IDL_MONITOR | OVSDB_IDL_ALERT);
add_ref_table(idl, &column->type.key);
add_ref_table(idl, &column->type.value);
}
@@ -1186,6 +1173,51 @@ ovsdb_idl_omit_alert(struct ovsdb_idl *idl,
*ovsdb_idl_get_mode(idl, column) &= ~(OVSDB_IDL_ALERT | OVSDB_IDL_TRACK);
}
+/* Sets the 'column' mode to 'mode' which must be a valid combination of
+ * OVSDB_IDL_MONITOR, OVSDB_IDL_ALERT and OVSDB_IDL_TRACK.
+ */
+void
+ovsdb_idl_set_column_mode(struct ovsdb_idl *idl,
+ const struct ovsdb_idl_column *column,
+ unsigned char mode)
+{
+ if (mode & ~(OVSDB_IDL_MONITOR | OVSDB_IDL_ALERT | OVSDB_IDL_TRACK)) {
+ VLOG_ABORT("%s(): Column '%s' unknown mode %02x.",
+ __func__, column->name, mode);
+ }
+
+ if ((mode & (OVSDB_IDL_ALERT | OVSDB_IDL_TRACK))
+ && !(mode & OVSDB_IDL_MONITOR)) {
+ VLOG_ABORT("%s(): Column '%s' mode must include OVSDB_IDL_MONITOR.",
+ __func__, column->name);
+ }
+
+ const struct ovsdb_idl_table *table = ovsdb_idl_table_from_column(idl,
+ column);
+ size_t column_idx = column - table->class_->columns;
+
+ if (table->modes[column_idx] != mode) {
+ *ovsdb_idl_get_mode(idl, column) = mode;
+ }
+}
+
+/* Walks all columns of tables in 'idl' and sets their mode to 'mode'
+ * which must be a valid combination of OVSDB_IDL_MONITOR, OVSDB_IDL_ALERT
+ * and OVSDB_IDL_TRACK.
+ */
+void
+ovsdb_idl_set_column_mode_all(struct ovsdb_idl *idl, unsigned char mode)
+{
+ for (size_t i = 0; i < idl->class_->n_tables; i++) {
+ const struct ovsdb_idl_table_class *tc = &idl->class_->tables[i];
+
+ for (size_t j = 0; j < tc->n_columns; j++) {
+ const struct ovsdb_idl_column *column = &tc->columns[j];
+ ovsdb_idl_set_column_mode(idl, column, mode);
+ }
+ }
+}
+
/* Sets the mode for 'column' in 'idl' to 0. See the big comment above
* OVSDB_IDL_MONITOR for details.
*
@@ -1236,8 +1268,8 @@ ovsdb_idl_row_get_seqno(const struct ovsdb_idl_row *row,
* functions.
*
* This function should be called between ovsdb_idl_create() and
- * the first call to ovsdb_idl_run(). The column to be tracked
- * should have OVSDB_IDL_ALERT turned on.
+ * the first call to ovsdb_idl_run(). The function will also ensure
+ * that the column to be tracked has OVSDB_IDL_ALERT turned on.
*/
void
ovsdb_idl_track_add_column(struct ovsdb_idl *idl,
@@ -1246,7 +1278,9 @@ ovsdb_idl_track_add_column(struct ovsdb_idl *idl,
if (!(*ovsdb_idl_get_mode(idl, column) & OVSDB_IDL_ALERT)) {
ovsdb_idl_add_column(idl, column);
}
- *ovsdb_idl_get_mode(idl, column) |= OVSDB_IDL_TRACK;
+
+ unsigned char mode = OVSDB_IDL_MONITOR | OVSDB_IDL_ALERT | OVSDB_IDL_TRACK;
+ ovsdb_idl_set_column_mode(idl, column, mode);
}
void
@@ -1694,11 +1728,14 @@ ovsdb_idl_row_change(struct ovsdb_idl_row *row, const
struct shash *values,
continue;
}
- if (datum_changed && table->modes[column_idx] & OVSDB_IDL_ALERT) {
- changed = true;
- row->change_seqno[change]
- = row->table->change_seqno[change]
- = row->table->idl->change_seqno + 1;
+ if (datum_changed) {
+ unsigned char column_mode = table->modes[column_idx];
+ if (column_mode & (OVSDB_IDL_ALERT | OVSDB_IDL_TRACK)) {
+ changed = true;
+ row->change_seqno[change]
+ = row->table->change_seqno[change]
+ = row->table->idl->change_seqno + 1;
+ }
if (table->modes[column_idx] & OVSDB_IDL_TRACK) {
if (ovs_list_is_empty(&row->track_node) &&
@@ -3501,7 +3538,7 @@ ovsdb_idl_txn_write__(const struct ovsdb_idl_row *row_,
class = row->table->class_;
column_idx = column - class->columns;
- write_only = row->table->modes[column_idx] == OVSDB_IDL_MONITOR;
+ write_only = !(row->table->modes[column_idx] & OVSDB_IDL_ALERT);
ovs_assert(row->new_datum != NULL);
ovs_assert(column_idx < class->n_columns);
diff --git a/lib/ovsdb-idl.h b/lib/ovsdb-idl.h
index d00599616ef9..2d74ed0727a7 100644
--- a/lib/ovsdb-idl.h
+++ b/lib/ovsdb-idl.h
@@ -180,7 +180,8 @@ bool ovsdb_idl_server_has_column(const struct ovsdb_idl *,
*
* - (OVSDB_IDL_MONITOR | OVSDB_IDL_ALERT | OVSDB_IDL_TRACK), for a column
* that a client wants to track using the change tracking
- * ovsdb_idl_track_get_*() functions.
+ * ovsdb_idl_track_get_*() functions. OVSDB_IDL_ALERT can be omitted for
+ * write-only columns.
*/
#define OVSDB_IDL_MONITOR (1 << 0) /* Replicate this column? */
#define OVSDB_IDL_ALERT (1 << 1) /* Alert client when column changes? */
@@ -193,6 +194,11 @@ void ovsdb_idl_add_table(struct ovsdb_idl *,
void ovsdb_idl_omit(struct ovsdb_idl *, const struct ovsdb_idl_column *);
void ovsdb_idl_omit_alert(struct ovsdb_idl *, const struct ovsdb_idl_column *);
+void ovsdb_idl_set_column_mode(struct ovsdb_idl *,
+ const struct ovsdb_idl_column *,
+ unsigned char mode);
+void ovsdb_idl_set_column_mode_all(struct ovsdb_idl *, unsigned char mode);
+
/* Change tracking.
*
* In OVSDB, change tracking is applied at each client in the IDL layer. This
diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at
index 62e2b638320c..0436e0f77b0f 100644
--- a/tests/ovsdb-idl.at
+++ b/tests/ovsdb-idl.at
@@ -1154,12 +1154,25 @@ OVSDB_CHECK_IDL_WO_MONITOR_COND([simple idl disable
monitor-cond],
]])
m4_define([OVSDB_CHECK_IDL_TRACK_C],
- [AT_SETUP([$1 - C])
+ [AT_SETUP([$1 - alert - C])
+ AT_KEYWORDS([ovsdb server idl tracking positive $5])
+ AT_CHECK([ovsdb_start_idltest])
+ m4_if([$2], [], [],
+ [AT_CHECK([ovsdb-client transact unix:socket $2], [0], [ignore],
[ignore])])
+ AT_CHECK([test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -vjsonrpc -t10
--change-track=alert idl unix:socket $3],
+ [0], [stdout], [ignore])
+ AT_CHECK([sort stdout | uuidfilt]m4_if([$6],,, [[| $6]]),
+ [0], [$4])
+ OVSDB_SERVER_SHUTDOWN
+ AT_CLEANUP])
+
+m4_define([OVSDB_CHECK_IDL_TRACK_NOALERT_C],
+ [AT_SETUP([$1 - noalert - C])
AT_KEYWORDS([ovsdb server idl tracking positive $5])
AT_CHECK([ovsdb_start_idltest])
m4_if([$2], [], [],
[AT_CHECK([ovsdb-client transact unix:socket $2], [0], [ignore],
[ignore])])
- AT_CHECK([test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -vjsonrpc -t10 -c
idl unix:socket $3],
+ AT_CHECK([test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -vjsonrpc -t10
--change-track=noalert idl unix:socket $3],
[0], [stdout], [ignore])
AT_CHECK([sort stdout | uuidfilt]m4_if([$6],,, [[| $6]]),
[0], [$4])
@@ -1167,7 +1180,8 @@ m4_define([OVSDB_CHECK_IDL_TRACK_C],
AT_CLEANUP])
m4_define([OVSDB_CHECK_IDL_TRACK],
- [OVSDB_CHECK_IDL_TRACK_C($@)])
+ [OVSDB_CHECK_IDL_TRACK_C($@)
+ OVSDB_CHECK_IDL_TRACK_NOALERT_C($@)])
OVSDB_CHECK_IDL_TRACK([track, simple idl, initially populated],
[['["idltest",
diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c
index ca4e87b8115c..abd30cf27d18 100644
--- a/tests/test-ovsdb.c
+++ b/tests/test-ovsdb.c
@@ -56,6 +56,7 @@
VLOG_DEFINE_THIS_MODULE(test_ovsdb);
struct test_ovsdb_pvt_context {
+ bool track_alert;
bool track;
};
@@ -122,6 +123,15 @@ parse_options(int argc, char *argv[], struct
test_ovsdb_pvt_context *pvt)
break;
case 'c':
+ if (optarg) {
+ if (!strcmp(optarg, "noalert")) {
+ pvt->track_alert = false;
+ } else if (!strcmp(optarg, "alert")) {
+ pvt->track_alert = true;
+ } else {
+ ovs_fatal(0, "value %s is invalid", optarg);
+ }
+ }
pvt->track = true;
break;
@@ -2610,6 +2620,7 @@ update_conditions(struct ovsdb_idl *idl, char *commands)
static void
do_idl(struct ovs_cmdl_context *ctx)
{
+ struct test_ovsdb_pvt_context *pvt = ctx->pvt;
struct jsonrpc *rpc;
struct ovsdb_idl *idl;
unsigned int seqno = 0;
@@ -2618,9 +2629,6 @@ do_idl(struct ovs_cmdl_context *ctx)
int step = 0;
int error;
int i;
- bool track;
-
- track = ((struct test_ovsdb_pvt_context *)(ctx->pvt))->track;
idl = ovsdb_idl_create(ctx->argv[1], &idltest_idl_class, true, true);
ovsdb_idl_set_leader_only(idl, false);
@@ -2637,8 +2645,13 @@ do_idl(struct ovs_cmdl_context *ctx)
rpc = NULL;
}
- if (track) {
- ovsdb_idl_track_add_all(idl);
+ if (pvt->track) {
+ unsigned char mode = OVSDB_IDL_MONITOR | OVSDB_IDL_TRACK;
+
+ if (pvt->track_alert) {
+ mode |= OVSDB_IDL_ALERT;
+ }
+ ovsdb_idl_set_column_mode_all(idl, mode);
}
setvbuf(stdout, NULL, _IONBF, 0);
@@ -2683,7 +2696,7 @@ do_idl(struct ovs_cmdl_context *ctx)
}
/* Print update. */
- if (track) {
+ if (pvt->track) {
print_idl_track(idl, step++, terse);
ovsdb_idl_track_clear(idl);
} else {
--
2.27.0
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev