When starting the ovn-controller without monitor_all we need a few iterations of the engine node and updating monitoring conditions until we have loaded all data. Previously this was handled by the number of main loop iterations of ovn-controller and an arbitrary timeout. However this is was always best effort and there was no guarantee that we actually had loaded all data.
To solve this we now track the monitoring requests we sent out to southbound and when that data becomes available. For monitor_all we just need to have the initial monitoring conditions fullfilled and are afterwards sure that we have all required data. For non monitor_all we need at least one more iteration of the engine node and updating sb monitoring conditions. We limit this to 20 rounds for now, which should be sufficient and is at least the value of what the previous implementation provided. However if we notice that we do not update monitoring condition anymore after a engine run we can also assume that we have finished loading all data. Signed-off-by: Felix Huettner <felix.huettner@stackit.cloud> --- v7->v8: introduced controller/ovn-controller.c | 27 ++++++++++++++++++++++++++- lib/ovn-util.c | 19 ++++++++++++++----- tests/ovn-controller.at | 1 - tests/ovn-performance.at | 3 --- tests/ovn.at | 10 ---------- 5 files changed, 40 insertions(+), 20 deletions(-) diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index 440c76ca1..4dfb426f9 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -5996,6 +5996,7 @@ main(int argc, char *argv[]) unsigned int ovs_cond_seqno = UINT_MAX; unsigned int ovnsb_cond_seqno = UINT_MAX; unsigned int ovnsb_expected_cond_seqno = UINT_MAX; + bool has_computed_once = false; struct controller_engine_ctx ctrl_engine_ctx = { .lflow_cache = lflow_cache_create(), @@ -6070,6 +6071,20 @@ main(int argc, char *argv[]) ovnsb_cond_seqno = new_ovnsb_cond_seqno; } + /* Check if we have received all initial dumps of the southbound + * based on the monitor condtions we set. + * If we have sb_monitor_all that means we have all data that we would + * ever need. + * If we monitor cases individually we need at least one engine run. */ + if (ovnsb_cond_seqno == ovnsb_expected_cond_seqno && + ovnsb_expected_cond_seqno != UINT_MAX) { + if (sb_monitor_all) { + daemon_started_recently_ignore(); + } else if (has_computed_once) { + daemon_started_recently_countdown(); + } + } + struct engine_context eng_ctx = { .ovs_idl_txn = ovs_idl_txn, .ovnsb_idl_txn = ovnsb_idl_txn, @@ -6227,8 +6242,9 @@ main(int argc, char *argv[]) stopwatch_stop(CONTROLLER_LOOP_STOPWATCH_NAME, time_msec()); + if (engine_has_updated()) { - daemon_started_recently_countdown(); + has_computed_once = true; } ct_zones_data = engine_get_data(&en_ct_zones); @@ -6346,6 +6362,15 @@ main(int argc, char *argv[]) &runtime_data->local_datapaths, sb_monitor_all); } + /* If there is no new expected seqno we have + * finished loading all needed data from + * southbound. We then need to run one more time since + * we might behave differently. */ + if (daemon_started_recently() && + ovnsb_cond_seqno == ovnsb_expected_cond_seqno) { + daemon_started_recently_ignore(); + poll_immediate_wake(); + } if (ovs_idl_txn) { update_qos(sbrec_port_binding_by_name, ovs_idl_txn, ovsrec_port_by_qos, diff --git a/lib/ovn-util.c b/lib/ovn-util.c index f406114db..f89523502 100644 --- a/lib/ovn-util.c +++ b/lib/ovn-util.c @@ -933,13 +933,24 @@ get_bridge(const struct ovsrec_bridge_table *bridge_table, const char *br_name) return NULL; } +/* This counts the amount of iterations of monitoring condition updates and + * their respective update towards us. + * This only needs to handle the case of non monitor_all since that will use + * the ignore feature below. + * In this case we need at least 2 iterations: + * 1. the initial iteration where we pull the sb content based on the few + * conditions we now have. + * 2. after the first engine run we have enough information to update the + * monitoring conditions to their final values. + * However based on the structure of the datapaths we might need more. The + * value below is just a hard limit of iterations. We detect if we are done + * earlier and then skip further iterations. */ #define DAEMON_STARTUP_DELAY_SEED 20 -#define DAEMON_STARTUP_DELAY_MS 10000 static int64_t startup_ts; static int startup_delay = DAEMON_STARTUP_DELAY_SEED; -/* Used by debug command only, for tests. */ +/* Used if we do not need the startup delay (e.g. when using monitor_all). */ static bool ignore_startup_delay = false; OVS_CONSTRUCTOR(startup_ts_initializer) { @@ -980,9 +991,7 @@ daemon_started_recently(void) if (startup_delay) { return true; } - - /* Ensure that at least an amount of time has passed. */ - return time_wall_msec() - startup_ts <= DAEMON_STARTUP_DELAY_MS; + return false; } /* Builds a unique address set compatible name ([a-zA-Z_.][a-zA-Z_.0-9]*) diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at index fdd574c20..59df1979d 100644 --- a/tests/ovn-controller.at +++ b/tests/ovn-controller.at @@ -161,7 +161,6 @@ check_patches \ 'br-eth0 patch-localnet2-to-br-int patch-br-int-to-localnet2' # Delete the mapping and the ovn-bridge-mapping patch ports should go away. -check ovn-appctl -t ovn-controller debug/ignore-startup-delay AT_CHECK([ovs-vsctl remove Open_vSwitch . external-ids ovn-bridge-mappings]) check_bridge_mappings check_patches diff --git a/tests/ovn-performance.at b/tests/ovn-performance.at index 44bd4f7e4..4ddb1bd81 100644 --- a/tests/ovn-performance.at +++ b/tests/ovn-performance.at @@ -238,9 +238,6 @@ for i in `seq 1 5`; do as hv$i ovs-vsctl add-br br-phys ovn_attach n1 br-phys 192.168.0.$i - # Do not postpone patch ports related changes, as they may occur more or less anywere within the test, - # hence causing unexpected recomputes. - check ovn-appctl -t ovn-controller debug/ignore-startup-delay if [[ $i -ge 3 ]] ; then ovs-vsctl add-br br-ex ovs-vsctl set open . external_ids:ovn-bridge-mappings="public:br-ex" diff --git a/tests/ovn.at b/tests/ovn.at index 613a4a2db..cf8cdce3a 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -34956,7 +34956,6 @@ OVS_WAIT_UNTIL([ test x430 = x$(as hv1 ovs-vsctl get Interface ${iface1_uuid} mtu_request) ]) -as hv1 check ovn-appctl -t ovn-controller debug/ignore-startup-delay # Check that pointing requested-chassis somewhere else will unplug the port check ovn-nbctl --wait=hv set Logical_Switch_Port lsp1 \ options:requested-chassis=non-existent-chassis @@ -34964,7 +34963,6 @@ OVS_WAIT_UNTIL([ ! as hv1 ovs-vsctl get Interface ${iface1_uuid} _uuid ]) -as hv2 check ovn-appctl -t ovn-controller debug/ignore-startup-delay # Check that removing an lport will unplug it AT_CHECK([test x${iface2_uuid} = x$(as hv2 ovs-vsctl get Interface ${iface2_uuid} _uuid)], [0], []) check ovn-nbctl --wait=hv lsp-del ${lsp2_uuid} @@ -37716,9 +37714,6 @@ ovs-vsctl -- add-port br-phys-2 fake-phys-2-to-int-2 -- \ ovn_attach n1 br-phys 192.168.1.1 24 -# allow controller to immediately clean patch ports up -check ovn-appctl -t ovn-controller debug/ignore-startup-delay - # check that patch port on br-int is cleaned up (and also its peer) OVS_WAIT_UNTIL([ test 0 = $(ovs-vsctl --columns _uuid --bare find Port name=fake-int-to-phys | wc -l) @@ -37784,9 +37779,6 @@ check ovn-nbctl lsp-set-options ln_port network_name=phys-1 wait_for_ports_up check ovn-nbctl --wait=hv sync -# Allow controller to immediately clean patch ports up if any. -check ovn-appctl -t ovn-controller debug/ignore-startup-delay - # check that patch port created on br-int and br-phys-1. OVS_WAIT_UNTIL([ test 1 = $(ovs-vsctl --columns _uuid --bare find Port name=patch-br-int-to-ln_port | wc -l) @@ -41050,8 +41042,6 @@ check ovn-nbctl lsp-add sw0 sw0p1 check ovn-nbctl --wait=hv sync -check ovn-appctl -t ovn-controller debug/ignore-startup-delay - # Waits until the OVS database contains exactly the specified patch ports. # Each argument should be of the form BRIDGE PORT PEER. check_patch_ports () { -- 2.43.0 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev