> When starting the ovn-controller without monitor_all we need a few
> iterations of the engine node and updating monitoring conditions until
> we have loaded all data.
> Previously this was handled by the number of main loop iterations of
> ovn-controller and an arbitrary timeout.
> However this is was always best effort and there was no guarantee that
> we actually had loaded all data.
> 
> To solve this we now track the monitoring requests we sent out to
> southbound and when that data becomes available.
> 
> For monitor_all we just need to have the initial monitoring conditions
> fullfilled and are afterwards sure that we have all required data.
> 
> For non monitor_all we need at least one more iteration of the engine
> node and updating sb monitoring conditions. We limit this to 20 rounds
> for now, which should be sufficient and is at least the value of what
> the previous implementation provided.
> However if we notice that we do not update monitoring condition anymore
> after a engine run we can also assume that we have finished loading all
> data.
> 
> Signed-off-by: Felix Huettner <felix.huettner@stackit.cloud>

Hi Felix,

the patch seems fine to me, just one question inline.

Regards,
Lorenzo

> ---
> v7->v8: introduced
> 
>  controller/ovn-controller.c | 27 ++++++++++++++++++++++++++-
>  lib/ovn-util.c              | 19 ++++++++++++++-----
>  tests/ovn-controller.at     |  1 -
>  tests/ovn-performance.at    |  3 ---
>  tests/ovn.at                | 10 ----------
>  5 files changed, 40 insertions(+), 20 deletions(-)
> 
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 440c76ca1..4dfb426f9 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -5996,6 +5996,7 @@ main(int argc, char *argv[])
>      unsigned int ovs_cond_seqno = UINT_MAX;
>      unsigned int ovnsb_cond_seqno = UINT_MAX;
>      unsigned int ovnsb_expected_cond_seqno = UINT_MAX;
> +    bool has_computed_once = false;
>  
>      struct controller_engine_ctx ctrl_engine_ctx = {
>          .lflow_cache = lflow_cache_create(),
> @@ -6070,6 +6071,20 @@ main(int argc, char *argv[])
>              ovnsb_cond_seqno = new_ovnsb_cond_seqno;
>          }
>  
> +        /* Check if we have received all initial dumps of the southbound
> +         * based on the monitor condtions we set.
> +         * If we have sb_monitor_all that means we have all data that we 
> would
> +         * ever need.
> +         * If we monitor cases individually we need at least one engine run. 
> */
> +        if (ovnsb_cond_seqno == ovnsb_expected_cond_seqno &&
> +                ovnsb_expected_cond_seqno != UINT_MAX) {
> +            if (sb_monitor_all) {
> +                daemon_started_recently_ignore();
> +            } else if (has_computed_once) {
> +                daemon_started_recently_countdown();
> +            }
> +        }
> +
>          struct engine_context eng_ctx = {
>              .ovs_idl_txn = ovs_idl_txn,
>              .ovnsb_idl_txn = ovnsb_idl_txn,
> @@ -6227,8 +6242,9 @@ main(int argc, char *argv[])
>  
>                      stopwatch_stop(CONTROLLER_LOOP_STOPWATCH_NAME,
>                                     time_msec());
> +
>                      if (engine_has_updated()) {
> -                        daemon_started_recently_countdown();
> +                        has_computed_once = true;
>                      }
>  
>                      ct_zones_data = engine_get_data(&en_ct_zones);
> @@ -6346,6 +6362,15 @@ main(int argc, char *argv[])
>                                      &runtime_data->local_datapaths,
>                                      sb_monitor_all);
>                          }
> +                        /* If there is no new expected seqno we have
> +                         * finished loading all needed data from
> +                         * southbound. We then need to run one more time 
> since
> +                         * we might behave differently. */
> +                        if (daemon_started_recently() &&
> +                              ovnsb_cond_seqno == ovnsb_expected_cond_seqno) 
> {
> +                            daemon_started_recently_ignore();
> +                            poll_immediate_wake();
> +                        }
>                          if (ovs_idl_txn) {
>                              update_qos(sbrec_port_binding_by_name, 
> ovs_idl_txn,
>                                         ovsrec_port_by_qos,
> diff --git a/lib/ovn-util.c b/lib/ovn-util.c
> index f406114db..f89523502 100644
> --- a/lib/ovn-util.c
> +++ b/lib/ovn-util.c
> @@ -933,13 +933,24 @@ get_bridge(const struct ovsrec_bridge_table 
> *bridge_table, const char *br_name)
>      return NULL;
>  }
>  
> +/* This counts the amount of iterations of monitoring condition updates and
> + * their respective update towards us.
> + * This only needs to handle the case of non monitor_all since that will use
> + * the ignore feature below.
> + * In this case we need at least 2 iterations:
> + * 1. the initial iteration where we pull the sb content based on the few
> + *    conditions we now have.
> + * 2. after the first engine run we have enough information to update the
> + *    monitoring conditions to their final values.
> + * However based on the structure of the datapaths we might need more. The
> + * value below is just a hard limit of iterations. We detect if we are done
> + * earlier and then skip further iterations. */
>  #define DAEMON_STARTUP_DELAY_SEED 20
> -#define DAEMON_STARTUP_DELAY_MS   10000
>  
>  static int64_t startup_ts;
>  static int startup_delay = DAEMON_STARTUP_DELAY_SEED;
>  
> -/* Used by debug command only, for tests. */
> +/* Used if we do not need the startup delay (e.g. when using monitor_all). */
>  static bool ignore_startup_delay = false;
>  
>  OVS_CONSTRUCTOR(startup_ts_initializer) {
> @@ -980,9 +991,7 @@ daemon_started_recently(void)
>      if (startup_delay) {
>          return true;
>      }
> -
> -    /* Ensure that at least an amount of time has passed. */
> -    return time_wall_msec() - startup_ts <= DAEMON_STARTUP_DELAY_MS;

IIUC this condition will be evaluated just when sb_monitor_all is false, right?
Do you really need to remove this check? IIUC it will be always false when 
startup_delay
reaches 0, right? Am I missing something?

Regards,
Lorenzo

> +    return false;
>  }
>  
>  /* Builds a unique address set compatible name ([a-zA-Z_.][a-zA-Z_.0-9]*)
> diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
> index fdd574c20..59df1979d 100644
> --- a/tests/ovn-controller.at
> +++ b/tests/ovn-controller.at
> @@ -161,7 +161,6 @@ check_patches \
>      'br-eth0 patch-localnet2-to-br-int patch-br-int-to-localnet2'
>  
>  # Delete the mapping and the ovn-bridge-mapping patch ports should go away.
> -check ovn-appctl -t ovn-controller debug/ignore-startup-delay
>  AT_CHECK([ovs-vsctl remove Open_vSwitch . external-ids ovn-bridge-mappings])
>  check_bridge_mappings
>  check_patches
> diff --git a/tests/ovn-performance.at b/tests/ovn-performance.at
> index 44bd4f7e4..4ddb1bd81 100644
> --- a/tests/ovn-performance.at
> +++ b/tests/ovn-performance.at
> @@ -238,9 +238,6 @@ for i in `seq 1 5`; do
>      as hv$i
>      ovs-vsctl add-br br-phys
>      ovn_attach n1 br-phys 192.168.0.$i
> -    # Do not postpone patch ports related changes, as they may occur more or 
> less anywere within the test,
> -    # hence causing unexpected recomputes.
> -    check ovn-appctl -t ovn-controller debug/ignore-startup-delay
>      if [[ $i -ge 3 ]] ; then
>          ovs-vsctl add-br br-ex
>          ovs-vsctl set open . external_ids:ovn-bridge-mappings="public:br-ex"
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 613a4a2db..cf8cdce3a 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -34956,7 +34956,6 @@ OVS_WAIT_UNTIL([
>      test x430 = x$(as hv1 ovs-vsctl get Interface ${iface1_uuid} mtu_request)
>  ])
>  
> -as hv1 check ovn-appctl -t ovn-controller debug/ignore-startup-delay
>  # Check that pointing requested-chassis somewhere else will unplug the port
>  check ovn-nbctl --wait=hv set Logical_Switch_Port lsp1 \
>      options:requested-chassis=non-existent-chassis
> @@ -34964,7 +34963,6 @@ OVS_WAIT_UNTIL([
>      ! as hv1 ovs-vsctl get Interface ${iface1_uuid} _uuid
>  ])
>  
> -as hv2 check ovn-appctl -t ovn-controller debug/ignore-startup-delay
>  # Check that removing an lport will unplug it
>  AT_CHECK([test x${iface2_uuid} = x$(as hv2 ovs-vsctl get Interface 
> ${iface2_uuid} _uuid)], [0], [])
>  check ovn-nbctl --wait=hv lsp-del ${lsp2_uuid}
> @@ -37716,9 +37714,6 @@ ovs-vsctl -- add-port br-phys-2 fake-phys-2-to-int-2 
> -- \
>  
>  ovn_attach n1 br-phys 192.168.1.1 24
>  
> -# allow controller to immediately clean patch ports up
> -check ovn-appctl -t ovn-controller debug/ignore-startup-delay
> -
>  # check that patch port on br-int is cleaned up (and also its peer)
>  OVS_WAIT_UNTIL([
>      test 0 = $(ovs-vsctl --columns _uuid --bare find Port 
> name=fake-int-to-phys | wc -l)
> @@ -37784,9 +37779,6 @@ check ovn-nbctl lsp-set-options ln_port 
> network_name=phys-1
>  wait_for_ports_up
>  check ovn-nbctl --wait=hv sync
>  
> -# Allow controller to immediately clean patch ports up if any.
> -check ovn-appctl -t ovn-controller debug/ignore-startup-delay
> -
>  # check that patch port created on br-int and br-phys-1.
>  OVS_WAIT_UNTIL([
>      test 1 = $(ovs-vsctl --columns _uuid --bare find Port 
> name=patch-br-int-to-ln_port | wc -l)
> @@ -41050,8 +41042,6 @@ check ovn-nbctl lsp-add sw0 sw0p1
>  
>  check ovn-nbctl --wait=hv sync
>  
> -check ovn-appctl -t ovn-controller debug/ignore-startup-delay
> -
>  # Waits until the OVS database contains exactly the specified patch ports.
>  # Each argument should be of the form BRIDGE PORT PEER.
>  check_patch_ports () {
> -- 
> 2.43.0
> 
> 
> _______________________________________________
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> 
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to