On Wed, Jun 22, 2022 at 3:23 AM Xavier Simonart <[email protected]> wrote:
>
> When VIF ports are claimed on a chassis, SBDB Port_Binding table is
updated.
> If the SBDB IDL is still is read-only ("in transaction") when such a
update
> is required, the update is not possible and recompute is triggered through
> I+P failure.
>
> This situation can happen:
> - after updating Port_Binding->chassis to SBDB for one port, in a
following
>   iteration, ovn-controller handles Interface:external_ids:ovn-installed
>   (for the same port) while SBDB is still read-only.
> - after updating Port_Binding->chassis to SBDB for one port, in a
following
>   iteration, ovn-controller updates Port_Binding->chassis for another
port,
>   while SBDB is still read-only.
>
> This patch prevent the recompute, by having the if-status module
> updating the Port_Binding chassis (if needed) when possible.
> This does not delay Port_Binding chassis update compared to before this
patch.
> - With the patch, Port_Binding chassis will be updated as soon as SBDB is
> again writable, without recompute.
> - Without the patch, Port_Binding chassis was updated as soon as SBDB was
> again writable, through a recompute.
>
> As part of this patch, ovn-installed will not be updated for additional
chassis;
> it will only be updated when the migration is completed.
>
> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2059253
> Signed-off-by: Xavier Simonart <[email protected]>
>
> ---
> v2:  - handled Dumitru's comments.
>      - handled Han's comments, mainly ensure we moved out of CLAIMED state
>        only after updating pb->chassis to guarentee physical flows are
installed
>        when ovn-installed is updated in OVS.
>      - slighly reorganize the code to isolate 'notify_up = false' cases in
>        claim_port (i.e. ports such as virtual ports), in the idea of
making
>        future patch preventing recomputes when virtual ports are claimed.
>      - updated test case to cause more race conditions.
>      - rebased on origin/main
>      - note that "additional chassis" as now supported by
>        "Support LSP:options:requested-chassis as a list" might still cause
>        recomputes.
>      - fixed missing flows when Port_Binding chassis was updated by
mgr_update
>        w/o any lflow recalculation.
> v3:  - handled Dumitru's comments on v2, mainly have runtime_data handler
>        handling pb_claims when sb becomes writable (instead of a lflow
handler).
>      - fixed test as it was not checking recomputes on all hv, as well as
a flaky
>        behavior.
>      - rebased on origin/main.
> ---
>  controller/binding.c        | 154 +++++++++++++++++++++----------
>  controller/binding.h        |  15 +++-
>  controller/if-status.c      | 174 ++++++++++++++++++++++++++++++++----
>  controller/if-status.h      |  16 +++-
>  controller/ovn-controller.c |  72 ++++++++++++++-
>  tests/ovn-macros.at         |  12 +++
>  tests/ovn.at                | 147 +++++++++++++++++++++++++++++-
>  tests/perf-northd.at        |  17 ----
>  8 files changed, 519 insertions(+), 88 deletions(-)
>
> diff --git a/controller/binding.c b/controller/binding.c
> index 2279570f9..b21577f71 100644
> --- a/controller/binding.c
> +++ b/controller/binding.c
> @@ -644,11 +644,17 @@ local_binding_get_lport_ofport(const struct shash
*local_bindings,
>  }
>
>  bool
> -local_binding_is_up(struct shash *local_bindings, const char *pb_name)
> +local_binding_is_up(struct shash *local_bindings, const char *pb_name,
> +                    const struct sbrec_chassis *chassis_rec)
>  {
>      struct local_binding *lbinding =
>          local_binding_find(local_bindings, pb_name);
>      struct binding_lport *b_lport =
local_binding_get_primary_lport(lbinding);
> +
> +    if (b_lport && b_lport->pb->chassis != chassis_rec) {
> +        return false;
> +    }
> +
>      if (lbinding && b_lport && lbinding->iface) {
>          if (b_lport->pb->n_up && !b_lport->pb->up[0]) {
>              return false;
> @@ -660,13 +666,23 @@ local_binding_is_up(struct shash *local_bindings,
const char *pb_name)
>  }
>
>  bool
> -local_binding_is_down(struct shash *local_bindings, const char *pb_name)
> +local_binding_is_down(struct shash *local_bindings, const char *pb_name,
> +                      const struct sbrec_chassis *chassis_rec)
>  {
>      struct local_binding *lbinding =
>          local_binding_find(local_bindings, pb_name);
>
>      struct binding_lport *b_lport =
local_binding_get_primary_lport(lbinding);
>
> +    if (b_lport) {
> +        if (b_lport->pb->chassis == chassis_rec) {
> +            return false;
> +        } else if (b_lport->pb->chassis) {
> +            VLOG_DBG("lport %s already claimed by other chassis",
> +                     b_lport->pb->logical_port);
> +        }
> +    }
> +
>      if (!lbinding) {
>          return true;
>      }
> @@ -884,37 +900,80 @@ get_lport_type_str(enum en_lport_type lport_type)
>      OVS_NOT_REACHED();
>  }
>
> -/* For newly claimed ports, if 'notify_up' is 'false':
> +void
> +set_pb_chassis_in_sbrec(const struct sbrec_port_binding *pb,
> +                        const struct sbrec_chassis *chassis_rec,
> +                        bool is_set)
> +{
> +    if (pb->chassis != chassis_rec) {
> +         if (is_set) {
> +            if (pb->chassis) {
> +                VLOG_INFO("Changing chassis for lport %s from %s to %s.",
> +                          pb->logical_port, pb->chassis->name,
> +                          chassis_rec->name);
> +            } else {
> +                VLOG_INFO("Claiming lport %s for this chassis.",
> +                          pb->logical_port);
> +            }
> +            for (int i = 0; i < pb->n_mac; i++) {
> +                VLOG_INFO("%s: Claiming %s", pb->logical_port,
pb->mac[i]);
> +            }
> +            sbrec_port_binding_set_chassis(pb, chassis_rec);
> +        }
> +    } else if (!is_set) {
> +        sbrec_port_binding_set_chassis(pb, NULL);
> +    }
> +}
> +
> +void
> +local_binding_set_pb(struct shash *local_bindings, const char *pb_name,
> +                     const struct sbrec_chassis *chassis_rec,
> +                     struct hmap *tracked_datapaths, bool is_set)
> +{
> +    struct local_binding *lbinding =
> +        local_binding_find(local_bindings, pb_name);
> +    struct binding_lport *b_lport =
local_binding_get_primary_lport(lbinding);
> +
> +    if (b_lport) {
> +        set_pb_chassis_in_sbrec(b_lport->pb, chassis_rec, is_set);
> +        if (tracked_datapaths) {
> +            update_lport_tracking(b_lport->pb, tracked_datapaths, true);
> +        }
> +    }
> +}
> +
> +/* For newly claimed ports:
>   * - set the 'pb.up' field to true if 'pb' has no 'parent_pb'.
>   * - set the 'pb.up' field to true if 'parent_pb.up' is 'true' (e.g., for
>   *   container and virtual ports).
> - * Otherwise request a notification to be sent when the OVS flows
> - * corresponding to 'pb' have been installed.
> + *
> + * Returns false if lport is not claimed due to 'sb_readonly'.
> + * Returns true otherwise.
>   *
>   * Note:
> - *   Updates (directly or through a notification) the 'pb->up' field
only if
> - *   it's explicitly set to 'false'.
> + *   Updates the 'pb->up' field only if it's explicitly set to 'false'.
>   *   This is to ensure compatibility with older versions of ovn-northd.
>   */
> -static void
> +static bool
>  claimed_lport_set_up(const struct sbrec_port_binding *pb,
>                       const struct sbrec_port_binding *parent_pb,
> -                     const struct sbrec_chassis *chassis_rec,
> -                     bool notify_up, struct if_status_mgr *if_mgr)
> +                     bool sb_readonly)
>  {
> -    if (!notify_up) {
> -        bool up = true;
> -        if (!parent_pb || (parent_pb->n_up && parent_pb->up[0])) {
> +    /* When notify_up is false in claim_port(), no state is created
> +     * by if_status_mgr. In such cases, return false (i.e. trigger
recompute)
> +     * if we can't update sb (because it is readonly).
> +     */
> +    bool up = true;
> +    if (!parent_pb || (parent_pb->n_up && parent_pb->up[0])) {
> +        if (!sb_readonly) {
>              if (pb->n_up) {
>                  sbrec_port_binding_set_up(pb, &up, 1);
>              }
> +        } else if (pb->n_up && !pb->up[0]) {
> +            return false;
>          }
> -        return;
> -    }
> -
> -    if (pb->chassis != chassis_rec || (pb->n_up && !pb->up[0])) {
> -        if_status_mgr_claim_iface(if_mgr, pb->logical_port);
>      }
> +    return true;
>  }
>
>  typedef void (*set_func)(const struct sbrec_port_binding *pb,
> @@ -1057,37 +1116,35 @@ claim_lport(const struct sbrec_port_binding *pb,
>              struct hmap *tracked_datapaths,
>              struct if_status_mgr *if_mgr)
>  {
> -    if (!sb_readonly) {
> -        claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up,
if_mgr);
> -    }
> -
>      enum can_bind can_bind = lport_can_bind_on_this_chassis(chassis_rec,
pb);
>      bool update_tracked = false;
>
>      if (can_bind == CAN_BIND_AS_MAIN) {
>          if (pb->chassis != chassis_rec) {
> -            if (sb_readonly) {
> -                return false;
> -            }
> -
> -            if (pb->chassis) {
> -                VLOG_INFO("Changing chassis for lport %s from %s to %s.",
> -                        pb->logical_port, pb->chassis->name,
> -                        chassis_rec->name);
> -            } else {
> -                VLOG_INFO("Claiming lport %s for this chassis.",
> -                          pb->logical_port);
> -            }
> -            for (size_t i = 0; i < pb->n_mac; i++) {
> -                VLOG_INFO("%s: Claiming %s", pb->logical_port,
pb->mac[i]);
> -            }
> -
> -            sbrec_port_binding_set_chassis(pb, chassis_rec);
>              if (is_additional_chassis(pb, chassis_rec)) {
> +                if (sb_readonly) {
> +                    return false;
> +                }
>                  remove_additional_chassis(pb, chassis_rec);
>              }
>              update_tracked = true;
>          }
> +        if (!notify_up) {
> +            if (!claimed_lport_set_up(pb, parent_pb, sb_readonly)) {
> +                return false;
> +            }
> +            if (pb->chassis != chassis_rec) {
> +                if (sb_readonly) {
> +                    return false;
> +                }
> +                set_pb_chassis_in_sbrec(pb, chassis_rec, true);
> +            }
> +        } else {
> +            if ((pb->chassis != chassis_rec) || (pb->n_up &&
!pb->up[0])) {
> +                if_status_mgr_claim_iface(if_mgr, pb, chassis_rec,
> +                                          sb_readonly);
> +            }
> +        }
>      } else if (can_bind == CAN_BIND_AS_ADDITIONAL) {
>          if (!is_additional_chassis(pb, chassis_rec)) {
>              if (sb_readonly) {
> @@ -1132,7 +1189,8 @@ claim_lport(const struct sbrec_port_binding *pb,
>   */
>  static bool
>  release_lport_main_chassis(const struct sbrec_port_binding *pb,
> -                           bool sb_readonly)
> +                           bool sb_readonly,
> +                           struct if_status_mgr *if_mgr)
>  {
>      if (pb->encap) {
>          if (sb_readonly) {
> @@ -1141,11 +1199,13 @@ release_lport_main_chassis(const struct
sbrec_port_binding *pb,
>          sbrec_port_binding_set_encap(pb, NULL);
>      }
>
> +    /* If sb readonly, pb->chassis unset through if-status if present. */
>      if (pb->chassis) {
> -        if (sb_readonly) {
> +        if (!sb_readonly) {
> +            sbrec_port_binding_set_chassis(pb, NULL);
> +        } else if (!if_status_mgr_iface_is_present(if_mgr,
pb->logical_port)) {
>              return false;
>          }
> -        sbrec_port_binding_set_chassis(pb, NULL);
>      }
>
>      if (pb->virtual_parent) {
> @@ -1155,7 +1215,8 @@ release_lport_main_chassis(const struct
sbrec_port_binding *pb,
>          sbrec_port_binding_set_virtual_parent(pb, NULL);
>      }
>
> -    VLOG_INFO("Releasing lport %s from this chassis.", pb->logical_port);
> +    VLOG_INFO("Releasing lport %s from this chassis (sb_readonly=%d)",
> +              pb->logical_port, sb_readonly);
>      return true;
>  }
>
> @@ -1189,7 +1250,7 @@ release_lport(const struct sbrec_port_binding *pb,
>                struct hmap *tracked_datapaths, struct if_status_mgr
*if_mgr)
>  {
>      if (pb->chassis == chassis_rec) {
> -        if (!release_lport_main_chassis(pb, sb_readonly)) {
> +        if (!release_lport_main_chassis(pb, sb_readonly, if_mgr)) {
>              return false;
>          }
>      } else if (is_additional_chassis(pb, chassis_rec)) {
> @@ -1271,7 +1332,7 @@ consider_vif_lport_(const struct sbrec_port_binding
*pb,
>                               b_lport->lbinding->iface,
>                               !b_ctx_in->ovnsb_idl_txn,
>                               !parent_pb, b_ctx_out->tracked_dp_bindings,
> -                             b_ctx_out->if_mgr)){
> +                             b_ctx_out->if_mgr)) {
>                  return false;
>              }
>
> @@ -1527,7 +1588,8 @@ consider_localport(const struct sbrec_port_binding
*pb,
>      enum can_bind can_bind = lport_can_bind_on_this_chassis(
>          b_ctx_in->chassis_rec, pb);
>      if (can_bind == CAN_BIND_AS_MAIN) {
> -        if (!release_lport_main_chassis(pb, !b_ctx_in->ovnsb_idl_txn)) {
> +        if (!release_lport_main_chassis(pb, !b_ctx_in->ovnsb_idl_txn,
> +            b_ctx_out->if_mgr)) {
>              return false;
>          }
>      } else if (can_bind == CAN_BIND_AS_ADDITIONAL) {
> diff --git a/controller/binding.h b/controller/binding.h
> index 1fed06674..d20659b0b 100644
> --- a/controller/binding.h
> +++ b/controller/binding.h
> @@ -151,8 +151,10 @@ const struct sbrec_port_binding
*local_binding_get_primary_pb(
>  ofp_port_t local_binding_get_lport_ofport(const struct shash
*local_bindings,
>                                            const char *pb_name);
>
> -bool local_binding_is_up(struct shash *local_bindings, const char
*pb_name);
> -bool local_binding_is_down(struct shash *local_bindings, const char
*pb_name);
> +bool local_binding_is_up(struct shash *local_bindings, const char
*pb_name,
> +                         const struct sbrec_chassis *);
> +bool local_binding_is_down(struct shash *local_bindings, const char
*pb_name,
> +                           const struct sbrec_chassis *);
>  void local_binding_set_up(struct shash *local_bindings, const char
*pb_name,
>                            const struct sbrec_chassis *chassis_rec,
>                            const char *ts_now_str, bool sb_readonly,
> @@ -160,7 +162,10 @@ void local_binding_set_up(struct shash
*local_bindings, const char *pb_name,
>  void local_binding_set_down(struct shash *local_bindings, const char
*pb_name,
>                              const struct sbrec_chassis *chassis_rec,
>                              bool sb_readonly, bool ovs_readonly);
> -
> +void local_binding_set_pb(struct shash *local_bindings, const char
*pb_name,
> +                          const struct sbrec_chassis *chassis_rec,
> +                          struct hmap *tracked_datapaths,
> +                          bool is_set);
>  void binding_register_ovs_idl(struct ovsdb_idl *);
>  void binding_run(struct binding_ctx_in *, struct binding_ctx_out *);
>  bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
> @@ -178,6 +183,10 @@ void binding_dump_local_bindings(struct
local_binding_data *, struct ds *);
>  bool is_additional_chassis(const struct sbrec_port_binding *pb,
>                             const struct sbrec_chassis *chassis_rec);
>
> +void set_pb_chassis_in_sbrec(const struct sbrec_port_binding *pb,
> +                             const struct sbrec_chassis *chassis_rec,
> +                             bool is_set);
> +
>  /* Corresponds to each Port_Binding.type. */
>  enum en_lport_type {
>      LP_UNKNOWN,
> diff --git a/controller/if-status.c b/controller/if-status.c
> index ad61844d8..7693c289b 100644
> --- a/controller/if-status.c
> +++ b/controller/if-status.c
> @@ -24,6 +24,7 @@
>  #include "lib/util.h"
>  #include "timeval.h"
>  #include "openvswitch/vlog.h"
> +#include "lib/ovn-sb-idl.h"
>
>  VLOG_DEFINE_THIS_MODULE(if_status);
>
> @@ -53,9 +54,11 @@ VLOG_DEFINE_THIS_MODULE(if_status);
>   */
>
>  enum if_state {
> -    OIF_CLAIMED,       /* Newly claimed interface. */
> -    OIF_INSTALL_FLOWS, /* Already claimed interface for which flows are
still
> -                        * being installed.
> +    OIF_CLAIMED,       /* Newly claimed interface. pb->chassis not yet
updated.
> +                        */
> +    OIF_INSTALL_FLOWS, /* Claimed interface with pb->chassis successfully
> +                        * updated in SB and for which flows are still
being
> +                        * installed.
>                          */
>      OIF_MARK_UP,       /* Interface with flows successfully installed in
OVS
>                          * but not yet marked "up" in the binding module
(in
> @@ -78,6 +81,55 @@ static const char *if_state_names[] = {
>      [OIF_INSTALLED]     = "INSTALLED",
>  };
>
> +/*
> + *       +----------------------+
> + * +---> |                      |
> + * | +-> |         NULL         |
<--------------------------------------+++-+
> + * | |   +----------------------+
     |
> + * | |     ^ release_iface   | claim_iface
    |
> + * | |     |                 V - sbrec_update_chassis(if sb is rw)
    |
> + * | |   +----------------------+
     |
> + * | |   |                      |
<----------------------------------------+ |
> + * | |   |       CLAIMED        |
<--------------------------------------+ | |
> + * | |   +----------------------+
 | | |
> + * | |                  | mgr_update(when sb is rw)
 | | |
> + * | | release_iface    |  - sbrec_update_chassis
 | | |
> + * | |                  |  - request seqno
| | |
> + * | |                  V
 | | |
> + * | |   +----------------------+
 | | |
> + * | +-- |                      |  mgr_run(seqno not rcvd)
| | |
> + * |     |    INSTALL_FLOWS     |   - set port down in sb
 | | |
> + * |     |                      |  mgr_update()
 | | |
> + * |     +----------------------+   - sbrec_update_chassis if needed
| | |
> + * |                    |
 | | |
> + * |                    |  mgr_run(seqno rcvd)
| | |
> + * |                    |  - set port up in sb
| | |
> + * | release_iface      |  - set ovn-installed in ovs
 | | |
> + * |                    V
 | | |
> + * |   +----------------------+
 | | |
> + * |   |                      |  mgr_run()
| | |
> + * +-- |       MARK_UP        |  - set port up in sb
| | |
> + *     |                      |  - set ovn-installed in ovs
 | | |
> + *     |                      |  mgr_update()
 | | |
> + *     +----------------------+  - sbrec_update_chassis if needed
 | | |
> + *              |
 | | |
> + *              | mgr_update(rcvd port up / ovn_installed & chassis set)
| | |
> + *              V
 | | |
> + *     +----------------------+
 | | |
> + *     |      INSTALLED       | ------------> claim_iface
---------------+ | |
> + *     +----------------------+
   | |
> + *              |
   | |
> + *              | release_iface
   | |
> + *              V
   | |
> + *     +----------------------+
   | |
> + *     |                      | ------------> claim_iface
-----------------+ |
> + *     |      MARK_DOWN       | ------> mgr_update(rcvd port down)
----------+
> + *     |                      | mgr_run()
> + *     |                      | - set port down in sb
> + *     |                      | mgr_update()
> + *     +----------------------+ - sbrec_update_chassis(NULL)
> + */
> +
>  struct ovs_iface {
>      char *id;               /* Extracted from OVS external_ids.iface_id.
*/
>      enum if_state state;    /* State of the interface in the state
machine. */
> @@ -85,6 +137,7 @@ struct ovs_iface {
>                               * be fully programmed in OVS.  Only used in
state
>                               * OIF_INSTALL_FLOWS.
>                               */
> +    bool chassis_update_required;  /* If true, pb->chassis must be
updated. */
>  };
>
>  static uint64_t ifaces_usage;
> @@ -158,14 +211,23 @@ if_status_mgr_destroy(struct if_status_mgr *mgr)
>  }
>
>  void
> -if_status_mgr_claim_iface(struct if_status_mgr *mgr, const char
*iface_id)
> +if_status_mgr_claim_iface(struct if_status_mgr *mgr,
> +                          const struct sbrec_port_binding *pb,
> +                          const struct sbrec_chassis *chassis_rec,
> +                          bool sb_readonly)
>  {
> +    const char *iface_id = pb->logical_port;
>      struct ovs_iface *iface = shash_find_data(&mgr->ifaces, iface_id);
>
>      if (!iface) {
>          iface = ovs_iface_create(mgr, iface_id, OIF_CLAIMED);
>      }
> -
> +    if (!sb_readonly) {
> +        set_pb_chassis_in_sbrec(pb, chassis_rec, true);
> +        iface->chassis_update_required = false;
> +    } else {
> +        iface->chassis_update_required = true;
> +    }
>      switch (iface->state) {
>      case OIF_CLAIMED:
>      case OIF_INSTALL_FLOWS:
> @@ -182,6 +244,12 @@ if_status_mgr_claim_iface(struct if_status_mgr *mgr,
const char *iface_id)
>      }
>  }
>
> +bool
> +if_status_mgr_iface_is_present(struct if_status_mgr *mgr, const char
*iface_id)
> +{
> +    return !!shash_find_data(&mgr->ifaces, iface_id);
> +}
> +
>  void
>  if_status_mgr_release_iface(struct if_status_mgr *mgr, const char
*iface_id)
>  {
> @@ -246,9 +314,39 @@ if_status_mgr_delete_iface(struct if_status_mgr
*mgr, const char *iface_id)
>      }
>  }
>
> +bool
> +if_status_handle_claims(struct if_status_mgr *mgr,
> +                        struct local_binding_data *binding_data,
> +                        const struct sbrec_chassis *chassis_rec,
> +                        struct hmap *tracked_datapath,
> +                        bool sb_readonly)
> +{
> +    if (!binding_data || sb_readonly) {
> +        return false;
> +    }
> +
> +    struct shash *bindings = &binding_data->bindings;
> +    struct hmapx_node *node;
> +
> +    bool rc = false;
> +    HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_CLAIMED]) {
> +        struct ovs_iface *iface = node->data;
> +        if (iface->chassis_update_required) {

Thanks Xavier for the revision. The state machine looks more clear now, but
I have a major concern for the use of chassis_update_required. This bool
flag is used to decide if an update to SB is needed, and once a SB update
is requested, it is set to false, and assumes the SB update will succeed
immediately. However, the assumption may be wrong. There can be different
kinds of reasons that the subsequent SB update fails, or delayed, so this
flag is not reliable. Instead, in CLAIMED state, the responsibility to make
sure the SB update is completed. If the transaction is in-progress, the
sb_readonly is true. So if sb_readonly is false, it means nothing is
in-progress, so we can always check if (!sb_readonly && <SB chassis is not
updated for the port-binding>) we should just send the update, regardless
of whether we have requested it before. Please also see another comment
below for the state transition.

> +            VLOG_INFO("if_status_handle_claims for %s", iface->id);
> +            local_binding_set_pb(bindings, iface->id, chassis_rec,
> +                                 tracked_datapath, true);
> +            rc = true;
> +        }
> +        iface->chassis_update_required = false;
> +    }
> +    return rc;
> +}
> +
>  void
>  if_status_mgr_update(struct if_status_mgr *mgr,
> -                     struct local_binding_data *binding_data)
> +                     struct local_binding_data *binding_data,
> +                     const struct sbrec_chassis *chassis_rec,
> +                     bool sb_readonly)
>  {
>      if (!binding_data) {
>          return;
> @@ -257,13 +355,25 @@ if_status_mgr_update(struct if_status_mgr *mgr,
>      struct shash *bindings = &binding_data->bindings;
>      struct hmapx_node *node;
>
> +    /* Interfaces in OIF_MARK_UP state have already set their
pb->chassis.
> +     * However, it might have been reset by another hv.
> +     */
>      /* Move all interfaces that have been confirmed "up" by the binding
module,
>       * from OIF_MARK_UP to OIF_INSTALLED.
>       */
>      HMAPX_FOR_EACH_SAFE (node, &mgr->ifaces_per_state[OIF_MARK_UP]) {
>          struct ovs_iface *iface = node->data;
>
> -        if (local_binding_is_up(bindings, iface->id)) {
> +        if (iface->chassis_update_required) {
> +            if (!sb_readonly) {
> +                iface->chassis_update_required = false;
> +                local_binding_set_pb(bindings, iface->id, chassis_rec,
> +                                     NULL, true);
> +            } else {
> +                continue;
> +            }
> +        }
> +        if (local_binding_is_up(bindings, iface->id, chassis_rec)) {
>              ovs_iface_set_state(mgr, iface, OIF_INSTALLED);
>          }
>      }
> @@ -274,23 +384,53 @@ if_status_mgr_update(struct if_status_mgr *mgr,
>      HMAPX_FOR_EACH_SAFE (node, &mgr->ifaces_per_state[OIF_MARK_DOWN]) {
>          struct ovs_iface *iface = node->data;
>
> -        if (local_binding_is_down(bindings, iface->id)) {
> +        if (!sb_readonly) {
> +            local_binding_set_pb(bindings, iface->id, chassis_rec,
> +                                 NULL, false);
> +        }
> +        if (local_binding_is_down(bindings, iface->id, chassis_rec)) {
>              ovs_iface_destroy(mgr, iface);
>          }
>      }
>
> -    /* Register for a notification about flows being installed in OVS
for all
> -     * newly claimed interfaces.
> +    if (!sb_readonly) {
> +        HMAPX_FOR_EACH_SAFE (node,
&mgr->ifaces_per_state[OIF_INSTALL_FLOWS]) {
> +            struct ovs_iface *iface = node->data;
> +
> +            if (iface->chassis_update_required) {
> +                iface->chassis_update_required = false;
> +                local_binding_set_pb(bindings, iface->id, chassis_rec,
> +                                     NULL, true);
> +            }
> +        }
> +    }
> +
> +    /* Update Port_Binding->chassis for newly claimed interfaces
> +     * Register for a notification about flows being installed in OVS
for all
> +     * newly claimed interfaces for which we could update pb->chassis.
>       *
>       * Move them from OIF_CLAIMED to OIF_INSTALL_FLOWS.
>       */
> -    bool new_ifaces = false;
> -    HMAPX_FOR_EACH_SAFE (node, &mgr->ifaces_per_state[OIF_CLAIMED]) {
> -        struct ovs_iface *iface = node->data;
>
> -        ovs_iface_set_state(mgr, iface, OIF_INSTALL_FLOWS);
> -        iface->install_seqno = mgr->iface_seqno + 1;
> -        new_ifaces = true;
> +    bool new_ifaces = false;
> +    if (!sb_readonly) {
> +        HMAPX_FOR_EACH_SAFE (node, &mgr->ifaces_per_state[OIF_CLAIMED]) {
> +            struct ovs_iface *iface = node->data;
> +            /* No need to check for chassis_update_required as already
done
> +             * in if_status_handle_claims or if_status_mgr_claim_iface
> +             */
> +            ovs_iface_set_state(mgr, iface, OIF_INSTALL_FLOWS);

We need to make sure the chassis in SB port-binding is up-to-date (i.e. the
update notification from SB DB has been received) before moving to
INSTALL_FLOWS. Otherwise, it is still possible that the state is moved too
early and end up with incomplete flow installation for the lport when the
state is finally moved to INSTALLED.

Thanks,
Han

> +            iface->install_seqno = mgr->iface_seqno + 1;
> +            new_ifaces = true;
> +        }
> +    } else {
> +        HMAPX_FOR_EACH_SAFE (node, &mgr->ifaces_per_state[OIF_CLAIMED]) {
> +            struct ovs_iface *iface = node->data;
> +            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5,
1);
> +            VLOG_INFO_RL(&rl,
> +                         "Not updating pb chassis for %s now as "
> +                         "sb is readonly", iface->id);
> +        }
>      }
>
>      /* Request a seqno update when the flows for new interfaces have been
> @@ -403,7 +543,7 @@ if_status_mgr_update_bindings(struct if_status_mgr
*mgr,
>      struct hmapx_node *node;
>
>      /* Notify the binding module to set "down" all bindings that are
still
> -     * in the process of being installed in OVS, i.e., are not yet
instsalled.
> +     * in the process of being installed in OVS, i.e., are not yet
installed.
>       */
>      HMAPX_FOR_EACH (node, &mgr->ifaces_per_state[OIF_INSTALL_FLOWS]) {
>          struct ovs_iface *iface = node->data;
> diff --git a/controller/if-status.h b/controller/if-status.h
> index bb8a3950d..f9b05d30d 100644
> --- a/controller/if-status.h
> +++ b/controller/if-status.h
> @@ -27,15 +27,27 @@ struct if_status_mgr *if_status_mgr_create(void);
>  void if_status_mgr_clear(struct if_status_mgr *);
>  void if_status_mgr_destroy(struct if_status_mgr *);
>
> -void if_status_mgr_claim_iface(struct if_status_mgr *, const char
*iface_id);
> +void if_status_mgr_claim_iface(struct if_status_mgr *,
> +                               const struct sbrec_port_binding *pb,
> +                               const struct sbrec_chassis *chassis_rec,
> +                               bool sb_readonly);
>  void if_status_mgr_release_iface(struct if_status_mgr *, const char
*iface_id);
>  void if_status_mgr_delete_iface(struct if_status_mgr *, const char
*iface_id);
>
> -void if_status_mgr_update(struct if_status_mgr *, struct
local_binding_data *);
> +void if_status_mgr_update(struct if_status_mgr *, struct
local_binding_data *,
> +                          const struct sbrec_chassis *chassis,
> +                          bool sb_readonly);
>  void if_status_mgr_run(struct if_status_mgr *mgr, struct
local_binding_data *,
>                         const struct sbrec_chassis *,
>                         bool sb_readonly, bool ovs_readonly);
>  void if_status_mgr_get_memory_usage(struct if_status_mgr *mgr,
>                                      struct simap *usage);
> +bool if_status_mgr_iface_is_present(struct if_status_mgr *mgr,
> +                                    const char *iface_id);
> +bool if_status_handle_claims(struct if_status_mgr *mgr,
> +                             struct local_binding_data *binding_data,
> +                             const struct sbrec_chassis *chassis_rec,
> +                             struct hmap *tracked_datapath,
> +                             bool sb_readonly);
>
>  # endif /* controller/if-status.h */
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 69615308e..3947baf03 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -1464,6 +1464,73 @@ en_runtime_data_run(struct engine_node *node, void
*data)
>      engine_set_node_state(node, EN_UPDATED);
>  }
>
> +struct ed_type_sb_ro {
> +    bool sb_readonly;
> +};
> +
> +static void *
> +en_sb_ro_init(struct engine_node *node OVS_UNUSED,
> +              struct engine_arg *arg OVS_UNUSED)
> +{
> +    struct ed_type_sb_ro *data = xzalloc(sizeof *data);
> +    return data;
> +}
> +
> +static void
> +en_sb_ro_run(struct engine_node *node, void *data)
> +{
> +    struct ed_type_sb_ro *sb_ro_data = data;
> +    bool sb_readonly = !engine_get_context()->ovnsb_idl_txn;
> +    if (sb_ro_data->sb_readonly != sb_readonly) {
> +        sb_ro_data->sb_readonly = sb_readonly;
> +        if (!sb_ro_data->sb_readonly) {
> +            engine_set_node_state(node, EN_UPDATED);
> +        }
> +    }
> +}
> +
> +static void
> +en_sb_ro_cleanup(void *data OVS_UNUSED)
> +{
> +}
> +
> +static bool
> +runtime_data_sb_ro_handler(struct engine_node *node, void *data)
> +{
> +    const struct sbrec_chassis *chassis = NULL;
> +
> +    struct ovsrec_open_vswitch_table *ovs_table =
> +        (struct ovsrec_open_vswitch_table *)EN_OVSDB_GET(
> +            engine_get_input("OVS_open_vswitch", node));
> +
> +    const char *chassis_id = get_ovs_chassis_id(ovs_table);
> +
> +    struct ovsdb_idl_index *sbrec_chassis_by_name =
> +        engine_ovsdb_node_get_index(
> +                engine_get_input("SB_chassis", node),
> +                "name");
> +
> +    if (chassis_id) {
> +        chassis = chassis_lookup_by_name(sbrec_chassis_by_name,
chassis_id);
> +    }
> +    if (chassis) {
> +        struct ed_type_runtime_data *rt_data = data;
> +        bool sb_readonly = !engine_get_context()->ovnsb_idl_txn;
> +        struct controller_engine_ctx *ctrl_ctx =
> +            engine_get_context()->client_ctx;
> +
> +        if (if_status_handle_claims(ctrl_ctx->if_mgr,
> +                                    &rt_data->lbinding_data,
> +                                    chassis,
> +                                    &rt_data->tracked_dp_bindings,
> +                                    sb_readonly)) {
> +            engine_set_node_state(node, EN_UPDATED);
> +            rt_data->tracked = true;
> +        }
> +    }
> +    return true;
> +}
> +
>  static bool
>  runtime_data_ovs_interface_shadow_handler(struct engine_node *node, void
*data)
>  {
> @@ -3528,6 +3595,7 @@ main(int argc, char *argv[])
>      stopwatch_create(VIF_PLUG_RUN_STOPWATCH_NAME, SW_MS);
>
>      /* Define inc-proc-engine nodes. */
> +    ENGINE_NODE(sb_ro, "sb_ro");
>      ENGINE_NODE_WITH_CLEAR_TRACK_DATA_IS_VALID(ct_zones, "ct_zones");
>      ENGINE_NODE_WITH_CLEAR_TRACK_DATA(ovs_interface_shadow,
>                                        "ovs_interface_shadow");
> @@ -3664,6 +3732,7 @@ main(int argc, char *argv[])
>      engine_add_input(&en_ovs_interface_shadow, &en_ovs_interface,
>                       ovs_interface_shadow_ovs_interface_handler);
>
> +    engine_add_input(&en_runtime_data, &en_sb_ro,
runtime_data_sb_ro_handler);
>      engine_add_input(&en_runtime_data, &en_ofctrl_is_connected, NULL);
>
>      engine_add_input(&en_runtime_data, &en_ovs_open_vswitch, NULL);
> @@ -4098,7 +4167,8 @@ main(int argc, char *argv[])
>                          runtime_data ? &runtime_data->lbinding_data :
NULL;
>                      stopwatch_start(IF_STATUS_MGR_UPDATE_STOPWATCH_NAME,
>                                      time_msec());
> -                    if_status_mgr_update(if_mgr, binding_data);
> +                    if_status_mgr_update(if_mgr, binding_data, chassis,
> +                                         !ovnsb_idl_txn);
>                      stopwatch_stop(IF_STATUS_MGR_UPDATE_STOPWATCH_NAME,
>                                     time_msec());
>
> diff --git a/tests/ovn-macros.at b/tests/ovn-macros.at
> index 335f9158c..8fd6ae6f7 100644
> --- a/tests/ovn-macros.at
> +++ b/tests/ovn-macros.at
> @@ -759,3 +759,15 @@ m4_define([OVN_FOR_EACH_NORTHD_WITHOUT_DP_GROUPS],
>       [m4_foreach([NORTHD_USE_DP_GROUPS], [no],
>         [m4_foreach([NORTHD_USE_PARALLELIZATION], [yes, no], [$1
>  ])])])])
> +
> +# OVN_NBCTL(NBCTL_COMMAND) adds NBCTL_COMMAND to list of commands to be
run by RUN_OVN_NBCTL().
> +m4_define([OVN_NBCTL], [
> +    command="${command} -- $1"
> +])
> +
> +# RUN_OVN_NBCTL() executes list of commands built by the OVN_NBCTL()
macro.
> +m4_define([RUN_OVN_NBCTL], [
> +    check ovn-nbctl ${command}
> +    unset command
> +])
> +
> diff --git a/tests/ovn.at b/tests/ovn.at
> index bfaa41962..94d16bac9 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -102,6 +102,18 @@ m4_divert_text([PREPARE_TESTS],
>           test 1 -le $(as $hv1 ovs-ofctl dump-flows br-int | grep -c
"output:$ofport")
>       ])
>     }
> +
> +   ovn_wait_remote_input_flows () {
> +     hv1=$1
> +     hv2=$2
> +     echo "$3: waiting for flows for remote input on $hv1"
> +     # Wait for a flow outputing  to remote input
> +     OVS_WAIT_UNTIL([
> +         ofport=$(as $hv1 ovs-vsctl --bare --columns ofport find
Interface name=ovn-${hv2}-0)
> +         echo "tunnel port=$ofport"
> +         test 1 -le $(as $hv1 ovs-ofctl dump-flows br-int | grep -c
"in_port=$ofport")
> +     ])
> +   }
>  ])
>
>  m4_define([OVN_CHECK_PACKETS],
> @@ -127,6 +139,8 @@ m4_define([OVN_WAIT_PATCH_PORT_FLOWS],
>  m4_define([OVN_WAIT_REMOTE_OUTPUT_FLOWS],
>    [ovn_wait_remote_output_flows "$1" "$2" "__file__:__line__"])
>
> +m4_define([OVN_WAIT_REMOTE_INPUT_FLOWS],
> +  [ovn_wait_remote_input_flows "$1" "$2" "__file__:__line__"])
>
>  AT_BANNER([OVN components])
>
> @@ -14056,6 +14070,11 @@ wait_column "$hv1_uuid" Port_Binding
requested_chassis logical_port=lsp0
>  wait_column "$hv2_uuid" Port_Binding additional_chassis logical_port=lsp0
>  wait_column "$hv2_uuid" Port_Binding requested_additional_chassis
logical_port=lsp0
>
> +# Check ovn-installed updated for main chassis
> +wait_for_ports_up
> +OVS_WAIT_UNTIL([test `as hv1 ovs-vsctl get Interface lsp0
external_ids:ovn-installed` = '"true"'])
> +OVS_WAIT_UNTIL([test x`as hv2 ovs-vsctl get Interface lsp0
external_ids:ovn-installed` = x])
> +
>  # Check that setting iface:encap-ip populates
Port_Binding:additional_encap
>  wait_row_count Encap 2 chassis_name=hv1
>  wait_row_count Encap 2 chassis_name=hv2
> @@ -14081,6 +14100,11 @@ wait_column "$hv2_uuid" Port_Binding
requested_chassis logical_port=lsp0
>  wait_column "" Port_Binding additional_chassis logical_port=lsp0
>  wait_column "" Port_Binding requested_additional_chassis
logical_port=lsp0
>
> +# Check ovn-installed updated for main chassis and not for other chassis
> +wait_for_ports_up
> +OVS_WAIT_UNTIL([test `as hv2 ovs-vsctl get Interface lsp0
external_ids:ovn-installed` = '"true"'])
> +OVS_WAIT_UNTIL([test x`as hv1 ovs-vsctl get Interface lsp0
external_ids:ovn-installed` = x])
> +
>  # Check that additional_encap is cleared
>  wait_column "" Port_Binding additional_encap logical_port=lsp0
>
> @@ -15370,7 +15394,10 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int
table=65 | grep actions=output:1],
>  echo "verifying that lsp0 binding moves when requested-chassis is
changed"
>
>  ovn-nbctl lsp-set-options lsp0 requested-chassis=hv2
> -OVS_WAIT_UNTIL([test 1 = $(grep -c "Releasing lport lsp0 from this
chassis" hv1/ovn-controller.log)])
> +
> +# We might see multiple "Releasing lport ...", when sb is read only
> +OVS_WAIT_UNTIL([test 1 -le $(grep -c "Releasing lport lsp0 from this
chassis" hv1/ovn-controller.log)])
> +
>  wait_column "$hv2_uuid" Port_Binding chassis logical_port=lsp0
>
>  # (6) Chassis hv2 should add flows and hv1 should not.
> @@ -15416,7 +15443,7 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int
table=0 | grep in_port=1], [0], [ig
>  AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=65 | grep
actions=output:1], [0], [ignore])
>
>  check ovn-nbctl --wait=hv lsp-set-options lsp0
requested-chassis=non-existant-chassis
> -OVS_WAIT_UNTIL([test 1 = $(grep -c "Releasing lport lsp0 from this
chassis" hv1/ovn-controller.log)])
> +OVS_WAIT_UNTIL([test 1 -le $(grep -c "Releasing lport lsp0 from this
chassis" hv1/ovn-controller.log)])
>  check ovn-nbctl --wait=hv sync
>  wait_column '' Port_Binding chasssi logical_port=lsp0
>  AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep in_port=1],
[1], [])
> @@ -32418,3 +32445,119 @@ AT_CHECK([test $(ovn-sbctl list fdb | grep -c
"00:00:00:00:10:30") = 0])
>  OVN_CLEANUP([hv1])
>  AT_CLEANUP
>  ])
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([recomputes])
> +ovn_start
> +
> +n_hv=4
> +
> +# Add chassis
> +net_add n1
> +for i in $(seq 1 $n_hv); do
> +    sim_add hv$i
> +    as hv$i
> +    check ovs-vsctl add-br br-phys
> +    ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
> +    ovn_attach n1 br-phys 192.168.0.$i 24 geneve
> +done
> +
> +add_switch_ports() {
> +    start_port=$1
> +    end_port=$2
> +    nb_hv=$3
> +    bulk_size=$4
> +    for ((i=start_port; i<end_port; )) do
> +        start_bulk=$i
> +        for hv in $(seq 1 $nb_hv); do
> +            end_bulk=$((start_bulk+bulk_size-1))
> +            for port in $(seq $start_bulk $end_bulk); do
> +                logical_switch_port=lsp${port}
> +                OVN_NBCTL(lsp-add ls1 $logical_switch_port)
> +                OVN_NBCTL(lsp-set-addresses $logical_switch_port dynamic)
> +            done
> +            start_bulk=$((end_bulk+1))
> +        done
> +        RUN_OVN_NBCTL()
> +
> +        start_bulk=$i
> +        for hv in $(seq 1 $nb_hv); do
> +            end_bulk=$((start_bulk+bulk_size-1))
> +            for port in $(seq $start_bulk $end_bulk); do
> +                logical_switch_port=lsp${port}
> +                as hv$hv ovs-vsctl \
> +                    --no-wait -- add-port br-int vif${port} \
> +                    -- set Interface vif${port}
external_ids:iface-id=$logical_switch_port
> +            done
> +            start_bulk=$((end_bulk+1))
> +        done
> +        i=$((end_bulk+1))
> +    done
> +}
> +check ovn-nbctl ls-add ls1
> +check ovn-nbctl set Logical_Switch ls1 other_config:subnet=10.1.0.0/16
> +check ovn-nbctl set Logical_Switch ls1
other_config:exclude_ips=10.1.255.254
> +
> +check ovn-nbctl lr-add lr1
> +check ovn-nbctl lsp-add ls1 lsp0 -- set Logical_Switch_Port lsp0
type=router options:router-port=lrp0 addresses=dynamic
> +check ovn-nbctl lrp-add lr1 lrp0 "f0:00:00:01:00:01" 10.1.255.254/16
> +check ovn-nbctl lr-nat-add lr1 snat 10.2.0.1 10.1.0.0/16
> +
> +lflow_run=0
> +check ovn-nbctl --wait=hv sync
> +
> +# Tunnel ports might not be added (yet) at this point on slow system.
> +# Wait for flows related to such ports to ensure those ports have been
added
> +# before we measure recomputes. Otherwise, ovs_interface handler might
be run
> +# afterwards for tunnel ports, causing recomputes.
> +for i in $(seq 1 $n_hv); do
> +    for j in $(seq 1 $n_hv); do
> +        if test $i != $j; then
> +            OVN_WAIT_REMOTE_INPUT_FLOWS(["hv$i"],["hv$j"])
> +        fi
> +    done
> +done
> +
> +for i in $(seq 1 $n_hv); do
> +    as hv$i
> +    lflow_run1=$(ovn-appctl -t ovn-controller coverage/read-counter
lflow_run)
> +    lflow_run=`expr $lflow_run1 + $lflow_run`
> +done
> +
> +add_switch_ports 1 1000 $n_hv 5
> +
> +wait_for_ports_up
> +check ovn-nbctl --wait=hv sync
> +
> +for i in $(seq 1 $n_hv); do
> +    pid=$(cat hv${i}/ovn-controller.pid)
> +    u=$((u+$(cat "/proc/$pid/stat" | awk '{print $14}')))
> +    s=$((s+$(cat "/proc/$pid/stat" | awk '{print $15}')))
> +done
> +
> +n_pid=$(cat northd/ovn-northd.pid)
> +n_u=$(cat "/proc/$pid/stat" | awk '{print $14}')
> +n_s=$(cat "/proc/$pid/stat" | awk '{print $15}')
> +
> +echo "Total Northd User Time: $n_u"
> +echo "Total Northd System Time: $n_s"
> +echo "Total Controller User Time: $u"
> +echo "Total Controller System Time: $s"
> +
> +lflow_run_end=0
> +for i in $(seq 1 $n_hv); do
> +    as hv$i
> +    lflow_run1=$(ovn-appctl -t ovn-controller coverage/read-counter
lflow_run)
> +    lflow_run_end=`expr $lflow_run1 + $lflow_run_end`
> +done
> +n_recomputes=`expr $lflow_run_end - $lflow_run`
> +echo "$n_recomputes recomputes"
> +
> +AT_CHECK([test $lflow_run_end == $lflow_run])
> +
> +for i in $(seq 2 $n_hv); do
> +    OVN_CLEANUP_SBOX([hv$i])
> +done
> +OVN_CLEANUP([hv1])
> +AT_CLEANUP
> +])
> diff --git a/tests/perf-northd.at b/tests/perf-northd.at
> index 74b69e9d4..6ec196b36 100644
> --- a/tests/perf-northd.at
> +++ b/tests/perf-northd.at
> @@ -76,23 +76,6 @@ m4_define([PERF_RECORD_STOP], [
>      PERF_RECORD_STOPWATCH(ovn-northd-loop, ["Short term average"],
[Average (northd-loop in msec)])
>  ])
>
> -# OVN_NBCTL([NBCTL_COMMAND])
> -#
> -# Add NBCTL_COMMAND to list of commands to be run by RUN_OVN_NBCTL().
> -#
> -m4_define([OVN_NBCTL], [
> -    command="${command} -- $1"
> -])
> -
> -# RUN_OVN_NBCTL()
> -#
> -# Execute list of commands built by the OVN_NBCTL() macro.
> -#
> -m4_define([RUN_OVN_NBCTL], [
> -    check ovn-nbctl ${command}
> -    unset command
> -])
> -
>  OVS_START_SHELL_HELPERS
>  generate_subnet () {
>      local a=$(printf %d $(expr $1 / 256 + 10))
> --
> 2.31.1
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to