On Wed, Aug 10, 2022 at 4:25 AM Ihar Hrachyshka <[email protected]> wrote:
>
> When multiple chassis are fighting for the same port (requested-chassis
> is not set, e.g. for gateway ports), they may produce an unreasonable
> number of chassis field updates in a very short time frame (hundreds of
> updates in several seconds). This puts unnecessary load on OVN as well
> as any db notification consumers trying to keep up with the barrage.
>
> This patch throttles port claim attempts so that they don't happen more
> frequently than once per 0.5 seconds.
>
> Reported: https://bugzilla.redhat.com/show_bug.cgi?id=1974898
> Signed-off-by: Ihar Hrachyshka <[email protected]>
> Acked-by: Mark Michelson <[email protected]>

Thanks for the v6.  I applied both the patches to the main.

Numan

> ---
> v1: initial version
> v2: don't postpone claim when port is unclaimed (chassis == nil)
> v2: don't postpone claim as an additional chassis for a multichassis
> port
> v2: fixed memory corruption when modifying sset while iterating over
> it
> v3: rebased to resolve a git conflict
> v4: added opportunistic cleanup for claimed_ports shash
> v4: made a debug message in the new test case more intelligible
> v5: fixed a memleak in cleanup_claimed_port_timestamps (node->data not
>     freed)
> v6: rebased, added Mark's ack.
> v6: removed poll_wait calls from engine handler, moved them to
>     binding_wait.
> ---
>  controller/binding.c        | 127 ++++++++++++++++++++++++++++++++++--
>  controller/binding.h        |  10 +++
>  controller/ovn-controller.c |  49 ++++++++++++++
>  tests/ovn.at                |  41 ++++++++++++
>  4 files changed, 222 insertions(+), 5 deletions(-)
>
> diff --git a/controller/binding.c b/controller/binding.c
> index 96a158225..9f5393a92 100644
> --- a/controller/binding.c
> +++ b/controller/binding.c
> @@ -48,6 +48,67 @@ VLOG_DEFINE_THIS_MODULE(binding);
>
>  #define OVN_QOS_TYPE "linux-htb"
>
> +#define CLAIM_TIME_THRESHOLD_MS 500
> +
> +struct claimed_port {
> +    long long int last_claimed;
> +};
> +
> +static struct shash _claimed_ports = SHASH_INITIALIZER(&_claimed_ports);
> +static struct sset _postponed_ports = SSET_INITIALIZER(&_postponed_ports);
> +
> +struct sset *
> +get_postponed_ports(void)
> +{
> +    return &_postponed_ports;
> +}
> +
> +static long long int
> +get_claim_timestamp(const char *port_name)
> +{
> +    struct claimed_port *cp = shash_find_data(&_claimed_ports, port_name);
> +    return cp ? cp->last_claimed : 0;
> +}
> +
> +static void
> +register_claim_timestamp(const char *port_name, long long int t)
> +{
> +    struct claimed_port *cp = shash_find_data(&_claimed_ports, port_name);
> +    if (!cp) {
> +        cp = xzalloc(sizeof *cp);
> +        shash_add(&_claimed_ports, port_name, cp);
> +    }
> +    cp->last_claimed = t;
> +}
> +
> +static void
> +cleanup_claimed_port_timestamps(void)
> +{
> +    long long int now = time_msec();
> +    struct shash_node *node;
> +    SHASH_FOR_EACH_SAFE (node, &_claimed_ports) {
> +        struct claimed_port *cp = (struct claimed_port *) node->data;
> +        if (now - cp->last_claimed >= 5 * CLAIM_TIME_THRESHOLD_MS) {
> +            free(cp);
> +            shash_delete(&_claimed_ports, node);
> +        }
> +    }
> +}
> +
> +/* Schedule any pending binding work. Runs with in the main ovn-controller
> + * thread context.*/
> +void
> +binding_wait(void)
> +{
> +    const char *port_name;
> +    SSET_FOR_EACH (port_name, &_postponed_ports) {
> +        long long int t = get_claim_timestamp(port_name);
> +        if (t) {
> +            poll_timer_wait_until(t + CLAIM_TIME_THRESHOLD_MS);
> +        }
> +    }
> +}
> +
>  struct qos_queue {
>      struct hmap_node node;
>      uint32_t queue_id;
> @@ -996,6 +1057,21 @@ remove_additional_chassis(const struct 
> sbrec_port_binding *pb,
>      remove_additional_encap_for_chassis(pb, chassis_rec);
>  }
>
> +static bool
> +lport_maybe_postpone(const char *port_name, long long int now,
> +                     struct sset *postponed_ports)
> +{
> +    long long int last_claimed = get_claim_timestamp(port_name);
> +    if (now - last_claimed >= CLAIM_TIME_THRESHOLD_MS) {
> +        return false;
> +    }
> +
> +    sset_add(postponed_ports, port_name);
> +    VLOG_DBG("Postponed claim on logical port %s.", port_name);
> +
> +    return true;
> +}
> +
>  /* Returns false if lport is not claimed due to 'sb_readonly'.
>   * Returns true otherwise.
>   */
> @@ -1006,7 +1082,8 @@ claim_lport(const struct sbrec_port_binding *pb,
>              const struct ovsrec_interface *iface_rec,
>              bool sb_readonly, bool notify_up,
>              struct hmap *tracked_datapaths,
> -            struct if_status_mgr *if_mgr)
> +            struct if_status_mgr *if_mgr,
> +            struct sset *postponed_ports)
>  {
>      if (!sb_readonly) {
>          claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up, if_mgr);
> @@ -1021,7 +1098,12 @@ claim_lport(const struct sbrec_port_binding *pb,
>                  return false;
>              }
>
> +            long long int now = time_msec();
>              if (pb->chassis) {
> +                if (lport_maybe_postpone(pb->logical_port, now,
> +                                         postponed_ports)) {
> +                    return true;
> +                }
>                  VLOG_INFO("Changing chassis for lport %s from %s to %s.",
>                          pb->logical_port, pb->chassis->name,
>                          chassis_rec->name);
> @@ -1038,6 +1120,9 @@ claim_lport(const struct sbrec_port_binding *pb,
>                  remove_additional_chassis(pb, chassis_rec);
>              }
>              update_tracked = true;
> +
> +            register_claim_timestamp(pb->logical_port, now);
> +            sset_find_and_delete(postponed_ports, pb->logical_port);
>          }
>      } else if (can_bind == CAN_BIND_AS_ADDITIONAL) {
>          if (!is_additional_chassis(pb, chassis_rec)) {
> @@ -1060,8 +1145,10 @@ claim_lport(const struct sbrec_port_binding *pb,
>          }
>      }
>
> -    if (update_tracked && tracked_datapaths) {
> -        update_lport_tracking(pb, tracked_datapaths, true);
> +    if (update_tracked) {
> +        if (tracked_datapaths) {
> +            update_lport_tracking(pb, tracked_datapaths, true);
> +        }
>      }
>
>      /* Check if the port encap binding, if any, has changed */
> @@ -1223,7 +1310,8 @@ consider_vif_lport_(const struct sbrec_port_binding *pb,
>                               b_lport->lbinding->iface,
>                               !b_ctx_in->ovnsb_idl_txn,
>                               !parent_pb, b_ctx_out->tracked_dp_bindings,
> -                             b_ctx_out->if_mgr)){
> +                             b_ctx_out->if_mgr,
> +                             b_ctx_out->postponed_ports)) {
>                  return false;
>              }
>
> @@ -1519,7 +1607,8 @@ consider_nonvif_lport_(const struct sbrec_port_binding 
> *pb,
>          return claim_lport(pb, NULL, b_ctx_in->chassis_rec, NULL,
>                             !b_ctx_in->ovnsb_idl_txn, false,
>                             b_ctx_out->tracked_dp_bindings,
> -                           b_ctx_out->if_mgr);
> +                           b_ctx_out->if_mgr,
> +                           b_ctx_out->postponed_ports);
>      }
>
>      if (pb->chassis == b_ctx_in->chassis_rec ||
> @@ -1843,6 +1932,8 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct 
> binding_ctx_out *b_ctx_out)
>      }
>
>      destroy_qos_map(&qos_map);
> +
> +    cleanup_claimed_port_timestamps();
>  }
>
>  /* Returns true if the database is all cleaned up, false if more work is
> @@ -2740,6 +2831,25 @@ delete_done:
>          }
>      }
>
> +    /* Also handle any postponed (throttled) ports. */
> +    const char *port_name;
> +    struct sset postponed_ports = SSET_INITIALIZER(&postponed_ports);
> +    sset_clone(&postponed_ports, b_ctx_out->postponed_ports);
> +    SSET_FOR_EACH (port_name, &postponed_ports) {
> +        pb = lport_lookup_by_name(b_ctx_in->sbrec_port_binding_by_name,
> +                                  port_name);
> +        if (!pb) {
> +            sset_find_and_delete(b_ctx_out->postponed_ports, port_name);
> +            continue;
> +        }
> +        handled = handle_updated_port(b_ctx_in, b_ctx_out, pb, qos_map_ptr);
> +        if (!handled) {
> +            break;
> +        }
> +    }
> +    sset_destroy(&postponed_ports);
> +    cleanup_claimed_port_timestamps();
> +
>      if (handled && qos_map_ptr && set_noop_qos(b_ctx_in->ovs_idl_txn,
>                                                 b_ctx_in->port_table,
>                                                 b_ctx_in->qos_table,
> @@ -3182,3 +3292,10 @@ ovs_iface_matches_lport_iface_id_ver(const struct 
> ovsrec_interface *iface,
>
>      return true;
>  }
> +
> +void
> +binding_destroy(void)
> +{
> +    shash_destroy_free_data(&_claimed_ports);
> +    sset_clear(&_postponed_ports);
> +}
> diff --git a/controller/binding.h b/controller/binding.h
> index 1fed06674..b2360bac2 100644
> --- a/controller/binding.h
> +++ b/controller/binding.h
> @@ -103,6 +103,8 @@ struct binding_ctx_out {
>      struct hmap *tracked_dp_bindings;
>
>      struct if_status_mgr *if_mgr;
> +
> +    struct sset *postponed_ports;
>  };
>
>  /* Local bindings. binding.c module binds the logical port (represented by
> @@ -219,4 +221,12 @@ struct binding_lport {
>      size_t n_port_security;
>  };
>
> +struct sset *get_postponed_ports(void);
> +
> +/* Schedule any pending binding work. */
> +void binding_wait(void);
> +
> +/* Clean up module state. */
> +void binding_destroy(void);
> +
>  #endif /* controller/binding.h */
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 5449743e8..8268726e6 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -1175,6 +1175,41 @@ en_activated_ports_run(struct engine_node *node, void 
> *data_)
>      engine_set_node_state(node, state);
>  }
>
> +struct ed_type_postponed_ports {
> +    struct sset *postponed_ports;
> +};
> +
> +static void *
> +en_postponed_ports_init(struct engine_node *node OVS_UNUSED,
> +                        struct engine_arg *arg OVS_UNUSED)
> +{
> +    struct ed_type_postponed_ports *data = xzalloc(sizeof *data);
> +    data->postponed_ports = get_postponed_ports();
> +    return data;
> +}
> +
> +static void
> +en_postponed_ports_cleanup(void *data_)
> +{
> +    struct ed_type_postponed_ports *data = data_;
> +    if (!data->postponed_ports) {
> +        return;
> +    }
> +    data->postponed_ports = NULL;
> +}
> +
> +static void
> +en_postponed_ports_run(struct engine_node *node, void *data_)
> +{
> +    struct ed_type_postponed_ports *data = data_;
> +    enum engine_node_state state = EN_UNCHANGED;
> +    data->postponed_ports = get_postponed_ports();
> +    if (!sset_is_empty(data->postponed_ports)) {
> +        state = EN_UPDATED;
> +    }
> +    engine_set_node_state(node, state);
> +}
> +
>  struct ed_type_runtime_data {
>      /* Contains "struct local_datapath" nodes. */
>      struct hmap local_datapaths;
> @@ -1205,6 +1240,8 @@ struct ed_type_runtime_data {
>
>      struct shash local_active_ports_ipv6_pd;
>      struct shash local_active_ports_ras;
> +
> +    struct sset *postponed_ports;
>  };
>
>  /* struct ed_type_runtime_data has the below members for tracking the
> @@ -1405,6 +1442,7 @@ init_binding_ctx(struct engine_node *node,
>      b_ctx_out->egress_ifaces = &rt_data->egress_ifaces;
>      b_ctx_out->lbinding_data = &rt_data->lbinding_data;
>      b_ctx_out->local_iface_ids = &rt_data->local_iface_ids;
> +    b_ctx_out->postponed_ports = rt_data->postponed_ports;
>      b_ctx_out->tracked_dp_bindings = NULL;
>      b_ctx_out->if_mgr = ctrl_ctx->if_mgr;
>  }
> @@ -1442,6 +1480,10 @@ en_runtime_data_run(struct engine_node *node, void 
> *data)
>          local_binding_data_init(&rt_data->lbinding_data);
>      }
>
> +    struct ed_type_postponed_ports *pp_data =
> +        engine_get_input_data("postponed_ports", node);
> +    rt_data->postponed_ports = pp_data->postponed_ports;
> +
>      struct binding_ctx_in b_ctx_in;
>      struct binding_ctx_out b_ctx_out;
>      init_binding_ctx(node, rt_data, &b_ctx_in, &b_ctx_out);
> @@ -3542,6 +3584,7 @@ main(int argc, char *argv[])
>      ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
>      ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
>      ENGINE_NODE_WITH_CLEAR_TRACK_DATA(activated_ports, "activated_ports");
> +    ENGINE_NODE(postponed_ports, "postponed_ports");
>      ENGINE_NODE(pflow_output, "physical_flow_output");
>      ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lflow_output, "logical_flow_output");
>      ENGINE_NODE(flow_output, "flow_output");
> @@ -3681,6 +3724,9 @@ main(int argc, char *argv[])
>                       runtime_data_sb_datapath_binding_handler);
>      engine_add_input(&en_runtime_data, &en_sb_port_binding,
>                       runtime_data_sb_port_binding_handler);
> +    /* Reuse the same handler for any previously postponed ports. */
> +    engine_add_input(&en_runtime_data, &en_postponed_ports,
> +                     runtime_data_sb_port_binding_handler);
>
>      /* The OVS interface handler for runtime_data changes MUST be executed
>       * after the sb_port_binding_handler as port_binding deletes must be
> @@ -4191,6 +4237,8 @@ main(int argc, char *argv[])
>                  ofctrl_wait();
>                  pinctrl_wait(ovnsb_idl_txn);
>              }
> +
> +            binding_wait();
>          }
>
>          if (!northd_version_match && br_int) {
> @@ -4318,6 +4366,7 @@ loop_done:
>      lflow_destroy();
>      ofctrl_destroy();
>      pinctrl_destroy();
> +    binding_destroy();
>      patch_destroy();
>      if_status_mgr_destroy(if_mgr);
>      shash_destroy(&vif_plug_deleted_iface_ids);
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 23b205791..c8cc8cde4 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -15274,6 +15274,47 @@ OVN_CLEANUP([hv1],[hv2])
>  AT_CLEANUP
>  ])
>
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([tug-of-war between two chassis for the same port])
> +ovn_start
> +
> +ovn-nbctl ls-add ls0
> +ovn-nbctl lsp-add ls0 lsp0
> +
> +net_add n1
> +for i in 1 2; do
> +    sim_add hv$i
> +    as hv$i
> +    ovs-vsctl add-br br-phys
> +    ovn_attach n1 br-phys 192.168.0.$i
> +done
> +
> +for i in 1 2; do
> +    as hv$i
> +    ovs-vsctl -- add-port br-int vif \
> +              -- set Interface vif external-ids:iface-id=lsp0
> +done
> +
> +# give controllers some time to fight for the port binding
> +sleep 3
> +
> +# calculate the number of port claims registered by each fighting chassis
> +hv1_claims=$(as hv1 grep -c 'Claiming\|Changing chassis' 
> hv1/ovn-controller.log)
> +hv2_claims=$(as hv2 grep -c 'Claiming\|Changing chassis' 
> hv2/ovn-controller.log)
> +
> +echo "hv1 claimed ${hv1_claims} times"
> +echo "hv2 claimed ${hv2_claims} times"
> +
> +# check that neither registered an outrageous number of port claims
> +max_claims=10
> +AT_CHECK([test "${hv1_claims}" -le "${max_claims}"], [0], [])
> +AT_CHECK([test "${hv2_claims}" -le "${max_claims}"], [0], [])
> +
> +OVN_CLEANUP([hv1],[hv2])
> +
> +AT_CLEANUP
> +])
> +
>  OVN_FOR_EACH_NORTHD([
>  AT_SETUP([options:requested-chassis with hostname])
>
> --
> 2.34.1
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to