On Wed, Aug 10, 2022 at 4:25 AM Ihar Hrachyshka <[email protected]> wrote:
>
> When multiple chassis are fighting for the same port (requested-chassis
> is not set, e.g. for gateway ports), they may produce an unreasonable
> number of chassis field updates in a very short time frame (hundreds of
> updates in several seconds). This puts unnecessary load on OVN as well
> as any db notification consumers trying to keep up with the barrage.
>
> This patch throttles port claim attempts so that they don't happen more
> frequently than once per 0.5 seconds.
>
> Reported: https://bugzilla.redhat.com/show_bug.cgi?id=1974898
> Signed-off-by: Ihar Hrachyshka <[email protected]>
> Acked-by: Mark Michelson <[email protected]>
Thanks for the v6. I applied both the patches to the main.
Numan
> ---
> v1: initial version
> v2: don't postpone claim when port is unclaimed (chassis == nil)
> v2: don't postpone claim as an additional chassis for a multichassis
> port
> v2: fixed memory corruption when modifying sset while iterating over
> it
> v3: rebased to resolve a git conflict
> v4: added opportunistic cleanup for claimed_ports shash
> v4: made a debug message in the new test case more intelligible
> v5: fixed a memleak in cleanup_claimed_port_timestamps (node->data not
> freed)
> v6: rebased, added Mark's ack.
> v6: removed poll_wait calls from engine handler, moved them to
> binding_wait.
> ---
> controller/binding.c | 127 ++++++++++++++++++++++++++++++++++--
> controller/binding.h | 10 +++
> controller/ovn-controller.c | 49 ++++++++++++++
> tests/ovn.at | 41 ++++++++++++
> 4 files changed, 222 insertions(+), 5 deletions(-)
>
> diff --git a/controller/binding.c b/controller/binding.c
> index 96a158225..9f5393a92 100644
> --- a/controller/binding.c
> +++ b/controller/binding.c
> @@ -48,6 +48,67 @@ VLOG_DEFINE_THIS_MODULE(binding);
>
> #define OVN_QOS_TYPE "linux-htb"
>
> +#define CLAIM_TIME_THRESHOLD_MS 500
> +
> +struct claimed_port {
> + long long int last_claimed;
> +};
> +
> +static struct shash _claimed_ports = SHASH_INITIALIZER(&_claimed_ports);
> +static struct sset _postponed_ports = SSET_INITIALIZER(&_postponed_ports);
> +
> +struct sset *
> +get_postponed_ports(void)
> +{
> + return &_postponed_ports;
> +}
> +
> +static long long int
> +get_claim_timestamp(const char *port_name)
> +{
> + struct claimed_port *cp = shash_find_data(&_claimed_ports, port_name);
> + return cp ? cp->last_claimed : 0;
> +}
> +
> +static void
> +register_claim_timestamp(const char *port_name, long long int t)
> +{
> + struct claimed_port *cp = shash_find_data(&_claimed_ports, port_name);
> + if (!cp) {
> + cp = xzalloc(sizeof *cp);
> + shash_add(&_claimed_ports, port_name, cp);
> + }
> + cp->last_claimed = t;
> +}
> +
> +static void
> +cleanup_claimed_port_timestamps(void)
> +{
> + long long int now = time_msec();
> + struct shash_node *node;
> + SHASH_FOR_EACH_SAFE (node, &_claimed_ports) {
> + struct claimed_port *cp = (struct claimed_port *) node->data;
> + if (now - cp->last_claimed >= 5 * CLAIM_TIME_THRESHOLD_MS) {
> + free(cp);
> + shash_delete(&_claimed_ports, node);
> + }
> + }
> +}
> +
> +/* Schedule any pending binding work. Runs with in the main ovn-controller
> + * thread context.*/
> +void
> +binding_wait(void)
> +{
> + const char *port_name;
> + SSET_FOR_EACH (port_name, &_postponed_ports) {
> + long long int t = get_claim_timestamp(port_name);
> + if (t) {
> + poll_timer_wait_until(t + CLAIM_TIME_THRESHOLD_MS);
> + }
> + }
> +}
> +
> struct qos_queue {
> struct hmap_node node;
> uint32_t queue_id;
> @@ -996,6 +1057,21 @@ remove_additional_chassis(const struct
> sbrec_port_binding *pb,
> remove_additional_encap_for_chassis(pb, chassis_rec);
> }
>
> +static bool
> +lport_maybe_postpone(const char *port_name, long long int now,
> + struct sset *postponed_ports)
> +{
> + long long int last_claimed = get_claim_timestamp(port_name);
> + if (now - last_claimed >= CLAIM_TIME_THRESHOLD_MS) {
> + return false;
> + }
> +
> + sset_add(postponed_ports, port_name);
> + VLOG_DBG("Postponed claim on logical port %s.", port_name);
> +
> + return true;
> +}
> +
> /* Returns false if lport is not claimed due to 'sb_readonly'.
> * Returns true otherwise.
> */
> @@ -1006,7 +1082,8 @@ claim_lport(const struct sbrec_port_binding *pb,
> const struct ovsrec_interface *iface_rec,
> bool sb_readonly, bool notify_up,
> struct hmap *tracked_datapaths,
> - struct if_status_mgr *if_mgr)
> + struct if_status_mgr *if_mgr,
> + struct sset *postponed_ports)
> {
> if (!sb_readonly) {
> claimed_lport_set_up(pb, parent_pb, chassis_rec, notify_up, if_mgr);
> @@ -1021,7 +1098,12 @@ claim_lport(const struct sbrec_port_binding *pb,
> return false;
> }
>
> + long long int now = time_msec();
> if (pb->chassis) {
> + if (lport_maybe_postpone(pb->logical_port, now,
> + postponed_ports)) {
> + return true;
> + }
> VLOG_INFO("Changing chassis for lport %s from %s to %s.",
> pb->logical_port, pb->chassis->name,
> chassis_rec->name);
> @@ -1038,6 +1120,9 @@ claim_lport(const struct sbrec_port_binding *pb,
> remove_additional_chassis(pb, chassis_rec);
> }
> update_tracked = true;
> +
> + register_claim_timestamp(pb->logical_port, now);
> + sset_find_and_delete(postponed_ports, pb->logical_port);
> }
> } else if (can_bind == CAN_BIND_AS_ADDITIONAL) {
> if (!is_additional_chassis(pb, chassis_rec)) {
> @@ -1060,8 +1145,10 @@ claim_lport(const struct sbrec_port_binding *pb,
> }
> }
>
> - if (update_tracked && tracked_datapaths) {
> - update_lport_tracking(pb, tracked_datapaths, true);
> + if (update_tracked) {
> + if (tracked_datapaths) {
> + update_lport_tracking(pb, tracked_datapaths, true);
> + }
> }
>
> /* Check if the port encap binding, if any, has changed */
> @@ -1223,7 +1310,8 @@ consider_vif_lport_(const struct sbrec_port_binding *pb,
> b_lport->lbinding->iface,
> !b_ctx_in->ovnsb_idl_txn,
> !parent_pb, b_ctx_out->tracked_dp_bindings,
> - b_ctx_out->if_mgr)){
> + b_ctx_out->if_mgr,
> + b_ctx_out->postponed_ports)) {
> return false;
> }
>
> @@ -1519,7 +1607,8 @@ consider_nonvif_lport_(const struct sbrec_port_binding
> *pb,
> return claim_lport(pb, NULL, b_ctx_in->chassis_rec, NULL,
> !b_ctx_in->ovnsb_idl_txn, false,
> b_ctx_out->tracked_dp_bindings,
> - b_ctx_out->if_mgr);
> + b_ctx_out->if_mgr,
> + b_ctx_out->postponed_ports);
> }
>
> if (pb->chassis == b_ctx_in->chassis_rec ||
> @@ -1843,6 +1932,8 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct
> binding_ctx_out *b_ctx_out)
> }
>
> destroy_qos_map(&qos_map);
> +
> + cleanup_claimed_port_timestamps();
> }
>
> /* Returns true if the database is all cleaned up, false if more work is
> @@ -2740,6 +2831,25 @@ delete_done:
> }
> }
>
> + /* Also handle any postponed (throttled) ports. */
> + const char *port_name;
> + struct sset postponed_ports = SSET_INITIALIZER(&postponed_ports);
> + sset_clone(&postponed_ports, b_ctx_out->postponed_ports);
> + SSET_FOR_EACH (port_name, &postponed_ports) {
> + pb = lport_lookup_by_name(b_ctx_in->sbrec_port_binding_by_name,
> + port_name);
> + if (!pb) {
> + sset_find_and_delete(b_ctx_out->postponed_ports, port_name);
> + continue;
> + }
> + handled = handle_updated_port(b_ctx_in, b_ctx_out, pb, qos_map_ptr);
> + if (!handled) {
> + break;
> + }
> + }
> + sset_destroy(&postponed_ports);
> + cleanup_claimed_port_timestamps();
> +
> if (handled && qos_map_ptr && set_noop_qos(b_ctx_in->ovs_idl_txn,
> b_ctx_in->port_table,
> b_ctx_in->qos_table,
> @@ -3182,3 +3292,10 @@ ovs_iface_matches_lport_iface_id_ver(const struct
> ovsrec_interface *iface,
>
> return true;
> }
> +
> +void
> +binding_destroy(void)
> +{
> + shash_destroy_free_data(&_claimed_ports);
> + sset_clear(&_postponed_ports);
> +}
> diff --git a/controller/binding.h b/controller/binding.h
> index 1fed06674..b2360bac2 100644
> --- a/controller/binding.h
> +++ b/controller/binding.h
> @@ -103,6 +103,8 @@ struct binding_ctx_out {
> struct hmap *tracked_dp_bindings;
>
> struct if_status_mgr *if_mgr;
> +
> + struct sset *postponed_ports;
> };
>
> /* Local bindings. binding.c module binds the logical port (represented by
> @@ -219,4 +221,12 @@ struct binding_lport {
> size_t n_port_security;
> };
>
> +struct sset *get_postponed_ports(void);
> +
> +/* Schedule any pending binding work. */
> +void binding_wait(void);
> +
> +/* Clean up module state. */
> +void binding_destroy(void);
> +
> #endif /* controller/binding.h */
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 5449743e8..8268726e6 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -1175,6 +1175,41 @@ en_activated_ports_run(struct engine_node *node, void
> *data_)
> engine_set_node_state(node, state);
> }
>
> +struct ed_type_postponed_ports {
> + struct sset *postponed_ports;
> +};
> +
> +static void *
> +en_postponed_ports_init(struct engine_node *node OVS_UNUSED,
> + struct engine_arg *arg OVS_UNUSED)
> +{
> + struct ed_type_postponed_ports *data = xzalloc(sizeof *data);
> + data->postponed_ports = get_postponed_ports();
> + return data;
> +}
> +
> +static void
> +en_postponed_ports_cleanup(void *data_)
> +{
> + struct ed_type_postponed_ports *data = data_;
> + if (!data->postponed_ports) {
> + return;
> + }
> + data->postponed_ports = NULL;
> +}
> +
> +static void
> +en_postponed_ports_run(struct engine_node *node, void *data_)
> +{
> + struct ed_type_postponed_ports *data = data_;
> + enum engine_node_state state = EN_UNCHANGED;
> + data->postponed_ports = get_postponed_ports();
> + if (!sset_is_empty(data->postponed_ports)) {
> + state = EN_UPDATED;
> + }
> + engine_set_node_state(node, state);
> +}
> +
> struct ed_type_runtime_data {
> /* Contains "struct local_datapath" nodes. */
> struct hmap local_datapaths;
> @@ -1205,6 +1240,8 @@ struct ed_type_runtime_data {
>
> struct shash local_active_ports_ipv6_pd;
> struct shash local_active_ports_ras;
> +
> + struct sset *postponed_ports;
> };
>
> /* struct ed_type_runtime_data has the below members for tracking the
> @@ -1405,6 +1442,7 @@ init_binding_ctx(struct engine_node *node,
> b_ctx_out->egress_ifaces = &rt_data->egress_ifaces;
> b_ctx_out->lbinding_data = &rt_data->lbinding_data;
> b_ctx_out->local_iface_ids = &rt_data->local_iface_ids;
> + b_ctx_out->postponed_ports = rt_data->postponed_ports;
> b_ctx_out->tracked_dp_bindings = NULL;
> b_ctx_out->if_mgr = ctrl_ctx->if_mgr;
> }
> @@ -1442,6 +1480,10 @@ en_runtime_data_run(struct engine_node *node, void
> *data)
> local_binding_data_init(&rt_data->lbinding_data);
> }
>
> + struct ed_type_postponed_ports *pp_data =
> + engine_get_input_data("postponed_ports", node);
> + rt_data->postponed_ports = pp_data->postponed_ports;
> +
> struct binding_ctx_in b_ctx_in;
> struct binding_ctx_out b_ctx_out;
> init_binding_ctx(node, rt_data, &b_ctx_in, &b_ctx_out);
> @@ -3542,6 +3584,7 @@ main(int argc, char *argv[])
> ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
> ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
> ENGINE_NODE_WITH_CLEAR_TRACK_DATA(activated_ports, "activated_ports");
> + ENGINE_NODE(postponed_ports, "postponed_ports");
> ENGINE_NODE(pflow_output, "physical_flow_output");
> ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lflow_output, "logical_flow_output");
> ENGINE_NODE(flow_output, "flow_output");
> @@ -3681,6 +3724,9 @@ main(int argc, char *argv[])
> runtime_data_sb_datapath_binding_handler);
> engine_add_input(&en_runtime_data, &en_sb_port_binding,
> runtime_data_sb_port_binding_handler);
> + /* Reuse the same handler for any previously postponed ports. */
> + engine_add_input(&en_runtime_data, &en_postponed_ports,
> + runtime_data_sb_port_binding_handler);
>
> /* The OVS interface handler for runtime_data changes MUST be executed
> * after the sb_port_binding_handler as port_binding deletes must be
> @@ -4191,6 +4237,8 @@ main(int argc, char *argv[])
> ofctrl_wait();
> pinctrl_wait(ovnsb_idl_txn);
> }
> +
> + binding_wait();
> }
>
> if (!northd_version_match && br_int) {
> @@ -4318,6 +4366,7 @@ loop_done:
> lflow_destroy();
> ofctrl_destroy();
> pinctrl_destroy();
> + binding_destroy();
> patch_destroy();
> if_status_mgr_destroy(if_mgr);
> shash_destroy(&vif_plug_deleted_iface_ids);
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 23b205791..c8cc8cde4 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -15274,6 +15274,47 @@ OVN_CLEANUP([hv1],[hv2])
> AT_CLEANUP
> ])
>
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([tug-of-war between two chassis for the same port])
> +ovn_start
> +
> +ovn-nbctl ls-add ls0
> +ovn-nbctl lsp-add ls0 lsp0
> +
> +net_add n1
> +for i in 1 2; do
> + sim_add hv$i
> + as hv$i
> + ovs-vsctl add-br br-phys
> + ovn_attach n1 br-phys 192.168.0.$i
> +done
> +
> +for i in 1 2; do
> + as hv$i
> + ovs-vsctl -- add-port br-int vif \
> + -- set Interface vif external-ids:iface-id=lsp0
> +done
> +
> +# give controllers some time to fight for the port binding
> +sleep 3
> +
> +# calculate the number of port claims registered by each fighting chassis
> +hv1_claims=$(as hv1 grep -c 'Claiming\|Changing chassis'
> hv1/ovn-controller.log)
> +hv2_claims=$(as hv2 grep -c 'Claiming\|Changing chassis'
> hv2/ovn-controller.log)
> +
> +echo "hv1 claimed ${hv1_claims} times"
> +echo "hv2 claimed ${hv2_claims} times"
> +
> +# check that neither registered an outrageous number of port claims
> +max_claims=10
> +AT_CHECK([test "${hv1_claims}" -le "${max_claims}"], [0], [])
> +AT_CHECK([test "${hv2_claims}" -le "${max_claims}"], [0], [])
> +
> +OVN_CLEANUP([hv1],[hv2])
> +
> +AT_CLEANUP
> +])
> +
> OVN_FOR_EACH_NORTHD([
> AT_SETUP([options:requested-chassis with hostname])
>
> --
> 2.34.1
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev