On Fri, Jun 3, 2022 at 1:20 PM Ihar Hrachyshka <[email protected]> wrote:
>
> When options:activation-strategy is set to "rarp" for LSP, when used in
> combination with multiple chassis names listed in
> options:requested-chassis, additional chassis will install special flows
> that would block all ingress and egress traffic for the port until a
> special activation event happens.
>
> For "rarp" strategy, an observation of a RARP packet sent from the port
> on the additional chassis is such an event. When it occurs, a special
> flow passes control to a controller() action handler that eventually
> removes the installed blocking flows and also marks the port as
> options:additional-chassis-activated in southbound db.
>
> This feature is useful in live migration scenarios where it's not
> advisable to unlock the destination port location prematurily to avoid
> duplicate packets originating from the port.
>
> Signed-off-by: Ihar Hrachyshka <[email protected]>
> ---
> v13: use resubmit() action to reinject RARP into pipeline.
> v13: lock / unlock pinctrl_mutex in functions invoked from main thread.
> v13: db_is_port_activated->lport_is_activated_by_activation_strategy.
Thanks Ihar for the revision. Sorry that I didn't follow up in time after
v6. Please see some of my quick comments for this version regarding I-P:
> ---
> NEWS | 2 +
> controller/lport.c | 22 +++
> controller/lport.h | 3 +
> controller/ovn-controller.c | 87 +++++++++
> controller/physical.c | 94 ++++++++++
> controller/pinctrl.c | 145 +++++++++++++-
> controller/pinctrl.h | 13 ++
> include/ovn/actions.h | 3 +
> northd/northd.c | 10 +
> northd/ovn-northd.c | 5 +-
> ovn-nb.xml | 11 ++
> ovn-sb.xml | 15 ++
> tests/ovn.at | 365 ++++++++++++++++++++++++++++++++++++
> 13 files changed, 772 insertions(+), 3 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index 2ee283a56..7c54670ed 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -29,6 +29,8 @@ OVN v22.06.0 - XX XXX XXXX
> - Added support for setting the Next server IP in the DHCP header
> using the private DHCP option - 253 in native OVN DHCPv4 responder.
> - Support list of chassis for
Logical_Switch_Port:options:requested-chassis.
> + - Support Logical_Switch_Port:options:activation-strategy for live
migration
> + scenarios.
>
> OVN v22.03.0 - 11 Mar 2022
> --------------------------
> diff --git a/controller/lport.c b/controller/lport.c
> index bf55d83f2..add7e91aa 100644
> --- a/controller/lport.c
> +++ b/controller/lport.c
> @@ -197,3 +197,25 @@ get_peer_lport(const struct sbrec_port_binding *pb,
> peer_name);
> return (peer && peer->datapath) ? peer : NULL;
> }
> +
> +bool
> +lport_is_activated_by_activation_strategy(const struct
sbrec_port_binding *pb,
> + const struct sbrec_chassis
*chassis)
> +{
> + const char *activated_chassis = smap_get(&pb->options,
> +
"additional-chassis-activated");
> + if (activated_chassis) {
> + char *save_ptr;
> + char *tokstr = xstrdup(activated_chassis);
> + for (const char *chassis_name = strtok_r(tokstr, ",", &save_ptr);
> + chassis_name != NULL;
> + chassis_name = strtok_r(NULL, ",", &save_ptr)) {
> + if (!strcmp(chassis_name, chassis->name)) {
> + free(tokstr);
> + return true;
> + }
> + }
> + free(tokstr);
> + }
> + return false;
> +}
> diff --git a/controller/lport.h b/controller/lport.h
> index 115881655..644c67255 100644
> --- a/controller/lport.h
> +++ b/controller/lport.h
> @@ -70,4 +70,7 @@ const struct sbrec_port_binding *lport_get_peer(
> const struct sbrec_port_binding *lport_get_l3gw_peer(
> const struct sbrec_port_binding *,
> struct ovsdb_idl_index *sbrec_port_binding_by_name);
> +bool
> +lport_is_activated_by_activation_strategy(const struct
sbrec_port_binding *pb,
> + const struct sbrec_chassis
*chassis);
> #endif /* controller/lport.h */
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index b597c0e37..a37dfcb78 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -1047,6 +1047,50 @@ en_ofctrl_is_connected_run(struct engine_node
*node, void *data)
> engine_set_node_state(node, EN_UNCHANGED);
> }
>
> +struct ed_type_activated_ports {
> + struct ovs_list *activated_ports;
> +};
> +
> +static void *
> +en_activated_ports_init(struct engine_node *node OVS_UNUSED,
> + struct engine_arg *arg OVS_UNUSED)
> +{
> + struct ed_type_activated_ports *data = xzalloc(sizeof *data);
> + data->activated_ports = NULL;
> + return data;
> +}
> +
> +static void
> +en_activated_ports_cleanup(void *data_)
> +{
> + struct ed_type_activated_ports *data = data_;
> +
> + struct activated_port *pp;
> + if (!data->activated_ports) {
> + return;
> + }
> +
> + LIST_FOR_EACH_POP (pp, list, data->activated_ports) {
> + free(pp);
> + }
> + free(data->activated_ports);
> + data->activated_ports = NULL;
> +}
> +
> +static void
> +en_activated_ports_run(struct engine_node *node, void *data_)
> +{
> + struct ed_type_activated_ports *data = data_;
> +
> + en_activated_ports_cleanup(data);
> + data->activated_ports = get_activated_ports();
> + if (data->activated_ports) {
> + engine_set_node_state(node, EN_UNCHANGED);
Sorry that I don't understand here. If there are activated_ports why it is
UNCHANGED? Shouldn't it be UPDATED so that the runtime_data can be
triggered to handle it?
> + } else {
> + engine_set_node_state(node, EN_UPDATED);
> + }
> +}
> +
> /* This engine node is to wrap the OVS_interface input and maintain a
copy of
> * the old version of data for the column external_ids.
> *
> @@ -1421,6 +1465,44 @@ en_runtime_data_run(struct engine_node *node, void
*data)
> engine_set_node_state(node, EN_UPDATED);
> }
>
> +static bool
> +runtime_data_activated_ports_handler(struct engine_node *node, void
*data)
> +{
> + struct ed_type_runtime_data *rt_data = data;
> +
> + struct ed_type_activated_ports *ap =
> + engine_get_input_data("activated_ports", node);
> +
> + if (!ap->activated_ports) {
> + return true;
> + }
> +
> + struct activated_port *pp;
> + LIST_FOR_EACH_POP (pp, list, ap->activated_ports) {
Here the runtime_data node not only reads the activated_ports data but also
writes to (cleans) it. This is not expected for a change-handler in I-P. A
change handler is not guaranteed to be executed in an engine run. It is
possible that in the same run there are some other changes that triggers
I-P engine recompute, which calls only the xxx_run() functions but not the
handlers. In that case the activated_ports are not cleared?
In this case, the activated_ports is not like usual engine data. It is in
fact tracked-changes, and there is no real persistent data for this engine
node. So it seems the right place to clean it is the clear_tracked_data()
member function.
There are other reasons why a change handler is not the right place to
clear its input changes. For example, there can be other nodes that depend
on the same input (although it is not the case here).
In addition, if the change needs to be handled in the change handler, it is
reasonable to be handled in the runtime_data_run(). If there are special
reasons it is not needed there, it deserves some comment to help understand.
> + struct ovsdb_idl_index *sbrec_datapath_binding_by_key =
> + engine_ovsdb_node_get_index(
> + engine_get_input("SB_datapath_binding", node),
> + "key");
> + struct ovsdb_idl_index *sbrec_port_binding_by_key =
> + engine_ovsdb_node_get_index(
> + engine_get_input("SB_port_binding", node),
> + "key");
> + const struct sbrec_port_binding *pb = lport_lookup_by_key(
> + sbrec_datapath_binding_by_key, sbrec_port_binding_by_key,
> + pp->dp_key, pp->port_key);
> + if (pb) {
> + rt_data->tracked = true;
> + tracked_datapath_lport_add(pb, TRACKED_RESOURCE_UPDATED,
> + &rt_data->tracked_dp_bindings);
> + engine_set_node_state(node, EN_UPDATED);
> + }
> + free(pp);
> + }
> + free(ap->activated_ports);
> + ap->activated_ports = NULL;
> + return true;
> +}
> +
> static bool
> runtime_data_ovs_interface_shadow_handler(struct engine_node *node, void
*data)
> {
> @@ -3453,6 +3535,7 @@ main(int argc, char *argv[])
> ENGINE_NODE(non_vif_data, "non_vif_data");
> ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
> ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
> + ENGINE_NODE(activated_ports, "activated_ports");
> ENGINE_NODE(pflow_output, "physical_flow_output");
> ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lflow_output,
"logical_flow_output");
> ENGINE_NODE(flow_output, "flow_output");
> @@ -3500,6 +3583,8 @@ main(int argc, char *argv[])
> engine_add_input(&en_pflow_output, &en_sb_multicast_group,
> pflow_output_sb_multicast_group_handler);
>
> + engine_add_input(&en_pflow_output, &en_sb_datapath_binding,
> + engine_noop_handler);
Usually, if a node depends on some input, if the input changes, it should
handle it. Could you put some comment here why sb_datapath_binding is
required by pflow_output but doesn't need to be handled?
Thanks,
Han
> engine_add_input(&en_pflow_output, &en_runtime_data,
> pflow_output_runtime_data_handler);
> engine_add_input(&en_pflow_output, &en_sb_encap, NULL);
> @@ -3584,6 +3669,8 @@ main(int argc, char *argv[])
> runtime_data_sb_datapath_binding_handler);
> engine_add_input(&en_runtime_data, &en_sb_port_binding,
> runtime_data_sb_port_binding_handler);
> + engine_add_input(&en_runtime_data, &en_activated_ports,
> + runtime_data_activated_ports_handler);
>
> /* The OVS interface handler for runtime_data changes MUST be
executed
> * after the sb_port_binding_handler as port_binding deletes must be
> diff --git a/controller/physical.c b/controller/physical.c
> index 24de86f24..fc8280a99 100644
> --- a/controller/physical.c
> +++ b/controller/physical.c
> @@ -40,7 +40,9 @@
> #include "lib/mcast-group-index.h"
> #include "lib/ovn-sb-idl.h"
> #include "lib/ovn-util.h"
> +#include "ovn/actions.h"
> #include "physical.h"
> +#include "pinctrl.h"
> #include "openvswitch/shash.h"
> #include "simap.h"
> #include "smap.h"
> @@ -984,6 +986,94 @@ enum access_type {
> PORT_HA_REMOTE,
> };
>
> +static void
> +setup_rarp_activation_strategy(const struct sbrec_port_binding *binding,
> + ofp_port_t ofport, struct zone_ids
*zone_ids,
> + struct ovn_desired_flow_table *flow_table,
> + struct ofpbuf *ofpacts_p)
> +{
> + struct match match = MATCH_CATCHALL_INITIALIZER;
> +
> + /* Unblock the port on ingress RARP. */
> + match_set_dl_type(&match, htons(ETH_TYPE_RARP));
> + match_set_in_port(&match, ofport);
> + ofpbuf_clear(ofpacts_p);
> +
> + load_logical_ingress_metadata(binding, zone_ids, ofpacts_p);
> +
> + size_t ofs = ofpacts_p->size;
> + struct ofpact_controller *oc = ofpact_put_CONTROLLER(ofpacts_p);
> + oc->max_len = UINT16_MAX;
> + oc->reason = OFPR_ACTION;
> +
> + struct action_header ah = {
> + .opcode = htonl(ACTION_OPCODE_ACTIVATION_STRATEGY_RARP)
> + };
> + ofpbuf_put(ofpacts_p, &ah, sizeof ah);
> +
> + ofpacts_p->header = oc;
> + oc->userdata_len = ofpacts_p->size - (ofs + sizeof *oc);
> + ofpact_finish_CONTROLLER(ofpacts_p, &oc);
> + put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, ofpacts_p);
> +
> + ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 1010,
> + binding->header_.uuid.parts[0],
> + &match, ofpacts_p, &binding->header_.uuid);
> + ofpbuf_clear(ofpacts_p);
> +
> + /* Block all non-RARP traffic for the port, both directions. */
> + match_init_catchall(&match);
> + match_set_in_port(&match, ofport);
> +
> + ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 1000,
> + binding->header_.uuid.parts[0],
> + &match, ofpacts_p, &binding->header_.uuid);
> +
> + match_init_catchall(&match);
> + uint32_t dp_key = binding->datapath->tunnel_key;
> + uint32_t port_key = binding->tunnel_key;
> + match_set_metadata(&match, htonll(dp_key));
> + match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, port_key);
> +
> + ofctrl_add_flow(flow_table, OFTABLE_LOG_TO_PHY, 1000,
> + binding->header_.uuid.parts[0],
> + &match, ofpacts_p, &binding->header_.uuid);
> +}
> +
> +static void
> +setup_activation_strategy(const struct sbrec_port_binding *binding,
> + const struct sbrec_chassis *chassis,
> + uint32_t dp_key, uint32_t port_key,
> + ofp_port_t ofport, struct zone_ids *zone_ids,
> + struct ovn_desired_flow_table *flow_table,
> + struct ofpbuf *ofpacts_p)
> +{
> + for (size_t i = 0; i < binding->n_additional_chassis; i++) {
> + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> + if (binding->additional_chassis[i] == chassis) {
> + const char *strategy = smap_get(&binding->options,
> + "activation-strategy");
> + if (strategy
> + &&
!lport_is_activated_by_activation_strategy(binding,
> +
chassis)
> + && !pinctrl_is_port_activated(dp_key, port_key)) {
> + if (!strcmp(strategy, "rarp")) {
> + setup_rarp_activation_strategy(binding, ofport,
> + zone_ids, flow_table,
> + ofpacts_p);
> + } else {
> + VLOG_WARN_RL(&rl,
> + "Unknown activation strategy defined
for "
> + "port %s: %s",
> + binding->logical_port, strategy);
> + return;
> + }
> + }
> + return;
> + }
> + }
> +}
> +
> static void
> consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
> enum mf_field_id mff_ovn_geneve,
> @@ -1239,6 +1329,10 @@ consider_port_binding(struct ovsdb_idl_index
*sbrec_port_binding_by_name,
> }
> }
>
> + setup_activation_strategy(binding, chassis, dp_key, port_key,
> + ofport, &zone_ids, flow_table,
> + ofpacts_p);
> +
> /* Remember the size with just strip vlan added so far,
> * as we're going to remove this with ofpbuf_pull() later. */
> uint32_t ofpacts_orig_size = ofpacts_p->size;
> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> index 428863293..3d9f2c195 100644
> --- a/controller/pinctrl.c
> +++ b/controller/pinctrl.c
> @@ -29,10 +29,12 @@
> #include "lport.h"
> #include "mac-learn.h"
> #include "nx-match.h"
> +#include "ofctrl.h"
> #include "latch.h"
> #include "lib/packets.h"
> #include "lib/sset.h"
> #include "openvswitch/ofp-actions.h"
> +#include "openvswitch/ofp-flow.h"
> #include "openvswitch/ofp-msgs.h"
> #include "openvswitch/ofp-packet.h"
> #include "openvswitch/ofp-print.h"
> @@ -152,8 +154,8 @@ VLOG_DEFINE_THIS_MODULE(pinctrl);
> * and pinctrl_run().
> * 'pinctrl_handler_seq' is used by pinctrl_run() to
> * wake up pinctrl_handler thread from poll_block() if any changes
happened
> - * in 'send_garp_rarp_data', 'ipv6_ras' and 'buffered_mac_bindings'
> - * structures.
> + * in 'send_garp_rarp_data', 'ipv6_ras', 'activated_ports' and
> + * 'buffered_mac_bindings' structures.
> *
> * 'pinctrl_main_seq' is used by pinctrl_handler() thread to wake up
> * the main thread from poll_block() when mac bindings/igmp groups need
to
> @@ -198,6 +200,17 @@ static void wait_put_mac_bindings(struct
ovsdb_idl_txn *ovnsb_idl_txn);
> static void send_mac_binding_buffered_pkts(struct rconn *swconn)
> OVS_REQUIRES(pinctrl_mutex);
>
> +static void pinctrl_rarp_activation_strategy_handler(const struct match
*md);
> +
> +static void init_activated_ports(void);
> +static void destroy_activated_ports(void);
> +static void wait_activated_ports(struct ovsdb_idl_txn *ovnsb_idl_txn);
> +static void run_activated_ports(
> + struct ovsdb_idl_txn *ovnsb_idl_txn,
> + struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
> + struct ovsdb_idl_index *sbrec_port_binding_by_name,
> + const struct sbrec_chassis *chassis);
> +
> static void init_send_garps_rarps(void);
> static void destroy_send_garps_rarps(void);
> static void send_garp_rarp_wait(long long int send_garp_rarp_time);
> @@ -522,6 +535,7 @@ pinctrl_init(void)
> init_ipv6_ras();
> init_ipv6_prefixd();
> init_buffered_packets_map();
> + init_activated_ports();
> init_event_table();
> ip_mcast_snoop_init();
> init_put_vport_bindings();
> @@ -3269,6 +3283,12 @@ process_packet_in(struct rconn *swconn, const
struct ofp_header *msg)
> ovs_mutex_unlock(&pinctrl_mutex);
> break;
>
> + case ACTION_OPCODE_ACTIVATION_STRATEGY_RARP:
> + ovs_mutex_lock(&pinctrl_mutex);
> + pinctrl_rarp_activation_strategy_handler(&pin.flow_metadata);
> + ovs_mutex_unlock(&pinctrl_mutex);
> + break;
> +
> default:
> VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
> ntohl(ah->opcode));
> @@ -3533,6 +3553,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> bfd_monitor_run(ovnsb_idl_txn, bfd_table, sbrec_port_binding_by_name,
> chassis, active_tunnels);
> run_put_fdbs(ovnsb_idl_txn, sbrec_fdb_by_dp_key_mac);
> + run_activated_ports(ovnsb_idl_txn, sbrec_datapath_binding_by_key,
> + sbrec_port_binding_by_key, chassis);
> ovs_mutex_unlock(&pinctrl_mutex);
> }
>
> @@ -4036,6 +4058,7 @@ pinctrl_wait(struct ovsdb_idl_txn *ovnsb_idl_txn)
> int64_t new_seq = seq_read(pinctrl_main_seq);
> seq_wait(pinctrl_main_seq, new_seq);
> wait_put_fdbs(ovnsb_idl_txn);
> + wait_activated_ports(ovnsb_idl_txn);
> }
>
> /* Called by ovn-controller. */
> @@ -4050,6 +4073,7 @@ pinctrl_destroy(void)
> destroy_ipv6_ras();
> destroy_ipv6_prefixd();
> destroy_buffered_packets_map();
> + destroy_activated_ports();
> event_table_destroy();
> destroy_put_mac_bindings();
> destroy_put_vport_bindings();
> @@ -7727,6 +7751,123 @@ pinctrl_handle_svc_check(struct rconn *swconn,
const struct flow *ip_flow,
> }
> }
>
> +static struct ovs_list activated_ports = OVS_LIST_INITIALIZER(
> + &activated_ports);
> +
> +struct ovs_list *
> +get_activated_ports(void)
> +{
> + ovs_mutex_lock(&pinctrl_mutex);
> + if (ovs_list_is_empty(&activated_ports)) {
> + ovs_mutex_unlock(&pinctrl_mutex);
> + return NULL;
> + }
> +
> + struct activated_port *pp;
> + struct ovs_list *res = xmalloc(sizeof *res);
> + ovs_list_init(res);
> +
> + LIST_FOR_EACH (pp, list, &activated_ports) {
> + struct activated_port *pp_copy = xmalloc(sizeof *pp_copy);
> + pp_copy->port_key = pp->port_key;
> + pp_copy->dp_key = pp->dp_key;
> + ovs_list_push_front(res, &pp_copy->list);
> + }
> + ovs_mutex_unlock(&pinctrl_mutex);
> + return res;
> +}
> +
> +static void
> +init_activated_ports(void)
> + OVS_REQUIRES(pinctrl_mutex)
> +{
> + ovs_list_init(&activated_ports);
> +}
> +
> +static void
> +destroy_activated_ports(void)
> + OVS_REQUIRES(pinctrl_mutex)
> +{
> + struct activated_port *pp;
> + LIST_FOR_EACH_POP (pp, list, &activated_ports) {
> + free(pp);
> + }
> +}
> +
> +static void
> +wait_activated_ports(struct ovsdb_idl_txn *ovnsb_idl_txn)
> + OVS_REQUIRES(pinctrl_mutex)
> +{
> + if (ovnsb_idl_txn && !ovs_list_is_empty(&activated_ports)) {
> + poll_immediate_wake();
> + }
> +}
> +
> +bool pinctrl_is_port_activated(int64_t dp_key, int64_t port_key)
> +{
> + const struct activated_port *pp;
> + ovs_mutex_lock(&pinctrl_mutex);
> + LIST_FOR_EACH (pp, list, &activated_ports) {
> + if (pp->dp_key == dp_key && pp->port_key == port_key) {
> + ovs_mutex_unlock(&pinctrl_mutex);
> + return true;
> + }
> + }
> + ovs_mutex_unlock(&pinctrl_mutex);
> + return false;
> +}
> +
> +static void
> +run_activated_ports(struct ovsdb_idl_txn *ovnsb_idl_txn,
> + struct ovsdb_idl_index
*sbrec_datapath_binding_by_key,
> + struct ovsdb_idl_index *sbrec_port_binding_by_key,
> + const struct sbrec_chassis *chassis)
> + OVS_REQUIRES(pinctrl_mutex)
> +{
> + if (!ovnsb_idl_txn) {
> + return;
> + }
> +
> + struct activated_port *pp;
> + LIST_FOR_EACH_SAFE (pp, list, &activated_ports) {
> + const struct sbrec_port_binding *pb = lport_lookup_by_key(
> + sbrec_datapath_binding_by_key, sbrec_port_binding_by_key,
> + pp->dp_key, pp->port_key);
> + if (!pb || lport_is_activated_by_activation_strategy(pb,
chassis)) {
> + ovs_list_remove(&pp->list);
> + free(pp);
> + continue;
> + }
> + const char *activated_chassis = smap_get(
> + &pb->options, "additional-chassis-activated");
> + char *activated_str;
> + if (activated_chassis) {
> + activated_str = xasprintf(
> + "%s,%s", activated_chassis, chassis->name);
> + sbrec_port_binding_update_options_setkey(
> + pb, "additional-chassis-activated", activated_str);
> + free(activated_str);
> + } else {
> + sbrec_port_binding_update_options_setkey(
> + pb, "additional-chassis-activated", chassis->name);
> + }
> + }
> +}
> +
> +static void
> +pinctrl_rarp_activation_strategy_handler(const struct match *md)
> + OVS_REQUIRES(pinctrl_mutex)
> +{
> + /* Tag the port as activated in-memory. */
> + struct activated_port *pp = xmalloc(sizeof *pp);
> + pp->port_key = md->flow.regs[MFF_LOG_INPORT - MFF_REG0];
> + pp->dp_key = ntohll(md->flow.metadata);
> + ovs_list_push_front(&activated_ports, &pp->list);
> +
> + /* Notify main thread on pending additional-chassis-activated
updates. */
> + notify_pinctrl_main();
> +}
> +
> static struct hmap put_fdbs;
>
> /* MAC learning (fdb) related functions. Runs within the main
> diff --git a/controller/pinctrl.h b/controller/pinctrl.h
> index 88f18e983..3a29d1896 100644
> --- a/controller/pinctrl.h
> +++ b/controller/pinctrl.h
> @@ -20,6 +20,7 @@
> #include <stdint.h>
>
> #include "lib/sset.h"
> +#include "openvswitch/list.h"
> #include "openvswitch/meta-flow.h"
>
> struct hmap;
> @@ -33,6 +34,7 @@ struct sbrec_dns_table;
> struct sbrec_controller_event_table;
> struct sbrec_service_monitor_table;
> struct sbrec_bfd_table;
> +struct sbrec_port_binding;
>
> void pinctrl_init(void);
> void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> @@ -56,4 +58,15 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> void pinctrl_wait(struct ovsdb_idl_txn *ovnsb_idl_txn);
> void pinctrl_destroy(void);
> void pinctrl_set_br_int_name(char *br_int_name);
> +
> +struct activated_port {
> + uint32_t dp_key;
> + uint32_t port_key;
> + struct ovs_list list;
> +};
> +
> +struct ovs_list *get_activated_ports(void);
> +bool pinctrl_is_port_activated(int64_t dp_key, int64_t port_key);
> +bool db_is_port_activated(const struct sbrec_port_binding *pb,
> + const struct sbrec_chassis *chassis);
> #endif /* controller/pinctrl.h */
> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> index 1ae496960..33c319f1c 100644
> --- a/include/ovn/actions.h
> +++ b/include/ovn/actions.h
> @@ -683,6 +683,9 @@ enum action_opcode {
> /* put_fdb(inport, eth.src).
> */
> ACTION_OPCODE_PUT_FDB,
> +
> + /* activation_strategy_rarp() */
> + ACTION_OPCODE_ACTIVATION_STRATEGY_RARP,
> };
>
> /* Header. */
> diff --git a/northd/northd.c b/northd/northd.c
> index 450e05ad6..988b57acc 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -3469,6 +3469,16 @@ ovn_port_update_sbrec(struct northd_input
*input_data,
> smap_add(&options, "vlan-passthru", "true");
> }
>
> + /* Retain activated chassis flags. */
> + if (op->sb->requested_additional_chassis) {
> + const char *activated_str = smap_get(
> + &op->sb->options, "additional-chassis-activated");
> + if (activated_str) {
> + smap_add(&options, "additional-chassis-activated",
> + activated_str);
> + }
> + }
> +
> sbrec_port_binding_set_options(op->sb, &options);
> smap_destroy(&options);
> if (ovn_is_known_nb_lsp_type(op->nbsp->type)) {
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index e4e980720..ab28756af 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -107,7 +107,10 @@ static const char *rbac_port_binding_auth[] =
> static const char *rbac_port_binding_update[] =
> {"chassis", "additional_chassis",
> "encap", "additional_encap",
> - "up", "virtual_parent"};
> + "up", "virtual_parent",
> + /* NOTE: we only need to update the additional-chassis-activated
key,
> + * but RBAC_Role doesn't support mutate operation for subkeys. */
> + "options"};
>
> static const char *rbac_mac_binding_auth[] =
> {""};
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index c197f431f..e700b2e88 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -1045,6 +1045,17 @@
> </p>
> </column>
>
> + <column name="options" key="activation-strategy">
> + If used with multiple chassis set in
> + <ref column="requested-chassis"/>, specifies an activation
strategy
> + for all additional chassis. By default, no activation strategy
is
> + used, meaning additional port locations are immediately
available for
> + use. When set to "rarp", the port is blocked for ingress and
egress
> + communication until a RARP packet is sent from a new location.
The
> + "rarp" strategy is useful in live migration scenarios for
virtual
> + machines.
> + </column>
> +
> <column name="options" key="iface-id-ver">
> If set, this port will be bound by <code>ovn-controller</code>
> only if this same key and value is configured in the
> diff --git a/ovn-sb.xml b/ovn-sb.xml
> index 2dc0d5bea..9d37dd3cf 100644
> --- a/ovn-sb.xml
> +++ b/ovn-sb.xml
> @@ -3354,6 +3354,21 @@ tcp.flags = RST;
> </p>
> </column>
>
> + <column name="options" key="activation-strategy">
> + If used with multiple chassis set in <ref
column="requested-chassis"/>,
> + specifies an activation strategy for all additional chassis. By
> + default, no activation strategy is used, meaning additional port
> + locations are immediately available for use. When set to "rarp",
the
> + port is blocked for ingress and egress communication until a RARP
> + packet is sent from a new location. The "rarp" strategy is useful
> + in live migration scenarios for virtual machines.
> + </column>
> +
> + <column name="options" key="additional-chassis-activated">
> + When <ref column="activation-strategy"/> is set, this option
indicates
> + that the port was activated using the strategy specified.
> + </column>
> +
> <column name="options" key="iface-id-ver">
> If set, this port will be bound by <code>ovn-controller</code>
> only if this same key and value is configured in the
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 3c079e0fb..b210d4a28 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -14924,6 +14924,371 @@ OVN_CLEANUP([hv1],[hv2],[hv3])
> AT_CLEANUP
> ])
>
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([options:activation-strategy for logical port])
> +ovn_start
> +
> +net_add n1
> +
> +sim_add hv1
> +as hv1
> +check ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.11
> +
> +sim_add hv2
> +as hv2
> +check ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.12
> +
> +sim_add hv3
> +as hv3
> +check ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.13
> +
> +# Disable local ARP responder to pass ARP requests through tunnels
> +check ovn-nbctl ls-add ls0 -- add Logical_Switch ls0 other_config
vlan-passthru=true
> +
> +check ovn-nbctl lsp-add ls0 migrator
> +check ovn-nbctl lsp-set-options migrator requested-chassis=hv1,hv2 \
> + activation-strategy=rarp
> +
> +check ovn-nbctl lsp-add ls0 first
> +check ovn-nbctl lsp-set-options first requested-chassis=hv1
> +check ovn-nbctl lsp-add ls0 second
> +check ovn-nbctl lsp-set-options second requested-chassis=hv2
> +check ovn-nbctl lsp-add ls0 outside
> +check ovn-nbctl lsp-set-options outside requested-chassis=hv3
> +
> +check ovn-nbctl lsp-set-addresses migrator "00:00:00:00:00:10 10.0.0.10"
> +check ovn-nbctl lsp-set-addresses first "00:00:00:00:00:01 10.0.0.1"
> +check ovn-nbctl lsp-set-addresses second "00:00:00:00:00:02 10.0.0.2"
> +check ovn-nbctl lsp-set-addresses outside "00:00:00:00:00:03 10.0.0.3"
> +
> +for hv in hv1 hv2; do
> + as $hv check ovs-vsctl -- add-port br-int migrator -- \
> + set Interface migrator external-ids:iface-id=migrator \
> + options:tx_pcap=$hv/migrator-tx.pcap \
> + options:rxq_pcap=$hv/migrator-rx.pcap
> +done
> +
> +as hv1 check ovs-vsctl -- add-port br-int first -- \
> + set Interface first external-ids:iface-id=first
> +as hv2 check ovs-vsctl -- add-port br-int second -- \
> + set Interface second external-ids:iface-id=second
> +as hv3 check ovs-vsctl -- add-port br-int outside -- \
> + set Interface outside external-ids:iface-id=outside
> +
> +for hv in hv1 hv2 hv3; do
> + wait_row_count Chassis 1 name=$hv
> +done
> +hv1_uuid=$(fetch_column Chassis _uuid name=hv1)
> +hv2_uuid=$(fetch_column Chassis _uuid name=hv2)
> +hv3_uuid=$(fetch_column Chassis _uuid name=hv3)
> +
> +wait_column "$hv1_uuid" Port_Binding chassis logical_port=migrator
> +wait_column "$hv1_uuid" Port_Binding requested_chassis
logical_port=migrator
> +wait_column "$hv2_uuid" Port_Binding additional_chassis
logical_port=migrator
> +wait_column "$hv2_uuid" Port_Binding requested_additional_chassis
logical_port=migrator
> +
> +wait_column "$hv1_uuid" Port_Binding chassis logical_port=first
> +wait_column "$hv2_uuid" Port_Binding chassis logical_port=second
> +wait_column "$hv3_uuid" Port_Binding chassis logical_port=outside
> +
> +OVN_POPULATE_ARP
> +
> +send_arp() {
> + local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 spa=$5 tpa=$6
> + local
request=${eth_dst}${eth_src}08060001080006040001${eth_src}${spa}${eth_dst}${tpa}
> + as ${hv} ovs-appctl netdev-dummy/receive $inport $request
> + echo "${request}"
> +}
> +
> +send_rarp() {
> + local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 spa=$5 tpa=$6
> + local
request=${eth_dst}${eth_src}80350001080006040001${eth_src}${spa}${eth_dst}${tpa}
> + as ${hv} ovs-appctl netdev-dummy/receive $inport $request
> + echo "${request}"
> +}
> +
> +reset_pcap_file() {
> + local hv=$1
> + local iface=$2
> + local pcap_file=$3
> + as $hv check ovs-vsctl -- set Interface $iface
options:tx_pcap=dummy-tx.pcap \
> +
options:rxq_pcap=dummy-rx.pcap
> + check rm -f ${pcap_file}*.pcap
> + as $hv check ovs-vsctl -- set Interface $iface
options:tx_pcap=${pcap_file}-tx.pcap \
> +
options:rxq_pcap=${pcap_file}-rx.pcap
> +}
> +
> +reset_env() {
> + reset_pcap_file hv1 migrator hv1/migrator
> + reset_pcap_file hv2 migrator hv2/migrator
> + reset_pcap_file hv1 first hv1/first
> + reset_pcap_file hv2 second hv2/second
> + reset_pcap_file hv3 outside hv3/outside
> +
> + for port in hv1/migrator hv2/migrator hv1/first hv2/second
hv3/outside; do
> + : > $port.expected
> + done
> +}
> +
> +check_packets() {
> + OVN_CHECK_PACKETS([hv1/migrator-tx.pcap], [hv1/migrator.expected])
> + OVN_CHECK_PACKETS([hv2/migrator-tx.pcap], [hv2/migrator.expected])
> + OVN_CHECK_PACKETS([hv3/outside-tx.pcap], [hv3/outside.expected])
> + OVN_CHECK_PACKETS([hv1/first-tx.pcap], [hv1/first.expected])
> + OVN_CHECK_PACKETS([hv2/second-tx.pcap], [hv2/second.expected])
> +}
> +
> +migrator_spa=$(ip_to_hex 10 0 0 10)
> +first_spa=$(ip_to_hex 10 0 0 1)
> +second_spa=$(ip_to_hex 10 0 0 2)
> +outside_spa=$(ip_to_hex 10 0 0 3)
> +
> +reset_env
> +
> +# Packet from hv3:Outside arrives to hv1:Migrator
> +# hv3:Outside cannot reach hv2:Migrator because it is blocked by RARP
strategy
> +request=$(send_arp hv3 outside 000000000003 000000000010 $outside_spa
$migrator_spa)
> +echo $request >> hv1/migrator.expected
> +
> +# Packet from hv1:First arrives to hv1:Migrator
> +# hv1:First cannot reach hv2:Migrator because it is blocked by RARP
strategy
> +request=$(send_arp hv1 first 000000000001 000000000010 $first_spa
$migrator_spa)
> +echo $request >> hv1/migrator.expected
> +
> +# Packet from hv2:Second arrives to hv1:Migrator
> +# hv2:Second cannot reach hv2:Migrator because it is blocked by RARP
strategy
> +request=$(send_arp hv2 second 000000000002 000000000010 $second_spa
$migrator_spa)
> +echo $request >> hv1/migrator.expected
> +
> +check_packets
> +reset_env
> +
> +# Packet from hv1:Migrator arrives to hv3:Outside
> +request=$(send_arp hv1 migrator 000000000010 000000000003 $migrator_spa
$outside_spa)
> +echo $request >> hv3/outside.expected
> +
> +# Packet from hv1:Migrator arrives to hv1:First
> +request=$(send_arp hv1 migrator 000000000010 000000000001 $migrator_spa
$first_spa)
> +echo $request >> hv1/first.expected
> +
> +# Packet from hv1:Migrator arrives to hv2:Second
> +request=$(send_arp hv1 migrator 000000000010 000000000002 $migrator_spa
$second_spa)
> +echo $request >> hv2/second.expected
> +
> +check_packets
> +reset_env
> +
> +# hv2:Migrator cannot reach to hv3:Outside because it is blocked by RARP
strategy
> +request=$(send_arp hv2 migrator 000000000010 000000000003 $migrator_spa
$outside_spa)
> +
> +check_packets
> +reset_env
> +
> +AT_CHECK([ovn-sbctl find port_binding logical_port=migrator | grep -q
additional-chassis-activated], [1])
> +
> +# Now activate hv2:Migrator location
> +request=$(send_rarp hv2 migrator 000000000010 ffffffffffff $migrator_spa
$migrator_spa)
> +
> +# RARP was reinjected into the pipeline
> +echo $request >> hv3/outside.expected
> +echo $request >> hv1/first.expected
> +echo $request >> hv2/second.expected
> +
> +check_packets
> +reset_env
> +
> +pb_uuid=$(ovn-sbctl --bare --columns _uuid find Port_Binding
logical_port=migrator)
> +OVS_WAIT_UNTIL([test xhv2 = x$(ovn-sbctl get Port_Binding $pb_uuid
options:additional-chassis-activated | tr -d '""')])
> +
> +# Now packet arrives to both locations
> +request=$(send_arp hv3 outside 000000000003 000000000010 $outside_spa
$migrator_spa)
> +echo $request >> hv1/migrator.expected
> +echo $request >> hv2/migrator.expected
> +
> +check_packets
> +reset_env
> +
> +# Packet from hv1:Migrator still arrives to hv3:Outside
> +request=$(send_arp hv1 migrator 000000000010 000000000003 $migrator_spa
$outside_spa)
> +echo $request >> hv3/outside.expected
> +
> +check_packets
> +reset_env
> +
> +# hv2:Migrator can now reach to hv3:Outside because RARP strategy
activated it
> +request=$(send_arp hv2 migrator 000000000010 000000000003 $migrator_spa
$outside_spa)
> +echo $request >> hv3/outside.expected
> +
> +check_packets
> +
> +# complete port migration and check that -activated flag is reset
> +check ovn-nbctl lsp-set-options migrator requested-chassis=hv2
> +OVS_WAIT_UNTIL([test x = x$(ovn-sbctl get Port_Binding $pb_uuid
options:additional-chassis-activated)])
> +
> +OVN_CLEANUP([hv1],[hv2],[hv3])
> +
> +AT_CLEANUP
> +])
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([options:activation-strategy=rarp is not waiting for southbound
db])
> +# TODO: remove it when we find a way to make vswitchd forward packets to
> +# controller() handler when ovsdb-server is down
> +AT_SKIP_IF([true])
> +ovn_start
> +
> +net_add n1
> +
> +sim_add hv1
> +as hv1
> +check ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.11
> +
> +sim_add hv2
> +as hv2
> +check ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.12
> +
> +# Disable local ARP responder to pass ARP requests through tunnels
> +check ovn-nbctl ls-add ls0 -- add Logical_Switch ls0 other_config
vlan-passthru=true
> +
> +check ovn-nbctl lsp-add ls0 migrator
> +check ovn-nbctl lsp-set-options migrator requested-chassis=hv1,hv2 \
> + activation-strategy=rarp
> +
> +check ovn-nbctl lsp-add ls0 first
> +check ovn-nbctl lsp-set-options first requested-chassis=hv1
> +
> +check ovn-nbctl lsp-set-addresses migrator "00:00:00:00:00:10 10.0.0.10"
> +check ovn-nbctl lsp-set-addresses first "00:00:00:00:00:01 10.0.0.1"
> +
> +for hv in hv1 hv2; do
> + as $hv check ovs-vsctl -- add-port br-int migrator -- \
> + set Interface migrator external-ids:iface-id=migrator \
> + options:tx_pcap=$hv/migrator-tx.pcap \
> + options:rxq_pcap=$hv/migrator-rx.pcap
> +done
> +
> +as hv1 check ovs-vsctl -- add-port br-int first -- \
> + set Interface first external-ids:iface-id=first
> +
> +for hv in hv1 hv2; do
> + wait_row_count Chassis 1 name=$hv
> +done
> +hv1_uuid=$(fetch_column Chassis _uuid name=hv1)
> +hv2_uuid=$(fetch_column Chassis _uuid name=hv2)
> +
> +wait_column "$hv1_uuid" Port_Binding chassis logical_port=migrator
> +wait_column "$hv1_uuid" Port_Binding requested_chassis
logical_port=migrator
> +wait_column "$hv2_uuid" Port_Binding additional_chassis
logical_port=migrator
> +wait_column "$hv2_uuid" Port_Binding requested_additional_chassis
logical_port=migrator
> +
> +wait_column "$hv1_uuid" Port_Binding chassis logical_port=first
> +
> +OVN_POPULATE_ARP
> +
> +send_arp() {
> + local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 spa=$5 tpa=$6
> + local
request=${eth_dst}${eth_src}08060001080006040001${eth_src}${spa}${eth_dst}${tpa}
> + as ${hv} ovs-appctl netdev-dummy/receive $inport $request
> + echo "${request}"
> +}
> +
> +send_rarp() {
> + local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 spa=$5 tpa=$6
> + local
request=${eth_dst}${eth_src}80350001080006040001${eth_src}${spa}${eth_dst}${tpa}
> + as ${hv} ovs-appctl netdev-dummy/receive $inport $request
> + echo "${request}"
> +}
> +
> +reset_pcap_file() {
> + local hv=$1
> + local iface=$2
> + local pcap_file=$3
> + as $hv check ovs-vsctl -- set Interface $iface
options:tx_pcap=dummy-tx.pcap \
> +
options:rxq_pcap=dummy-rx.pcap
> + check rm -f ${pcap_file}*.pcap
> + as $hv check ovs-vsctl -- set Interface $iface
options:tx_pcap=${pcap_file}-tx.pcap \
> +
options:rxq_pcap=${pcap_file}-rx.pcap
> +}
> +
> +reset_env() {
> + reset_pcap_file hv1 migrator hv1/migrator
> + reset_pcap_file hv2 migrator hv2/migrator
> + reset_pcap_file hv1 first hv1/first
> +
> + for port in hv1/migrator hv2/migrator hv1/first; do
> + : > $port.expected
> + done
> +}
> +
> +check_packets() {
> + OVN_CHECK_PACKETS([hv1/migrator-tx.pcap], [hv1/migrator.expected])
> + OVN_CHECK_PACKETS([hv2/migrator-tx.pcap], [hv2/migrator.expected])
> + OVN_CHECK_PACKETS([hv1/first-tx.pcap], [hv1/first.expected])
> +}
> +
> +migrator_spa=$(ip_to_hex 10 0 0 10)
> +first_spa=$(ip_to_hex 10 0 0 1)
> +
> +reset_env
> +
> +# Packet from hv1:First arrives to hv1:Migrator
> +# hv1:First cannot reach hv2:Migrator because it is blocked by RARP
strategy
> +request=$(send_arp hv1 first 000000000001 000000000010 $first_spa
$migrator_spa)
> +echo $request >> hv1/migrator.expected
> +
> +check_packets
> +reset_env
> +
> +# Packet from hv1:Migrator arrives to hv1:First
> +request=$(send_arp hv1 migrator 000000000010 000000000001 $migrator_spa
$first_spa)
> +echo $request >> hv1/first.expected
> +
> +check_packets
> +reset_env
> +
> +# hv2:Migrator cannot reach to hv1:First because it is blocked by RARP
strategy
> +request=$(send_arp hv2 migrator 000000000010 000000000001 $migrator_spa
$first_spa)
> +
> +check_packets
> +reset_env
> +
> +# Before proceeding, stop ovsdb-server to make sure we test in the
environment
> +# that can't remove flows triggered by updates to database
> +as hv2
> +SVCPID=$(cat $OVS_RUNDIR/ovsdb-server.pid)
> +kill -9 $SVCPID
> +
> +# Now activate hv2:Migrator location
> +request=$(send_rarp hv2 migrator 000000000010 ffffffffffff $migrator_spa
$migrator_spa)
> +
> +# RARP was reinjected into the pipeline
> +echo $request >> hv1/first.expected
> +
> +# Now packet from hv1:First arrives to both locations
> +request=$(send_arp hv1 first 000000000001 000000000010 $first_spa
$migrator_spa)
> +echo $request >> hv1/migrator.expected
> +echo $request >> hv2/migrator.expected
> +
> +# Packet from hv1:Migrator still arrives to hv1:First
> +request=$(send_arp hv1 migrator 000000000010 000000000001 $migrator_spa
$first_spa)
> +echo $request >> hv1/first.expected
> +
> +# hv2:Migrator can now reach to hv1:First because RARP strategy
activated it
> +request=$(send_arp hv2 migrator 000000000010 000000000001 $migrator_spa
$first_spa)
> +echo $request >> hv1/first.expected
> +
> +check_packets
> +
> +OVN_CLEANUP([hv1],[hv2])
> +
> +AT_CLEANUP
> +])
> +
> OVN_FOR_EACH_NORTHD([
> AT_SETUP([options:requested-chassis for logical port])
> ovn_start
> --
> 2.34.1
>
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev