On Mon, Jun 13, 2022 at 11:41 AM Ihar Hrachyshka <[email protected]>
wrote:
>
> When options:activation-strategy is set to "rarp" for LSP, when used in
> combination with multiple chassis names listed in
> options:requested-chassis, additional chassis will install special flows
> that would block all ingress and egress traffic for the port until a
> special activation event happens.
>
> For "rarp" strategy, an observation of a RARP packet sent from the port
> on the additional chassis is such an event. When it occurs, a special
> flow passes control to a controller() action handler that eventually
> removes the installed blocking flows and also marks the port as
> options:additional-chassis-activated in southbound db.
>
> This feature is useful in live migration scenarios where it's not
> advisable to unlock the destination port location prematurily to avoid
> duplicate packets originating from the port.
>
> Signed-off-by: Ihar Hrachyshka <[email protected]>
> ---
> v15: remove redundant branch from en_activated_ports_run.
> v15: split activated_ports into two lists: one for db updates, another
> for engine consumption.
> ---
> NEWS | 2 +
> controller/lport.c | 22 +++
> controller/lport.h | 3 +
> controller/ovn-controller.c | 86 +++++++++
> controller/physical.c | 94 ++++++++++
> controller/pinctrl.c | 160 +++++++++++++++-
> controller/pinctrl.h | 11 ++
> include/ovn/actions.h | 3 +
> northd/northd.c | 10 +
> northd/ovn-northd.c | 5 +-
> ovn-nb.xml | 11 ++
> ovn-sb.xml | 15 ++
> tests/ovn.at | 365 ++++++++++++++++++++++++++++++++++++
> 13 files changed, 784 insertions(+), 3 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index e015ae8e7..97a0688ff 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -31,6 +31,8 @@ OVN v22.06.0 - XX XXX XXXX
> - Added support for setting the Next server IP in the DHCP header
> using the private DHCP option - 253 in native OVN DHCPv4 responder.
> - Support list of chassis for
Logical_Switch_Port:options:requested-chassis.
> + - Support Logical_Switch_Port:options:activation-strategy for live
migration
> + scenarios.
>
> OVN v22.03.0 - 11 Mar 2022
> --------------------------
> diff --git a/controller/lport.c b/controller/lport.c
> index bf55d83f2..add7e91aa 100644
> --- a/controller/lport.c
> +++ b/controller/lport.c
> @@ -197,3 +197,25 @@ get_peer_lport(const struct sbrec_port_binding *pb,
> peer_name);
> return (peer && peer->datapath) ? peer : NULL;
> }
> +
> +bool
> +lport_is_activated_by_activation_strategy(const struct
sbrec_port_binding *pb,
> + const struct sbrec_chassis
*chassis)
> +{
> + const char *activated_chassis = smap_get(&pb->options,
> +
"additional-chassis-activated");
> + if (activated_chassis) {
> + char *save_ptr;
> + char *tokstr = xstrdup(activated_chassis);
> + for (const char *chassis_name = strtok_r(tokstr, ",", &save_ptr);
> + chassis_name != NULL;
> + chassis_name = strtok_r(NULL, ",", &save_ptr)) {
> + if (!strcmp(chassis_name, chassis->name)) {
> + free(tokstr);
> + return true;
> + }
> + }
> + free(tokstr);
> + }
> + return false;
> +}
> diff --git a/controller/lport.h b/controller/lport.h
> index 115881655..644c67255 100644
> --- a/controller/lport.h
> +++ b/controller/lport.h
> @@ -70,4 +70,7 @@ const struct sbrec_port_binding *lport_get_peer(
> const struct sbrec_port_binding *lport_get_l3gw_peer(
> const struct sbrec_port_binding *,
> struct ovsdb_idl_index *sbrec_port_binding_by_name);
> +bool
> +lport_is_activated_by_activation_strategy(const struct
sbrec_port_binding *pb,
> + const struct sbrec_chassis
*chassis);
> #endif /* controller/lport.h */
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 2793c8687..f6614295c 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -1417,6 +1417,89 @@ en_runtime_data_run(struct engine_node *node, void
*data)
> engine_set_node_state(node, EN_UPDATED);
> }
>
> +struct ed_type_activated_ports {
> + struct ovs_list *activated_ports;
> +};
> +
> +static void *
> +en_activated_ports_init(struct engine_node *node OVS_UNUSED,
> + struct engine_arg *arg OVS_UNUSED)
> +{
> + struct ed_type_activated_ports *data = xzalloc(sizeof *data);
> + data->activated_ports = NULL;
> + return data;
> +}
> +
> +static void
> +en_activated_ports_cleanup(void *data_)
> +{
> + struct ed_type_activated_ports *data = data_;
> +
> + struct activated_port *pp;
> + if (!data->activated_ports) {
> + return;
> + }
> +
> + LIST_FOR_EACH_POP (pp, list, data->activated_ports) {
> + free(pp);
> + }
> + free(data->activated_ports);
> + data->activated_ports = NULL;
> +}
> +
> +static void
> +en_activated_ports_clear_tracked_data(void *data)
> +{
> + en_activated_ports_cleanup(data);
> +}
> +
> +static void
> +en_activated_ports_run(struct engine_node *node, void *data_)
> +{
> + struct ed_type_activated_ports *data = data_;
> + enum engine_node_state state = EN_UNCHANGED;
> + data->activated_ports = get_ports_to_activate_in_engine();
> + if (data->activated_ports) {
> + state = EN_UPDATED;
> + }
> + engine_set_node_state(node, state);
> +}
> +
> +static bool
> +runtime_data_activated_ports_handler(struct engine_node *node, void
*data)
> +{
> + struct ed_type_runtime_data *rt_data = data;
> +
> + struct ed_type_activated_ports *ap =
> + engine_get_input_data("activated_ports", node);
> +
> + if (!ap->activated_ports) {
> + return true;
> + }
> +
> + struct activated_port *pp;
> + LIST_FOR_EACH (pp, list, ap->activated_ports) {
> + struct ovsdb_idl_index *sbrec_datapath_binding_by_key =
> + engine_ovsdb_node_get_index(
> + engine_get_input("SB_datapath_binding", node),
> + "key");
> + struct ovsdb_idl_index *sbrec_port_binding_by_key =
> + engine_ovsdb_node_get_index(
> + engine_get_input("SB_port_binding", node),
> + "key");
> + const struct sbrec_port_binding *pb = lport_lookup_by_key(
> + sbrec_datapath_binding_by_key, sbrec_port_binding_by_key,
> + pp->dp_key, pp->port_key);
> + if (pb) {
> + rt_data->tracked = true;
> + tracked_datapath_lport_add(pb, TRACKED_RESOURCE_UPDATED,
> + &rt_data->tracked_dp_bindings);
> + engine_set_node_state(node, EN_UPDATED);
> + }
> + }
> + return true;
> +}
> +
> static bool
> runtime_data_ovs_interface_shadow_handler(struct engine_node *node, void
*data)
> {
> @@ -3445,6 +3528,7 @@ main(int argc, char *argv[])
> ENGINE_NODE(non_vif_data, "non_vif_data");
> ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
> ENGINE_NODE(ofctrl_is_connected, "ofctrl_is_connected");
> + ENGINE_NODE_WITH_CLEAR_TRACK_DATA(activated_ports,
"activated_ports");
> ENGINE_NODE(pflow_output, "physical_flow_output");
> ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lflow_output,
"logical_flow_output");
> ENGINE_NODE(flow_output, "flow_output");
> @@ -3576,6 +3660,8 @@ main(int argc, char *argv[])
> runtime_data_sb_datapath_binding_handler);
> engine_add_input(&en_runtime_data, &en_sb_port_binding,
> runtime_data_sb_port_binding_handler);
> + engine_add_input(&en_runtime_data, &en_activated_ports,
> + runtime_data_activated_ports_handler);
>
> /* The OVS interface handler for runtime_data changes MUST be
executed
> * after the sb_port_binding_handler as port_binding deletes must be
> diff --git a/controller/physical.c b/controller/physical.c
> index 24de86f24..fc8280a99 100644
> --- a/controller/physical.c
> +++ b/controller/physical.c
> @@ -40,7 +40,9 @@
> #include "lib/mcast-group-index.h"
> #include "lib/ovn-sb-idl.h"
> #include "lib/ovn-util.h"
> +#include "ovn/actions.h"
> #include "physical.h"
> +#include "pinctrl.h"
> #include "openvswitch/shash.h"
> #include "simap.h"
> #include "smap.h"
> @@ -984,6 +986,94 @@ enum access_type {
> PORT_HA_REMOTE,
> };
>
> +static void
> +setup_rarp_activation_strategy(const struct sbrec_port_binding *binding,
> + ofp_port_t ofport, struct zone_ids
*zone_ids,
> + struct ovn_desired_flow_table *flow_table,
> + struct ofpbuf *ofpacts_p)
> +{
> + struct match match = MATCH_CATCHALL_INITIALIZER;
> +
> + /* Unblock the port on ingress RARP. */
> + match_set_dl_type(&match, htons(ETH_TYPE_RARP));
> + match_set_in_port(&match, ofport);
> + ofpbuf_clear(ofpacts_p);
> +
> + load_logical_ingress_metadata(binding, zone_ids, ofpacts_p);
> +
> + size_t ofs = ofpacts_p->size;
> + struct ofpact_controller *oc = ofpact_put_CONTROLLER(ofpacts_p);
> + oc->max_len = UINT16_MAX;
> + oc->reason = OFPR_ACTION;
> +
> + struct action_header ah = {
> + .opcode = htonl(ACTION_OPCODE_ACTIVATION_STRATEGY_RARP)
> + };
> + ofpbuf_put(ofpacts_p, &ah, sizeof ah);
> +
> + ofpacts_p->header = oc;
> + oc->userdata_len = ofpacts_p->size - (ofs + sizeof *oc);
> + ofpact_finish_CONTROLLER(ofpacts_p, &oc);
> + put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, ofpacts_p);
> +
> + ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 1010,
> + binding->header_.uuid.parts[0],
> + &match, ofpacts_p, &binding->header_.uuid);
> + ofpbuf_clear(ofpacts_p);
> +
> + /* Block all non-RARP traffic for the port, both directions. */
> + match_init_catchall(&match);
> + match_set_in_port(&match, ofport);
> +
> + ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 1000,
> + binding->header_.uuid.parts[0],
> + &match, ofpacts_p, &binding->header_.uuid);
> +
> + match_init_catchall(&match);
> + uint32_t dp_key = binding->datapath->tunnel_key;
> + uint32_t port_key = binding->tunnel_key;
> + match_set_metadata(&match, htonll(dp_key));
> + match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, port_key);
> +
> + ofctrl_add_flow(flow_table, OFTABLE_LOG_TO_PHY, 1000,
> + binding->header_.uuid.parts[0],
> + &match, ofpacts_p, &binding->header_.uuid);
> +}
> +
> +static void
> +setup_activation_strategy(const struct sbrec_port_binding *binding,
> + const struct sbrec_chassis *chassis,
> + uint32_t dp_key, uint32_t port_key,
> + ofp_port_t ofport, struct zone_ids *zone_ids,
> + struct ovn_desired_flow_table *flow_table,
> + struct ofpbuf *ofpacts_p)
> +{
> + for (size_t i = 0; i < binding->n_additional_chassis; i++) {
> + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> + if (binding->additional_chassis[i] == chassis) {
> + const char *strategy = smap_get(&binding->options,
> + "activation-strategy");
> + if (strategy
> + &&
!lport_is_activated_by_activation_strategy(binding,
> +
chassis)
> + && !pinctrl_is_port_activated(dp_key, port_key)) {
> + if (!strcmp(strategy, "rarp")) {
> + setup_rarp_activation_strategy(binding, ofport,
> + zone_ids, flow_table,
> + ofpacts_p);
> + } else {
> + VLOG_WARN_RL(&rl,
> + "Unknown activation strategy defined
for "
> + "port %s: %s",
> + binding->logical_port, strategy);
> + return;
> + }
> + }
> + return;
> + }
> + }
> +}
> +
> static void
> consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
> enum mf_field_id mff_ovn_geneve,
> @@ -1239,6 +1329,10 @@ consider_port_binding(struct ovsdb_idl_index
*sbrec_port_binding_by_name,
> }
> }
>
> + setup_activation_strategy(binding, chassis, dp_key, port_key,
> + ofport, &zone_ids, flow_table,
> + ofpacts_p);
> +
> /* Remember the size with just strip vlan added so far,
> * as we're going to remove this with ofpbuf_pull() later. */
> uint32_t ofpacts_orig_size = ofpacts_p->size;
> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> index 9a1a0faa1..4fecb6fb6 100644
> --- a/controller/pinctrl.c
> +++ b/controller/pinctrl.c
> @@ -29,10 +29,12 @@
> #include "lport.h"
> #include "mac-learn.h"
> #include "nx-match.h"
> +#include "ofctrl.h"
> #include "latch.h"
> #include "lib/packets.h"
> #include "lib/sset.h"
> #include "openvswitch/ofp-actions.h"
> +#include "openvswitch/ofp-flow.h"
> #include "openvswitch/ofp-msgs.h"
> #include "openvswitch/ofp-packet.h"
> #include "openvswitch/ofp-print.h"
> @@ -152,8 +154,8 @@ VLOG_DEFINE_THIS_MODULE(pinctrl);
> * and pinctrl_run().
> * 'pinctrl_handler_seq' is used by pinctrl_run() to
> * wake up pinctrl_handler thread from poll_block() if any changes
happened
> - * in 'send_garp_rarp_data', 'ipv6_ras' and 'buffered_mac_bindings'
> - * structures.
> + * in 'send_garp_rarp_data', 'ipv6_ras', 'ports_to_activate_in_db' and
> + * 'buffered_mac_bindings' structures.
> *
> * 'pinctrl_main_seq' is used by pinctrl_handler() thread to wake up
> * the main thread from poll_block() when mac bindings/igmp groups need
to
> @@ -198,6 +200,17 @@ static void wait_put_mac_bindings(struct
ovsdb_idl_txn *ovnsb_idl_txn);
> static void send_mac_binding_buffered_pkts(struct rconn *swconn)
> OVS_REQUIRES(pinctrl_mutex);
>
> +static void pinctrl_rarp_activation_strategy_handler(const struct match
*md);
> +
> +static void init_activated_ports(void);
> +static void destroy_activated_ports(void);
> +static void wait_activated_ports(struct ovsdb_idl_txn *ovnsb_idl_txn);
> +static void run_activated_ports(
> + struct ovsdb_idl_txn *ovnsb_idl_txn,
> + struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
> + struct ovsdb_idl_index *sbrec_port_binding_by_name,
> + const struct sbrec_chassis *chassis);
> +
> static void init_send_garps_rarps(void);
> static void destroy_send_garps_rarps(void);
> static void send_garp_rarp_wait(long long int send_garp_rarp_time);
> @@ -522,6 +535,7 @@ pinctrl_init(void)
> init_ipv6_ras();
> init_ipv6_prefixd();
> init_buffered_packets_map();
> + init_activated_ports();
> init_event_table();
> ip_mcast_snoop_init();
> init_put_vport_bindings();
> @@ -3269,6 +3283,12 @@ process_packet_in(struct rconn *swconn, const
struct ofp_header *msg)
> ovs_mutex_unlock(&pinctrl_mutex);
> break;
>
> + case ACTION_OPCODE_ACTIVATION_STRATEGY_RARP:
> + ovs_mutex_lock(&pinctrl_mutex);
> + pinctrl_rarp_activation_strategy_handler(&pin.flow_metadata);
> + ovs_mutex_unlock(&pinctrl_mutex);
> + break;
> +
> default:
> VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
> ntohl(ah->opcode));
> @@ -3533,6 +3553,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> bfd_monitor_run(ovnsb_idl_txn, bfd_table, sbrec_port_binding_by_name,
> chassis, active_tunnels);
> run_put_fdbs(ovnsb_idl_txn, sbrec_fdb_by_dp_key_mac);
> + run_activated_ports(ovnsb_idl_txn, sbrec_datapath_binding_by_key,
> + sbrec_port_binding_by_key, chassis);
> ovs_mutex_unlock(&pinctrl_mutex);
> }
>
> @@ -4037,6 +4059,7 @@ pinctrl_wait(struct ovsdb_idl_txn *ovnsb_idl_txn)
> int64_t new_seq = seq_read(pinctrl_main_seq);
> seq_wait(pinctrl_main_seq, new_seq);
> wait_put_fdbs(ovnsb_idl_txn);
> + wait_activated_ports(ovnsb_idl_txn);
> ovs_mutex_unlock(&pinctrl_mutex);
> }
>
> @@ -4052,6 +4075,7 @@ pinctrl_destroy(void)
> destroy_ipv6_ras();
> destroy_ipv6_prefixd();
> destroy_buffered_packets_map();
> + destroy_activated_ports();
> event_table_destroy();
> destroy_put_mac_bindings();
> destroy_put_vport_bindings();
> @@ -7729,6 +7753,138 @@ pinctrl_handle_svc_check(struct rconn *swconn,
const struct flow *ip_flow,
> }
> }
>
> +static struct ovs_list ports_to_activate_in_db = OVS_LIST_INITIALIZER(
> + &ports_to_activate_in_db);
> +static struct ovs_list *ports_to_activate_in_engine = NULL;
> +
> +struct ovs_list *
> +get_ports_to_activate_in_engine(void)
> +{
> + ovs_mutex_lock(&pinctrl_mutex);
> + if (!ports_to_activate_in_engine ||
> + ovs_list_is_empty(ports_to_activate_in_engine)) {
> + ovs_mutex_unlock(&pinctrl_mutex);
> + return NULL;
> + }
> +
> + struct ovs_list *ap = ports_to_activate_in_engine;
> + ports_to_activate_in_engine = xmalloc(sizeof
*ports_to_activate_in_engine);
> + ovs_list_init(ports_to_activate_in_engine);
> + ovs_mutex_unlock(&pinctrl_mutex);
> + return ap;
> +}
> +
> +static void
> +init_activated_ports(void)
> + OVS_REQUIRES(pinctrl_mutex)
> +{
> + ovs_list_init(&ports_to_activate_in_db);
> + ports_to_activate_in_engine = xmalloc(sizeof
*ports_to_activate_in_engine);
> + ovs_list_init(ports_to_activate_in_engine);
> +}
> +
> +static void
> +destroy_activated_ports(void)
> + OVS_REQUIRES(pinctrl_mutex)
> +{
> + struct activated_port *pp;
> + LIST_FOR_EACH_POP (pp, list, &ports_to_activate_in_db) {
> + free(pp);
> + }
> + LIST_FOR_EACH_POP (pp, list, ports_to_activate_in_engine) {
> + free(pp);
> + }
> + free(ports_to_activate_in_engine);
> + ports_to_activate_in_engine = NULL;
> +}
> +
> +static void
> +wait_activated_ports(struct ovsdb_idl_txn *ovnsb_idl_txn)
> + OVS_REQUIRES(pinctrl_mutex)
> +{
> + if (ovnsb_idl_txn &&
> + (!ovs_list_is_empty(&ports_to_activate_in_db) ||
> + !ovs_list_is_empty(ports_to_activate_in_engine))) {
If I understand correctly, the major benefit of this feature is to activate
the port immediately when ready, without waiting for the control plane to
converge though SB DB. So I think here it shouldn't check ovnsb_idl_txn for
the list "ports_to_activate_in_engine". It can be:
if (ovnsb_idl_txn && !ovs_list_is_empty(&ports_to_activate_in_db)) ||
!ovs_list_is_empty(ports_to_activate_in_engine)) {
so that whenever ports_to_activate_in_engine is not empty the I-P engine
run can be triggered so that the blocking flows are removed ASAP.
In addition, I wonder if the check for ports_to_activate_in_db is really
necessary.
- If the ovnsb_idl_txn was non-null in this iteration, the earlier call to
run_activated_ports() would have sent the updates to SB DB, so there is no
need to poll_immeidate_wake because when response come back the main loop
will woke up.
- If the ovsdb_idl_txn was null in this iteration, it means some
transaction in progress, so no need to poll_immediate_wake either.
So I think it can simply:
if ( !ovs_list_is_empty(ports_to_activate_in_engine)) {
> + poll_immediate_wake();
> + }
> +}
> +
> +bool pinctrl_is_port_activated(int64_t dp_key, int64_t port_key)
> +{
> + const struct activated_port *pp;
> + ovs_mutex_lock(&pinctrl_mutex);
> + LIST_FOR_EACH (pp, list, &ports_to_activate_in_db) {
> + if (pp->dp_key == dp_key && pp->port_key == port_key) {
> + ovs_mutex_unlock(&pinctrl_mutex);
> + return true;
> + }
> + }
> + LIST_FOR_EACH (pp, list, ports_to_activate_in_engine) {
> + if (pp->dp_key == dp_key && pp->port_key == port_key) {
> + ovs_mutex_unlock(&pinctrl_mutex);
> + return true;
> + }
> + }
> + ovs_mutex_unlock(&pinctrl_mutex);
> + return false;
> +}
> +
> +static void
> +run_activated_ports(struct ovsdb_idl_txn *ovnsb_idl_txn,
> + struct ovsdb_idl_index
*sbrec_datapath_binding_by_key,
> + struct ovsdb_idl_index *sbrec_port_binding_by_key,
> + const struct sbrec_chassis *chassis)
> + OVS_REQUIRES(pinctrl_mutex)
> +{
> + if (!ovnsb_idl_txn) {
> + return;
> + }
> +
> + struct activated_port *pp;
> + LIST_FOR_EACH_SAFE (pp, list, &ports_to_activate_in_db) {
> + const struct sbrec_port_binding *pb = lport_lookup_by_key(
> + sbrec_datapath_binding_by_key, sbrec_port_binding_by_key,
> + pp->dp_key, pp->port_key);
> + if (!pb || lport_is_activated_by_activation_strategy(pb,
chassis)) {
> + ovs_list_remove(&pp->list);
> + free(pp);
> + continue;
> + }
> + const char *activated_chassis = smap_get(
> + &pb->options, "additional-chassis-activated");
> + char *activated_str;
> + if (activated_chassis) {
> + activated_str = xasprintf(
> + "%s,%s", activated_chassis, chassis->name);
> + sbrec_port_binding_update_options_setkey(
> + pb, "additional-chassis-activated", activated_str);
> + free(activated_str);
> + } else {
> + sbrec_port_binding_update_options_setkey(
> + pb, "additional-chassis-activated", chassis->name);
> + }
I have a concern here but I think it is ok to be addressed as a TODO for
future:
if ovn-controller is restarted after the RARP but before the change is sent
to SB DB, would ovn-controller still *think* the port is not activated and
still block it?
Thanks again for the revisions. With the comment in wait_activated_ports()
addressed:
Acked-by: Han Zhou <[email protected]>
Regards,
Han
> + }
> +}
> +
> +static void
> +pinctrl_rarp_activation_strategy_handler(const struct match *md)
> + OVS_REQUIRES(pinctrl_mutex)
> +{
> + /* Tag the port as activated in-memory. */
> + struct activated_port *pp = xmalloc(sizeof *pp);
> + pp->port_key = md->flow.regs[MFF_LOG_INPORT - MFF_REG0];
> + pp->dp_key = ntohll(md->flow.metadata);
> + ovs_list_push_front(&ports_to_activate_in_db, &pp->list);
> +
> + pp = xmalloc(sizeof *pp);
> + pp->port_key = md->flow.regs[MFF_LOG_INPORT - MFF_REG0];
> + pp->dp_key = ntohll(md->flow.metadata);
> + ovs_list_push_front(ports_to_activate_in_engine, &pp->list);
> +
> + /* Notify main thread on pending additional-chassis-activated
updates. */
> + notify_pinctrl_main();
> +}
> +
> static struct hmap put_fdbs;
>
> /* MAC learning (fdb) related functions. Runs within the main
> diff --git a/controller/pinctrl.h b/controller/pinctrl.h
> index 88f18e983..0b6523baa 100644
> --- a/controller/pinctrl.h
> +++ b/controller/pinctrl.h
> @@ -20,6 +20,7 @@
> #include <stdint.h>
>
> #include "lib/sset.h"
> +#include "openvswitch/list.h"
> #include "openvswitch/meta-flow.h"
>
> struct hmap;
> @@ -33,6 +34,7 @@ struct sbrec_dns_table;
> struct sbrec_controller_event_table;
> struct sbrec_service_monitor_table;
> struct sbrec_bfd_table;
> +struct sbrec_port_binding;
>
> void pinctrl_init(void);
> void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> @@ -56,4 +58,13 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> void pinctrl_wait(struct ovsdb_idl_txn *ovnsb_idl_txn);
> void pinctrl_destroy(void);
> void pinctrl_set_br_int_name(char *br_int_name);
> +
> +struct activated_port {
> + uint32_t dp_key;
> + uint32_t port_key;
> + struct ovs_list list;
> +};
> +
> +struct ovs_list *get_ports_to_activate_in_engine(void);
> +bool pinctrl_is_port_activated(int64_t dp_key, int64_t port_key);
> #endif /* controller/pinctrl.h */
> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> index 1ae496960..33c319f1c 100644
> --- a/include/ovn/actions.h
> +++ b/include/ovn/actions.h
> @@ -683,6 +683,9 @@ enum action_opcode {
> /* put_fdb(inport, eth.src).
> */
> ACTION_OPCODE_PUT_FDB,
> +
> + /* activation_strategy_rarp() */
> + ACTION_OPCODE_ACTIVATION_STRATEGY_RARP,
> };
>
> /* Header. */
> diff --git a/northd/northd.c b/northd/northd.c
> index 0d6ebccde..4d6193589 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -3499,6 +3499,16 @@ ovn_port_update_sbrec(struct northd_input
*input_data,
> smap_add(&options, "vlan-passthru", "true");
> }
>
> + /* Retain activated chassis flags. */
> + if (op->sb->requested_additional_chassis) {
> + const char *activated_str = smap_get(
> + &op->sb->options, "additional-chassis-activated");
> + if (activated_str) {
> + smap_add(&options, "additional-chassis-activated",
> + activated_str);
> + }
> + }
> +
> sbrec_port_binding_set_options(op->sb, &options);
> smap_destroy(&options);
> if (ovn_is_known_nb_lsp_type(op->nbsp->type)) {
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index e4e980720..ab28756af 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -107,7 +107,10 @@ static const char *rbac_port_binding_auth[] =
> static const char *rbac_port_binding_update[] =
> {"chassis", "additional_chassis",
> "encap", "additional_encap",
> - "up", "virtual_parent"};
> + "up", "virtual_parent",
> + /* NOTE: we only need to update the additional-chassis-activated
key,
> + * but RBAC_Role doesn't support mutate operation for subkeys. */
> + "options"};
>
> static const char *rbac_mac_binding_auth[] =
> {""};
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index 14a624c16..9c09de8d8 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -1052,6 +1052,17 @@
> </p>
> </column>
>
> + <column name="options" key="activation-strategy">
> + If used with multiple chassis set in
> + <ref column="requested-chassis"/>, specifies an activation
strategy
> + for all additional chassis. By default, no activation strategy
is
> + used, meaning additional port locations are immediately
available for
> + use. When set to "rarp", the port is blocked for ingress and
egress
> + communication until a RARP packet is sent from a new location.
The
> + "rarp" strategy is useful in live migration scenarios for
virtual
> + machines.
> + </column>
> +
> <column name="options" key="iface-id-ver">
> If set, this port will be bound by <code>ovn-controller</code>
> only if this same key and value is configured in the
> diff --git a/ovn-sb.xml b/ovn-sb.xml
> index 898f3676a..59ad3aa2d 100644
> --- a/ovn-sb.xml
> +++ b/ovn-sb.xml
> @@ -3374,6 +3374,21 @@ tcp.flags = RST;
> </p>
> </column>
>
> + <column name="options" key="activation-strategy">
> + If used with multiple chassis set in <ref
column="requested-chassis"/>,
> + specifies an activation strategy for all additional chassis. By
> + default, no activation strategy is used, meaning additional port
> + locations are immediately available for use. When set to "rarp",
the
> + port is blocked for ingress and egress communication until a RARP
> + packet is sent from a new location. The "rarp" strategy is useful
> + in live migration scenarios for virtual machines.
> + </column>
> +
> + <column name="options" key="additional-chassis-activated">
> + When <ref column="activation-strategy"/> is set, this option
indicates
> + that the port was activated using the strategy specified.
> + </column>
> +
> <column name="options" key="iface-id-ver">
> If set, this port will be bound by <code>ovn-controller</code>
> only if this same key and value is configured in the
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 59d51f3e0..3215e9dc2 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -14924,6 +14924,371 @@ OVN_CLEANUP([hv1],[hv2],[hv3])
> AT_CLEANUP
> ])
>
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([options:activation-strategy for logical port])
> +ovn_start
> +
> +net_add n1
> +
> +sim_add hv1
> +as hv1
> +check ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.11
> +
> +sim_add hv2
> +as hv2
> +check ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.12
> +
> +sim_add hv3
> +as hv3
> +check ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.13
> +
> +# Disable local ARP responder to pass ARP requests through tunnels
> +check ovn-nbctl ls-add ls0 -- add Logical_Switch ls0 other_config
vlan-passthru=true
> +
> +check ovn-nbctl lsp-add ls0 migrator
> +check ovn-nbctl lsp-set-options migrator requested-chassis=hv1,hv2 \
> + activation-strategy=rarp
> +
> +check ovn-nbctl lsp-add ls0 first
> +check ovn-nbctl lsp-set-options first requested-chassis=hv1
> +check ovn-nbctl lsp-add ls0 second
> +check ovn-nbctl lsp-set-options second requested-chassis=hv2
> +check ovn-nbctl lsp-add ls0 outside
> +check ovn-nbctl lsp-set-options outside requested-chassis=hv3
> +
> +check ovn-nbctl lsp-set-addresses migrator "00:00:00:00:00:10 10.0.0.10"
> +check ovn-nbctl lsp-set-addresses first "00:00:00:00:00:01 10.0.0.1"
> +check ovn-nbctl lsp-set-addresses second "00:00:00:00:00:02 10.0.0.2"
> +check ovn-nbctl lsp-set-addresses outside "00:00:00:00:00:03 10.0.0.3"
> +
> +for hv in hv1 hv2; do
> + as $hv check ovs-vsctl -- add-port br-int migrator -- \
> + set Interface migrator external-ids:iface-id=migrator \
> + options:tx_pcap=$hv/migrator-tx.pcap \
> + options:rxq_pcap=$hv/migrator-rx.pcap
> +done
> +
> +as hv1 check ovs-vsctl -- add-port br-int first -- \
> + set Interface first external-ids:iface-id=first
> +as hv2 check ovs-vsctl -- add-port br-int second -- \
> + set Interface second external-ids:iface-id=second
> +as hv3 check ovs-vsctl -- add-port br-int outside -- \
> + set Interface outside external-ids:iface-id=outside
> +
> +for hv in hv1 hv2 hv3; do
> + wait_row_count Chassis 1 name=$hv
> +done
> +hv1_uuid=$(fetch_column Chassis _uuid name=hv1)
> +hv2_uuid=$(fetch_column Chassis _uuid name=hv2)
> +hv3_uuid=$(fetch_column Chassis _uuid name=hv3)
> +
> +wait_column "$hv1_uuid" Port_Binding chassis logical_port=migrator
> +wait_column "$hv1_uuid" Port_Binding requested_chassis
logical_port=migrator
> +wait_column "$hv2_uuid" Port_Binding additional_chassis
logical_port=migrator
> +wait_column "$hv2_uuid" Port_Binding requested_additional_chassis
logical_port=migrator
> +
> +wait_column "$hv1_uuid" Port_Binding chassis logical_port=first
> +wait_column "$hv2_uuid" Port_Binding chassis logical_port=second
> +wait_column "$hv3_uuid" Port_Binding chassis logical_port=outside
> +
> +OVN_POPULATE_ARP
> +
> +send_arp() {
> + local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 spa=$5 tpa=$6
> + local
request=${eth_dst}${eth_src}08060001080006040001${eth_src}${spa}${eth_dst}${tpa}
> + as ${hv} ovs-appctl netdev-dummy/receive $inport $request
> + echo "${request}"
> +}
> +
> +send_rarp() {
> + local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 spa=$5 tpa=$6
> + local
request=${eth_dst}${eth_src}80350001080006040001${eth_src}${spa}${eth_dst}${tpa}
> + as ${hv} ovs-appctl netdev-dummy/receive $inport $request
> + echo "${request}"
> +}
> +
> +reset_pcap_file() {
> + local hv=$1
> + local iface=$2
> + local pcap_file=$3
> + as $hv check ovs-vsctl -- set Interface $iface
options:tx_pcap=dummy-tx.pcap \
> +
options:rxq_pcap=dummy-rx.pcap
> + check rm -f ${pcap_file}*.pcap
> + as $hv check ovs-vsctl -- set Interface $iface
options:tx_pcap=${pcap_file}-tx.pcap \
> +
options:rxq_pcap=${pcap_file}-rx.pcap
> +}
> +
> +reset_env() {
> + reset_pcap_file hv1 migrator hv1/migrator
> + reset_pcap_file hv2 migrator hv2/migrator
> + reset_pcap_file hv1 first hv1/first
> + reset_pcap_file hv2 second hv2/second
> + reset_pcap_file hv3 outside hv3/outside
> +
> + for port in hv1/migrator hv2/migrator hv1/first hv2/second
hv3/outside; do
> + : > $port.expected
> + done
> +}
> +
> +check_packets() {
> + OVN_CHECK_PACKETS([hv1/migrator-tx.pcap], [hv1/migrator.expected])
> + OVN_CHECK_PACKETS([hv2/migrator-tx.pcap], [hv2/migrator.expected])
> + OVN_CHECK_PACKETS([hv3/outside-tx.pcap], [hv3/outside.expected])
> + OVN_CHECK_PACKETS([hv1/first-tx.pcap], [hv1/first.expected])
> + OVN_CHECK_PACKETS([hv2/second-tx.pcap], [hv2/second.expected])
> +}
> +
> +migrator_spa=$(ip_to_hex 10 0 0 10)
> +first_spa=$(ip_to_hex 10 0 0 1)
> +second_spa=$(ip_to_hex 10 0 0 2)
> +outside_spa=$(ip_to_hex 10 0 0 3)
> +
> +reset_env
> +
> +# Packet from hv3:Outside arrives to hv1:Migrator
> +# hv3:Outside cannot reach hv2:Migrator because it is blocked by RARP
strategy
> +request=$(send_arp hv3 outside 000000000003 000000000010 $outside_spa
$migrator_spa)
> +echo $request >> hv1/migrator.expected
> +
> +# Packet from hv1:First arrives to hv1:Migrator
> +# hv1:First cannot reach hv2:Migrator because it is blocked by RARP
strategy
> +request=$(send_arp hv1 first 000000000001 000000000010 $first_spa
$migrator_spa)
> +echo $request >> hv1/migrator.expected
> +
> +# Packet from hv2:Second arrives to hv1:Migrator
> +# hv2:Second cannot reach hv2:Migrator because it is blocked by RARP
strategy
> +request=$(send_arp hv2 second 000000000002 000000000010 $second_spa
$migrator_spa)
> +echo $request >> hv1/migrator.expected
> +
> +check_packets
> +reset_env
> +
> +# Packet from hv1:Migrator arrives to hv3:Outside
> +request=$(send_arp hv1 migrator 000000000010 000000000003 $migrator_spa
$outside_spa)
> +echo $request >> hv3/outside.expected
> +
> +# Packet from hv1:Migrator arrives to hv1:First
> +request=$(send_arp hv1 migrator 000000000010 000000000001 $migrator_spa
$first_spa)
> +echo $request >> hv1/first.expected
> +
> +# Packet from hv1:Migrator arrives to hv2:Second
> +request=$(send_arp hv1 migrator 000000000010 000000000002 $migrator_spa
$second_spa)
> +echo $request >> hv2/second.expected
> +
> +check_packets
> +reset_env
> +
> +# hv2:Migrator cannot reach to hv3:Outside because it is blocked by RARP
strategy
> +request=$(send_arp hv2 migrator 000000000010 000000000003 $migrator_spa
$outside_spa)
> +
> +check_packets
> +reset_env
> +
> +AT_CHECK([ovn-sbctl find port_binding logical_port=migrator | grep -q
additional-chassis-activated], [1])
> +
> +# Now activate hv2:Migrator location
> +request=$(send_rarp hv2 migrator 000000000010 ffffffffffff $migrator_spa
$migrator_spa)
> +
> +# RARP was reinjected into the pipeline
> +echo $request >> hv3/outside.expected
> +echo $request >> hv1/first.expected
> +echo $request >> hv2/second.expected
> +
> +check_packets
> +reset_env
> +
> +pb_uuid=$(ovn-sbctl --bare --columns _uuid find Port_Binding
logical_port=migrator)
> +OVS_WAIT_UNTIL([test xhv2 = x$(ovn-sbctl get Port_Binding $pb_uuid
options:additional-chassis-activated | tr -d '""')])
> +
> +# Now packet arrives to both locations
> +request=$(send_arp hv3 outside 000000000003 000000000010 $outside_spa
$migrator_spa)
> +echo $request >> hv1/migrator.expected
> +echo $request >> hv2/migrator.expected
> +
> +check_packets
> +reset_env
> +
> +# Packet from hv1:Migrator still arrives to hv3:Outside
> +request=$(send_arp hv1 migrator 000000000010 000000000003 $migrator_spa
$outside_spa)
> +echo $request >> hv3/outside.expected
> +
> +check_packets
> +reset_env
> +
> +# hv2:Migrator can now reach to hv3:Outside because RARP strategy
activated it
> +request=$(send_arp hv2 migrator 000000000010 000000000003 $migrator_spa
$outside_spa)
> +echo $request >> hv3/outside.expected
> +
> +check_packets
> +
> +# complete port migration and check that -activated flag is reset
> +check ovn-nbctl lsp-set-options migrator requested-chassis=hv2
> +OVS_WAIT_UNTIL([test x = x$(ovn-sbctl get Port_Binding $pb_uuid
options:additional-chassis-activated)])
> +
> +OVN_CLEANUP([hv1],[hv2],[hv3])
> +
> +AT_CLEANUP
> +])
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([options:activation-strategy=rarp is not waiting for southbound
db])
> +# TODO: remove it when we find a way to make vswitchd forward packets to
> +# controller() handler when ovsdb-server is down
> +AT_SKIP_IF([true])
> +ovn_start
> +
> +net_add n1
> +
> +sim_add hv1
> +as hv1
> +check ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.11
> +
> +sim_add hv2
> +as hv2
> +check ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.12
> +
> +# Disable local ARP responder to pass ARP requests through tunnels
> +check ovn-nbctl ls-add ls0 -- add Logical_Switch ls0 other_config
vlan-passthru=true
> +
> +check ovn-nbctl lsp-add ls0 migrator
> +check ovn-nbctl lsp-set-options migrator requested-chassis=hv1,hv2 \
> + activation-strategy=rarp
> +
> +check ovn-nbctl lsp-add ls0 first
> +check ovn-nbctl lsp-set-options first requested-chassis=hv1
> +
> +check ovn-nbctl lsp-set-addresses migrator "00:00:00:00:00:10 10.0.0.10"
> +check ovn-nbctl lsp-set-addresses first "00:00:00:00:00:01 10.0.0.1"
> +
> +for hv in hv1 hv2; do
> + as $hv check ovs-vsctl -- add-port br-int migrator -- \
> + set Interface migrator external-ids:iface-id=migrator \
> + options:tx_pcap=$hv/migrator-tx.pcap \
> + options:rxq_pcap=$hv/migrator-rx.pcap
> +done
> +
> +as hv1 check ovs-vsctl -- add-port br-int first -- \
> + set Interface first external-ids:iface-id=first
> +
> +for hv in hv1 hv2; do
> + wait_row_count Chassis 1 name=$hv
> +done
> +hv1_uuid=$(fetch_column Chassis _uuid name=hv1)
> +hv2_uuid=$(fetch_column Chassis _uuid name=hv2)
> +
> +wait_column "$hv1_uuid" Port_Binding chassis logical_port=migrator
> +wait_column "$hv1_uuid" Port_Binding requested_chassis
logical_port=migrator
> +wait_column "$hv2_uuid" Port_Binding additional_chassis
logical_port=migrator
> +wait_column "$hv2_uuid" Port_Binding requested_additional_chassis
logical_port=migrator
> +
> +wait_column "$hv1_uuid" Port_Binding chassis logical_port=first
> +
> +OVN_POPULATE_ARP
> +
> +send_arp() {
> + local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 spa=$5 tpa=$6
> + local
request=${eth_dst}${eth_src}08060001080006040001${eth_src}${spa}${eth_dst}${tpa}
> + as ${hv} ovs-appctl netdev-dummy/receive $inport $request
> + echo "${request}"
> +}
> +
> +send_rarp() {
> + local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 spa=$5 tpa=$6
> + local
request=${eth_dst}${eth_src}80350001080006040001${eth_src}${spa}${eth_dst}${tpa}
> + as ${hv} ovs-appctl netdev-dummy/receive $inport $request
> + echo "${request}"
> +}
> +
> +reset_pcap_file() {
> + local hv=$1
> + local iface=$2
> + local pcap_file=$3
> + as $hv check ovs-vsctl -- set Interface $iface
options:tx_pcap=dummy-tx.pcap \
> +
options:rxq_pcap=dummy-rx.pcap
> + check rm -f ${pcap_file}*.pcap
> + as $hv check ovs-vsctl -- set Interface $iface
options:tx_pcap=${pcap_file}-tx.pcap \
> +
options:rxq_pcap=${pcap_file}-rx.pcap
> +}
> +
> +reset_env() {
> + reset_pcap_file hv1 migrator hv1/migrator
> + reset_pcap_file hv2 migrator hv2/migrator
> + reset_pcap_file hv1 first hv1/first
> +
> + for port in hv1/migrator hv2/migrator hv1/first; do
> + : > $port.expected
> + done
> +}
> +
> +check_packets() {
> + OVN_CHECK_PACKETS([hv1/migrator-tx.pcap], [hv1/migrator.expected])
> + OVN_CHECK_PACKETS([hv2/migrator-tx.pcap], [hv2/migrator.expected])
> + OVN_CHECK_PACKETS([hv1/first-tx.pcap], [hv1/first.expected])
> +}
> +
> +migrator_spa=$(ip_to_hex 10 0 0 10)
> +first_spa=$(ip_to_hex 10 0 0 1)
> +
> +reset_env
> +
> +# Packet from hv1:First arrives to hv1:Migrator
> +# hv1:First cannot reach hv2:Migrator because it is blocked by RARP
strategy
> +request=$(send_arp hv1 first 000000000001 000000000010 $first_spa
$migrator_spa)
> +echo $request >> hv1/migrator.expected
> +
> +check_packets
> +reset_env
> +
> +# Packet from hv1:Migrator arrives to hv1:First
> +request=$(send_arp hv1 migrator 000000000010 000000000001 $migrator_spa
$first_spa)
> +echo $request >> hv1/first.expected
> +
> +check_packets
> +reset_env
> +
> +# hv2:Migrator cannot reach to hv1:First because it is blocked by RARP
strategy
> +request=$(send_arp hv2 migrator 000000000010 000000000001 $migrator_spa
$first_spa)
> +
> +check_packets
> +reset_env
> +
> +# Before proceeding, stop ovsdb-server to make sure we test in the
environment
> +# that can't remove flows triggered by updates to database
> +as hv2
> +SVCPID=$(cat $OVS_RUNDIR/ovsdb-server.pid)
> +kill -9 $SVCPID
> +
> +# Now activate hv2:Migrator location
> +request=$(send_rarp hv2 migrator 000000000010 ffffffffffff $migrator_spa
$migrator_spa)
> +
> +# RARP was reinjected into the pipeline
> +echo $request >> hv1/first.expected
> +
> +# Now packet from hv1:First arrives to both locations
> +request=$(send_arp hv1 first 000000000001 000000000010 $first_spa
$migrator_spa)
> +echo $request >> hv1/migrator.expected
> +echo $request >> hv2/migrator.expected
> +
> +# Packet from hv1:Migrator still arrives to hv1:First
> +request=$(send_arp hv1 migrator 000000000010 000000000001 $migrator_spa
$first_spa)
> +echo $request >> hv1/first.expected
> +
> +# hv2:Migrator can now reach to hv1:First because RARP strategy
activated it
> +request=$(send_arp hv2 migrator 000000000010 000000000001 $migrator_spa
$first_spa)
> +echo $request >> hv1/first.expected
> +
> +check_packets
> +
> +OVN_CLEANUP([hv1],[hv2])
> +
> +AT_CLEANUP
> +])
> +
> OVN_FOR_EACH_NORTHD([
> AT_SETUP([options:requested-chassis for logical port])
> ovn_start
> --
> 2.34.1
>
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev