> When packets goes between AZs through transit router for the first > time there isn't any MAC binding for the remote port equivalent. The > TR will properly generate ARP/ND NS packet that will arrive to the > remote AZ, however the response would never leave the remote AZ as a > consequence the local AZ would never learn this MAC binding. > > To prevent the described behavior add a new table that will contain > all remote chassis and corresponding encapsulations that allow us > to just flood all chassis with any packet that will be sent to this > table. At the same time add a new action that sends the packet to this > table. > > In order to properly generate MAC binding we need to redirect the ARP > into ingress instead of egress as usual for reception from tunnels. > Add flows that will match on ARP and ND NA with combination of 0 > outport which should indicate that this is the remote flood flow. > Only exception is VXLAN which doesn't have enough space for outport > encoding, in that case we need to send the packet to both ingress > and egress as we cannot determine if it was part of the remote flood > or regular packet that arrived from another chassis in the same AZ. > > Signed-off-by: Ales Musil <amu...@redhat.com>
Acked-by: Lorenzo Bianconi <lorenzo.bianc...@redhat.com> > --- > v3: Rebase on top of latest main. > Fix the ARP loop. > v2: Rebase on top of latest main. > Slightly adjust IP of GW in the multinode test. > --- > controller/lflow.c | 1 + > controller/lflow.h | 4 + > controller/physical.c | 198 +++++++++++++++++++++++++++++++++---- > include/ovn/actions.h | 3 + > lib/actions.c | 17 ++++ > northd/northd.c | 12 ++- > tests/multinode-macros.at | 48 +++++++++ > tests/multinode.at | 201 ++++++++++++++++++++++++++++++++++++++ > tests/ovn-controller.at | 65 ++++++++++++ > tests/ovn-macros.at | 1 + > tests/ovn.at | 10 +- > tests/test-ovn.c | 1 + > tutorial/ovn-sandbox | 24 ++--- > utilities/ovn-trace.c | 3 + > 14 files changed, 552 insertions(+), 36 deletions(-) > > diff --git a/controller/lflow.c b/controller/lflow.c > index 856261f40..5314449a0 100644 > --- a/controller/lflow.c > +++ b/controller/lflow.c > @@ -896,6 +896,7 @@ add_matches_to_flow_table(const struct sbrec_logical_flow > *lflow, > .ct_nw_dst_load_table = OFTABLE_CT_ORIG_NW_DST_LOAD, > .ct_ip6_dst_load_table = OFTABLE_CT_ORIG_IP6_DST_LOAD, > .ct_tp_dst_load_table = OFTABLE_CT_ORIG_TP_DST_LOAD, > + .flood_remote_table = OFTABLE_FLOOD_REMOTE_CHASSIS, > .ctrl_meter_id = ctrl_meter_id, > .common_nat_ct_zone = get_common_nat_zone(ldp), > }; > diff --git a/controller/lflow.h b/controller/lflow.h > index 206328f9e..b27721baa 100644 > --- a/controller/lflow.h > +++ b/controller/lflow.h > @@ -98,6 +98,10 @@ struct uuid; > #define OFTABLE_CT_ORIG_NW_DST_LOAD 81 > #define OFTABLE_CT_ORIG_IP6_DST_LOAD 82 > #define OFTABLE_CT_ORIG_TP_DST_LOAD 83 > +#define OFTABLE_FLOOD_REMOTE_CHASSIS 84 > + > +/* Common defines shared between some controller components. */ > +#define CHASSIS_FLOOD_INDEX_START 0x8000 > > > struct lflow_ctx_in { > diff --git a/controller/physical.c b/controller/physical.c > index bbc97ee30..5d088302a 100644 > --- a/controller/physical.c > +++ b/controller/physical.c > @@ -185,6 +185,84 @@ put_encapsulation(enum mf_field_id mff_ovn_geneve, > } > } > > +static void > +put_decapsulation(enum mf_field_id mff_ovn_geneve, > + const struct chassis_tunnel *tun, > + struct ofpbuf *ofpacts) > +{ > + if (tun->type == GENEVE) { > + put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, ofpacts); > + put_move(mff_ovn_geneve, 16, MFF_LOG_INPORT, 0, 15, ofpacts); > + put_move(mff_ovn_geneve, 0, MFF_LOG_OUTPORT, 0, 16, ofpacts); > + } else if (tun->type == STT) { > + put_move(MFF_TUN_ID, 40, MFF_LOG_INPORT, 0, 15, ofpacts); > + put_move(MFF_TUN_ID, 24, MFF_LOG_OUTPORT, 0, 16, ofpacts); > + put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, ofpacts); > + } else if (tun->type == VXLAN) { > + /* Add flows for non-VTEP tunnels. Split VNI into two 12-bit > + * sections and use them for datapath and outport IDs. */ > + put_move(MFF_TUN_ID, 12, MFF_LOG_OUTPORT, 0, 12, ofpacts); > + put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 12, ofpacts); > + } else { > + OVS_NOT_REACHED(); > + } > +} > + > + > +static void > +put_remote_chassis_flood_encap(struct ofpbuf *ofpacts, > + enum chassis_tunnel_type type, > + enum mf_field_id mff_ovn_geneve) > +{ > + if (type == GENEVE) { > + put_move(MFF_LOG_DATAPATH, 0, MFF_TUN_ID, 0, 24, ofpacts); > + put_load(0, mff_ovn_geneve, 0, 32, ofpacts); > + put_move(MFF_LOG_INPORT, 0, mff_ovn_geneve, 16, 15, ofpacts); > + } else if (type == STT) { > + put_move(MFF_LOG_INPORT, 0, MFF_TUN_ID, 40, 15, ofpacts); > + put_load(0, MFF_TUN_ID, 24, 16, ofpacts); > + put_move(MFF_LOG_DATAPATH, 0, MFF_TUN_ID, 0, 24, ofpacts); > + } else if (type == VXLAN) { > + put_move(MFF_LOG_INPORT, 0, MFF_TUN_ID, 12, 12, ofpacts); > + put_move(MFF_LOG_DATAPATH, 0, MFF_TUN_ID, 0, 12, ofpacts); > + } else { > + OVS_NOT_REACHED(); > + } > +} > + > +static void > +match_set_chassis_flood_outport(struct match *match, > + enum chassis_tunnel_type type, > + enum mf_field_id mff_ovn_geneve) > +{ > + if (type == GENEVE) { > + /* Outport occupies the lower half of tunnel metadata (0-15). */ > + union mf_value value, mask; > + memset(&value, 0, sizeof value); > + memset(&mask, 0, sizeof mask); > + > + const struct mf_field *mf_ovn_geneve = mf_from_id(mff_ovn_geneve); > + memset(&mask.tun_metadata[mf_ovn_geneve->n_bytes - 2], 0xff, 2); > + > + tun_metadata_set_match(mf_ovn_geneve, &value, &mask, match, NULL); > + } else if (type == STT) { > + /* Outport occupies bits 24-39. */ > + match_set_tun_id_masked(match, 0, htonll(UINT64_C(0xffff) << 24)); > + } > +} > + > +static void > +match_set_chassis_flood_remote(struct match *match, uint32_t index) > +{ > + match_init_catchall(match); > + match_set_reg(match, MFF_REG6 - MFF_REG0, index); > + /* Match if the packet wasn't already received from tunnel. > + * This prevents from looping it back to the tunnel again. */ > + match_set_reg_masked(match, MFF_LOG_FLAGS - MFF_REG0, 0, > + MLF_RX_FROM_TUNNEL); > +} > + > + > static void > put_stack(enum mf_field_id field, struct ofpact_stack *stack) > { > @@ -2367,6 +2445,105 @@ consider_mc_group(const struct physical_ctx *ctx, > sset_destroy(&vtep_chassis); > } > > +#define CHASSIS_FLOOD_MAX_MSG_SIZE MC_OFPACTS_MAX_MSG_SIZE > + > +static void > +physical_eval_remote_chassis_flows(const struct physical_ctx *ctx, > + struct ofpbuf *egress_ofpacts, > + struct ovn_desired_flow_table *flow_table) > +{ > + struct match match = MATCH_CATCHALL_INITIALIZER; > + uint32_t index = CHASSIS_FLOOD_INDEX_START; > + struct chassis_tunnel *prev = NULL; > + > + uint8_t actions_stub[256]; > + struct ofpbuf ingress_ofpacts; > + ofpbuf_use_stub(&ingress_ofpacts, actions_stub, sizeof(actions_stub)); > + > + ofpbuf_clear(egress_ofpacts); > + > + const struct sbrec_chassis *chassis; > + SBREC_CHASSIS_TABLE_FOR_EACH (chassis, ctx->chassis_table) { > + if (!smap_get_bool(&chassis->other_config, "is-remote", false)) { > + continue; > + } > + > + struct chassis_tunnel *tun = > + chassis_tunnel_find(ctx->chassis_tunnels, chassis->name, > + NULL, NULL); > + if (!tun) { > + continue; > + } > + > + if (!(prev && prev->type == tun->type)) { > + put_remote_chassis_flood_encap(egress_ofpacts, tun->type, > + ctx->mff_ovn_geneve); > + } > + > + ofpact_put_OUTPUT(egress_ofpacts)->port = tun->ofport; > + prev = tun; > + > + if (egress_ofpacts->size > CHASSIS_FLOOD_MAX_MSG_SIZE) { > + match_set_chassis_flood_remote(&match, index++); > + put_split_buf_function(index, 0, OFTABLE_FLOOD_REMOTE_CHASSIS, > + egress_ofpacts); > + > + ofctrl_add_flow(flow_table, OFTABLE_FLOOD_REMOTE_CHASSIS, 100, 0, > + &match, egress_ofpacts, hc_uuid); > + > + ofpbuf_clear(egress_ofpacts); > + prev = NULL; > + } > + > + > + ofpbuf_clear(&ingress_ofpacts); > + put_load(1, MFF_LOG_FLAGS, MLF_RX_FROM_TUNNEL_BIT, 1, > + &ingress_ofpacts); > + put_decapsulation(ctx->mff_ovn_geneve, tun, &ingress_ofpacts); > + put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ingress_ofpacts); > + if (tun->type == VXLAN) { > + /* VXLAN doesn't carry the inport information, we cannot set > + * the outport to 0 then and match on it. */ > + put_resubmit(OFTABLE_LOCAL_OUTPUT, &ingress_ofpacts); > + } > + > + /* Add match on ARP response coming from remote chassis. */ > + match_init_catchall(&match); > + match_set_in_port(&match, tun->ofport); > + match_set_dl_type(&match, htons(ETH_TYPE_ARP)); > + match_set_arp_opcode_masked(&match, 2, UINT8_MAX); > + match_set_chassis_flood_outport(&match, tun->type, > + ctx->mff_ovn_geneve); > + > + ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 120, > + chassis->header_.uuid.parts[0], > + &match, &ingress_ofpacts, hc_uuid); > + > + /* Add match on ND NA coming from remote chassis. */ > + match_init_catchall(&match); > + match_set_in_port(&match, tun->ofport); > + match_set_dl_type(&match, htons(ETH_TYPE_IPV6)); > + match_set_nw_proto(&match, IPPROTO_ICMPV6); > + match_set_icmp_type(&match, 136); > + match_set_icmp_code(&match, 0); > + match_set_chassis_flood_outport(&match, tun->type, > + ctx->mff_ovn_geneve); > + > + ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 120, > + chassis->header_.uuid.parts[0], > + &match, &ingress_ofpacts, hc_uuid); > + } > + > + if (egress_ofpacts->size > 0) { > + match_set_chassis_flood_remote(&match, index++); > + > + ofctrl_add_flow(flow_table, OFTABLE_FLOOD_REMOTE_CHASSIS, 100, 0, > + &match, egress_ofpacts, hc_uuid); > + } > + > + ofpbuf_uninit(&ingress_ofpacts); > +} > + > static void > physical_eval_port_binding(struct physical_ctx *p_ctx, > const struct sbrec_port_binding *pb, > @@ -2531,24 +2708,7 @@ physical_run(struct physical_ctx *p_ctx, > match_set_in_port(&match, tun->ofport); > > ofpbuf_clear(&ofpacts); > - if (tun->type == GENEVE) { > - put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, &ofpacts); > - put_move(p_ctx->mff_ovn_geneve, 16, MFF_LOG_INPORT, 0, 15, > - &ofpacts); > - put_move(p_ctx->mff_ovn_geneve, 0, MFF_LOG_OUTPORT, 0, 16, > - &ofpacts); > - } else if (tun->type == STT) { > - put_move(MFF_TUN_ID, 40, MFF_LOG_INPORT, 0, 15, &ofpacts); > - put_move(MFF_TUN_ID, 24, MFF_LOG_OUTPORT, 0, 16, &ofpacts); > - put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, &ofpacts); > - } else if (tun->type == VXLAN) { > - /* Add flows for non-VTEP tunnels. Split VNI into two 12-bit > - * sections and use them for datapath and outport IDs. */ > - put_move(MFF_TUN_ID, 12, MFF_LOG_OUTPORT, 0, 12, &ofpacts); > - put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 12, &ofpacts); > - } else { > - OVS_NOT_REACHED(); > - } > + put_decapsulation(p_ctx->mff_ovn_geneve, tun, &ofpacts); > > put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts); > ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 100, 0, &match, > @@ -2800,5 +2960,7 @@ physical_run(struct physical_ctx *p_ctx, > ofctrl_add_flow(flow_table, OFTABLE_CT_ORIG_IP6_DST_LOAD, 100, 0, &match, > &ofpacts, hc_uuid); > > + physical_eval_remote_chassis_flows(p_ctx, &ofpacts, flow_table); > + > ofpbuf_uninit(&ofpacts); > } > diff --git a/include/ovn/actions.h b/include/ovn/actions.h > index 7e0670a11..73beeeee9 100644 > --- a/include/ovn/actions.h > +++ b/include/ovn/actions.h > @@ -134,6 +134,7 @@ struct collector_set_ids; > OVNACT(CT_ORIG_NW_DST, ovnact_result) \ > OVNACT(CT_ORIG_IP6_DST, ovnact_result) \ > OVNACT(CT_ORIG_TP_DST, ovnact_result) \ > + OVNACT(FLOOD_REMOTE, ovnact_null) \ > > /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */ > enum OVS_PACKED_ENUM ovnact_type { > @@ -945,6 +946,8 @@ struct ovnact_encode_params { > * to resubmit. */ > uint32_t ct_tp_dst_load_table; /* OpenFlow table for 'ct_tp_dst' > * to resubmit. */ > + uint32_t flood_remote_table; /* OpenFlow table for 'chassis_flood' > + * to resubmit. */ > }; > > void ovnacts_encode(const struct ovnact[], size_t ovnacts_len, > diff --git a/lib/actions.c b/lib/actions.c > index 388846eff..14e86478f 100644 > --- a/lib/actions.c > +++ b/lib/actions.c > @@ -5527,6 +5527,21 @@ format_CT_ORIG_TP_DST(const struct ovnact_result *res, > struct ds *s) > ds_put_cstr(s, " = ct_tp_dst();"); > } > > +static void > +format_FLOOD_REMOTE(const struct ovnact_null *null OVS_UNUSED, struct ds *s) > +{ > + ds_put_cstr(s, "flood_remote;"); > +} > + > +static void > +encode_FLOOD_REMOTE(const struct ovnact_null *null OVS_UNUSED, > + const struct ovnact_encode_params *ep, > + struct ofpbuf *ofpacts) > +{ > + put_load(CHASSIS_FLOOD_INDEX_START, MFF_REG6, 0, 32, ofpacts); > + emit_resubmit(ofpacts, ep->flood_remote_table); > +} > + > /* Parses an assignment or exchange or put_dhcp_opts action. */ > static void > parse_set_action(struct action_context *ctx) > @@ -5754,6 +5769,8 @@ parse_action(struct action_context *ctx) > parse_sample(ctx); > } else if (lexer_match_id(ctx->lexer, "mac_cache_use")) { > ovnact_put_MAC_CACHE_USE(ctx->ovnacts); > + } else if (lexer_match_id(ctx->lexer, "flood_remote")) { > + ovnact_put_FLOOD_REMOTE(ctx->ovnacts); > } else { > lexer_syntax_error(ctx->lexer, "expecting action"); > } > diff --git a/northd/northd.c b/northd/northd.c > index c4368646d..751e89863 100644 > --- a/northd/northd.c > +++ b/northd/northd.c > @@ -13155,21 +13155,22 @@ build_neigh_learning_flows_for_lrouter( > * */ > > /* Flows for LOOKUP_NEIGHBOR. */ > + const char *flood = od->is_transit_router ? "flood_remote; " : ""; > bool learn_from_arp_request = smap_get_bool(&od->nbr->options, > "always_learn_from_arp_request", true); > ds_clear(actions); > ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT > - " = lookup_arp(inport, arp.spa, arp.sha); %snext;", > + " = lookup_arp(inport, arp.spa, arp.sha); %s%snext;", > learn_from_arp_request ? "" : > - REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; "); > + REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ", flood); > ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, > "arp.op == 2", ds_cstr(actions), lflow_ref); > > ds_clear(actions); > ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT > - " = lookup_nd(inport, nd.target, nd.tll); %snext;", > + " = lookup_nd(inport, nd.target, nd.tll); %s%snext;", > learn_from_arp_request ? "" : > - REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; "); > + REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ", flood); > ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_na", > ds_cstr(actions), lflow_ref); > > @@ -13185,7 +13186,8 @@ build_neigh_learning_flows_for_lrouter( > ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT > " = lookup_nd(inport, nd.target, nd.tll); " > REGBIT_LOOKUP_NEIGHBOR_IP_RESULT > - " = lookup_nd_ip(inport, nd.target); next;"); > + " = lookup_nd_ip(inport, nd.target); %snext;", > + flood); > ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 110, > "nd_na && ip6.src == fe80::/10 && ip6.dst == ff00::/8", > ds_cstr(actions), lflow_ref); > diff --git a/tests/multinode-macros.at b/tests/multinode-macros.at > index 698d2c625..29f0711e6 100644 > --- a/tests/multinode-macros.at > +++ b/tests/multinode-macros.at > @@ -112,6 +112,54 @@ cleanup_multinode_resources_by_nodes() { > done > } > > +# multinode_cleanup_northd NODE > +# > +# Removes previously set nothd on specified node > +multinode_cleanup_northd() { > + c=$1 > + # Cleanup existing one > + m_as $c /usr/share/ovn/scripts/ovn-ctl stop_northd > + m_as $c rm -f /etc/ovn/*.db > +} > + > +# multinode_setup_northd NODE > +# > +# Sets up northd on specified node. > +multinode_setup_northd() { > + c=$1 > + > + multinode_cleanup_northd $c > + > + m_as $c /usr/share/ovn/scripts/ovn-ctl start_northd > + m_as $c ovn-nbctl set-connection ptcp:6641 > + m_as $c ovn-sbctl set-connection ptcp:6642 > +} > + > +# multinode_setup_controller NODE ENCAP_IP REMOTE_IP [ENCAP_TYPE] > +# > +# Sets up controller on specified node. > +multinode_setup_controller() { > + c=$1 > + encap_ip=$3 > + remote_ip=$4 > + encap_type=${5:-"geneve"} > + > + # Cleanup existing one > + m_as $c /usr/share/openvswitch/scripts/ovs-ctl stop > + m_as $c /usr/share/ovn/scripts/ovn-ctl stop_controller > + m_as $c rm -f /etc/openvswitch/*.db > + > + m_as $c /usr/share/openvswitch/scripts/ovs-ctl start --system-id=$c > + m_as $c /usr/share/ovn/scripts/ovn-ctl start_controller > + > + m_as $c ovs-vsctl set open . external_ids:ovn-encap-ip=$encap_ip > + m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=$encap_type > + m_as $c ovs-vsctl set open . external-ids:ovn-remote=tcp:$remote_ip:6642 > + m_as $c ovs-vsctl set open . external-ids:ovn-openflow-probe-interval=60 > + m_as $c ovs-vsctl set open . > external-ids:ovn-remote-probe-interval=180000 > + m_as $c ovs-vsctl set open . external-ids:ovn-bridge-datapath-type=system > +} > + > # m_count_rows TABLE [CONDITION...] > # > # Prints the number of rows in TABLE (that satisfy CONDITION). > diff --git a/tests/multinode.at b/tests/multinode.at > index 9602358aa..c1bd3123a 100644 > --- a/tests/multinode.at > +++ b/tests/multinode.at > @@ -2575,3 +2575,204 @@ Connected to 10.0.2.4 (10.0.2.4) port 8080 > fi > > AT_CLEANUP > + > +AT_SETUP([ovn multinode - Transit Router basic functionality]) > + > +# Check that ovn-fake-multinode setup is up and running > +check_fake_multinode_setup > + > +# Delete the multinode NB and OVS resources before starting the test. > +cleanup_multinode_resources > + > +# Network topology > +# ┌─────────────────────────────────┐ > ┌────────────────────────────────┐ > +# │ │ │ > │ > +# │ ┌───────────────────┐ AZ1 │ │ AZ2 ┌───────────────────┐ > │ > +# │ │ external │ │ │ │ │ > │ > +# │ │ │ │ │ │ │ > │ > +# │ │ 192.168.100.10/24 │ │ │ │ ................. │ > │ > +# │ │ 1000::10/64 │ │ │ │ │ > │ > +# │ └─────────┬─────────┘ │ │ └─────────┬─────────┘ > │ > +# │ │ │ │ │ > │ > +# │ │ │ │ │ > │ > +# │ ┌─────────┴─────────┐ │ │ ┌─────────┴─────────┐ > │ > +# │ │ 192.168.100.1/24 │ │ │ │ 192.168.100.2/24 │ > │ > +# │ │ 1000::1/64 │ │ │ │ 1000::2/64 │ > │ > +# │ │ │ │ │ │ │ > │ > +# │ │ GW │ │ │ │ GW │ > │ > +# │ │ │ │ │ │ │ > │ > +# │ │ 100.65.0.1/30 │ │ │ │ 100.65.0.5/30 │ > │ > +# │ │ 100:65::1/126 │ │ │ │ 100:65::5/126 │ > │ > +# │ └─────────┬─────────┘ │ │ └───────────────────┘ > │ > +# │ │ │ │ │ > │ > +# │ │ Peer ports │ │ │ Peer > ports │ > +# │ │ │ │ │ > │ > +# │ ┌─────────┴──────────────────│─────│──────────────────┴─────────┐ > │ > +# │ │ 100.65.0.2/30 │ │ 100.65.0.6/30 │ > │ > +# │ │ 100:65::2/126 │ │ 100:65::6/126 │ > │ > +# │ │ │ │ │ > │ > +# │ │ │ TR │ │ > │ > +# │ │ │ │ │ > │ > +# │ │ 10.100.200.1/24 │ │ 10.100.200.1/24 │ > │ > +# │ │ 10:200::1/64 │ │ 10:200::1/64 │ > │ > +# │ └─────────┬──────────────────│─────│────────────────────────────┘ > │ > +# │ │ │ │ │ > │ > +# │ │ │ │ │ > │ > +# │ │ │ │ │ > │ > +# │ ┌─────────┴──────────────────│─────│────────────────────────────┐ > │ > +# │ │ │ TS │ │ > │ > +# │ └─────────┬──────────────────│─────│────────────────────────────┘ > │ > +# │ │ │ │ │ > │ > +# │ │ │ │ │ > │ > +# │ │ │ │ │ > │ > +# │ ┌─────────┴─────────┐ │ │ ┌─────────┴─────────┐ > │ > +# │ │ pod10 │ │ │ │ pod20 │ > │ > +# │ │ │ │ │ │ │ > │ > +# │ │ 10.100.200.10/24 │ │ │ │ 10.100.200.20/24 │ > │ > +# │ │ 10:200::10/64 │ │ │ │ 10:200::20/64 │ > │ > +# │ └───────────────────┘ │ │ └───────────────────┘ > │ > +# └─────────────────────────────────┘ > └────────────────────────────────┘ > + > +for i in 1 2; do > + chassis="ovn-chassis-$i" > + ip=$(m_as $chassis ip -4 addr show eth1 | grep inet | awk '{print $2}' | > cut -d'/' -f1) > + > + multinode_setup_northd $chassis > + multinode_setup_controller $chassis $chassis $ip $ip > + > + check m_as $chassis ovs-vsctl set open . > external_ids:ovn-monitor-all=true > + check m_as $chassis ovs-vsctl set open . > external_ids:ovn-is-interconn=true > + > + check m_as $chassis ovn-nbctl ls-add public > + > + check m_as $chassis ovn-nbctl lsp-add public public-gw > + check m_as $chassis ovn-nbctl lsp-set-type public-gw router > + check m_as $chassis ovn-nbctl lsp-set-addresses public-gw router > + check m_as $chassis ovn-nbctl lsp-set-options public-gw > router-port=gw-public > + > + check m_as $chassis ovn-nbctl lr-add gw > + check m_as $chassis ovn-nbctl lrp-add gw gw-public 00:00:00:00:20:00 > 192.168.100.$i/24 1000::$i/64 > + > + check m_as $chassis ovn-nbctl set logical_router gw > options:chassis=$chassis > + > + # Add TR and set the same tunnel key for both chassis > + check m_as $chassis ovn-nbctl ls-add ts > + check m_as $chassis ovn-nbctl set logical_switch ts > other_config:requested-tnl-key=10 > + > + check m_as $chassis ovn-nbctl lsp-add ts ts-tr > + check m_as $chassis ovn-nbctl lsp-set-type ts-tr router > + check m_as $chassis ovn-nbctl lsp-set-addresses ts-tr router > + check m_as $chassis ovn-nbctl lsp-set-options ts-tr router-port=tr-ts > + > + check m_as $chassis ovn-nbctl lr-add tr > + check m_as $chassis ovn-nbctl lrp-add tr tr-ts 00:00:00:00:10:00 > 10.100.200.1/24 10:200::1/64 > + check m_as $chassis ovn-nbctl set logical_router tr > options:requested-tnl-key=20 > + > + # Add TS pods, with the same tunnel keys on both sides > + check m_as $chassis ovn-nbctl lsp-add ts pod10 > + check m_as $chassis ovn-nbctl lsp-set-addresses pod10 "00:00:00:00:10:10 > 10.100.200.10 10:200::10" > + check m_as $chassis ovn-nbctl set logical_switch_port pod10 > options:requested-tnl-key=10 > + > + check m_as $chassis ovn-nbctl lsp-add ts pod20 > + check m_as $chassis ovn-nbctl lsp-set-addresses pod20 "00:00:00:00:10:20 > 10.100.200.20 10:200::20" > + check m_as $chassis ovn-nbctl set logical_switch_port pod20 > options:requested-tnl-key=20 > +done > + > +# Add SNAT for the GW router that corresponds to "gw-tr" LRP IP > +check m_as ovn-chassis-1 ovn-nbctl lr-nat-add gw snat 100.65.0.1 > 192.168.100.0/24 > +check m_as ovn-chassis-1 ovn-nbctl lr-nat-add gw snat 100:65::1 1000::/64 > +check m_as ovn-chassis-2 ovn-nbctl lr-nat-add gw snat 100.65.0.5 > 192.168.100.0/24 > +check m_as ovn-chassis-2 ovn-nbctl lr-nat-add gw snat 100:65::5 1000::/64 > + > +# Add peer ports between GW and TR > +check m_as ovn-chassis-1 ovn-nbctl lrp-add gw gw-tr 00:00:00:00:30:01 > 100.65.0.1/30 100:65::1/126 peer=tr-gw > +check m_as ovn-chassis-1 ovn-nbctl lrp-add tr tr-gw 00:00:00:00:30:02 > 100.65.0.2/30 100:65::2/126 peer=gw-tr > + > +check m_as ovn-chassis-2 ovn-nbctl lrp-add gw gw-tr 00:00:00:00:30:05 > 100.65.0.5/30 100:65::5/126 peer=tr-gw > +check m_as ovn-chassis-2 ovn-nbctl lrp-add tr tr-gw 00:00:00:00:30:06 > 100.65.0.6/30 100:65::6/126 peer=gw-tr > + > +# Add routes for the TS subnet > +check m_as ovn-chassis-1 ovn-nbctl lr-route-add gw 10.100.200.0/24 100.65.0.2 > +check m_as ovn-chassis-1 ovn-nbctl lr-route-add gw 10:200::/64 100:65::2 > +check m_as ovn-chassis-2 ovn-nbctl lr-route-add gw 10.100.200.0/24 100.65.0.6 > +check m_as ovn-chassis-2 ovn-nbctl lr-route-add gw 10:200::/64 100:65::6 > + > +# Add mutual remote ports > +check m_as ovn-chassis-1 ovn-nbctl lrp-add tr tr-az2 00:00:00:00:30:06 > 100.65.0.6/30 100:65::6/126 > +check m_as ovn-chassis-1 ovn-nbctl set logical_router_port tr-az2 > options:requested-chassis=ovn-chassis-2 > + > +check m_as ovn-chassis-2 ovn-nbctl lrp-add tr tr-az1 00:00:00:00:30:02 > 100.65.0.2/30 100:65::2/126 > +check m_as ovn-chassis-2 ovn-nbctl set logical_router_port tr-az1 > options:requested-chassis=ovn-chassis-1 > + > +# Important set the proper tunnel keys > +check m_as ovn-chassis-1 ovn-nbctl set logical_router_port tr-gw > options:requested-tnl-key=10 > +check m_as ovn-chassis-1 ovn-nbctl set logical_router_port tr-az2 > options:requested-tnl-key=20 > + > +check m_as ovn-chassis-2 ovn-nbctl set logical_router_port tr-gw > options:requested-tnl-key=20 > +check m_as ovn-chassis-2 ovn-nbctl set logical_router_port tr-az1 > options:requested-tnl-key=10 > + > +check m_as ovn-chassis-1 ovn-nbctl lsp-add public external > +check m_as ovn-chassis-1 ovn-nbctl lsp-set-addresses external > "00:00:00:00:20:10 192.168.100.10 1000::10" > + > +# Add mutual chassis > +check m_as ovn-chassis-1 ovn-sbctl chassis-add ovn-chassis-2 geneve $(m_as > ovn-chassis-2 ip -4 addr show eth1 | grep inet | awk '{print $2}' | cut -d'/' > -f1) > +check m_as ovn-chassis-1 ovn-sbctl set chassis ovn-chassis-2 > other_config:is-remote=true > + > +check m_as ovn-chassis-2 ovn-sbctl chassis-add ovn-chassis-1 geneve $(m_as > ovn-chassis-1 ip -4 addr show eth1 | grep inet | awk '{print $2}' | cut -d'/' > -f1) > +check m_as ovn-chassis-2 ovn-sbctl set chassis ovn-chassis-1 > other_config:is-remote=true > + > +# Configure ports on the transit switch as remotes > +check m_as ovn-chassis-1 ovn-nbctl lsp-set-type pod20 remote > +check m_as ovn-chassis-1 ovn-nbctl lsp-set-options pod10 > requested-chassis=ovn-chassis-1 > +check m_as ovn-chassis-1 ovn-nbctl lsp-set-options pod20 > requested-chassis=ovn-chassis-2 > + > +check m_as ovn-chassis-2 ovn-nbctl lsp-set-type pod10 remote > +check m_as ovn-chassis-2 ovn-nbctl lsp-set-options pod10 > requested-chassis=ovn-chassis-1 > +check m_as ovn-chassis-2 ovn-nbctl lsp-set-options pod20 > requested-chassis=ovn-chassis-2 > + > +m_as ovn-chassis-1 /data/create_fake_vm.sh external external > 00:00:00:00:20:10 1500 192.168.100.10 24 192.168.100.1 1000::10/64 1000::1 > +m_as ovn-chassis-1 /data/create_fake_vm.sh pod10 pod10 00:00:00:00:10:10 > 1500 10.100.200.10 24 10.100.200.1 10:200::10/64 10:200::1 > +m_as ovn-chassis-2 /data/create_fake_vm.sh pod20 pod20 00:00:00:00:10:20 > 1500 10.100.200.20 24 10.100.200.1 10:200::20/64 10:200::1 > + > +# We cannot use any of the helpers as they assume that there is only single > ovn-northd instance running > +check m_as ovn-chassis-1 ovn-nbctl --wait=hv sync > +OVS_WAIT_UNTIL([test -n "$(m_as ovn-chassis-1 ovn-sbctl --bare --columns > _uuid find Port_Binding logical_port=external up=true)"]) > +OVS_WAIT_UNTIL([test -n "$(m_as ovn-chassis-1 ovn-sbctl --bare --columns > _uuid find Port_Binding logical_port=pod10 up=true)"]) > +check m_as ovn-chassis-2 ovn-nbctl --wait=hv sync > +OVS_WAIT_UNTIL([test -n "$(m_as ovn-chassis-2 ovn-sbctl --bare --columns > _uuid find Port_Binding logical_port=pod20 up=true)"]) > + > +M_NS_CHECK_EXEC([ovn-chassis-1], [external], [ping -q -c 5 -i 0.3 -w 2 > 10.100.200.20 | FORMAT_PING], \ > +[0], [dnl > +5 packets transmitted, 5 received, 0% packet loss, time 0ms > +]) > + > +M_NS_CHECK_EXEC([ovn-chassis-1], [external], [ping -q -c 5 -i 0.3 -w 2 > 10:200::20 | FORMAT_PING], \ > +[0], [dnl > +5 packets transmitted, 5 received, 0% packet loss, time 0ms > +]) > + > +check test $(m_as ovn-chassis-1 grep -c "skipping output to input port" \ > + /var/log/openvswitch/ovs-vswitchd.log) -eq 0 > +check test $(m_as ovn-chassis-2 grep -c "skipping output to input port" \ > + /var/log/openvswitch/ovs-vswitchd.log) -eq 0 > + > +echo "Chassis1" > +m_as ovn-chassis-1 ovn-sbctl show > +m_as ovn-chassis-1 ovn-nbctl show > +m_as ovn-chassis-1 ovs-vsctl show > + > +echo "Chassis2" > +m_as ovn-chassis-2 ovn-sbctl show > +m_as ovn-chassis-2 ovn-nbctl show > +m_as ovn-chassis-2 ovs-vsctl show > + > +# Connect the chassis back to the original northd and remove northd per > chassis. > +for i in 1 2; do > + chassis="ovn-chassis-$i" > + ip=$(m_as $chassis ip -4 addr show eth1 | grep inet | awk '{print $2}' | > cut -d'/' -f1) > + > + multinode_setup_controller $chassis $chassis $ip "170.168.0.2" > + multinode_cleanup_northd $chassis > +done > + > +AT_CLEANUP > diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at > index b1c57fc21..e208723bc 100644 > --- a/tests/ovn-controller.at > +++ b/tests/ovn-controller.at > @@ -3646,3 +3646,68 @@ AT_CHECK([grep -c "cookie=$lr1_peer_cookie," > log_to_phy_flows], [0], [dnl > > OVN_CLEANUP([hv1]) > AT_CLEANUP > + > +AT_SETUP([Remote chassis flood flows]) > +ovn_start > + > +net_add n1 > +sim_add hv1 > +as hv1 > +check ovs-vsctl add-br br-phys > +ovn_attach n1 br-phys 192.168.0.11 24 geneve,vxlan > + > +check ovs-vsctl set open . external_ids:ovn-is-interconn=true > + > +check ovn-sbctl chassis-add hv2 geneve 192.168.0.12 \ > + -- set chassis hv2 other_config:is-remote=true > + > +check ovn-sbctl chassis-add hv3 vxlan 192.168.0.14 \ > + -- set chassis hv3 other_config:is-remote=true > + > +check ovn-nbctl --wait=hv sync > + > +chassis_cookie() { > + name=$1 > + fetch_column chassis _uuid name=$name |\ > + cut -d '-' -f 1 | tr -d '\n' | sed 's/^0\{0,8\}//' > +} > + > +ovs-ofctl dump-flows --names --no-stats br-int table=OFTABLE_PHY_TO_LOG > > phy_to_log_flows > +ovs-ofctl dump-flows --names --no-stats br-int > table=OFTABLE_FLOOD_REMOTE_CHASSIS > flood_flows > + > +# Check that we have all encap + output actions one by one because the order > can change > +# Geneve > +AT_CHECK([grep -c > 'move:OXM_OF_METADATA\[[0..23\]]->NXM_NX_TUN_ID\[[0..23\]],set_field:0->tun_metadata0,move:NXM_NX_REG14\[[0..14\]]->NXM_NX_TUN_METADATA0\[[16..30\]],output:"ovn-hv2-0"' > flood_flows], [0], [dnl > +1 > +]) > + > +# VXLAN > +AT_CHECK([grep -c > 'move:NXM_NX_REG14\[[0..11\]]->NXM_NX_TUN_ID\[[12..23\]],move:OXM_OF_METADATA\[[0..11\]]->NXM_NX_TUN_ID\[[0..11\]],output:"ovn-hv3-0"' > flood_flows], [0], [dnl > +1 > +]) > + > +AT_CHECK([grep -c "reg6=0x8000" flood_flows], [0], [dnl > +1 > +]) > + > +AT_CHECK([grep -c "reg10=0/0x10000" flood_flows], [0], [dnl > +1 > +]) > + > +# Check ingress flows for ARP and ND NA > +# Geneve > +hv2_cookie="0x$(chassis_cookie hv2)" > +AT_CHECK_UNQUOTED([grep "cookie=$hv2_cookie," phy_to_log_flows], [0], [dnl > + cookie=$hv2_cookie, > priority=120,arp,tun_metadata0=0,in_port="ovn-hv2-0",arp_op=2 > actions=load:0x1->NXM_NX_REG10[[16]],move:NXM_NX_TUN_ID[[0..23]]->OXM_OF_METADATA[[0..23]],move:NXM_NX_TUN_METADATA0[[16..30]]->NXM_NX_REG14[[0..14]],move:NXM_NX_TUN_METADATA0[[0..15]]->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE) > + cookie=$hv2_cookie, > priority=120,icmp6,tun_metadata0=0,in_port="ovn-hv2-0",icmp_type=136,icmp_code=0 > > actions=load:0x1->NXM_NX_REG10[[16]],move:NXM_NX_TUN_ID[[0..23]]->OXM_OF_METADATA[[0..23]],move:NXM_NX_TUN_METADATA0[[16..30]]->NXM_NX_REG14[[0..14]],move:NXM_NX_TUN_METADATA0[[0..15]]->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE) > +]) > + > +# VXLAN > +hv3_cookie="0x$(chassis_cookie hv3)" > +AT_CHECK_UNQUOTED([grep "cookie=$hv3_cookie," phy_to_log_flows], [0], [dnl > + cookie=$hv3_cookie, > priority=120,icmp6,in_port="ovn-hv3-0",icmp_type=136,icmp_code=0 > actions=load:0x1->NXM_NX_REG10[[16]],move:NXM_NX_TUN_ID[[12..23]]->NXM_NX_REG15[[0..11]],move:NXM_NX_TUN_ID[[0..11]]->OXM_OF_METADATA[[0..11]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE),resubmit(,OFTABLE_LOCAL_OUTPUT) > + cookie=$hv3_cookie, priority=120,arp,in_port="ovn-hv3-0",arp_op=2 > actions=load:0x1->NXM_NX_REG10[[16]],move:NXM_NX_TUN_ID[[12..23]]->NXM_NX_REG15[[0..11]],move:NXM_NX_TUN_ID[[0..11]]->OXM_OF_METADATA[[0..11]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE),resubmit(,OFTABLE_LOCAL_OUTPUT) > +]) > + > +OVN_CLEANUP([hv1]) > +AT_CLEANUP > diff --git a/tests/ovn-macros.at b/tests/ovn-macros.at > index 7cbd01489..e5a132684 100644 > --- a/tests/ovn-macros.at > +++ b/tests/ovn-macros.at > @@ -1434,5 +1434,6 @@ m4_define([OFTABLE_CT_ZONE_LOOKUP], [80]) > m4_define([OFTABLE_CT_ORIG_NW_DST_LOAD], [81]) > m4_define([OFTABLE_CT_ORIG_IP6_DST_LOAD], [82]) > m4_define([OFTABLE_CT_ORIG_TP_DST_LOAD], [83]) > +m4_define([OFTABLE_FLOOD_REMOTE_CHASSIS], [84]) > > m4_define([OFTABLE_SAVE_INPORT_HEX], [m4_eval(OFTABLE_SAVE_INPORT, 16)]) > diff --git a/tests/ovn.at b/tests/ovn.at > index 8ecf1f6bf..8ad2ae596 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -2279,6 +2279,12 @@ ct_tp_dst; > ct_tp_dst(); > Syntax error at `ct_tp_dst' expecting action. > > +flood_remote; > + encodes as set_field:0x8000->reg6,resubmit(,OFTABLE_FLOOD_REMOTE_CHASSIS) > + > +flood_remote(); > + Syntax error at `(' expecting `;'. > + > # Miscellaneous negative tests. > ; > Syntax error at `;'. > @@ -35676,7 +35682,9 @@ check_default_flows() { > # respectively and it's OK if they don't have a default action. > # Tables 81, 82 and 83 are part of ct_nw_dst(), ct_ip6_dst() and > ct_tp_dst() > # actions respectively and its OK for them to not have default flows. > - if test ${table} -eq 68 -o ${table} -eq 70 -o ${table} -eq 81 -o > ${table} -eq 82 -o ${table} -eq 83; then > + # Table 84 is part of flood_remote; action and its OK for > + # it to not have default flows. > + if test ${table} -eq 68 -o ${table} -eq 70 -o ${table} -eq 81 -o > ${table} -eq 82 -o ${table} -eq 83 -o ${table} -eq 84; then > continue; > fi > AT_CHECK([grep -qe "table=$table.* priority=0\(,metadata=0x\w*\)\? > actions" oflows], [0], [ignore], [ignore], [echo "Table $table does not > contain a default action"]) > diff --git a/tests/test-ovn.c b/tests/test-ovn.c > index b097ec084..0fec97e19 100644 > --- a/tests/test-ovn.c > +++ b/tests/test-ovn.c > @@ -1385,6 +1385,7 @@ test_parse_actions(struct ovs_cmdl_context *ctx > OVS_UNUSED) > .ct_nw_dst_load_table = OFTABLE_CT_ORIG_NW_DST_LOAD, > .ct_ip6_dst_load_table = OFTABLE_CT_ORIG_IP6_DST_LOAD, > .ct_tp_dst_load_table = OFTABLE_CT_ORIG_TP_DST_LOAD, > + .flood_remote_table = OFTABLE_FLOOD_REMOTE_CHASSIS, > .lflow_uuid.parts = > { 0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc, 0xdddddddd}, > .dp_key = 0xabcdef, > diff --git a/tutorial/ovn-sandbox b/tutorial/ovn-sandbox > index ed334d1c3..f3d11fd9b 100755 > --- a/tutorial/ovn-sandbox > +++ b/tutorial/ovn-sandbox > @@ -613,8 +613,8 @@ The backup database file is sandbox/${db}2.db > backup_note= > ovn_start_db nb "$nbdb_model" "$nbdb_servers" "$ovnnb_source" > ovn_start_db sb "$sbdb_model" "$sbdb_servers" "$ovnsb_source" > -ovn_start_db ic_nb "$ic_nb_model" "$ic_nb_servers" "$ic_nb_schema" > -ovn_start_db ic_sb "$ic_sb_model" "$ic_sb_servers" "$ic_sb_schema" > +#ovn_start_db ic_nb "$ic_nb_model" "$ic_nb_servers" "$ic_nb_schema" > +#ovn_start_db ic_sb "$ic_sb_model" "$ic_sb_servers" "$ic_sb_schema" > > #Add a small delay to allow ovsdb-server to launch. > sleep 0.1 > @@ -637,8 +637,8 @@ rungdb $gdb_vswitchd $gdb_vswitchd_ex ovs-vswitchd > --detach --no-chdir --pidfile > > run ovn-nbctl init > run ovn-sbctl init > -run ovn-ic-nbctl init > -run ovn-ic-sbctl init > +#run ovn-ic-nbctl init > +#run ovn-ic-sbctl init > run ovn-nbctl set NB_Global . name=az-1 > > run ovs-vsctl set open . external-ids:system-id=chassis-1 > @@ -661,14 +661,14 @@ else > run ovs-vsctl set open . external-ids:ovn-remote=$OVN_SB_DB > OVN_CTRLR_PKI="" > fi > -for i in $(seq $n_ics); do > - if [ $i -eq 1 ]; then inst=""; else inst=$i; fi > - rungdb $gdb_ovn_ic $gdb_ovn_ic_ex ovn-ic --detach \ > - --no-chdir --pidfile=ovn-ic${inst}.pid -vconsole:off \ > - --log-file=ovn-ic${inst}.log -vsyslog:off \ > - --ovnsb-db="$OVN_SB_DB" --ovnnb-db="$OVN_NB_DB" \ > - --ic-sb-db="$OVN_IC_SB_DB" --ic-nb-db="$OVN_IC_NB_DB" > -done > +#for i in $(seq $n_ics); do > +# if [ $i -eq 1 ]; then inst=""; else inst=$i; fi > +# rungdb $gdb_ovn_ic $gdb_ovn_ic_ex ovn-ic --detach \ > +# --no-chdir --pidfile=ovn-ic${inst}.pid -vconsole:off \ > +# --log-file=ovn-ic${inst}.log -vsyslog:off \ > +# --ovnsb-db="$OVN_SB_DB" --ovnnb-db="$OVN_NB_DB" \ > +# --ic-sb-db="$OVN_IC_SB_DB" --ic-nb-db="$OVN_IC_NB_DB" > +#done > > northd_args= > OVN_NORTHD=ovn-northd > diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c > index bd31cdbf5..d25c612c7 100644 > --- a/utilities/ovn-trace.c > +++ b/utilities/ovn-trace.c > @@ -3460,6 +3460,9 @@ trace_actions(const struct ovnact *ovnacts, size_t > ovnacts_len, > break; > case OVNACT_CT_ORIG_TP_DST: > break; > + case OVNACT_FLOOD_REMOTE: > + ovntrace_node_append(super, OVNTRACE_NODE_OUTPUT, > + "/* Flood to all remote chassis */"); > } > } > ofpbuf_uninit(&stack); > -- > 2.48.1 > > _______________________________________________ > dev mailing list > d...@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev