Improve buffered packet management in ovn-controller avoid useless loop in run_buffered_binding routine and using datapath key and output port key as buffered_packets_map hashmap hash. Add new selftest for buffered packets.
Signed-off-by: Lorenzo Bianconi <[email protected]> --- Changes since v1: - improve code readability --- controller/pinctrl.c | 113 +++++++++++++++++++++++++++------------ tests/system-ovn.at | 124 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 203 insertions(+), 34 deletions(-) diff --git a/controller/pinctrl.c b/controller/pinctrl.c index 8859cb080..77df20038 100644 --- a/controller/pinctrl.c +++ b/controller/pinctrl.c @@ -182,6 +182,7 @@ static void destroy_buffered_packets_map(void); static void run_buffered_binding(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip, struct ovsdb_idl_index *sbrec_port_binding_by_datapath, + struct ovsdb_idl_index *sbrec_port_binding_by_name, const struct hmap *local_datapaths) OVS_REQUIRES(pinctrl_mutex); @@ -1430,6 +1431,9 @@ struct buffered_packets { struct in6_addr ip; struct eth_addr ea; + uint64_t dp_key; + uint64_t port_key; + long long int timestamp; struct buffer_info data[BUFFER_QUEUE_DEPTH]; @@ -1556,13 +1560,12 @@ buffered_packets_map_gc(void) } static struct buffered_packets * -pinctrl_find_buffered_packets(const struct in6_addr *ip, uint32_t hash) +pinctrl_find_buffered_packets(uint64_t dp_key, uint64_t port_key, + uint32_t hash) { struct buffered_packets *qp; - - HMAP_FOR_EACH_WITH_HASH (qp, hmap_node, hash, - &buffered_packets_map) { - if (IN6_ARE_ADDR_EQUAL(&qp->ip, ip)) { + HMAP_FOR_EACH_WITH_HASH (qp, hmap_node, hash, &buffered_packets_map) { + if (qp->dp_key == dp_key && qp->port_key == port_key) { return qp; } } @@ -1575,19 +1578,13 @@ pinctrl_handle_buffered_packets(struct dp_packet *pkt_in, const struct match *md, bool is_arp) OVS_REQUIRES(pinctrl_mutex) { - struct buffered_packets *bp; - struct dp_packet *clone; - struct in6_addr addr; - - if (is_arp) { - addr = in6_addr_mapped_ipv4(htonl(md->flow.regs[0])); - } else { - ovs_be128 ip6 = hton128(flow_get_xxreg(&md->flow, 0)); - memcpy(&addr, &ip6, sizeof addr); - } - - uint32_t hash = hash_bytes(&addr, sizeof addr, 0); - bp = pinctrl_find_buffered_packets(&addr, hash); + uint64_t dp_key = ntohll(md->flow.metadata); + uint64_t oport_key = md->flow.regs[MFF_LOG_OUTPORT - MFF_REG0]; + uint32_t hash = hash_add( + hash_bytes(&oport_key, sizeof oport_key, 0), + dp_key); + struct buffered_packets *bp + = pinctrl_find_buffered_packets(dp_key, oport_key, hash); if (!bp) { if (hmap_count(&buffered_packets_map) >= 1000) { COVERAGE_INC(pinctrl_drop_buffered_packets_map); @@ -1597,12 +1594,20 @@ pinctrl_handle_buffered_packets(struct dp_packet *pkt_in, bp = xmalloc(sizeof *bp); hmap_insert(&buffered_packets_map, &bp->hmap_node, hash); bp->head = bp->tail = 0; - bp->ip = addr; + if (is_arp) { + bp->ip = in6_addr_mapped_ipv4(htonl(md->flow.regs[0])); + } else { + ovs_be128 ip6 = hton128(flow_get_xxreg(&md->flow, 0)); + memcpy(&bp->ip, &ip6, sizeof bp->ip); + } + bp->dp_key = dp_key; + bp->port_key = oport_key; } bp->timestamp = time_msec(); /* clone the packet to send it later with correct L2 address */ - clone = dp_packet_clone_data(dp_packet_data(pkt_in), - dp_packet_size(pkt_in)); + struct dp_packet *clone + = dp_packet_clone_data(dp_packet_data(pkt_in), + dp_packet_size(pkt_in)); buffered_push_packet(bp, clone, md); return 0; @@ -3586,6 +3591,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, sbrec_ip_multicast_opts); run_buffered_binding(sbrec_mac_binding_by_lport_ip, sbrec_port_binding_by_datapath, + sbrec_port_binding_by_name, local_datapaths); sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, sbrec_port_binding_by_name, chassis); @@ -4354,15 +4360,30 @@ run_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn, } } +static struct buffered_packets * +pinctrl_get_buffered_packets(uint64_t dp_key, uint64_t port_key) +{ + uint32_t hash = hash_add( + hash_bytes(&port_key, sizeof port_key, 0), + dp_key); + return pinctrl_find_buffered_packets(dp_key, port_key, hash); +} + static void run_buffered_binding(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip, struct ovsdb_idl_index *sbrec_port_binding_by_datapath, + struct ovsdb_idl_index *sbrec_port_binding_by_name, const struct hmap *local_datapaths) OVS_REQUIRES(pinctrl_mutex) { const struct local_datapath *ld; bool notify = false; + if (!hmap_count(&buffered_packets_map)) { + /* No work to do. */ + return; + } + HMAP_FOR_EACH (ld, hmap_node, local_datapaths) { /* MAC_Binding.logical_port will always belong to a * a router datapath. Hence we can skip logical switch @@ -4381,20 +4402,44 @@ run_buffered_binding(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip, if (strcmp(pb->type, "patch") && strcmp(pb->type, "l3gateway")) { continue; } - struct buffered_packets *cur_qp; - HMAP_FOR_EACH_SAFE (cur_qp, hmap_node, &buffered_packets_map) { - struct ds ip_s = DS_EMPTY_INITIALIZER; - ipv6_format_mapped(&cur_qp->ip, &ip_s); - const struct sbrec_mac_binding *b = mac_binding_lookup( - sbrec_mac_binding_by_lport_ip, pb->logical_port, - ds_cstr(&ip_s)); - if (b && ovs_scan(b->mac, ETH_ADDR_SCAN_FMT, - ETH_ADDR_SCAN_ARGS(cur_qp->ea))) { - hmap_remove(&buffered_packets_map, &cur_qp->hmap_node); - ovs_list_push_back(&buffered_mac_bindings, &cur_qp->list); - notify = true; + + struct buffered_packets *cur_qp + = pinctrl_get_buffered_packets(ld->datapath->tunnel_key, + pb->tunnel_key); + if (!cur_qp) { + /* If primary lookup fails, check chassisredirect port + * for distributed gw router port use-case. */ + char *redirect_name = xasprintf("cr-%s", pb->logical_port); + const struct sbrec_port_binding *cr_pb + = lport_lookup_by_name(sbrec_port_binding_by_name, + redirect_name); + free(redirect_name); + if (cr_pb) { + cur_qp = pinctrl_get_buffered_packets( + ld->datapath->tunnel_key, cr_pb->tunnel_key); } - ds_destroy(&ip_s); + } + + if (!cur_qp) { + continue; + } + + struct ds ip_s = DS_EMPTY_INITIALIZER; + ipv6_format_mapped(&cur_qp->ip, &ip_s); + const struct sbrec_mac_binding *b = mac_binding_lookup( + sbrec_mac_binding_by_lport_ip, pb->logical_port, + ds_cstr(&ip_s)); + if (b && ovs_scan(b->mac, ETH_ADDR_SCAN_FMT, + ETH_ADDR_SCAN_ARGS(cur_qp->ea))) { + hmap_remove(&buffered_packets_map, &cur_qp->hmap_node); + ovs_list_push_back(&buffered_mac_bindings, &cur_qp->list); + notify = true; + } + ds_destroy(&ip_s); + + if (!hmap_count(&buffered_packets_map)) { + /* No more work to do. */ + break; } } sbrec_port_binding_index_destroy_row(target); diff --git a/tests/system-ovn.at b/tests/system-ovn.at index 20c058415..a186a6e63 100644 --- a/tests/system-ovn.at +++ b/tests/system-ovn.at @@ -8597,3 +8597,127 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d /connection dropped.*/d"]) AT_CLEANUP ]) + +OVN_FOR_EACH_NORTHD([ +AT_SETUP([IP buffering]) +AT_KEYWORDS([ip-buffering]) +AT_SKIP_IF([test $HAVE_TCPDUMP = no]) + +ovn_start +OVS_TRAFFIC_VSWITCHD_START() + +ADD_BR([br-int]) +ADD_BR([br-ext]) + +ovs-ofctl add-flow br-ext action=normal +# Set external-ids in br-int needed for ovn-controller +ovs-vsctl \ + -- set Open_vSwitch . external-ids:system-id=hv1 \ + -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ + -- set bridge br-int fail-mode=secure other-config:disable-in-band=true + +# Start ovn-controller +start_daemon ovn-controller + +ADD_NAMESPACES(sw01) +ADD_VETH(sw01, sw01, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \ + "192.168.1.1") +ADD_NAMESPACES(sw11) +ADD_VETH(sw11, sw11, br-int, "192.168.2.2/24", "f0:00:00:02:02:03", \ + "192.168.2.1") +ADD_NAMESPACES(remote) +ADD_VETH(remote, remote, br-ext, "172.16.1.2/24", "f0:00:00:01:02:05", \ + "172.16.1.1") +ADD_NAMESPACES(remote1) +ADD_VETH(remote1, remote1, br-ext, "172.16.1.4/24", "f0:00:00:01:02:06", \ + "172.16.1.1") + +NS_CHECK_EXEC([remote], [tcpdump -c 3 -nneei remote -Q in src 192.168.1.2 and dst 172.16.1.2 and icmp > icmp.pcap &]) +NS_CHECK_EXEC([remote], [tcpdump -c 1 -nneei remote -Q in arp and arp[[24:4]]==0xac100102 > arp.pcap &]) +NS_CHECK_EXEC([remote1], [tcpdump -c 3 -nneei remote1 -Q in src 172.16.1.3 and dst 172.16.1.4 and icmp > icmp1.pcap 2>/dev/null &]) +NS_CHECK_EXEC([remote1], [tcpdump -c 1 -nneei remote1 -Q in arp and arp[[24:4]]==0xac100104 > arp1.pcap 2>/dev/null &]) + +ovn-nbctl create Logical_Router name=R1 options:chassis=hv1 +ovn-nbctl ls-add sw0 +ovn-nbctl ls-add sw1 +ovn-nbctl ls-add public + +ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 192.168.1.1/24 +ovn-nbctl lrp-add R1 rp-sw1 00:00:03:01:02:03 192.168.2.1/24 +ovn-nbctl lrp-add R1 rp-public 00:00:02:01:02:03 172.16.1.1/24 + +ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \ + type=router options:router-port=rp-sw0 \ + -- lsp-set-addresses sw0-rp router +ovn-nbctl lsp-add sw1 sw1-rp -- set Logical_Switch_Port sw1-rp \ + type=router options:router-port=rp-sw1 \ + -- lsp-set-addresses sw1-rp router + +ovn-nbctl lsp-add public public-rp -- set Logical_Switch_Port public-rp \ + type=router options:router-port=rp-public \ + -- lsp-set-addresses public-rp router + +ovn-nbctl lsp-add sw0 sw01 \ + -- lsp-set-addresses sw01 "f0:00:00:01:02:03 192.168.1.2" + +ovn-nbctl lsp-add sw1 sw11 \ + -- lsp-set-addresses sw11 "f0:00:00:02:02:03 192.168.2.2" + +ovn-nbctl lr-nat-add R1 dnat_and_snat 172.16.1.3 192.168.2.2 sw11 00:00:02:02:03:10 + +AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext]) +ovn-nbctl lsp-add public public1 \ + -- lsp-set-addresses public1 unknown \ + -- lsp-set-type public1 localnet \ + -- lsp-set-options public1 network_name=phynet + +OVN_POPULATE_ARP + +NS_CHECK_EXEC([sw01], [ping -q -c 3 -i 0.3 -w 2 172.16.1.2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +OVS_WAIT_UNTIL([ + total_arp_pkts=$(cat arp.pcap | wc -l) + test "${total_arp_pkts}" = "1" +]) + +OVS_WAIT_UNTIL([ + total_icmp_pkts=$(cat icmp.pcap | wc -l) + test "${total_icmp_pkts}" = "3" +]) + +NS_CHECK_EXEC([sw11], [ping -q -c 3 -i 0.3 -w 2 172.16.1.4 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +OVS_WAIT_UNTIL([ + total_arp1_pkts=$(cat arp1.pcap | wc -l) + test "${total_arp1_pkts}" = "1" +]) + +OVS_WAIT_UNTIL([ + total_icmp1_pkts=$(cat icmp1.pcap | wc -l) + test "${total_icmp1_pkts}" = "3" +]) + +OVS_APP_EXIT_AND_WAIT([ovn-controller]) + +as ovn-sb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as ovn-nb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as northd +OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE]) + +as +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d +/connection dropped.*/d"]) +AT_CLEANUP +]) -- 2.37.3 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
