Improve buffered packet management in ovn-controller using even
datapath and port keys as buffered_packets_map hashmap hash.
Improve run_buffered_binding avoiding unnecessary loops over
datapaths and port_bindings.
Signed-off-by: Lorenzo Bianconi <[email protected]>
---
Changes since v4:
- improve buffered_packets_map lookup looping over tracked mac_binding table
Changes since v3:
- fix ip stored in buffered_packets structure (must be unique for each
buffered_packets_map map entry)
Changes since v2:
- improve hash function
Changes since v1:
- improve code readability
---
controller/ovn-controller.c | 2 +
controller/pinctrl.c | 130 +++++++++++++++++++++++-------------
controller/pinctrl.h | 2 +
tests/system-ovn.at | 127 +++++++++++++++++++++++++++++++++++
4 files changed, 216 insertions(+), 45 deletions(-)
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 0752a71ad..2e0a8952e 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -4304,6 +4304,8 @@ main(int argc, char *argv[])
ovnsb_idl_loop.idl),
sbrec_service_monitor_table_get(
ovnsb_idl_loop.idl),
+ sbrec_mac_binding_table_get(
+ ovnsb_idl_loop.idl),
sbrec_bfd_table_get(ovnsb_idl_loop.idl),
br_int, chassis,
&runtime_data->local_datapaths,
diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index f44775c7e..e1a5adb1d 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -180,9 +180,8 @@ static struct pinctrl pinctrl;
static void init_buffered_packets_map(void);
static void destroy_buffered_packets_map(void);
static void
-run_buffered_binding(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
- struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
- const struct hmap *local_datapaths)
+run_buffered_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
+ const struct sbrec_mac_binding_table *mac_binding_table)
OVS_REQUIRES(pinctrl_mutex);
static void pinctrl_handle_put_mac_binding(const struct flow *md,
@@ -1430,6 +1429,9 @@ struct buffered_packets {
struct in6_addr ip;
struct eth_addr ea;
+ uint64_t dp_key;
+ uint64_t port_key;
+
long long int timestamp;
struct buffer_info data[BUFFER_QUEUE_DEPTH];
@@ -1555,20 +1557,40 @@ buffered_packets_map_gc(void)
}
}
+static uint32_t
+pinctrl_buffer_packet_hash(uint64_t dp_key, uint64_t port_key,
+ const struct in6_addr *addr)
+{
+ uint32_t hash = 0;
+ hash = hash_add64(hash, port_key);
+ hash = hash_add64(hash, dp_key);
+ hash = hash_bytes(addr, sizeof addr, hash);
+ return hash_finish(hash, 16);
+}
+
static struct buffered_packets *
-pinctrl_find_buffered_packets(const struct in6_addr *ip, uint32_t hash)
+pinctrl_find_buffered_packets(uint64_t dp_key, uint64_t port_key,
+ const struct in6_addr *ip, uint32_t hash)
{
struct buffered_packets *qp;
-
- HMAP_FOR_EACH_WITH_HASH (qp, hmap_node, hash,
- &buffered_packets_map) {
- if (IN6_ARE_ADDR_EQUAL(&qp->ip, ip)) {
+ HMAP_FOR_EACH_WITH_HASH (qp, hmap_node, hash, &buffered_packets_map) {
+ if (qp->dp_key == dp_key && qp->port_key == port_key &&
+ IN6_ARE_ADDR_EQUAL(&qp->ip, ip)) {
return qp;
}
}
return NULL;
}
+static struct buffered_packets *
+pinctrl_find_buffered_packets_with_hash(uint64_t dp_key, uint64_t port_key,
+ const struct in6_addr *ip)
+{
+ uint32_t hash = pinctrl_buffer_packet_hash(dp_key, port_key, ip);
+
+ return pinctrl_find_buffered_packets(dp_key, port_key, ip, hash);
+}
+
/* Called with in the pinctrl_handler thread context. */
static int
pinctrl_handle_buffered_packets(struct dp_packet *pkt_in,
@@ -1578,6 +1600,8 @@ pinctrl_handle_buffered_packets(struct dp_packet *pkt_in,
struct buffered_packets *bp;
struct dp_packet *clone;
struct in6_addr addr;
+ uint64_t dp_key = ntohll(md->flow.metadata);
+ uint64_t oport_key = md->flow.regs[MFF_LOG_OUTPORT - MFF_REG0];
if (is_arp) {
addr = in6_addr_mapped_ipv4(htonl(md->flow.regs[0]));
@@ -1586,8 +1610,8 @@ pinctrl_handle_buffered_packets(struct dp_packet *pkt_in,
memcpy(&addr, &ip6, sizeof addr);
}
- uint32_t hash = hash_bytes(&addr, sizeof addr, 0);
- bp = pinctrl_find_buffered_packets(&addr, hash);
+ uint32_t hash = pinctrl_buffer_packet_hash(dp_key, oport_key, &addr);
+ bp = pinctrl_find_buffered_packets(dp_key, oport_key, &addr, hash);
if (!bp) {
if (hmap_count(&buffered_packets_map) >= 1000) {
COVERAGE_INC(pinctrl_drop_buffered_packets_map);
@@ -1598,6 +1622,8 @@ pinctrl_handle_buffered_packets(struct dp_packet *pkt_in,
hmap_insert(&buffered_packets_map, &bp->hmap_node, hash);
bp->head = bp->tail = 0;
bp->ip = addr;
+ bp->dp_key = dp_key;
+ bp->port_key = oport_key;
}
bp->timestamp = time_msec();
/* clone the packet to send it later with correct L2 address */
@@ -3554,6 +3580,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
const struct sbrec_dns_table *dns_table,
const struct sbrec_controller_event_table *ce_table,
const struct sbrec_service_monitor_table *svc_mon_table,
+ const struct sbrec_mac_binding_table *mac_binding_table,
const struct sbrec_bfd_table *bfd_table,
const struct ovsrec_bridge *br_int,
const struct sbrec_chassis *chassis,
@@ -3584,9 +3611,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
sbrec_port_binding_by_key,
sbrec_igmp_groups,
sbrec_ip_multicast_opts);
- run_buffered_binding(sbrec_mac_binding_by_lport_ip,
- sbrec_port_binding_by_datapath,
- local_datapaths);
+ run_buffered_binding(sbrec_port_binding_by_name, mac_binding_table);
sync_svc_monitors(ovnsb_idl_txn, svc_mon_table,
sbrec_port_binding_by_name,
chassis);
bfd_monitor_run(ovnsb_idl_txn, bfd_table, sbrec_port_binding_by_name,
@@ -4355,49 +4380,64 @@ run_put_mac_bindings(struct ovsdb_idl_txn
*ovnsb_idl_txn,
}
static void
-run_buffered_binding(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
- struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
- const struct hmap *local_datapaths)
+run_buffered_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
+ const struct sbrec_mac_binding_table *mac_binding_table)
OVS_REQUIRES(pinctrl_mutex)
{
- const struct local_datapath *ld;
bool notify = false;
- HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
- /* MAC_Binding.logical_port will always belong to a
- * a router datapath. Hence we can skip logical switch
- * datapaths.
- * */
- if (ld->is_switch) {
+ if (!hmap_count(&buffered_packets_map)) {
+ return;
+ }
+
+ const struct sbrec_mac_binding *mb;
+ SBREC_MAC_BINDING_TABLE_FOR_EACH_TRACKED (mb, mac_binding_table) {
+ const struct sbrec_port_binding *pb = lport_lookup_by_name(
+ sbrec_port_binding_by_name, mb->logical_port);
+ if (!pb || !pb->datapath) {
continue;
}
- struct sbrec_port_binding *target =
- sbrec_port_binding_index_init_row(sbrec_port_binding_by_datapath);
- sbrec_port_binding_index_set_datapath(target, ld->datapath);
- const struct sbrec_port_binding *pb;
- SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target,
- sbrec_port_binding_by_datapath) {
- if (strcmp(pb->type, "patch") && strcmp(pb->type, "l3gateway")) {
+ struct in6_addr ip;
+ ovs_be32 ip4;
+ if (ip_parse(mb->ip, &ip4)) {
+ ip = in6_addr_mapped_ipv4(ip4);
+ } else if (!ipv6_parse(mb->ip, &ip)) {
+ continue;
+ }
+
+ struct buffered_packets *bp = pinctrl_find_buffered_packets_with_hash(
+ pb->datapath->tunnel_key, pb->tunnel_key, &ip);
+ if (!bp) {
+ if (!smap_get(&pb->options, "chassis-redirect-port")) {
continue;
}
- struct buffered_packets *cur_qp;
- HMAP_FOR_EACH_SAFE (cur_qp, hmap_node, &buffered_packets_map) {
- struct ds ip_s = DS_EMPTY_INITIALIZER;
- ipv6_format_mapped(&cur_qp->ip, &ip_s);
- const struct sbrec_mac_binding *b = mac_binding_lookup(
- sbrec_mac_binding_by_lport_ip, pb->logical_port,
- ds_cstr(&ip_s));
- if (b && ovs_scan(b->mac, ETH_ADDR_SCAN_FMT,
- ETH_ADDR_SCAN_ARGS(cur_qp->ea))) {
- hmap_remove(&buffered_packets_map, &cur_qp->hmap_node);
- ovs_list_push_back(&buffered_mac_bindings, &cur_qp->list);
- notify = true;
- }
- ds_destroy(&ip_s);
+
+ char *redirect_name = xasprintf("cr-%s", pb->logical_port);
+ pb = lport_lookup_by_name(sbrec_port_binding_by_name,
+ redirect_name);
+ free(redirect_name);
+
+ if (!pb || !pb->datapath || strcmp(pb->type, "chassisredirect")) {
+ continue;
}
+
+ bp = pinctrl_find_buffered_packets_with_hash(
+ pb->datapath->tunnel_key, pb->tunnel_key, &ip);
+ }
+
+ if (!bp) {
+ continue;
+ }
+
+ if (!ovs_scan(mb->mac, ETH_ADDR_SCAN_FMT,
+ ETH_ADDR_SCAN_ARGS(bp->ea))) {
+ continue;
}
- sbrec_port_binding_index_destroy_row(target);
+
+ hmap_remove(&buffered_packets_map, &bp->hmap_node);
+ ovs_list_push_back(&buffered_mac_bindings, &bp->list);
+ notify = true;
}
buffered_packets_map_gc();
diff --git a/controller/pinctrl.h b/controller/pinctrl.h
index d4f52e94d..90414041b 100644
--- a/controller/pinctrl.h
+++ b/controller/pinctrl.h
@@ -35,6 +35,7 @@ struct sbrec_controller_event_table;
struct sbrec_service_monitor_table;
struct sbrec_bfd_table;
struct sbrec_port_binding;
+struct sbrec_mac_binding_table;
void pinctrl_init(void);
void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
@@ -49,6 +50,7 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
const struct sbrec_dns_table *,
const struct sbrec_controller_event_table *,
const struct sbrec_service_monitor_table *,
+ const struct sbrec_mac_binding_table *,
const struct sbrec_bfd_table *,
const struct ovsrec_bridge *, const struct sbrec_chassis *,
const struct hmap *local_datapaths,
diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index cb3412717..9e6fffe2b 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -9138,3 +9138,130 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port
patch-.*/d
/connection dropped.*/d"])
AT_CLEANUP
])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([IP buffering])
+AT_KEYWORDS([ip-buffering])
+AT_SKIP_IF([test $HAVE_TCPDUMP = no])
+
+ovn_start
+OVS_TRAFFIC_VSWITCHD_START()
+
+ADD_BR([br-int])
+ADD_BR([br-ext])
+
+ovs-ofctl add-flow br-ext action=normal
+# Set external-ids in br-int needed for ovn-controller
+ovs-vsctl \
+ -- set Open_vSwitch . external-ids:system-id=hv1 \
+ -- set Open_vSwitch .
external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
+ -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
+ -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
+ -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
+
+# Start ovn-controller
+start_daemon ovn-controller
+
+ADD_NAMESPACES(sw01)
+ADD_VETH(sw01, sw01, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \
+ "192.168.1.1")
+ADD_NAMESPACES(sw11)
+ADD_VETH(sw11, sw11, br-int, "192.168.2.2/24", "f0:00:00:02:02:03", \
+ "192.168.2.1")
+ADD_NAMESPACES(remote)
+ADD_VETH(remote, remote, br-ext, "172.16.1.2/24", "f0:00:00:01:02:05", \
+ "172.16.1.1")
+ADD_NAMESPACES(remote1)
+ADD_VETH(remote1, remote1, br-ext, "172.16.1.4/24", "f0:00:00:01:02:06", \
+ "172.16.1.1")
+
+NETNS_DAEMONIZE([remote], [tcpdump -c 3 -nneei remote -Q in src 192.168.1.2 and
dst 172.16.1.2 and icmp > icmp.pcap], [tcpdump0.pid])
+NETNS_DAEMONIZE([remote], [tcpdump -c 1 -nneei remote -Q in arp and
arp[[24:4]]==0xac100102 > arp.pcap], [tcpdump1.pid])
+NETNS_DAEMONIZE([remote1], [tcpdump -c 3 -nneei remote1 -Q in src 172.16.1.3 and dst
172.16.1.4 and icmp > icmp1.pcap 2>/dev/null], [tcpdump2.pid])
+NETNS_DAEMONIZE([remote1], [tcpdump -c 1 -nneei remote1 -Q in arp and
arp[[24:4]]==0xac100104 > arp1.pcap 2>/dev/null], [tcpdump3.pid])
+
+check ovn-nbctl lr-add R1 -- set Logical_Router R1 options:chassis=hv1
+check ovn-nbctl ls-add sw0
+check ovn-nbctl ls-add sw1
+check ovn-nbctl ls-add public
+
+check ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 192.168.1.1/24
+check ovn-nbctl lrp-add R1 rp-sw1 00:00:03:01:02:03 192.168.2.1/24
+check ovn-nbctl lrp-add R1 rp-public 00:00:02:01:02:03 172.16.1.1/24
+
+check ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \
+ type=router options:router-port=rp-sw0 \
+ -- lsp-set-addresses sw0-rp router
+check ovn-nbctl lsp-add sw1 sw1-rp -- set Logical_Switch_Port sw1-rp \
+ type=router options:router-port=rp-sw1 \
+ -- lsp-set-addresses sw1-rp router
+
+check ovn-nbctl lsp-add public public-rp -- set Logical_Switch_Port public-rp \
+ type=router options:router-port=rp-public \
+ -- lsp-set-addresses public-rp router
+
+check ovn-nbctl lsp-add sw0 sw01 \
+ -- lsp-set-addresses sw01 "f0:00:00:01:02:03 192.168.1.2"
+
+check ovn-nbctl lsp-add sw1 sw11 \
+ -- lsp-set-addresses sw11 "f0:00:00:02:02:03 192.168.2.2"
+
+check ovn-nbctl lr-nat-add R1 dnat_and_snat 172.16.1.3 192.168.2.2 sw11
00:00:02:02:03:10
+
+AT_CHECK([ovs-vsctl set Open_vSwitch .
external-ids:ovn-bridge-mappings=phynet:br-ext])
+check ovn-nbctl lsp-add public public1 \
+ -- lsp-set-addresses public1 unknown \
+ -- lsp-set-type public1 localnet \
+ -- lsp-set-options public1 network_name=phynet
+
+OVN_POPULATE_ARP
+
+wait_for_ports_up
+check ovn-nbctl --wait=hv sync
+
+NS_CHECK_EXEC([sw01], [ping -q -c 3 -i 0.3 -w 2 172.16.1.2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+OVS_WAIT_UNTIL([
+ total_arp_pkts=$(cat arp.pcap | wc -l)
+ test "${total_arp_pkts}" = "1"
+])
+
+OVS_WAIT_UNTIL([
+ total_icmp_pkts=$(cat icmp.pcap | wc -l)
+ test "${total_icmp_pkts}" = "3"
+])
+
+NS_CHECK_EXEC([sw11], [ping -q -c 3 -i 0.3 -w 2 172.16.1.4 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+OVS_WAIT_UNTIL([
+ total_arp1_pkts=$(cat arp1.pcap | wc -l)
+ test "${total_arp1_pkts}" = "1"
+])
+
+OVS_WAIT_UNTIL([
+ total_icmp1_pkts=$(cat icmp1.pcap | wc -l)
+ test "${total_icmp1_pkts}" = "3"
+])
+
+OVS_APP_EXIT_AND_WAIT([ovn-controller])
+
+as ovn-sb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as ovn-nb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as northd
+OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE])
+
+as
+OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
+/connection dropped.*/d"])
+AT_CLEANUP
+])