> Hi Lorenzo, I only have one comment down below:
>
> On 10/7/22 07:08, Lorenzo Bianconi wrote:
> > Improve buffered packet management in ovn-controller avoid useless loop
> > in run_buffered_binding routine and using datapath key and output port
> > key as buffered_packets_map hashmap hash. Add new selftest for buffered
> > packets.
> >
> > Signed-off-by: Lorenzo Bianconi <[email protected]>
> > ---
> > controller/pinctrl.c | 119 ++++++++++++++++++++++++++++-------------
> > tests/system-ovn.at | 124 +++++++++++++++++++++++++++++++++++++++++++
> > 2 files changed, 206 insertions(+), 37 deletions(-)
> >
> > diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> > index 8859cb080..e5fe44daa 100644
> > --- a/controller/pinctrl.c
> > +++ b/controller/pinctrl.c
> > @@ -182,6 +182,7 @@ static void destroy_buffered_packets_map(void);
> > static void
> > run_buffered_binding(struct ovsdb_idl_index
> > *sbrec_mac_binding_by_lport_ip,
> > struct ovsdb_idl_index
> > *sbrec_port_binding_by_datapath,
> > + struct ovsdb_idl_index *sbrec_port_binding_by_name,
> > const struct hmap *local_datapaths)
> > OVS_REQUIRES(pinctrl_mutex);
> > @@ -1430,6 +1431,9 @@ struct buffered_packets {
> > struct in6_addr ip;
> > struct eth_addr ea;
> > + uint64_t dp_key;
> > + uint64_t port_key;
> > +
> > long long int timestamp;
> > struct buffer_info data[BUFFER_QUEUE_DEPTH];
> > @@ -1556,13 +1560,12 @@ buffered_packets_map_gc(void)
> > }
> > static struct buffered_packets *
> > -pinctrl_find_buffered_packets(const struct in6_addr *ip, uint32_t hash)
> > +pinctrl_find_buffered_packets(uint64_t dp_key, uint64_t port_key,
> > + uint32_t hash)
> > {
> > struct buffered_packets *qp;
> > -
> > - HMAP_FOR_EACH_WITH_HASH (qp, hmap_node, hash,
> > - &buffered_packets_map) {
> > - if (IN6_ARE_ADDR_EQUAL(&qp->ip, ip)) {
> > + HMAP_FOR_EACH_WITH_HASH (qp, hmap_node, hash, &buffered_packets_map) {
> > + if (qp->dp_key == dp_key && qp->port_key == port_key) {
> > return qp;
> > }
> > }
> > @@ -1575,19 +1578,13 @@ pinctrl_handle_buffered_packets(struct dp_packet
> > *pkt_in,
> > const struct match *md, bool is_arp)
> > OVS_REQUIRES(pinctrl_mutex)
> > {
> > - struct buffered_packets *bp;
> > - struct dp_packet *clone;
> > - struct in6_addr addr;
> > -
> > - if (is_arp) {
> > - addr = in6_addr_mapped_ipv4(htonl(md->flow.regs[0]));
> > - } else {
> > - ovs_be128 ip6 = hton128(flow_get_xxreg(&md->flow, 0));
> > - memcpy(&addr, &ip6, sizeof addr);
> > - }
> > -
> > - uint32_t hash = hash_bytes(&addr, sizeof addr, 0);
> > - bp = pinctrl_find_buffered_packets(&addr, hash);
> > + uint64_t dp_key = ntohll(md->flow.metadata);
> > + uint64_t oport_key = md->flow.regs[MFF_LOG_OUTPORT - MFF_REG0];
> > + uint32_t hash = hash_add(
> > + hash_bytes(&oport_key, sizeof oport_key, 0),
> > + dp_key);
> > + struct buffered_packets *bp
> > + = pinctrl_find_buffered_packets(dp_key, oport_key, hash);
> > if (!bp) {
> > if (hmap_count(&buffered_packets_map) >= 1000) {
> > COVERAGE_INC(pinctrl_drop_buffered_packets_map);
> > @@ -1597,12 +1594,20 @@ pinctrl_handle_buffered_packets(struct dp_packet
> > *pkt_in,
> > bp = xmalloc(sizeof *bp);
> > hmap_insert(&buffered_packets_map, &bp->hmap_node, hash);
> > bp->head = bp->tail = 0;
> > - bp->ip = addr;
> > + if (is_arp) {
> > + bp->ip = in6_addr_mapped_ipv4(htonl(md->flow.regs[0]));
> > + } else {
> > + ovs_be128 ip6 = hton128(flow_get_xxreg(&md->flow, 0));
> > + memcpy(&bp->ip, &ip6, sizeof bp->ip);
> > + }
> > + bp->dp_key = dp_key;
> > + bp->port_key = oport_key;
> > }
> > bp->timestamp = time_msec();
> > /* clone the packet to send it later with correct L2 address */
> > - clone = dp_packet_clone_data(dp_packet_data(pkt_in),
> > - dp_packet_size(pkt_in));
> > + struct dp_packet *clone
> > + = dp_packet_clone_data(dp_packet_data(pkt_in),
> > + dp_packet_size(pkt_in));
> > buffered_push_packet(bp, clone, md);
> > return 0;
> > @@ -3586,6 +3591,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> > sbrec_ip_multicast_opts);
> > run_buffered_binding(sbrec_mac_binding_by_lport_ip,
> > sbrec_port_binding_by_datapath,
> > + sbrec_port_binding_by_name,
> > local_datapaths);
> > sync_svc_monitors(ovnsb_idl_txn, svc_mon_table,
> > sbrec_port_binding_by_name,
> > chassis);
> > @@ -4354,20 +4360,34 @@ run_put_mac_bindings(struct ovsdb_idl_txn
> > *ovnsb_idl_txn,
> > }
> > }
> > +static struct buffered_packets *
> > +pinctrl_get_buffered_packets(uint64_t dp_key, uint64_t port_key)
> > +{
> > + uint32_t hash = hash_add(
> > + hash_bytes(&port_key, sizeof port_key, 0),
> > + dp_key);
> > + return pinctrl_find_buffered_packets(dp_key, port_key, hash);
> > +}
> > +
> > static void
> > run_buffered_binding(struct ovsdb_idl_index
> > *sbrec_mac_binding_by_lport_ip,
> > struct ovsdb_idl_index
> > *sbrec_port_binding_by_datapath,
> > + struct ovsdb_idl_index *sbrec_port_binding_by_name,
> > const struct hmap *local_datapaths)
> > OVS_REQUIRES(pinctrl_mutex)
> > {
> > const struct local_datapath *ld;
> > bool notify = false;
> > + if (!hmap_count(&buffered_packets_map)) {
> > + /* No work to do. */
> > + return;
> > + }
> > +
> > HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
> > /* MAC_Binding.logical_port will always belong to a
> > * a router datapath. Hence we can skip logical switch
> > - * datapaths.
> > - * */
> > + * datapaths. */
> > if (ld->is_switch) {
> > continue;
> > }
> > @@ -4381,26 +4401,51 @@ run_buffered_binding(struct ovsdb_idl_index
> > *sbrec_mac_binding_by_lport_ip,
> > if (strcmp(pb->type, "patch") && strcmp(pb->type,
> > "l3gateway")) {
> > continue;
> > }
> > - struct buffered_packets *cur_qp;
> > - HMAP_FOR_EACH_SAFE (cur_qp, hmap_node, &buffered_packets_map) {
> > - struct ds ip_s = DS_EMPTY_INITIALIZER;
> > - ipv6_format_mapped(&cur_qp->ip, &ip_s);
> > - const struct sbrec_mac_binding *b = mac_binding_lookup(
> > - sbrec_mac_binding_by_lport_ip, pb->logical_port,
> > - ds_cstr(&ip_s));
> > - if (b && ovs_scan(b->mac, ETH_ADDR_SCAN_FMT,
> > - ETH_ADDR_SCAN_ARGS(cur_qp->ea))) {
> > - hmap_remove(&buffered_packets_map, &cur_qp->hmap_node);
> > - ovs_list_push_back(&buffered_mac_bindings,
> > &cur_qp->list);
> > - notify = true;
> > +
> > + struct buffered_packets *cur_qp
> > + = pinctrl_get_buffered_packets(ld->datapath->tunnel_key,
> > + pb->tunnel_key);
> > + if (!cur_qp) {
> > + /* If primary lookup fails, check chassisredirect port
> > + * for distributed gw router port use-case. */
> > + char *redirect_name = xasprintf("cr-%s", pb->logical_port);
> > + const struct sbrec_port_binding *cr_pb
> > + = lport_lookup_by_name(sbrec_port_binding_by_name,
> > + redirect_name);
> > + free(redirect_name);
> > + if (cr_pb) {
> > + cur_qp = pinctrl_get_buffered_packets(
> > + ld->datapath->tunnel_key, cr_pb->tunnel_key);
> > }
> > - ds_destroy(&ip_s);
> > + }
> > +
> > + if (!cur_qp) {
> > + continue;
> > + }
> > +
> > + struct ds ip_s = DS_EMPTY_INITIALIZER;
> > + ipv6_format_mapped(&cur_qp->ip, &ip_s);
> > + const struct sbrec_mac_binding *b = mac_binding_lookup(
> > + sbrec_mac_binding_by_lport_ip, pb->logical_port,
> > + ds_cstr(&ip_s));
> > + if (b && ovs_scan(b->mac, ETH_ADDR_SCAN_FMT,
> > + ETH_ADDR_SCAN_ARGS(cur_qp->ea))) {
> > + hmap_remove(&buffered_packets_map, &cur_qp->hmap_node);
> > + ovs_list_push_back(&buffered_mac_bindings, &cur_qp->list);
> > + notify = true;
> > + }
> > + ds_destroy(&ip_s);
> > +
> > + if (!hmap_count(&buffered_packets_map)) {
> > + /* No more work to do. */
> > + sbrec_port_binding_index_destroy_row(target);
> > + goto out;
>
> If you change the body of this if statement to "break;" then
> sbrec_port_binding_index_destroy_row(target) will be called anyway. The only
> thing the "goto" changes is that buffered_packets_map_gc() will not be
> called. But since buffered_packets_map is empty, there is no harm in calling
> buffered_packets_map_gc() since it will just be a no-op.
>
> The benefit, in my eyes, is that it makes it easier to understand the
> lifetime of "target".
ack, I agree. This was just a micro-optimization, but I agree it does not worth
to do so. I will fix it in v2.
Regards,
Lorenzo
>
> > }
> > }
> > sbrec_port_binding_index_destroy_row(target);
> > }
> > buffered_packets_map_gc();
> > -
> > +out:
> > if (notify) {
> > notify_pinctrl_handler();
> > }
> > diff --git a/tests/system-ovn.at b/tests/system-ovn.at
> > index 20c058415..a186a6e63 100644
> > --- a/tests/system-ovn.at
> > +++ b/tests/system-ovn.at
> > @@ -8597,3 +8597,127 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port
> > patch-.*/d
> > /connection dropped.*/d"])
> > AT_CLEANUP
> > ])
> > +
> > +OVN_FOR_EACH_NORTHD([
> > +AT_SETUP([IP buffering])
> > +AT_KEYWORDS([ip-buffering])
> > +AT_SKIP_IF([test $HAVE_TCPDUMP = no])
> > +
> > +ovn_start
> > +OVS_TRAFFIC_VSWITCHD_START()
> > +
> > +ADD_BR([br-int])
> > +ADD_BR([br-ext])
> > +
> > +ovs-ofctl add-flow br-ext action=normal
> > +# Set external-ids in br-int needed for ovn-controller
> > +ovs-vsctl \
> > + -- set Open_vSwitch . external-ids:system-id=hv1 \
> > + -- set Open_vSwitch .
> > external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
> > + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
> > + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
> > + -- set bridge br-int fail-mode=secure
> > other-config:disable-in-band=true
> > +
> > +# Start ovn-controller
> > +start_daemon ovn-controller
> > +
> > +ADD_NAMESPACES(sw01)
> > +ADD_VETH(sw01, sw01, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \
> > + "192.168.1.1")
> > +ADD_NAMESPACES(sw11)
> > +ADD_VETH(sw11, sw11, br-int, "192.168.2.2/24", "f0:00:00:02:02:03", \
> > + "192.168.2.1")
> > +ADD_NAMESPACES(remote)
> > +ADD_VETH(remote, remote, br-ext, "172.16.1.2/24", "f0:00:00:01:02:05", \
> > + "172.16.1.1")
> > +ADD_NAMESPACES(remote1)
> > +ADD_VETH(remote1, remote1, br-ext, "172.16.1.4/24", "f0:00:00:01:02:06", \
> > + "172.16.1.1")
> > +
> > +NS_CHECK_EXEC([remote], [tcpdump -c 3 -nneei remote -Q in src 192.168.1.2
> > and dst 172.16.1.2 and icmp > icmp.pcap &])
> > +NS_CHECK_EXEC([remote], [tcpdump -c 1 -nneei remote -Q in arp and
> > arp[[24:4]]==0xac100102 > arp.pcap &])
> > +NS_CHECK_EXEC([remote1], [tcpdump -c 3 -nneei remote1 -Q in src 172.16.1.3
> > and dst 172.16.1.4 and icmp > icmp1.pcap 2>/dev/null &])
> > +NS_CHECK_EXEC([remote1], [tcpdump -c 1 -nneei remote1 -Q in arp and
> > arp[[24:4]]==0xac100104 > arp1.pcap 2>/dev/null &])
> > +
> > +ovn-nbctl create Logical_Router name=R1 options:chassis=hv1
> > +ovn-nbctl ls-add sw0
> > +ovn-nbctl ls-add sw1
> > +ovn-nbctl ls-add public
> > +
> > +ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 192.168.1.1/24
> > +ovn-nbctl lrp-add R1 rp-sw1 00:00:03:01:02:03 192.168.2.1/24
> > +ovn-nbctl lrp-add R1 rp-public 00:00:02:01:02:03 172.16.1.1/24
> > +
> > +ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \
> > + type=router options:router-port=rp-sw0 \
> > + -- lsp-set-addresses sw0-rp router
> > +ovn-nbctl lsp-add sw1 sw1-rp -- set Logical_Switch_Port sw1-rp \
> > + type=router options:router-port=rp-sw1 \
> > + -- lsp-set-addresses sw1-rp router
> > +
> > +ovn-nbctl lsp-add public public-rp -- set Logical_Switch_Port public-rp \
> > + type=router options:router-port=rp-public \
> > + -- lsp-set-addresses public-rp router
> > +
> > +ovn-nbctl lsp-add sw0 sw01 \
> > + -- lsp-set-addresses sw01 "f0:00:00:01:02:03 192.168.1.2"
> > +
> > +ovn-nbctl lsp-add sw1 sw11 \
> > + -- lsp-set-addresses sw11 "f0:00:00:02:02:03 192.168.2.2"
> > +
> > +ovn-nbctl lr-nat-add R1 dnat_and_snat 172.16.1.3 192.168.2.2 sw11
> > 00:00:02:02:03:10
> > +
> > +AT_CHECK([ovs-vsctl set Open_vSwitch .
> > external-ids:ovn-bridge-mappings=phynet:br-ext])
> > +ovn-nbctl lsp-add public public1 \
> > + -- lsp-set-addresses public1 unknown \
> > + -- lsp-set-type public1 localnet \
> > + -- lsp-set-options public1 network_name=phynet
> > +
> > +OVN_POPULATE_ARP
> > +
> > +NS_CHECK_EXEC([sw01], [ping -q -c 3 -i 0.3 -w 2 172.16.1.2 | FORMAT_PING],
> > \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +OVS_WAIT_UNTIL([
> > + total_arp_pkts=$(cat arp.pcap | wc -l)
> > + test "${total_arp_pkts}" = "1"
> > +])
> > +
> > +OVS_WAIT_UNTIL([
> > + total_icmp_pkts=$(cat icmp.pcap | wc -l)
> > + test "${total_icmp_pkts}" = "3"
> > +])
> > +
> > +NS_CHECK_EXEC([sw11], [ping -q -c 3 -i 0.3 -w 2 172.16.1.4 | FORMAT_PING],
> > \
> > +[0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +OVS_WAIT_UNTIL([
> > + total_arp1_pkts=$(cat arp1.pcap | wc -l)
> > + test "${total_arp1_pkts}" = "1"
> > +])
> > +
> > +OVS_WAIT_UNTIL([
> > + total_icmp1_pkts=$(cat icmp1.pcap | wc -l)
> > + test "${total_icmp1_pkts}" = "3"
> > +])
> > +
> > +OVS_APP_EXIT_AND_WAIT([ovn-controller])
> > +
> > +as ovn-sb
> > +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> > +
> > +as ovn-nb
> > +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> > +
> > +as northd
> > +OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE])
> > +
> > +as
> > +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
> > +/connection dropped.*/d"])
> > +AT_CLEANUP
> > +])
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev