On 11/24/22 12:45, Lorenzo Bianconi wrote:
> Improve buffered packet management in ovn-controller using even
> datapath and port keys as buffered_packets_map hashmap hash.
> Improve run_buffered_binding avoiding unnecessary loops over
> datapaths and port_bindings.
> 
> Signed-off-by: Lorenzo Bianconi <[email protected]>
> ---

Hi Lorenzo,

> Changes since v3:
> - fix ip stored in buffered_packets structure (must be unique for each
>   buffered_packets_map map entry)
> Changes since v2:
> - improve hash function
> Changes since v1:
> - improve code readability
> ---
>  controller/pinctrl.c | 112 +++++++++++++++++++++++++++-----------
>  tests/system-ovn.at  | 124 +++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 204 insertions(+), 32 deletions(-)
> 
> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> index f44775c7e..e310314db 100644
> --- a/controller/pinctrl.c
> +++ b/controller/pinctrl.c
> @@ -182,6 +182,7 @@ static void destroy_buffered_packets_map(void);
>  static void
>  run_buffered_binding(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
>                       struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
> +                     struct ovsdb_idl_index *sbrec_port_binding_by_name,
>                       const struct hmap *local_datapaths)
>      OVS_REQUIRES(pinctrl_mutex);
>  
> @@ -1430,6 +1431,9 @@ struct buffered_packets {
>      struct in6_addr ip;
>      struct eth_addr ea;
>  
> +    uint64_t dp_key;
> +    uint64_t port_key;
> +
>      long long int timestamp;
>  
>      struct buffer_info data[BUFFER_QUEUE_DEPTH];
> @@ -1556,19 +1560,30 @@ buffered_packets_map_gc(void)
>  }
>  
>  static struct buffered_packets *
> -pinctrl_find_buffered_packets(const struct in6_addr *ip, uint32_t hash)
> +pinctrl_find_buffered_packets(uint64_t dp_key, uint64_t port_key,
> +                              const struct in6_addr *ip, uint32_t hash)
>  {
>      struct buffered_packets *qp;
> -
> -    HMAP_FOR_EACH_WITH_HASH (qp, hmap_node, hash,
> -                             &buffered_packets_map) {
> -        if (IN6_ARE_ADDR_EQUAL(&qp->ip, ip)) {
> +    HMAP_FOR_EACH_WITH_HASH (qp, hmap_node, hash, &buffered_packets_map) {
> +        if (qp->dp_key == dp_key && qp->port_key == port_key &&
> +            IN6_ARE_ADDR_EQUAL(&qp->ip, ip)) {
>              return qp;
>          }
>      }
>      return NULL;
>  }
>  
> +static uint32_t
> +pinctrl_buffer_packet_hash(uint64_t dp_key, uint64_t port_key,
> +                           struct in6_addr *addr)
> +{
> +    uint32_t hash = 0;
> +    hash = hash_add64(hash, port_key);
> +    hash = hash_add64(hash, dp_key);
> +    hash = hash_bytes(&addr, sizeof addr, hash);
> +    return hash_finish(hash, 16);
> +}
> +
>  /* Called with in the pinctrl_handler thread context. */
>  static int
>  pinctrl_handle_buffered_packets(struct dp_packet *pkt_in,
> @@ -1578,6 +1593,8 @@ pinctrl_handle_buffered_packets(struct dp_packet 
> *pkt_in,
>      struct buffered_packets *bp;
>      struct dp_packet *clone;
>      struct in6_addr addr;
> +    uint64_t dp_key = ntohll(md->flow.metadata);
> +    uint64_t oport_key = md->flow.regs[MFF_LOG_OUTPORT - MFF_REG0];
>  
>      if (is_arp) {
>          addr = in6_addr_mapped_ipv4(htonl(md->flow.regs[0]));
> @@ -1586,8 +1603,8 @@ pinctrl_handle_buffered_packets(struct dp_packet 
> *pkt_in,
>          memcpy(&addr, &ip6, sizeof addr);
>      }
>  
> -    uint32_t hash = hash_bytes(&addr, sizeof addr, 0);
> -    bp = pinctrl_find_buffered_packets(&addr, hash);
> +    uint32_t hash = pinctrl_buffer_packet_hash(dp_key, oport_key, &addr);
> +    bp = pinctrl_find_buffered_packets(dp_key, oport_key, &addr, hash);
>      if (!bp) {
>          if (hmap_count(&buffered_packets_map) >= 1000) {
>              COVERAGE_INC(pinctrl_drop_buffered_packets_map);
> @@ -1598,6 +1615,8 @@ pinctrl_handle_buffered_packets(struct dp_packet 
> *pkt_in,
>          hmap_insert(&buffered_packets_map, &bp->hmap_node, hash);
>          bp->head = bp->tail = 0;
>          bp->ip = addr;
> +        bp->dp_key = dp_key;
> +        bp->port_key = oport_key;
>      }
>      bp->timestamp = time_msec();
>      /* clone the packet to send it later with correct L2 address */
> @@ -3586,6 +3605,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>                    sbrec_ip_multicast_opts);
>      run_buffered_binding(sbrec_mac_binding_by_lport_ip,
>                           sbrec_port_binding_by_datapath,
> +                         sbrec_port_binding_by_name,
>                           local_datapaths);
>      sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, 
> sbrec_port_binding_by_name,
>                        chassis);
> @@ -4357,47 +4377,75 @@ run_put_mac_bindings(struct ovsdb_idl_txn 
> *ovnsb_idl_txn,
>  static void
>  run_buffered_binding(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
>                       struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
> +                     struct ovsdb_idl_index *sbrec_port_binding_by_name,
>                       const struct hmap *local_datapaths)
>      OVS_REQUIRES(pinctrl_mutex)
>  {
>      const struct local_datapath *ld;
> +    struct buffered_packets *qp;
>      bool notify = false;
>  
> -    HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
> -        /* MAC_Binding.logical_port will always belong to a
> -         * a router datapath. Hence we can skip logical switch
> -         * datapaths.
> -         * */
> -        if (ld->is_switch) {
> -            continue;
> -        }
> +    HMAP_FOR_EACH_SAFE (qp, hmap_node, &buffered_packets_map) {
> +        bool found = false;
> +        HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
> +            /* MAC_Binding.logical_port will always belong to a
> +             * a router datapath. Hence we can skip logical switch
> +             * datapaths.
> +             */
> +            if (ld->is_switch) {
> +                continue;
> +            }
>  
> -        struct sbrec_port_binding *target =
> -            
> sbrec_port_binding_index_init_row(sbrec_port_binding_by_datapath);
> -        sbrec_port_binding_index_set_datapath(target, ld->datapath);
> -        const struct sbrec_port_binding *pb;
> -        SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target,
> -                                           sbrec_port_binding_by_datapath) {
> -            if (strcmp(pb->type, "patch") && strcmp(pb->type, "l3gateway")) {
> +            if (ld->datapath->tunnel_key != qp->dp_key) {
>                  continue;
>              }

Instead of doing a linear search in all datapaths you can just do:

get_local_datapath(local_datapaths, qp->dp_key);


> -            struct buffered_packets *cur_qp;
> -            HMAP_FOR_EACH_SAFE (cur_qp, hmap_node, &buffered_packets_map) {
> +
> +            struct sbrec_port_binding *target =
> +                sbrec_port_binding_index_init_row(
> +                        sbrec_port_binding_by_datapath);
> +            sbrec_port_binding_index_set_datapath(target, ld->datapath);
> +            const struct sbrec_port_binding *pb;
> +            SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target,
> +                    sbrec_port_binding_by_datapath) {
> +                if (pb->tunnel_key != qp->port_key) {

Indexes can match on multiple columns.  You could just create an index
that uses tunnel_key too.

> +                    continue;
> +                }
> +
> +                const struct sbrec_port_binding *port = pb;
> +                if (!strcmp(pb->type, "chassisredirect")) {
> +                    const char *name = smap_get(&pb->options,
> +                                                "distributed-port");
> +                    if (name) {
> +                        port = 
> lport_lookup_by_name(sbrec_port_binding_by_name,
> +                                                    name);
> +                    }
> +                }
> +
> +                if (!port) {
> +                    continue;
> +                }
> +
>                  struct ds ip_s = DS_EMPTY_INITIALIZER;
> -                ipv6_format_mapped(&cur_qp->ip, &ip_s);
> +                ipv6_format_mapped(&qp->ip, &ip_s);
>                  const struct sbrec_mac_binding *b = mac_binding_lookup(
> -                        sbrec_mac_binding_by_lport_ip, pb->logical_port,
> +                        sbrec_mac_binding_by_lport_ip, port->logical_port,
>                          ds_cstr(&ip_s));
> +                ds_destroy(&ip_s);
> +
>                  if (b && ovs_scan(b->mac, ETH_ADDR_SCAN_FMT,
> -                                  ETH_ADDR_SCAN_ARGS(cur_qp->ea))) {
> -                    hmap_remove(&buffered_packets_map, &cur_qp->hmap_node);
> -                    ovs_list_push_back(&buffered_mac_bindings, 
> &cur_qp->list);
> -                    notify = true;
> +                                  ETH_ADDR_SCAN_ARGS(qp->ea))) {
> +                    hmap_remove(&buffered_packets_map, &qp->hmap_node);
> +                    ovs_list_push_back(&buffered_mac_bindings, &qp->list);
> +                    found = true;
> +                    break;
>                  }
> -                ds_destroy(&ip_s);
> +            }
> +            sbrec_port_binding_index_destroy_row(target);
> +            if (found) {
> +                notify = true;
> +                break;
>              }

No need for this "if".  You  could just move the "notify = true;" above
and replace "found = true".

>          }
> -        sbrec_port_binding_index_destroy_row(target);
>      }
>      buffered_packets_map_gc();
>  

Maybe as a follow up but it's probably more efficient to:

- walk all updated mac bindings:
  - lookup the exact buffered_packets_map record based on
    mac_binding datapath + port + ip
    - remove it from the map and add it to the list.

> diff --git a/tests/system-ovn.at b/tests/system-ovn.at
> index cb3412717..159d22751 100644
> --- a/tests/system-ovn.at
> +++ b/tests/system-ovn.at

I think all the comments I had on the system test in v3 still apply to v4.

Regards,
Dumitru

> @@ -9138,3 +9138,127 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port 
> patch-.*/d
>  /connection dropped.*/d"])
>  AT_CLEANUP
>  ])
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([IP buffering])
> +AT_KEYWORDS([ip-buffering])
> +AT_SKIP_IF([test $HAVE_TCPDUMP = no])
> +
> +ovn_start
> +OVS_TRAFFIC_VSWITCHD_START()
> +
> +ADD_BR([br-int])
> +ADD_BR([br-ext])
> +
> +ovs-ofctl add-flow br-ext action=normal
> +# Set external-ids in br-int needed for ovn-controller
> +ovs-vsctl \
> +        -- set Open_vSwitch . external-ids:system-id=hv1 \
> +        -- set Open_vSwitch . 
> external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
> +        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
> +        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
> +        -- set bridge br-int fail-mode=secure 
> other-config:disable-in-band=true
> +
> +# Start ovn-controller
> +start_daemon ovn-controller
> +
> +ADD_NAMESPACES(sw01)
> +ADD_VETH(sw01, sw01, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \
> +         "192.168.1.1")
> +ADD_NAMESPACES(sw11)
> +ADD_VETH(sw11, sw11, br-int, "192.168.2.2/24", "f0:00:00:02:02:03", \
> +         "192.168.2.1")
> +ADD_NAMESPACES(remote)
> +ADD_VETH(remote, remote, br-ext, "172.16.1.2/24", "f0:00:00:01:02:05", \
> +         "172.16.1.1")
> +ADD_NAMESPACES(remote1)
> +ADD_VETH(remote1, remote1, br-ext, "172.16.1.4/24", "f0:00:00:01:02:06", \
> +         "172.16.1.1")
> +
> +NS_CHECK_EXEC([remote], [tcpdump -c 3 -nneei remote -Q in src 192.168.1.2 
> and dst 172.16.1.2 and icmp > icmp.pcap &])
> +NS_CHECK_EXEC([remote], [tcpdump -c 1 -nneei remote -Q in arp and 
> arp[[24:4]]==0xac100102 > arp.pcap &])
> +NS_CHECK_EXEC([remote1], [tcpdump -c 3 -nneei remote1 -Q in src 172.16.1.3 
> and dst 172.16.1.4 and icmp > icmp1.pcap 2>/dev/null &])
> +NS_CHECK_EXEC([remote1], [tcpdump -c 1 -nneei remote1 -Q in arp and 
> arp[[24:4]]==0xac100104 > arp1.pcap 2>/dev/null &])
> +
> +ovn-nbctl create Logical_Router name=R1 options:chassis=hv1
> +ovn-nbctl ls-add sw0
> +ovn-nbctl ls-add sw1
> +ovn-nbctl ls-add public
> +
> +ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 192.168.1.1/24
> +ovn-nbctl lrp-add R1 rp-sw1 00:00:03:01:02:03 192.168.2.1/24
> +ovn-nbctl lrp-add R1 rp-public 00:00:02:01:02:03 172.16.1.1/24
> +
> +ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \
> +    type=router options:router-port=rp-sw0 \
> +    -- lsp-set-addresses sw0-rp router
> +ovn-nbctl lsp-add sw1 sw1-rp -- set Logical_Switch_Port sw1-rp \
> +    type=router options:router-port=rp-sw1 \
> +    -- lsp-set-addresses sw1-rp router
> +
> +ovn-nbctl lsp-add public public-rp -- set Logical_Switch_Port public-rp \
> +    type=router options:router-port=rp-public \
> +    -- lsp-set-addresses public-rp router
> +
> +ovn-nbctl lsp-add sw0 sw01 \
> +    -- lsp-set-addresses sw01 "f0:00:00:01:02:03 192.168.1.2"
> +
> +ovn-nbctl lsp-add sw1 sw11 \
> +    -- lsp-set-addresses sw11 "f0:00:00:02:02:03 192.168.2.2"
> +
> +ovn-nbctl lr-nat-add R1 dnat_and_snat 172.16.1.3 192.168.2.2 sw11 
> 00:00:02:02:03:10
> +
> +AT_CHECK([ovs-vsctl set Open_vSwitch . 
> external-ids:ovn-bridge-mappings=phynet:br-ext])
> +ovn-nbctl lsp-add public public1 \
> +        -- lsp-set-addresses public1 unknown \
> +        -- lsp-set-type public1 localnet \
> +        -- lsp-set-options public1 network_name=phynet
> +
> +OVN_POPULATE_ARP
> +
> +NS_CHECK_EXEC([sw01], [ping -q -c 3 -i 0.3 -w 2 172.16.1.2 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +OVS_WAIT_UNTIL([
> +        total_arp_pkts=$(cat arp.pcap | wc -l)
> +        test "${total_arp_pkts}" = "1"
> +])
> +
> +OVS_WAIT_UNTIL([
> +        total_icmp_pkts=$(cat icmp.pcap | wc -l)
> +        test "${total_icmp_pkts}" = "3"
> +])
> +
> +NS_CHECK_EXEC([sw11], [ping -q -c 3 -i 0.3 -w 2 172.16.1.4 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +OVS_WAIT_UNTIL([
> +        total_arp1_pkts=$(cat arp1.pcap | wc -l)
> +        test "${total_arp1_pkts}" = "1"
> +])
> +
> +OVS_WAIT_UNTIL([
> +        total_icmp1_pkts=$(cat icmp1.pcap | wc -l)
> +        test "${total_icmp1_pkts}" = "3"
> +])
> +
> +OVS_APP_EXIT_AND_WAIT([ovn-controller])
> +
> +as ovn-sb
> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> +
> +as ovn-nb
> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> +
> +as northd
> +OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE])
> +
> +as
> +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
> +/connection dropped.*/d"])
> +AT_CLEANUP
> +])

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to