This seems really, really specific to the particular NIC.  Can you add a
generic tunnel offload interface to DPDK?  What would that look like?

On Thu, Mar 17, 2016 at 10:43:42PM +0000, Chandran, Sugesh wrote:
> Hi,
> 
> This patch proposes an approach that uses Flow director feature on the Intel 
> Fortville NICs to boost the VxLAN tunneling performance. In our testing we 
> verified that the VxLAN performance is almost doubled with this patch. 
> The solution programs the NIC to report the flow ID along with the VxLAN 
> packets, and it is matched by OVS in software. There may be corner cases that 
> needs to addressed in the approach, For eg:  There is a possibility of race 
> condition where NIC reports flow ID that may match on different flow in OVS. 
> This happen when a rule is evicted by a new rule with same flowID+ hash in 
> the OVS software. The packets may hit on wrong new rule in OVS until the flow 
> get deleted in the hardware too.
> 
> It is a hardware specific implementation (Only work with Intel Fortville 
> NICs) for now, however the proposal works with any programmable NICs.This RFC 
> proves that the OVS can offer very high speed tunneling performance using 
> flow programmability in NICs. I am looking for comments/suggestions on adding 
> this support(such as configuring, enable it for all the programmable NICs and 
> etc) in OVS userspace datapath for improving the performance.
> 
> Regards
> _Sugesh
> 
> 
> > -----Original Message-----
> > From: Chandran, Sugesh
> > Sent: Thursday, March 17, 2016 10:00 PM
> > To: dev@openvswitch.org
> > Cc: Chandran, Sugesh <sugesh.chand...@intel.com>
> > Subject: [RFC PATCH] tunneling: Improving vxlan performance using DPDK
> > flow director feature.
> > 
> > Optimizing vxlan tunneling performance in userspace datapath using flow
> > director feature in Fortville NIC DPDK ports. OVS uses metadata  reported by
> > NIC to improve the flow lookup performance on VxLAN  packets.
> > 
> > Signed-off-by: Sugesh Chandran <sugesh.chand...@intel.com>
> > ---
> >  lib/automake.mk      |   2 +
> >  lib/dpdk-i40e-ofld.c | 266
> > +++++++++++++++++++++++++++++++++++++++++++++++++++
> >  lib/dpdk-i40e-ofld.h |  59 ++++++++++++
> >  lib/dpif-netdev.c    | 118 ++++++++++++++++++++++-
> >  lib/netdev-dpdk.c    |  41 +++++++-
> >  5 files changed, 481 insertions(+), 5 deletions(-)  create mode 100644
> > lib/dpdk-i40e-ofld.c  create mode 100644 lib/dpdk-i40e-ofld.h
> > 
> > diff --git a/lib/automake.mk b/lib/automake.mk index 27a1669..da48479
> > 100644
> > --- a/lib/automake.mk
> > +++ b/lib/automake.mk
> > @@ -366,6 +366,8 @@ endif
> > 
> >  if DPDK_NETDEV
> >  lib_libopenvswitch_la_SOURCES += \
> > +       lib/dpdk-i40e-ofld.c \
> > +       lib/dpdk-i40e-ofld.h \
> >         lib/netdev-dpdk.c \
> >         lib/netdev-dpdk.h
> >  endif
> > diff --git a/lib/dpdk-i40e-ofld.c b/lib/dpdk-i40e-ofld.c new file mode 
> > 100644
> > index 0000000..3ea7084
> > --- /dev/null
> > +++ b/lib/dpdk-i40e-ofld.c
> > @@ -0,0 +1,266 @@
> > +/*
> > + * Copyright (c) 2016 Intel Corp.
> > + *
> > + * Licensed under the Apache License, Version 2.0 (the "License");
> > + * you may not use this file except in compliance with the License.
> > + * You may obtain a copy of the License at:
> > + *
> > + *     http://www.apache.org/licenses/LICENSE-2.0
> > + *
> > + * Unless required by applicable law or agreed to in writing, software
> > + * distributed under the License is distributed on an "AS IS" BASIS,
> > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> > implied.
> > + * See the License for the specific language governing permissions and
> > + * limitations under the License.
> > + */
> > +
> > +#include <config.h>
> > +
> > +#include "dpdk-i40e-ofld.h"
> > +#include "errno.h"
> > +#include "ovs-thread.h"
> > +#include "openvswitch/vlog.h"
> > +#include "netdev-provider.h"
> > +#include "rte_pci_dev_ids.h"
> > +#include "rte_ethdev.h"
> > +
> > +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE
> > +VLOG_DEFINE_THIS_MODULE(dpdk_hw_ofld);
> > +
> > +#define VXLAN_DST_PORT          4789
> > +#define VXLAN_HLEN                  50
> > +#define MAX_FDIR_RULES          8000
> > +
> > +static uint32_t total_fdir_ids;
> > +static struct ovs_mutex hw_ofld_mutex = OVS_MUTEX_INITIALIZER;
> > +
> > +/*
> > + * Returns '0' if FDIR IDs reaches max limit. Only 8000 entries are
> > + * supported in FVL.
> > + */
> > +static inline uint32_t
> > +i40e_fdir_entry_cnt_inc(void)
> > +{
> > +    if (total_fdir_ids < MAX_FDIR_RULES) {
> > +        ovs_mutex_lock(&hw_ofld_mutex);
> > +        total_fdir_ids++;
> > +        ovs_mutex_unlock(&hw_ofld_mutex);
> > +        return (total_fdir_ids);
> > +    }
> > +    return 0;
> > +}
> > +
> > +static inline void
> > +i40e_fdir_entry_cnt_decr(void)
> > +{
> > +    ovs_mutex_lock(&hw_ofld_mutex);
> > +    total_fdir_ids ? total_fdir_ids-- : 0;
> > +    ovs_mutex_unlock(&hw_ofld_mutex);
> > +}
> > +
> > +/*
> > + * Release the hardware offloading functionality from the dpdk-port.
> > + */
> > +int
> > +dpdk_hw_ofld_port_release(struct netdev_dpdk *dpdk_port) {
> > +    ovs_mutex_lock(&hw_ofld_mutex);
> > +    set_i40e_ofld_flag(dpdk_port, 0);
> > +    ovs_mutex_unlock(&hw_ofld_mutex);
> > +    return 0;
> > +}
> > +
> > +int
> > +dpdk_eth_dev_hw_ofld_init(struct netdev_dpdk *dev,
> > +                                        int n_rxq, int n_txq,
> > +                                        struct rte_eth_conf *port_conf)
> > +{
> > +    int err = 0;
> > +    struct rte_eth_dev_info info;
> > +    uint16_t vendor_id, device_id;
> > +
> > +    rte_eth_dev_info_get(get_dpdk_port_id(dev), &info);
> > +    vendor_id = info.pci_dev->id.vendor_id;
> > +    device_id = info.pci_dev->id.device_id;
> > +    /* Configure vxlan offload only if its FVL NIC */
> > +    if (vendor_id != PCI_VENDOR_ID_INTEL || device_id !=
> > +                                            I40E_DEV_ID_SFP_XL710) {
> > +        ovs_mutex_lock(&hw_ofld_mutex);
> > +        set_i40e_ofld_flag(dev, 0);
> > +        ovs_mutex_unlock(&hw_ofld_mutex);
> > +        err = rte_eth_dev_configure(get_dpdk_port_id(dev),
> > +                                    n_rxq, n_txq, port_conf);
> > +        return err;
> > +    }
> > +    ovs_mutex_lock(&hw_ofld_mutex);
> > +    set_i40e_ofld_flag(dev, 1);
> > +    ovs_mutex_unlock(&hw_ofld_mutex);
> > +    /* Configure FVL FDIR VxLAN tunnel handing */
> > +    port_conf->fdir_conf.mode = RTE_FDIR_MODE_PERFECT;
> > +    port_conf->fdir_conf.flex_conf.nb_payloads = 1;
> > +    port_conf->fdir_conf.flex_conf.flex_set[0].type =
> > RTE_ETH_L4_PAYLOAD;
> > +    /* Need to initilize all the 16 flex bytes,no matter;
> > +     * what we really using, possibly a DPDK bug?? */
> > +    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[0] = 0;
> > +    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[1] = 1;
> > +    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[2] = 2;
> > +    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[3] = 3;
> > +    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[4] = 4;
> > +    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[5] = 5;
> > +    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[6] = 6;
> > +    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[7] = 7;
> > +    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[8] = 8;
> > +    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[9] = 9;
> > +    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[10] = 10;
> > +    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[11] = 11;
> > +    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[12] = 12;
> > +    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[13] = 13;
> > +    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[14] = 14;
> > +    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[15] = 15;
> > +    err = rte_eth_dev_configure(get_dpdk_port_id(dev),
> > +                                n_rxq, n_txq, port_conf);
> > +    if (err) {
> > +        VLOG_ERR("Failed to configure DPDK port with hardware offload");
> > +        return err;
> > +    }
> > +    /*Clean all FDIR entries if any */
> > +    err = rte_eth_dev_filter_ctrl(get_dpdk_port_id(dev),
> > +            RTE_ETH_FILTER_FDIR, RTE_ETH_FILTER_FLUSH, NULL);
> > +    return err;
> > +}
> > +
> > +/*
> > + * Install rules for VxLAN packets in hardware  */ int
> > +set_up_hw_offload_port_rule(struct netdev *netdev__,
> > +                                const struct flow *flow,
> > +                                const uint32_t hw_flow_id,
> > +                                const bool is_add_rule) {
> > +    int err = 0;
> > +    uint8_t flexbytes[RTE_ETH_FDIR_MAX_FLEXLEN] = { 0 };
> > +    uint32_t *vni;
> > +    enum rte_filter_op filter_op;
> > +    struct rte_eth_fdir_filter entry = { 0 };
> > +    struct netdev_dpdk *netdev;
> > +
> > +    netdev = netdev_dpdk_cast(netdev__);
> > +    if (is_i40e_ofld_enable(netdev)) {
> > +        entry.soft_id = hw_flow_id;
> > +        if (!entry.soft_id) {
> > +            VLOG_DBG("Invalid flow ID, Cant install rule in the NIC for "
> > +                             "hardware offload");
> > +            err = ECANCELED;
> > +            return err;
> > +        }
> > +        /* Install rules in NIC only for VxLAN flows */
> > +        if (ntohs(flow->tp_dst) != VXLAN_DST_PORT) {
> > +            return 0;
> > +        }
> > +        entry.action.flex_off = 0;  /* use 0 by default */
> > +        entry.input.flow_ext.vlan_tci = 0; //! ignored by i40e fdir
> > +        entry.action.behavior = RTE_ETH_FDIR_PASSTHRU;
> > +        entry.action.report_status = RTE_ETH_FDIR_REPORT_ID_FLEX_4;
> > +        entry.input.flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_UDP;
> > +        entry.input.flow.ip4_flow.src_ip = flow->nw_src;
> > +        entry.input.flow.ip4_flow.dst_ip = flow->nw_dst;
> > +        entry.input.flow.udp4_flow.dst_port = htons(VXLAN_DST_PORT);
> > +        entry.input.flow.udp4_flow.src_port = flow->tp_src;
> > +        vni = (uint32_t *)&flexbytes[4];
> > +        *vni = flow->tunnel.tun_id << 8;
> > +        memcpy(entry.input.flow_ext.flexbytes, flexbytes,
> > +                      RTE_ETH_FDIR_MAX_FLEXLEN);
> > +        entry.action.rx_queue = 0;
> > +        filter_op = is_add_rule ? RTE_ETH_FILTER_ADD :
> > +                                              RTE_ETH_FILTER_DELETE;
> > +        err = rte_eth_dev_filter_ctrl(get_dpdk_port_id(netdev),
> > +                 RTE_ETH_FILTER_FDIR, filter_op, &entry);
> > +
> > +        /*
> > +         * XXX : Delayed the max limit check for flow director entries 
> > after
> > +         * the configuration. Anyway the rte_eth_dev_filter_ctrl will fail 
> > if
> > +         * max limit reaches. This can be used for tracking.
> > +         */
> > +        if (is_add_rule) {
> > +            if (!i40e_fdir_entry_cnt_inc()) {
> > +                VLOG_DBG("Cant configure rule on NIC, Flow director "
> > +                        "entries hits max limit");
> > +            }
> > +        }
> > +        else {
> > +            i40e_fdir_entry_cnt_decr();
> > +        }
> > +        if (err < 0) {
> > +            VLOG_DBG("flow director programming error in NIC: (%d)\n", 
> > err);
> > +            return err;
> > +        }
> > +    }
> > +    return err;
> > +}
> > +
> > +static int
> > +i40e_dpdk_port_get_hw_ofld_pkts(struct
> > +                 dp_netdev_pmd_thread *pmd, struct dp_packet
> > +                 **in_packets, struct dp_packet **hw_packets,
> > +                 struct dp_packet **non_hw_packets,
> > +                 uint32_t cnt)
> > +{
> > +    int i, hw_pkt_cnt = 0, norm_pkt_cnt = 0;
> > +    const struct dp_netdev_flow *flow;
> > +    struct rte_mbuf *mbuf;
> > +
> > +    for (i = 0; i < cnt; i++) {
> > +        mbuf = (struct rte_mbuf *)in_packets[i];
> > +        if (mbuf->ol_flags & PKT_RX_FDIR_ID) {
> > +            flow = lookup_hw_offload_flow_for_fdirid(pmd, mbuf,
> > +                                                     mbuf->hash.fdir.hi);
> > +            if (!flow) {
> > +                /* Bogus flow in hw, cannot find it in OVS EMC */
> > +                mbuf->ol_flags &= ~PKT_RX_FDIR_ID;
> > +                non_hw_packets[norm_pkt_cnt++] = in_packets[i];
> > +                continue;
> > +            }
> > +            dp_packet_reset_packet(in_packets[i], VXLAN_HLEN);
> > +            mbuf->ol_flags |= PKT_RX_RSS_HASH;
> > +            mbuf->hash.rss = hash_finish(mbuf->hash.rss, 1);
> > +            hw_packets[hw_pkt_cnt++] = in_packets[i];
> > +        }
> > +        else {
> > +            non_hw_packets[norm_pkt_cnt++] = in_packets[i];
> > +        }
> > +    }
> > +    return hw_pkt_cnt;
> > +}
> > +
> > +/*
> > + * Process the packets based on hardware offload configuration  */ void
> > +hw_ofld_dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
> > +                             struct netdev_rxq *netdev_rxq,
> > +                             struct dp_packet **packets, int cnt,
> > +                             odp_port_t port_no) {
> > +    int hw_pkt_cnt;
> > +    struct dp_packet *hw_ofld_packets[NETDEV_MAX_BURST] = { 0 };
> > +    struct dp_packet *orig_packets[NETDEV_MAX_BURST] = { 0 };
> > +    struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_rxq->netdev);
> > +
> > +    if (is_i40e_ofld_enable(netdev)) {
> > +        hw_pkt_cnt = i40e_dpdk_port_get_hw_ofld_pkts(pmd, packets,
> > +                                                          hw_ofld_packets,
> > +                                                          orig_packets, 
> > cnt);
> > +        /* Process packet streams separately. */
> > +        if (hw_pkt_cnt) {
> > +            dp_netdev_input(pmd, hw_ofld_packets, hw_pkt_cnt, port_no);
> > +        }
> > +        if (cnt - hw_pkt_cnt) {
> > +            dp_netdev_input(pmd, orig_packets, (cnt - hw_pkt_cnt), 
> > port_no);
> > +        }
> > +    }
> > +    else {
> > +        dp_netdev_input(pmd, packets, cnt, port_no);
> > +    }
> > +}
> > +#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE
> > diff --git a/lib/dpdk-i40e-ofld.h b/lib/dpdk-i40e-ofld.h new file mode 
> > 100644
> > index 0000000..1aad246
> > --- /dev/null
> > +++ b/lib/dpdk-i40e-ofld.h
> > @@ -0,0 +1,59 @@
> > +/*
> > + * Copyright (c) 2016 Intel Corp.
> > + *
> > + * Licensed under the Apache License, Version 2.0 (the "License");
> > + * you may not use this file except in compliance with the License.
> > + * You may obtain a copy of the License at:
> > + *
> > + *     http://www.apache.org/licenses/LICENSE-2.0
> > + *
> > + * Unless required by applicable law or agreed to in writing, software
> > + * distributed under the License is distributed on an "AS IS" BASIS,
> > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> > implied.
> > + * See the License for the specific language governing permissions and
> > + * limitations under the License.
> > + */
> > +
> > +#ifndef DPDK_I40E_OFLD_H_
> > +#define DPDK_I40E_OFLD_H_
> > +
> > +#include <config.h>
> > +
> > +#include "dp-packet.h"
> > +#include "netdev.h"
> > +
> > +/*
> > + * Macro to enable/disable HW OFFLOAD feature for DPDK.
> > + * 1 :- Enable HW_OFFLOAD support in OVS
> > + * 0 :- Disable HW_OFFLOAD support in OVS  */
> > +#define DPDK_I40E_TNL_OFFLOAD_ENABLE        1
> > +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE
> > +
> > +struct netdev_dpdk;
> > +struct dp_netdev_pmd_thread;
> > +struct dp_netdev_flow;
> > +
> > +struct netdev_dpdk *netdev_dpdk_cast(const struct netdev *netdev);
> > +extern inline bool is_i40e_ofld_enable(const struct netdev_dpdk
> > +*netdev); extern inline void set_i40e_ofld_flag(struct netdev_dpdk
> > +*netdev, bool flag); extern inline int get_dpdk_port_id(struct
> > +netdev_dpdk *dpdk_port); int dpdk_eth_dev_hw_ofld_init(struct
> > netdev_dpdk *dev, int n_rxq, int n_txq,
> > +                              struct rte_eth_conf *port_conf); int
> > +dpdk_hw_ofld_port_release(struct netdev_dpdk *dpdk_port); int
> > +set_up_hw_offload_port_rule(struct netdev *netdev__,
> > +                                const struct flow *flow,
> > +                                const uint32_t hw_flow_id,
> > +                                const bool is_add_rule); void
> > +hw_ofld_dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
> > +                             struct netdev_rxq *netdev_rxq,
> > +                             struct dp_packet **packets, int cnt,
> > +                             odp_port_t port_no); const struct
> > +dp_netdev_flow *lookup_hw_offload_flow_for_fdirid(
> > +                            const struct dp_netdev_pmd_thread *pmd,
> > +                            struct rte_mbuf *mbuf, uint32_t flow_id);
> > +void dp_netdev_input(struct dp_netdev_pmd_thread *, struct dp_packet
> > **,
> > +                     int cnt, odp_port_t port_no);
> > +
> > +#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE
> > +#endif /* DPDK_I40E_OFLD_H_ */
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index cf574ad..d79b239
> > 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -70,6 +70,7 @@
> >  #include "util.h"
> > 
> >  #include "openvswitch/vlog.h"
> > +#include "dpdk-i40e-ofld.h"
> > 
> >  VLOG_DEFINE_THIS_MODULE(dpif_netdev);
> > 
> > @@ -478,7 +479,7 @@ static void dp_netdev_execute_actions(struct
> > dp_netdev_pmd_thread *pmd,
> >                                        bool may_steal,
> >                                        const struct nlattr *actions,
> >                                        size_t actions_len); -static void 
> > dp_netdev_input(struct
> > dp_netdev_pmd_thread *,
> > +void dp_netdev_input(struct dp_netdev_pmd_thread *,
> >                              struct dp_packet **, int cnt, odp_port_t 
> > port_no);  static void
> > dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
> >                                    struct dp_packet **, int cnt); @@ 
> > -1455,6 +1456,28 @@
> > dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
> >      flow->dead = true;
> > 
> >      dp_netdev_flow_unref(flow);
> > +
> > +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE
> > +    struct dp_netdev_port *dp_port;
> > +    int err;
> > +    odp_port_t in_port = flow->flow.in_port.odp_port;
> > +    err = get_port_by_number(pmd->dp, in_port, &dp_port);
> > +    if (err) {
> > +        VLOG_WARN("Cannot get the port information, hardware offload may
> > "
> > +                "not be functional");
> > +        return;
> > +    }
> > +    if(strcmp(dp_port->type, "dpdk")) {
> > +        /* No hardware offload on a non-DPDK port") */
> > +        return;
> > +    }
> > +    /* Remove the hardware offload rule if exists.*/
> > +    if(set_up_hw_offload_port_rule(dp_port->netdev, &flow->flow,
> > +            dp_netdev_flow_hash(&(flow->ufid)), 0)) {
> > +        VLOG_DBG("Failed to delete the hardware offload rule");
> > +        return;
> > +    }
> > +#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE
> >  }
> > 
> >  static void
> > @@ -2059,6 +2082,32 @@ dp_netdev_flow_add(struct
> > dp_netdev_pmd_thread *pmd,
> >          ds_destroy(&ds);
> >      }
> > 
> > +    /*
> > +     * Configure the hardware offload for tunnel while flows are getting
> > +     * inserted in OVS.
> > +     */
> > +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE
> > +    struct dp_netdev_port *dp_port;
> > +    int err;
> > +    odp_port_t in_port = flow->flow.in_port.odp_port;
> > +    err = get_port_by_number(pmd->dp, in_port, &dp_port);
> > +    if (err) {
> > +        VLOG_WARN("Cannot get the port information, Failed to configure "
> > +                            "hardware offload");
> > +        goto out;
> > +    }
> > +    if (strcmp(dp_port->type, "dpdk")) {
> > +        /* No hardware offload on a non-DPDK port */
> > +        goto out;
> > +    }
> > +    /* install the rule in hw, reduntant might overwrite if it exists*/
> > +    if (set_up_hw_offload_port_rule(dp_port->netdev, &flow->flow,
> > +            dp_netdev_flow_hash(&flow->ufid), 1)) {
> > +        VLOG_ERR("Failed to install the hardware offload rule");
> > +        goto out;
> > +    }
> > +#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE
> > +out:
> >      return flow;
> >  }
> > 
> > @@ -2575,7 +2624,19 @@ dp_netdev_process_rxq_port(struct
> > dp_netdev_pmd_thread *pmd,
> >          *recirc_depth_get() = 0;
> > 
> >          cycles_count_start(pmd);
> > +
> > +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE
> > +        /* Check if the source port is DPDK */
> > +        if (packets[0]->source == DPBUF_DPDK) {
> > +            hw_ofld_dp_netdev_input(pmd, rxq, packets, cnt, port->port_no);
> > +        }
> > +        else {
> > +            dp_netdev_input(pmd, packets, cnt, port->port_no);
> > +        }
> > +#else
> >          dp_netdev_input(pmd, packets, cnt, port->port_no);
> > +#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE
> > +
> >          cycles_count_end(pmd, PMD_CYCLES_PROCESSING);
> >      } else if (error != EAGAIN && error != EOPNOTSUPP) {
> >          static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); @@ -
> > 3321,7 +3382,6 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread
> > *pmd, struct dp_packet *packet_,
> >          flow->tunnel.metadata.present.len =
> > orig_tunnel.metadata.present.len;
> >          flow->tunnel.flags |= FLOW_TNL_F_UDPIF;
> >      }
> > -
> >      return err;
> >  }
> > 
> > @@ -3430,6 +3490,7 @@ emc_processing(struct dp_netdev_pmd_thread
> > *pmd, struct dp_packet **packets,
> >      struct emc_cache *flow_cache = &pmd->flow_cache;
> >      struct netdev_flow_key *key = &keys[0];
> >      size_t i, n_missed = 0, n_dropped = 0;
> > +    struct rte_mbuf *mbuf;
> > 
> >      for (i = 0; i < cnt; i++) {
> >          struct dp_netdev_flow *flow;
> > @@ -3454,7 +3515,18 @@ emc_processing(struct dp_netdev_pmd_thread
> > *pmd, struct dp_packet **packets,
> >          key->len = 0; /* Not computed yet. */
> >          key->hash = dpif_netdev_packet_get_rss_hash(packet, &key->mf);
> > 
> > +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE
> > +        mbuf = (struct rte_mbuf *)packet;
> > +        if (mbuf->ol_flags & PKT_RX_FDIR_ID) {
> > +            flow = lookup_hw_offload_flow_for_fdirid(pmd, mbuf, 0);
> > +        }
> > +        else {
> > +            flow = emc_lookup(flow_cache, key);
> > +        }
> > +#else
> >          flow = emc_lookup(flow_cache, key);
> > +#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE
> > +
> >          if (OVS_LIKELY(flow)) {
> >              dp_netdev_queue_batches(packet, flow, &key->mf, batches,
> >                                      n_batches); @@ -3651,7 +3723,7 @@
> > dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
> >      }
> >  }
> > 
> > -static void
> > +void
> >  dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
> >                  struct dp_packet **packets, int cnt,
> >                  odp_port_t port_no)
> > @@ -4290,3 +4362,43 @@ dpcls_lookup(const struct dpcls *cls, const struct
> > netdev_flow_key keys[],
> >      }
> >      return false;                     /* Some misses. */
> >  }
> > +
> > +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE
> > +/*
> > + * EMC lookup function on 'flow id' reported by NIC.
> > + */
> > +const struct dp_netdev_flow *
> > +lookup_hw_offload_flow_for_fdirid(const struct
> > +                 dp_netdev_pmd_thread *pmd, struct rte_mbuf *mbuf,
> > +                 uint32_t flow_id)
> > +{
> > +    const struct emc_cache *flow_cache = &pmd->flow_cache;
> > +    struct netdev_flow_key key;
> > +    struct emc_entry *current_entry;
> > +
> > +    key.len = 0;
> > +    if (OVS_LIKELY(mbuf->ol_flags & PKT_RX_RSS_HASH)) {
> > +        key.hash = mbuf->hash.rss;
> > +    }
> > +    else {
> > +        return NULL;
> > +    }
> > +    EMC_FOR_EACH_POS_WITH_HASH(flow_cache, current_entry,
> > key.hash) {
> > +        if (current_entry->key.hash == key.hash
> > +            && emc_entry_alive(current_entry)) {
> > +            if (OVS_UNLIKELY(flow_id && dp_netdev_flow_hash(
> > +                                       &current_entry->flow->ufid) !=
> > +                                       flow_id)) {
> > +                /* Hash collision in emc, fallback to software path */
> > +                return NULL;
> > +            }
> > +            return current_entry->flow;
> > +        }
> > +    }
> > +    /* XXX :: An improved classifier lookup needed here without any 
> > miniflow
> > +     * extract to keep it performant.Until then fallback to software based
> > +     * packet forwarding on EMC miss.
> > +     */
> > +     return NULL;
> > +}
> > +#endif /* DPDK_I40E_TNL_OFFLOAD_ENABLE */
> > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index f402354..2954f83
> > 100644
> > --- a/lib/netdev-dpdk.c
> > +++ b/lib/netdev-dpdk.c
> > @@ -56,6 +56,7 @@
> >  #include "rte_mbuf.h"
> >  #include "rte_meter.h"
> >  #include "rte_virtio_net.h"
> > +#include "dpdk-i40e-ofld.h"
> > 
> >  VLOG_DEFINE_THIS_MODULE(dpdk);
> >  static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
> > @@ -112,7 +113,7 @@ static char *vhost_sock_dir = NULL;   /* Location of
> > vhost-user sockets */
> >   */
> >  #define VHOST_ENQ_RETRY_USECS 100
> > 
> > -static const struct rte_eth_conf port_conf = {
> > +static struct rte_eth_conf port_conf = {
> >      .rxmode = {
> >          .mq_mode = ETH_MQ_RX_RSS,
> >          .split_hdr_size = 0,
> > @@ -331,6 +332,9 @@ struct netdev_dpdk {
> > 
> >      /* Identifier used to distinguish vhost devices from each other */
> >      char vhost_id[PATH_MAX];
> > +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE
> > +    bool i40e_ofld_enable; /* hardware/NIC offload flag*/ #endif
> > +//DPDK_I40E_TNL_OFFLOAD_ENABLE
> > 
> >      /* In dpdk_list. */
> >      struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex); @@ -346,6
> > +350,24 @@ struct netdev_rxq_dpdk {
> >      int port_id;
> >  };
> > 
> > +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE
> > +inline bool is_i40e_ofld_enable(const struct netdev_dpdk *netdev) {
> > +    return netdev->i40e_ofld_enable;
> > +}
> > +
> > +inline void set_i40e_ofld_flag(struct netdev_dpdk *netdev,
> > +                                                bool flag) {
> > +    netdev->i40e_ofld_enable = flag;
> > +}
> > +
> > +inline int get_dpdk_port_id(struct netdev_dpdk *dpdk_port) {
> > +    return dpdk_port->port_id;
> > +}
> > +#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE
> > +
> >  static bool dpdk_thread_is_pmd(void);
> > 
> >  static int netdev_dpdk_construct(struct netdev *); @@ -539,10 +561,21 @@
> > dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int
> > n_txq)
> >              VLOG_INFO("Retrying setup with (rxq:%d txq:%d)", n_rxq, n_txq);
> >          }
> > 
> > +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE
> > +        diag = (!dev->i40e_ofld_enable && dev->type == DPDK_DEV_ETH) ?
> > +                    dpdk_eth_dev_hw_ofld_init(dev, n_rxq, n_txq, 
> > &port_conf) :
> > +                    rte_eth_dev_configure(dev->port_id,
> > +                    n_rxq, n_txq, &port_conf);
> > +        if (diag) {
> > +            /* rte_dev_configure error */
> > +            break;
> > +        }
> > +#else
> >          diag = rte_eth_dev_configure(dev->port_id, n_rxq, n_txq, 
> > &port_conf);
> >          if (diag) {
> >              break;
> >          }
> > +#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE
> > 
> >          for (i = 0; i < n_txq; i++) {
> >              diag = rte_eth_tx_queue_setup(dev->port_id, i,
> > NIC_PORT_TX_Q_SIZE, @@ -637,7 +670,7 @@ dpdk_eth_dev_init(struct
> > netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex)
> >      return 0;
> >  }
> > 
> > -static struct netdev_dpdk *
> > +struct netdev_dpdk *
> >  netdev_dpdk_cast(const struct netdev *netdev)  {
> >      return CONTAINER_OF(netdev, struct netdev_dpdk, up); @@ -861,6
> > +894,10 @@ netdev_dpdk_destruct(struct netdev *netdev_)
> >      rte_free(dev->tx_q);
> >      list_remove(&dev->list_node);
> >      dpdk_mp_put(dev->dpdk_mp);
> > +
> > +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE
> > +        dpdk_hw_ofld_port_release(dev); #endif /*
> > +DPDK_I40E_TNL_OFFLOAD_ENABLE */
> >      ovs_mutex_unlock(&dpdk_mutex);
> >  }
> > 
> > --
> > 1.9.1
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to