This seems really, really specific to the particular NIC. Can you add a generic tunnel offload interface to DPDK? What would that look like?
On Thu, Mar 17, 2016 at 10:43:42PM +0000, Chandran, Sugesh wrote: > Hi, > > This patch proposes an approach that uses Flow director feature on the Intel > Fortville NICs to boost the VxLAN tunneling performance. In our testing we > verified that the VxLAN performance is almost doubled with this patch. > The solution programs the NIC to report the flow ID along with the VxLAN > packets, and it is matched by OVS in software. There may be corner cases that > needs to addressed in the approach, For eg: There is a possibility of race > condition where NIC reports flow ID that may match on different flow in OVS. > This happen when a rule is evicted by a new rule with same flowID+ hash in > the OVS software. The packets may hit on wrong new rule in OVS until the flow > get deleted in the hardware too. > > It is a hardware specific implementation (Only work with Intel Fortville > NICs) for now, however the proposal works with any programmable NICs.This RFC > proves that the OVS can offer very high speed tunneling performance using > flow programmability in NICs. I am looking for comments/suggestions on adding > this support(such as configuring, enable it for all the programmable NICs and > etc) in OVS userspace datapath for improving the performance. > > Regards > _Sugesh > > > > -----Original Message----- > > From: Chandran, Sugesh > > Sent: Thursday, March 17, 2016 10:00 PM > > To: dev@openvswitch.org > > Cc: Chandran, Sugesh <sugesh.chand...@intel.com> > > Subject: [RFC PATCH] tunneling: Improving vxlan performance using DPDK > > flow director feature. > > > > Optimizing vxlan tunneling performance in userspace datapath using flow > > director feature in Fortville NIC DPDK ports. OVS uses metadata reported by > > NIC to improve the flow lookup performance on VxLAN packets. > > > > Signed-off-by: Sugesh Chandran <sugesh.chand...@intel.com> > > --- > > lib/automake.mk | 2 + > > lib/dpdk-i40e-ofld.c | 266 > > +++++++++++++++++++++++++++++++++++++++++++++++++++ > > lib/dpdk-i40e-ofld.h | 59 ++++++++++++ > > lib/dpif-netdev.c | 118 ++++++++++++++++++++++- > > lib/netdev-dpdk.c | 41 +++++++- > > 5 files changed, 481 insertions(+), 5 deletions(-) create mode 100644 > > lib/dpdk-i40e-ofld.c create mode 100644 lib/dpdk-i40e-ofld.h > > > > diff --git a/lib/automake.mk b/lib/automake.mk index 27a1669..da48479 > > 100644 > > --- a/lib/automake.mk > > +++ b/lib/automake.mk > > @@ -366,6 +366,8 @@ endif > > > > if DPDK_NETDEV > > lib_libopenvswitch_la_SOURCES += \ > > + lib/dpdk-i40e-ofld.c \ > > + lib/dpdk-i40e-ofld.h \ > > lib/netdev-dpdk.c \ > > lib/netdev-dpdk.h > > endif > > diff --git a/lib/dpdk-i40e-ofld.c b/lib/dpdk-i40e-ofld.c new file mode > > 100644 > > index 0000000..3ea7084 > > --- /dev/null > > +++ b/lib/dpdk-i40e-ofld.c > > @@ -0,0 +1,266 @@ > > +/* > > + * Copyright (c) 2016 Intel Corp. > > + * > > + * Licensed under the Apache License, Version 2.0 (the "License"); > > + * you may not use this file except in compliance with the License. > > + * You may obtain a copy of the License at: > > + * > > + * http://www.apache.org/licenses/LICENSE-2.0 > > + * > > + * Unless required by applicable law or agreed to in writing, software > > + * distributed under the License is distributed on an "AS IS" BASIS, > > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or > > implied. > > + * See the License for the specific language governing permissions and > > + * limitations under the License. > > + */ > > + > > +#include <config.h> > > + > > +#include "dpdk-i40e-ofld.h" > > +#include "errno.h" > > +#include "ovs-thread.h" > > +#include "openvswitch/vlog.h" > > +#include "netdev-provider.h" > > +#include "rte_pci_dev_ids.h" > > +#include "rte_ethdev.h" > > + > > +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE > > +VLOG_DEFINE_THIS_MODULE(dpdk_hw_ofld); > > + > > +#define VXLAN_DST_PORT 4789 > > +#define VXLAN_HLEN 50 > > +#define MAX_FDIR_RULES 8000 > > + > > +static uint32_t total_fdir_ids; > > +static struct ovs_mutex hw_ofld_mutex = OVS_MUTEX_INITIALIZER; > > + > > +/* > > + * Returns '0' if FDIR IDs reaches max limit. Only 8000 entries are > > + * supported in FVL. > > + */ > > +static inline uint32_t > > +i40e_fdir_entry_cnt_inc(void) > > +{ > > + if (total_fdir_ids < MAX_FDIR_RULES) { > > + ovs_mutex_lock(&hw_ofld_mutex); > > + total_fdir_ids++; > > + ovs_mutex_unlock(&hw_ofld_mutex); > > + return (total_fdir_ids); > > + } > > + return 0; > > +} > > + > > +static inline void > > +i40e_fdir_entry_cnt_decr(void) > > +{ > > + ovs_mutex_lock(&hw_ofld_mutex); > > + total_fdir_ids ? total_fdir_ids-- : 0; > > + ovs_mutex_unlock(&hw_ofld_mutex); > > +} > > + > > +/* > > + * Release the hardware offloading functionality from the dpdk-port. > > + */ > > +int > > +dpdk_hw_ofld_port_release(struct netdev_dpdk *dpdk_port) { > > + ovs_mutex_lock(&hw_ofld_mutex); > > + set_i40e_ofld_flag(dpdk_port, 0); > > + ovs_mutex_unlock(&hw_ofld_mutex); > > + return 0; > > +} > > + > > +int > > +dpdk_eth_dev_hw_ofld_init(struct netdev_dpdk *dev, > > + int n_rxq, int n_txq, > > + struct rte_eth_conf *port_conf) > > +{ > > + int err = 0; > > + struct rte_eth_dev_info info; > > + uint16_t vendor_id, device_id; > > + > > + rte_eth_dev_info_get(get_dpdk_port_id(dev), &info); > > + vendor_id = info.pci_dev->id.vendor_id; > > + device_id = info.pci_dev->id.device_id; > > + /* Configure vxlan offload only if its FVL NIC */ > > + if (vendor_id != PCI_VENDOR_ID_INTEL || device_id != > > + I40E_DEV_ID_SFP_XL710) { > > + ovs_mutex_lock(&hw_ofld_mutex); > > + set_i40e_ofld_flag(dev, 0); > > + ovs_mutex_unlock(&hw_ofld_mutex); > > + err = rte_eth_dev_configure(get_dpdk_port_id(dev), > > + n_rxq, n_txq, port_conf); > > + return err; > > + } > > + ovs_mutex_lock(&hw_ofld_mutex); > > + set_i40e_ofld_flag(dev, 1); > > + ovs_mutex_unlock(&hw_ofld_mutex); > > + /* Configure FVL FDIR VxLAN tunnel handing */ > > + port_conf->fdir_conf.mode = RTE_FDIR_MODE_PERFECT; > > + port_conf->fdir_conf.flex_conf.nb_payloads = 1; > > + port_conf->fdir_conf.flex_conf.flex_set[0].type = > > RTE_ETH_L4_PAYLOAD; > > + /* Need to initilize all the 16 flex bytes,no matter; > > + * what we really using, possibly a DPDK bug?? */ > > + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[0] = 0; > > + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[1] = 1; > > + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[2] = 2; > > + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[3] = 3; > > + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[4] = 4; > > + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[5] = 5; > > + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[6] = 6; > > + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[7] = 7; > > + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[8] = 8; > > + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[9] = 9; > > + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[10] = 10; > > + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[11] = 11; > > + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[12] = 12; > > + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[13] = 13; > > + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[14] = 14; > > + port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[15] = 15; > > + err = rte_eth_dev_configure(get_dpdk_port_id(dev), > > + n_rxq, n_txq, port_conf); > > + if (err) { > > + VLOG_ERR("Failed to configure DPDK port with hardware offload"); > > + return err; > > + } > > + /*Clean all FDIR entries if any */ > > + err = rte_eth_dev_filter_ctrl(get_dpdk_port_id(dev), > > + RTE_ETH_FILTER_FDIR, RTE_ETH_FILTER_FLUSH, NULL); > > + return err; > > +} > > + > > +/* > > + * Install rules for VxLAN packets in hardware */ int > > +set_up_hw_offload_port_rule(struct netdev *netdev__, > > + const struct flow *flow, > > + const uint32_t hw_flow_id, > > + const bool is_add_rule) { > > + int err = 0; > > + uint8_t flexbytes[RTE_ETH_FDIR_MAX_FLEXLEN] = { 0 }; > > + uint32_t *vni; > > + enum rte_filter_op filter_op; > > + struct rte_eth_fdir_filter entry = { 0 }; > > + struct netdev_dpdk *netdev; > > + > > + netdev = netdev_dpdk_cast(netdev__); > > + if (is_i40e_ofld_enable(netdev)) { > > + entry.soft_id = hw_flow_id; > > + if (!entry.soft_id) { > > + VLOG_DBG("Invalid flow ID, Cant install rule in the NIC for " > > + "hardware offload"); > > + err = ECANCELED; > > + return err; > > + } > > + /* Install rules in NIC only for VxLAN flows */ > > + if (ntohs(flow->tp_dst) != VXLAN_DST_PORT) { > > + return 0; > > + } > > + entry.action.flex_off = 0; /* use 0 by default */ > > + entry.input.flow_ext.vlan_tci = 0; //! ignored by i40e fdir > > + entry.action.behavior = RTE_ETH_FDIR_PASSTHRU; > > + entry.action.report_status = RTE_ETH_FDIR_REPORT_ID_FLEX_4; > > + entry.input.flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_UDP; > > + entry.input.flow.ip4_flow.src_ip = flow->nw_src; > > + entry.input.flow.ip4_flow.dst_ip = flow->nw_dst; > > + entry.input.flow.udp4_flow.dst_port = htons(VXLAN_DST_PORT); > > + entry.input.flow.udp4_flow.src_port = flow->tp_src; > > + vni = (uint32_t *)&flexbytes[4]; > > + *vni = flow->tunnel.tun_id << 8; > > + memcpy(entry.input.flow_ext.flexbytes, flexbytes, > > + RTE_ETH_FDIR_MAX_FLEXLEN); > > + entry.action.rx_queue = 0; > > + filter_op = is_add_rule ? RTE_ETH_FILTER_ADD : > > + RTE_ETH_FILTER_DELETE; > > + err = rte_eth_dev_filter_ctrl(get_dpdk_port_id(netdev), > > + RTE_ETH_FILTER_FDIR, filter_op, &entry); > > + > > + /* > > + * XXX : Delayed the max limit check for flow director entries > > after > > + * the configuration. Anyway the rte_eth_dev_filter_ctrl will fail > > if > > + * max limit reaches. This can be used for tracking. > > + */ > > + if (is_add_rule) { > > + if (!i40e_fdir_entry_cnt_inc()) { > > + VLOG_DBG("Cant configure rule on NIC, Flow director " > > + "entries hits max limit"); > > + } > > + } > > + else { > > + i40e_fdir_entry_cnt_decr(); > > + } > > + if (err < 0) { > > + VLOG_DBG("flow director programming error in NIC: (%d)\n", > > err); > > + return err; > > + } > > + } > > + return err; > > +} > > + > > +static int > > +i40e_dpdk_port_get_hw_ofld_pkts(struct > > + dp_netdev_pmd_thread *pmd, struct dp_packet > > + **in_packets, struct dp_packet **hw_packets, > > + struct dp_packet **non_hw_packets, > > + uint32_t cnt) > > +{ > > + int i, hw_pkt_cnt = 0, norm_pkt_cnt = 0; > > + const struct dp_netdev_flow *flow; > > + struct rte_mbuf *mbuf; > > + > > + for (i = 0; i < cnt; i++) { > > + mbuf = (struct rte_mbuf *)in_packets[i]; > > + if (mbuf->ol_flags & PKT_RX_FDIR_ID) { > > + flow = lookup_hw_offload_flow_for_fdirid(pmd, mbuf, > > + mbuf->hash.fdir.hi); > > + if (!flow) { > > + /* Bogus flow in hw, cannot find it in OVS EMC */ > > + mbuf->ol_flags &= ~PKT_RX_FDIR_ID; > > + non_hw_packets[norm_pkt_cnt++] = in_packets[i]; > > + continue; > > + } > > + dp_packet_reset_packet(in_packets[i], VXLAN_HLEN); > > + mbuf->ol_flags |= PKT_RX_RSS_HASH; > > + mbuf->hash.rss = hash_finish(mbuf->hash.rss, 1); > > + hw_packets[hw_pkt_cnt++] = in_packets[i]; > > + } > > + else { > > + non_hw_packets[norm_pkt_cnt++] = in_packets[i]; > > + } > > + } > > + return hw_pkt_cnt; > > +} > > + > > +/* > > + * Process the packets based on hardware offload configuration */ void > > +hw_ofld_dp_netdev_input(struct dp_netdev_pmd_thread *pmd, > > + struct netdev_rxq *netdev_rxq, > > + struct dp_packet **packets, int cnt, > > + odp_port_t port_no) { > > + int hw_pkt_cnt; > > + struct dp_packet *hw_ofld_packets[NETDEV_MAX_BURST] = { 0 }; > > + struct dp_packet *orig_packets[NETDEV_MAX_BURST] = { 0 }; > > + struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_rxq->netdev); > > + > > + if (is_i40e_ofld_enable(netdev)) { > > + hw_pkt_cnt = i40e_dpdk_port_get_hw_ofld_pkts(pmd, packets, > > + hw_ofld_packets, > > + orig_packets, > > cnt); > > + /* Process packet streams separately. */ > > + if (hw_pkt_cnt) { > > + dp_netdev_input(pmd, hw_ofld_packets, hw_pkt_cnt, port_no); > > + } > > + if (cnt - hw_pkt_cnt) { > > + dp_netdev_input(pmd, orig_packets, (cnt - hw_pkt_cnt), > > port_no); > > + } > > + } > > + else { > > + dp_netdev_input(pmd, packets, cnt, port_no); > > + } > > +} > > +#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE > > diff --git a/lib/dpdk-i40e-ofld.h b/lib/dpdk-i40e-ofld.h new file mode > > 100644 > > index 0000000..1aad246 > > --- /dev/null > > +++ b/lib/dpdk-i40e-ofld.h > > @@ -0,0 +1,59 @@ > > +/* > > + * Copyright (c) 2016 Intel Corp. > > + * > > + * Licensed under the Apache License, Version 2.0 (the "License"); > > + * you may not use this file except in compliance with the License. > > + * You may obtain a copy of the License at: > > + * > > + * http://www.apache.org/licenses/LICENSE-2.0 > > + * > > + * Unless required by applicable law or agreed to in writing, software > > + * distributed under the License is distributed on an "AS IS" BASIS, > > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or > > implied. > > + * See the License for the specific language governing permissions and > > + * limitations under the License. > > + */ > > + > > +#ifndef DPDK_I40E_OFLD_H_ > > +#define DPDK_I40E_OFLD_H_ > > + > > +#include <config.h> > > + > > +#include "dp-packet.h" > > +#include "netdev.h" > > + > > +/* > > + * Macro to enable/disable HW OFFLOAD feature for DPDK. > > + * 1 :- Enable HW_OFFLOAD support in OVS > > + * 0 :- Disable HW_OFFLOAD support in OVS */ > > +#define DPDK_I40E_TNL_OFFLOAD_ENABLE 1 > > +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE > > + > > +struct netdev_dpdk; > > +struct dp_netdev_pmd_thread; > > +struct dp_netdev_flow; > > + > > +struct netdev_dpdk *netdev_dpdk_cast(const struct netdev *netdev); > > +extern inline bool is_i40e_ofld_enable(const struct netdev_dpdk > > +*netdev); extern inline void set_i40e_ofld_flag(struct netdev_dpdk > > +*netdev, bool flag); extern inline int get_dpdk_port_id(struct > > +netdev_dpdk *dpdk_port); int dpdk_eth_dev_hw_ofld_init(struct > > netdev_dpdk *dev, int n_rxq, int n_txq, > > + struct rte_eth_conf *port_conf); int > > +dpdk_hw_ofld_port_release(struct netdev_dpdk *dpdk_port); int > > +set_up_hw_offload_port_rule(struct netdev *netdev__, > > + const struct flow *flow, > > + const uint32_t hw_flow_id, > > + const bool is_add_rule); void > > +hw_ofld_dp_netdev_input(struct dp_netdev_pmd_thread *pmd, > > + struct netdev_rxq *netdev_rxq, > > + struct dp_packet **packets, int cnt, > > + odp_port_t port_no); const struct > > +dp_netdev_flow *lookup_hw_offload_flow_for_fdirid( > > + const struct dp_netdev_pmd_thread *pmd, > > + struct rte_mbuf *mbuf, uint32_t flow_id); > > +void dp_netdev_input(struct dp_netdev_pmd_thread *, struct dp_packet > > **, > > + int cnt, odp_port_t port_no); > > + > > +#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE > > +#endif /* DPDK_I40E_OFLD_H_ */ > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index cf574ad..d79b239 > > 100644 > > --- a/lib/dpif-netdev.c > > +++ b/lib/dpif-netdev.c > > @@ -70,6 +70,7 @@ > > #include "util.h" > > > > #include "openvswitch/vlog.h" > > +#include "dpdk-i40e-ofld.h" > > > > VLOG_DEFINE_THIS_MODULE(dpif_netdev); > > > > @@ -478,7 +479,7 @@ static void dp_netdev_execute_actions(struct > > dp_netdev_pmd_thread *pmd, > > bool may_steal, > > const struct nlattr *actions, > > size_t actions_len); -static void > > dp_netdev_input(struct > > dp_netdev_pmd_thread *, > > +void dp_netdev_input(struct dp_netdev_pmd_thread *, > > struct dp_packet **, int cnt, odp_port_t > > port_no); static void > > dp_netdev_recirculate(struct dp_netdev_pmd_thread *, > > struct dp_packet **, int cnt); @@ > > -1455,6 +1456,28 @@ > > dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd, > > flow->dead = true; > > > > dp_netdev_flow_unref(flow); > > + > > +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE > > + struct dp_netdev_port *dp_port; > > + int err; > > + odp_port_t in_port = flow->flow.in_port.odp_port; > > + err = get_port_by_number(pmd->dp, in_port, &dp_port); > > + if (err) { > > + VLOG_WARN("Cannot get the port information, hardware offload may > > " > > + "not be functional"); > > + return; > > + } > > + if(strcmp(dp_port->type, "dpdk")) { > > + /* No hardware offload on a non-DPDK port") */ > > + return; > > + } > > + /* Remove the hardware offload rule if exists.*/ > > + if(set_up_hw_offload_port_rule(dp_port->netdev, &flow->flow, > > + dp_netdev_flow_hash(&(flow->ufid)), 0)) { > > + VLOG_DBG("Failed to delete the hardware offload rule"); > > + return; > > + } > > +#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE > > } > > > > static void > > @@ -2059,6 +2082,32 @@ dp_netdev_flow_add(struct > > dp_netdev_pmd_thread *pmd, > > ds_destroy(&ds); > > } > > > > + /* > > + * Configure the hardware offload for tunnel while flows are getting > > + * inserted in OVS. > > + */ > > +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE > > + struct dp_netdev_port *dp_port; > > + int err; > > + odp_port_t in_port = flow->flow.in_port.odp_port; > > + err = get_port_by_number(pmd->dp, in_port, &dp_port); > > + if (err) { > > + VLOG_WARN("Cannot get the port information, Failed to configure " > > + "hardware offload"); > > + goto out; > > + } > > + if (strcmp(dp_port->type, "dpdk")) { > > + /* No hardware offload on a non-DPDK port */ > > + goto out; > > + } > > + /* install the rule in hw, reduntant might overwrite if it exists*/ > > + if (set_up_hw_offload_port_rule(dp_port->netdev, &flow->flow, > > + dp_netdev_flow_hash(&flow->ufid), 1)) { > > + VLOG_ERR("Failed to install the hardware offload rule"); > > + goto out; > > + } > > +#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE > > +out: > > return flow; > > } > > > > @@ -2575,7 +2624,19 @@ dp_netdev_process_rxq_port(struct > > dp_netdev_pmd_thread *pmd, > > *recirc_depth_get() = 0; > > > > cycles_count_start(pmd); > > + > > +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE > > + /* Check if the source port is DPDK */ > > + if (packets[0]->source == DPBUF_DPDK) { > > + hw_ofld_dp_netdev_input(pmd, rxq, packets, cnt, port->port_no); > > + } > > + else { > > + dp_netdev_input(pmd, packets, cnt, port->port_no); > > + } > > +#else > > dp_netdev_input(pmd, packets, cnt, port->port_no); > > +#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE > > + > > cycles_count_end(pmd, PMD_CYCLES_PROCESSING); > > } else if (error != EAGAIN && error != EOPNOTSUPP) { > > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); @@ - > > 3321,7 +3382,6 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread > > *pmd, struct dp_packet *packet_, > > flow->tunnel.metadata.present.len = > > orig_tunnel.metadata.present.len; > > flow->tunnel.flags |= FLOW_TNL_F_UDPIF; > > } > > - > > return err; > > } > > > > @@ -3430,6 +3490,7 @@ emc_processing(struct dp_netdev_pmd_thread > > *pmd, struct dp_packet **packets, > > struct emc_cache *flow_cache = &pmd->flow_cache; > > struct netdev_flow_key *key = &keys[0]; > > size_t i, n_missed = 0, n_dropped = 0; > > + struct rte_mbuf *mbuf; > > > > for (i = 0; i < cnt; i++) { > > struct dp_netdev_flow *flow; > > @@ -3454,7 +3515,18 @@ emc_processing(struct dp_netdev_pmd_thread > > *pmd, struct dp_packet **packets, > > key->len = 0; /* Not computed yet. */ > > key->hash = dpif_netdev_packet_get_rss_hash(packet, &key->mf); > > > > +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE > > + mbuf = (struct rte_mbuf *)packet; > > + if (mbuf->ol_flags & PKT_RX_FDIR_ID) { > > + flow = lookup_hw_offload_flow_for_fdirid(pmd, mbuf, 0); > > + } > > + else { > > + flow = emc_lookup(flow_cache, key); > > + } > > +#else > > flow = emc_lookup(flow_cache, key); > > +#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE > > + > > if (OVS_LIKELY(flow)) { > > dp_netdev_queue_batches(packet, flow, &key->mf, batches, > > n_batches); @@ -3651,7 +3723,7 @@ > > dp_netdev_input__(struct dp_netdev_pmd_thread *pmd, > > } > > } > > > > -static void > > +void > > dp_netdev_input(struct dp_netdev_pmd_thread *pmd, > > struct dp_packet **packets, int cnt, > > odp_port_t port_no) > > @@ -4290,3 +4362,43 @@ dpcls_lookup(const struct dpcls *cls, const struct > > netdev_flow_key keys[], > > } > > return false; /* Some misses. */ > > } > > + > > +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE > > +/* > > + * EMC lookup function on 'flow id' reported by NIC. > > + */ > > +const struct dp_netdev_flow * > > +lookup_hw_offload_flow_for_fdirid(const struct > > + dp_netdev_pmd_thread *pmd, struct rte_mbuf *mbuf, > > + uint32_t flow_id) > > +{ > > + const struct emc_cache *flow_cache = &pmd->flow_cache; > > + struct netdev_flow_key key; > > + struct emc_entry *current_entry; > > + > > + key.len = 0; > > + if (OVS_LIKELY(mbuf->ol_flags & PKT_RX_RSS_HASH)) { > > + key.hash = mbuf->hash.rss; > > + } > > + else { > > + return NULL; > > + } > > + EMC_FOR_EACH_POS_WITH_HASH(flow_cache, current_entry, > > key.hash) { > > + if (current_entry->key.hash == key.hash > > + && emc_entry_alive(current_entry)) { > > + if (OVS_UNLIKELY(flow_id && dp_netdev_flow_hash( > > + ¤t_entry->flow->ufid) != > > + flow_id)) { > > + /* Hash collision in emc, fallback to software path */ > > + return NULL; > > + } > > + return current_entry->flow; > > + } > > + } > > + /* XXX :: An improved classifier lookup needed here without any > > miniflow > > + * extract to keep it performant.Until then fallback to software based > > + * packet forwarding on EMC miss. > > + */ > > + return NULL; > > +} > > +#endif /* DPDK_I40E_TNL_OFFLOAD_ENABLE */ > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index f402354..2954f83 > > 100644 > > --- a/lib/netdev-dpdk.c > > +++ b/lib/netdev-dpdk.c > > @@ -56,6 +56,7 @@ > > #include "rte_mbuf.h" > > #include "rte_meter.h" > > #include "rte_virtio_net.h" > > +#include "dpdk-i40e-ofld.h" > > > > VLOG_DEFINE_THIS_MODULE(dpdk); > > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); > > @@ -112,7 +113,7 @@ static char *vhost_sock_dir = NULL; /* Location of > > vhost-user sockets */ > > */ > > #define VHOST_ENQ_RETRY_USECS 100 > > > > -static const struct rte_eth_conf port_conf = { > > +static struct rte_eth_conf port_conf = { > > .rxmode = { > > .mq_mode = ETH_MQ_RX_RSS, > > .split_hdr_size = 0, > > @@ -331,6 +332,9 @@ struct netdev_dpdk { > > > > /* Identifier used to distinguish vhost devices from each other */ > > char vhost_id[PATH_MAX]; > > +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE > > + bool i40e_ofld_enable; /* hardware/NIC offload flag*/ #endif > > +//DPDK_I40E_TNL_OFFLOAD_ENABLE > > > > /* In dpdk_list. */ > > struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex); @@ -346,6 > > +350,24 @@ struct netdev_rxq_dpdk { > > int port_id; > > }; > > > > +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE > > +inline bool is_i40e_ofld_enable(const struct netdev_dpdk *netdev) { > > + return netdev->i40e_ofld_enable; > > +} > > + > > +inline void set_i40e_ofld_flag(struct netdev_dpdk *netdev, > > + bool flag) { > > + netdev->i40e_ofld_enable = flag; > > +} > > + > > +inline int get_dpdk_port_id(struct netdev_dpdk *dpdk_port) { > > + return dpdk_port->port_id; > > +} > > +#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE > > + > > static bool dpdk_thread_is_pmd(void); > > > > static int netdev_dpdk_construct(struct netdev *); @@ -539,10 +561,21 @@ > > dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int > > n_txq) > > VLOG_INFO("Retrying setup with (rxq:%d txq:%d)", n_rxq, n_txq); > > } > > > > +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE > > + diag = (!dev->i40e_ofld_enable && dev->type == DPDK_DEV_ETH) ? > > + dpdk_eth_dev_hw_ofld_init(dev, n_rxq, n_txq, > > &port_conf) : > > + rte_eth_dev_configure(dev->port_id, > > + n_rxq, n_txq, &port_conf); > > + if (diag) { > > + /* rte_dev_configure error */ > > + break; > > + } > > +#else > > diag = rte_eth_dev_configure(dev->port_id, n_rxq, n_txq, > > &port_conf); > > if (diag) { > > break; > > } > > +#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE > > > > for (i = 0; i < n_txq; i++) { > > diag = rte_eth_tx_queue_setup(dev->port_id, i, > > NIC_PORT_TX_Q_SIZE, @@ -637,7 +670,7 @@ dpdk_eth_dev_init(struct > > netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex) > > return 0; > > } > > > > -static struct netdev_dpdk * > > +struct netdev_dpdk * > > netdev_dpdk_cast(const struct netdev *netdev) { > > return CONTAINER_OF(netdev, struct netdev_dpdk, up); @@ -861,6 > > +894,10 @@ netdev_dpdk_destruct(struct netdev *netdev_) > > rte_free(dev->tx_q); > > list_remove(&dev->list_node); > > dpdk_mp_put(dev->dpdk_mp); > > + > > +#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE > > + dpdk_hw_ofld_port_release(dev); #endif /* > > +DPDK_I40E_TNL_OFFLOAD_ENABLE */ > > ovs_mutex_unlock(&dpdk_mutex); > > } > > > > -- > > 1.9.1 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev