On 10 Oct 2018, at 18:14, Ophir Munk wrote:
1. Enable compilation and linkage with dpdk 18.08.0
The following dpdk commits which were introduced after dpdk 17.11.x
require OVS updates to accommodate to the dpdk changes.
- ce17edde ("ethdev: introduce Rx queue offloads API")
- ab3ce1e0 ("ethdev: remove old offload API")
- c06ddf96 ("meter: add configuration profile")
- e58638c3 ("ethdev: fix TPID handling in flow API")
- cd8c7c7c ("ethdev: replace bus specific struct with generic dev")
- ac8d22de ("ethdev: flatten RSS configuration in flow API")
2. Limit configured rss hash functions to only those supported
by the eth device.
3. Set default RSS key in struct action_rss_data, required by OVS
commit
- e8a2b5bf ("netdev-dpdk: implement flow offload with rte flow")
when configured with "other_config:hw-offload=true"
Remark: calling RSS with 0 length (default) key is rejected
in DPDK 18.08 and will be enabled in DPDK 18.11. It has no effect
when running in a "hw-offload=false" configuration.
4. Update references to DPDK version 18.08 in Documentation and in
travis linux-build script
5. There are currently warnings on DPDK deprecated functions calls:
- rte_eth_dev_attach
- rte_eth_dev_detach
- rte_eth_devargs_parse
The deprecated functions calls replacements will be added to
DPDK 18.11.
Signed-off-by: Ophir Munk <[email protected]>
---
v1:
First version
v2:
Avoid seg faults cases as described in
https://patchwork.ozlabs.org/patch/965451/
by using the patch in:
https://github.com/kevintraynor/ovs-dpdk-
master/commit/88f46cc5ab338eb4f3ca5db1eacd0effefe4fa0c
v3:
- rebase on latest dpdk-hwol branch
- Updates based on latest reviews to versions v1 & v2
v4:
This patch got lost in mailing list server due to administrative
issues and
is now obsolete
v5:
- updated commit message
- Address all reviews (some skipped by mistake) from recent versions
- it is suggested to ignore deprecated functions warnings as the
functions
replacements are missing in DPDK 18.08 and will be added to DPDK 18.11
.travis/linux-build.sh | 2 +-
Documentation/intro/install/dpdk.rst | 14 ++--
Documentation/topics/dpdk/vhost-user.rst | 6 +-
lib/netdev-dpdk.c | 130
++++++++++++++++++++-----------
4 files changed, 95 insertions(+), 57 deletions(-)
diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh
index 4b9fc4a..4c9e952 100755
--- a/.travis/linux-build.sh
+++ b/.travis/linux-build.sh
@@ -83,7 +83,7 @@ fi
if [ "$DPDK" ]; then
if [ -z "$DPDK_VER" ]; then
- DPDK_VER="17.11.3"
+ DPDK_VER="18.08"
fi
install_dpdk $DPDK_VER
if [ "$CC" = "clang" ]; then
diff --git a/Documentation/intro/install/dpdk.rst
b/Documentation/intro/install/dpdk.rst
index 36501c6..73610ef 100644
--- a/Documentation/intro/install/dpdk.rst
+++ b/Documentation/intro/install/dpdk.rst
@@ -42,7 +42,7 @@ Build requirements
In addition to the requirements described in :doc:`general`, building
Open
vSwitch with DPDK will require the following:
-- DPDK 17.11.3
+- DPDK 18.08.0
- A `DPDK supported NIC`_
@@ -71,9 +71,9 @@ Install DPDK
#. Download the `DPDK sources`_, extract the file and set
``DPDK_DIR``::
$ cd /usr/src/
- $ wget http://fast.dpdk.org/rel/dpdk-17.11.3.tar.xz
- $ tar xf dpdk-17.11.3.tar.xz
- $ export DPDK_DIR=/usr/src/dpdk-stable-17.11.3
+ $ wget http://fast.dpdk.org/rel/dpdk-18.08.tar.xz
+ $ tar xf dpdk-18.08.tar.xz
+ $ export DPDK_DIR=/usr/src/dpdk-stable-18.08
$ cd $DPDK_DIR
#. (Optional) Configure DPDK as a shared library
@@ -283,9 +283,9 @@ with either the ovs-vswitchd logs, or by running
either of the commands::
$ ovs-vswitchd --version
ovs-vswitchd (Open vSwitch) 2.9.0
- DPDK 17.11.0
+ DPDK 18.08.0
$ ovs-vsctl get Open_vSwitch . dpdk_version
- "DPDK 17.11.0"
+ "DPDK 18.08.0"
At this point you can use ovs-vsctl to set up bridges and other Open
vSwitch
features. Seeing as we've configured the DPDK datapath, we will use
DPDK-type
@@ -673,7 +673,7 @@ Limitations
The latest list of validated firmware versions can be found in the
`DPDK
release notes`_.
-.. _DPDK release notes:
http://dpdk.org/doc/guides/rel_notes/release_17_11.html
+.. _DPDK release notes:
http://dpdk.org/doc/guides/rel_notes/release_18_08.html
- Upper bound MTU: DPDK device drivers differ in how the L2 frame for
a
given MTU value is calculated e.g. i40e driver includes 2 x vlan
headers in
diff --git a/Documentation/topics/dpdk/vhost-user.rst
b/Documentation/topics/dpdk/vhost-user.rst
index b1e2285..56f58ba 100644
--- a/Documentation/topics/dpdk/vhost-user.rst
+++ b/Documentation/topics/dpdk/vhost-user.rst
@@ -320,9 +320,9 @@ To begin, instantiate a guest as described in
:ref:`dpdk-vhost-user` or
DPDK sources to VM and build DPDK::
$ cd /root/dpdk/
- $ wget http://fast.dpdk.org/rel/dpdk-17.11.3.tar.xz
- $ tar xf dpdk-17.11.3.tar.xz
- $ export DPDK_DIR=/root/dpdk/dpdk-stable-17.11.3
+ $ wget http://fast.dpdk.org/rel/dpdk-18.08.tar.xz
+ $ tar xf dpdk-18.08.tar.xz
+ $ export DPDK_DIR=/root/dpdk/dpdk-stable-18.08
$ export DPDK_TARGET=x86_64-native-linuxapp-gcc
$ export DPDK_BUILD=$DPDK_DIR/$DPDK_TARGET
$ cd $DPDK_DIR
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index f91aa27..4dd0ec3 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -168,11 +168,7 @@ static const struct rte_eth_conf port_conf = {
.rxmode = {
.mq_mode = ETH_MQ_RX_RSS,
.split_hdr_size = 0,
- .header_split = 0, /* Header Split disabled */
- .hw_ip_checksum = 0, /* IP checksum offload disabled */
- .hw_vlan_filter = 0, /* VLAN filtering disabled */
- .jumbo_frame = 0, /* Jumbo Frame Support disabled */
- .hw_strip_crc = 0,
+ .offloads = 0,
},
.rx_adv_conf = {
.rss_conf = {
@@ -364,6 +360,7 @@ struct dpdk_ring {
struct ingress_policer {
struct rte_meter_srtcm_params app_srtcm_params;
struct rte_meter_srtcm in_policer;
+ struct rte_meter_srtcm_profile in_prof;
rte_spinlock_t policer_lock;
};
@@ -894,6 +891,8 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev,
int n_rxq, int n_txq)
struct rte_eth_dev_info info;
uint16_t conf_mtu;
+ rte_eth_dev_info_get(dev->port_id, &info);
+
/* As of DPDK 17.11.1 a few PMDs require to explicitly enable
* scatter to support jumbo RX. Checking the offload capabilities
* is not an option as PMDs are not required yet to report
@@ -901,20 +900,25 @@ dpdk_eth_dev_port_config(struct netdev_dpdk
*dev, int n_rxq, int n_txq)
* (testing or code review). Listing all such PMDs feels harder
* than highlighting the one known not to need scatter */
if (dev->mtu > ETHER_MTU) {
- rte_eth_dev_info_get(dev->port_id, &info);
if (strncmp(info.driver_name, "net_nfp", 7)) {
- conf.rxmode.enable_scatter = 1;
+ conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;
}
}
conf.intr_conf.lsc = dev->lsc_interrupt_mode;
- conf.rxmode.hw_ip_checksum = (dev->hw_ol_features &
- NETDEV_RX_CHECKSUM_OFFLOAD) != 0;
+
+ if (dev->hw_ol_features & NETDEV_RX_CHECKSUM_OFFLOAD) {
+ conf.rxmode.offloads |= DEV_RX_OFFLOAD_CHECKSUM;
+ }
if (dev->hw_ol_features & NETDEV_RX_HW_CRC_STRIP) {
- conf.rxmode.hw_strip_crc = 1;
+ conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
}
+ /* Limit configured rss hash functions to only those supported
+ * by the eth device. */
+ conf.rx_adv_conf.rss_conf.rss_hf &= info.flow_type_rss_offloads;
+
/* A device may report more queues than it makes available (this
has
* been observed for Intel xl710, which reserves some of them for
* SRIOV): rte_eth_*_queue_setup will fail if a queue is not
@@ -1932,16 +1936,18 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk
*dev, int qid,
static inline bool
netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
+ struct rte_meter_srtcm_profile
*profile,
struct rte_mbuf *pkt, uint64_t time)
{
uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct
ether_hdr);
- return rte_meter_srtcm_color_blind_check(meter, time, pkt_len) ==
- e_RTE_METER_GREEN;
+ return rte_meter_srtcm_color_blind_check(meter, profile, time,
pkt_len) ==
+ e_RTE_METER_GREEN;
}
static int
netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
+ struct rte_meter_srtcm_profile *profile,
struct rte_mbuf **pkts, int pkt_cnt,
bool should_steal)
{
@@ -1953,7 +1959,8 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
*meter,
for (i = 0; i < pkt_cnt; i++) {
pkt = pkts[i];
/* Handle current packet */
- if (netdev_dpdk_policer_pkt_handle(meter, pkt, current_time))
{
+ if (netdev_dpdk_policer_pkt_handle(meter, profile,
+ pkt, current_time)) {
if (cnt != i) {
pkts[cnt] = pkt;
}
@@ -1975,8 +1982,8 @@ ingress_policer_run(struct ingress_policer
*policer, struct rte_mbuf **pkts,
int cnt = 0;
rte_spinlock_lock(&policer->policer_lock);
- cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
- pkt_cnt, should_steal);
+ cnt = netdev_dpdk_policer_run(&policer->in_policer,
&policer->in_prof,
+ pkts, pkt_cnt, should_steal);
rte_spinlock_unlock(&policer->policer_lock);
return cnt;
@@ -2767,8 +2774,12 @@ netdev_dpdk_policer_construct(uint32_t rate,
uint32_t burst)
policer->app_srtcm_params.cir = rate_bytes;
policer->app_srtcm_params.cbs = burst_bytes;
policer->app_srtcm_params.ebs = 0;
- err = rte_meter_srtcm_config(&policer->in_policer,
- &policer->app_srtcm_params);
+ err = rte_meter_srtcm_profile_config(&policer->in_prof,
+ &policer->app_srtcm_params);
+ if (!err) {
+ err = rte_meter_srtcm_config(&policer->in_policer,
+ &policer->in_prof);
+ }
if (err) {
VLOG_ERR("Could not create rte meter for ingress policer");
free(policer);
@@ -3043,13 +3054,18 @@ netdev_dpdk_get_status(const struct netdev
*netdev, struct smap *args)
smap_add_format(args, "if_descr", "%s %s", rte_version(),
dev_info.driver_name);
- if (dev_info.pci_dev) {
- smap_add_format(args, "pci-vendor_id", "0x%x",
- dev_info.pci_dev->id.vendor_id);
- smap_add_format(args, "pci-device_id", "0x%x",
- dev_info.pci_dev->id.device_id);
+ const struct rte_bus *bus;
+ const struct rte_pci_device *pci_dev;
Don’t we need to take the ovs_mutex_lock(&dev->mutex) lock here, we
are calling DPDK code?
+ bus = rte_bus_find_by_device(dev_info.device);van
+ if (bus && !strcmp(bus->name, "pci")) {
+ pci_dev = RTE_DEV_TO_PCI(dev_info.device);
+ if (pci_dev) {
+ smap_add_format(args, "pci-vendor_id", "0x%x",
+ pci_dev->id.vendor_id);
+ smap_add_format(args, "pci-device_id", "0x%x",
+ pci_dev->id.device_id);
+ }
}
-
return 0;
}
@@ -3727,6 +3743,7 @@ struct egress_policer {
struct qos_conf qos_conf;
struct rte_meter_srtcm_params app_srtcm_params;
struct rte_meter_srtcm egress_meter;
+ struct rte_meter_srtcm_profile egress_prof;
};
static void
@@ -3749,11 +3766,17 @@ egress_policer_qos_construct(const struct smap
*details,
policer = xmalloc(sizeof *policer);
qos_conf_init(&policer->qos_conf, &egress_policer_ops);
egress_policer_details_to_param(details,
&policer->app_srtcm_params);
- err = rte_meter_srtcm_config(&policer->egress_meter,
- &policer->app_srtcm_params);
+ err = rte_meter_srtcm_profile_config(&policer->egress_prof,
+ &policer->app_srtcm_params);
+ if (!err) {
+ err = rte_meter_srtcm_config(&policer->egress_meter,
+ &policer->egress_prof);
+ }
+
if (!err) {
*conf = &policer->qos_conf;
} else {
+ VLOG_ERR("Could not create rte meter for egress policer");
free(policer);
*conf = NULL;
err = -err;
@@ -3803,7 +3826,8 @@ egress_policer_run(struct qos_conf *conf, struct
rte_mbuf **pkts, int pkt_cnt,
struct egress_policer *policer =
CONTAINER_OF(conf, struct egress_policer, qos_conf);
- cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
+ cnt = netdev_dpdk_policer_run(&policer->egress_meter,
+ &policer->egress_prof, pkts,
pkt_cnt, should_steal);
return cnt;
@@ -3888,7 +3912,7 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk
*dev)
if (!err) {
/* A new mempool was created or re-used. */
netdev_change_seq_changed(&dev->up);
- } else if (err != EEXIST){
+ } else if (err != EEXIST) {
return err;
}
if (netdev_dpdk_get_vid(dev) >= 0) {
@@ -4103,15 +4127,15 @@ dump_flow_pattern(struct rte_flow_item *item)
VLOG_DBG("rte flow vlan pattern:\n");
if (vlan_spec) {
- VLOG_DBG(" Spec: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
- ntohs(vlan_spec->tpid), ntohs(vlan_spec->tci));
+ VLOG_DBG(" Spec: inner_type=0x%"PRIx16",
tci=0x%"PRIx16"\n",
+ ntohs(vlan_spec->inner_type),
ntohs(vlan_spec->tci));
} else {
VLOG_DBG(" Spec = null\n");
}
if (vlan_mask) {
- VLOG_DBG(" Mask: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
- vlan_mask->tpid, vlan_mask->tci);
+ VLOG_DBG(" Mask: inner_type=0x%"PRIx16",
tci=0x%"PRIx16"\n",
+ vlan_mask->inner_type, vlan_mask->tci);
Should the vlan_mask also use htons()?
} else {
VLOG_DBG(" Mask = null\n");
}
@@ -4281,27 +4305,39 @@ add_flow_action(struct flow_actions *actions,
enum rte_flow_action_type type,
actions->cnt++;
}
+struct action_rss_data {
+ struct rte_flow_action_rss conf;
+ uint16_t queue[0];
+};
+
static struct rte_flow_action_rss *
add_flow_rss_action(struct flow_actions *actions,
struct netdev *netdev) {
int i;
- struct rte_flow_action_rss *rss;
-
- rss = xmalloc(sizeof(*rss) + sizeof(uint16_t) * netdev->n_rxq);
- /*
- * Setting it to NULL will let the driver use the default RSS
- * configuration we have set: &port_conf.rx_adv_conf.rss_conf.
- */
- rss->rss_conf = NULL;
- rss->num = netdev->n_rxq;
+ struct action_rss_data *rss_data;
+
+ rss_data = xmalloc(sizeof(struct action_rss_data) +
+ sizeof(uint16_t) * netdev->n_rxq);
+ *rss_data = (struct action_rss_data) {
+ .conf = (struct rte_flow_action_rss) {
+ .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
+ .level = 0,
+ .types = ETH_RSS_IP,
+ .key_len = 0,
+ .queue_num = netdev->n_rxq,
+ .queue = rss_data->queue,
+ .key = NULL
If you have them in a different order than the structure, you might as
well group key_len and key together.
+ },
+ };
- for (i = 0; i < rss->num; i++) {
- rss->queue[i] = i;
+ /* Override queue array with default */
+ for (i = 0; i < netdev->n_rxq; i++) {
+ rss_data->queue[i] = i;
}
- add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, rss);
+ add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS,
&rss_data->conf);
- return rss;
+ return &rss_data->conf;
}
static int
@@ -4365,7 +4401,7 @@ netdev_dpdk_add_rte_flow_offload(struct netdev
*netdev,
vlan_mask.tci = match->wc.masks.vlans[0].tci &
~htons(VLAN_CFI);
/* match any protocols */
- vlan_mask.tpid = 0;
+ vlan_mask.inner_type = 0;
add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN,
&vlan_spec, &vlan_mask);
@@ -4520,7 +4556,9 @@ end_proto_check:
flow = rte_flow_create(dev->port_id, &flow_attr, patterns.items,
actions.actions, &error);
- free(rss);
+ void *rss_cont;
+ rss_cont = container_of(rss, struct action_rss_data, conf);
+ free(rss_cont);
if (!flow) {
VLOG_ERR("rte flow creat error: %u : message : %s\n",
error.type, error.message);
--
1.8.3.1
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev