from:"Nelio Laranjeiro"

[dpdk-dev] [PATCH 3/3] net/mlx5: add rte_flow rule creation

2016-11-25 Thread Nelio Laranjeiro

Convert Ethernet, IPv4, IPv6, TCP, UDP layers into ibv_flow and create
those rules when after validation (i.e. NIC supports the rule).

VLAN is still not supported in this commit.

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_flow.c | 645 ++-
 1 file changed, 631 insertions(+), 14 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 54807ad..e948000 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -31,6 +31,17 @@
  */

 #include 
+#include 
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include 
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif

 #include 
 #include 
@@ -39,11 +50,82 @@

 #include "mlx5.h"

+/** Define a value to use as index for the drop queue. */
+#define MLX5_FLOW_DROP_QUEUE ((uint32_t)-1)
+
 struct rte_flow {
LIST_ENTRY(rte_flow) next;
+   struct ibv_exp_flow_attr *ibv_attr;
+   struct ibv_exp_rwq_ind_table *ind_table;
+   struct ibv_qp *qp;
+   struct ibv_exp_flow *ibv_flow;
+   struct ibv_exp_wq *wq;
+   struct ibv_cq *cq;
+   uint8_t drop;
 };

 /**
+ * Check support for a given item.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param mask[in]
+ *   Bit-mask covering supported fields to compare with spec, last and mask in
+ *   \item.
+ * @param size
+ *   Bit-Mask size in bytes.
+ *
+ * @return
+ *   0 on success.
+ */
+static int
+mlx5_flow_item_validate(const struct rte_flow_item *item,
+   const uint8_t *mask, unsigned int size)
+{
+   int ret = 0;
+
+   if (item->spec && !item->mask) {
+   unsigned int i;
+   const uint8_t *spec = item->spec;
+
+   for (i = 0; i < size; ++i)
+   if ((spec[i] | mask[i]) != mask[i])
+   return -1;
+   }
+   if (item->last && !item->mask) {
+   unsigned int i;
+   const uint8_t *spec = item->last;
+
+   for (i = 0; i < size; ++i)
+   if ((spec[i] | mask[i]) != mask[i])
+   return -1;
+   }
+   if (item->mask) {
+   unsigned int i;
+   const uint8_t *spec = item->mask;
+
+   for (i = 0; i < size; ++i)
+   if ((spec[i] | mask[i]) != mask[i])
+   return -1;
+   }
+   if (item->spec && item->last) {
+   uint8_t spec[size];
+   uint8_t last[size];
+   const uint8_t *apply = mask;
+   unsigned int i;
+
+   if (item->mask)
+   apply = item->mask;
+   for (i = 0; i < size; ++i) {
+   spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
+   last[i] = ((const uint8_t *)item->last)[i] & apply[i];
+   }
+   ret = memcmp(spec, last, size);
+   }
+   return ret;
+}
+
+/**
  * Validate a flow supported by the NIC.
  *
  * @param priv
@@ -67,9 +149,43 @@ priv_flow_validate(struct priv *priv,
   const struct rte_flow_action actions[],
   struct rte_flow_error *error)
 {
-   (void)priv;
const struct rte_flow_item *ilast = NULL;
const struct rte_flow_action *alast = NULL;
+   /* Supported mask. */
+   const struct rte_flow_item_eth eth_mask = {
+   .dst.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+   .src.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+   };
+   const struct rte_flow_item_ipv4 ipv4_mask = {
+   .hdr = {
+   .src_addr = -1,
+   .dst_addr = -1,
+   },
+   };
+   const struct rte_flow_item_ipv6 ipv6_mask = {
+   .hdr = {
+   .src_addr = {
+   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+   },
+   .dst_addr = {
+   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+   },
+   },
+   };
+   const struct rte_flow_item_udp udp_mask = {
+   .hdr = {
+   .src_port = -1,
+   .dst_port = -1,
+   },
+   };
+   const struct rte_flow_item_tcp tcp_mask = {
+   .hdr = {
+   .src_port = -1,
+   .dst_port = -1,
+   },
+   };

if (attr->

[dpdk-dev] [PATCH 2/3] net/mlx5: add software support for rte_flow

2016-11-25 Thread Nelio Laranjeiro

Introduce initial software validation for rte_flow rules.

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5.h |   1 +
 drivers/net/mlx5/mlx5_flow.c| 196 ++--
 drivers/net/mlx5/mlx5_trigger.c |   1 +
 3 files changed, 169 insertions(+), 29 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 04f4eaa..df0e77c 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -136,6 +136,7 @@ struct priv {
unsigned int reta_idx_n; /* RETA index size. */
struct fdir_filter_list *fdir_filter_list; /* Flow director rules. */
struct fdir_queue *fdir_drop_queue; /* Flow director drop queue. */
+   LIST_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
uint32_t link_speed_capa; /* Link speed capabilities. */
rte_spinlock_t lock; /* Lock for control functions. */
 };
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index a514dff..54807ad 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -30,11 +30,125 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

+#include 
+
 #include 
 #include 
 #include 
+#include 
+
 #include "mlx5.h"

+struct rte_flow {
+   LIST_ENTRY(rte_flow) next;
+};
+
+/**
+ * Validate a flow supported by the NIC.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] attr
+ *   Flow rule attributes.
+ * @param[in] pattern
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_validate(struct priv *priv,
+  const struct rte_flow_attr *attr,
+  const struct rte_flow_item items[],
+  const struct rte_flow_action actions[],
+  struct rte_flow_error *error)
+{
+   (void)priv;
+   const struct rte_flow_item *ilast = NULL;
+   const struct rte_flow_action *alast = NULL;
+
+   if (attr->group) {
+   rte_flow_error_set(error, ENOTSUP,
+  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+  NULL,
+  "groups are not supported");
+   return -rte_errno;
+   }
+   if (attr->priority) {
+   rte_flow_error_set(error, ENOTSUP,
+  RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+  NULL,
+  "priorities are not supported");
+   return -rte_errno;
+   }
+   if (attr->egress) {
+   rte_flow_error_set(error, ENOTSUP,
+  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
+  NULL,
+  "egress is not supported");
+   return -rte_errno;
+   }
+   if (!attr->ingress) {
+   rte_flow_error_set(error, ENOTSUP,
+  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+  NULL,
+  "only ingress is supported");
+   return -rte_errno;
+   }
+   for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
+   if (items->type == RTE_FLOW_ITEM_TYPE_VOID) {
+   continue;
+   } else if (items->type == RTE_FLOW_ITEM_TYPE_ETH) {
+   if (ilast)
+   goto exit_item_not_supported;
+   ilast = items;
+   } else if ((items->type == RTE_FLOW_ITEM_TYPE_IPV4) ||
+  (items->type == RTE_FLOW_ITEM_TYPE_IPV6)) {
+   if (!ilast)
+   goto exit_item_not_supported;
+   else if (ilast->type != RTE_FLOW_ITEM_TYPE_ETH)
+   goto exit_item_not_supported;
+   ilast = items;
+   } else if ((items->type == RTE_FLOW_ITEM_TYPE_UDP) ||
+  (items->type == RTE_FLOW_ITEM_TYPE_TCP)) {
+   if (!ilast)
+   goto exit_item_not_supported;
+   else if ((ilast->type != RTE_FLOW_ITEM_TYPE_IPV4) &&
+(ilast->type != RTE_FLOW_ITEM_TYPE_IPV6))
+   goto exit_item_not_supported;
+   ilast = items;
+   } else {
+   goto exit_item_not_supported;
+   }
+   }
+   for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
+   if (actions->

[dpdk-dev] [PATCH 1/3] net/mlx5: add preliminary support for rte_flow

2016-11-25 Thread Nelio Laranjeiro

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/Makefile|   1 +
 drivers/net/mlx5/mlx5.h  |  16 ++
 drivers/net/mlx5/mlx5_fdir.c |  15 ++
 drivers/net/mlx5/mlx5_flow.c | 122 +++
 4 files changed, 154 insertions(+)
 create mode 100644 drivers/net/mlx5/mlx5_flow.c

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index cf87f0b..6d1338a 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -48,6 +48,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_stats.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rss.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_fdir.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c

 # Dependencies.
 DEPDIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += lib/librte_ether
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 79b7a60..04f4eaa 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -59,6 +59,7 @@
 #include 
 #include 
 #include 
+#include 
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
@@ -268,4 +269,19 @@ void priv_fdir_enable(struct priv *);
 int mlx5_dev_filter_ctrl(struct rte_eth_dev *, enum rte_filter_type,
 enum rte_filter_op, void *);

+/* mlx5_flow.c */
+
+int mlx5_flow_validate(struct rte_eth_dev *, const struct rte_flow_attr *,
+  const struct rte_flow_item [],
+  const struct rte_flow_action [],
+  struct rte_flow_error *);
+struct rte_flow *mlx5_flow_create(struct rte_eth_dev *,
+ const struct rte_flow_attr *,
+ const struct rte_flow_item [],
+ const struct rte_flow_action [],
+ struct rte_flow_error *);
+int mlx5_flow_destroy(struct rte_eth_dev *, struct rte_flow *,
+ struct rte_flow_error *);
+int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *);
+
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
index 1acf682..f80c58b 100644
--- a/drivers/net/mlx5/mlx5_fdir.c
+++ b/drivers/net/mlx5/mlx5_fdir.c
@@ -55,6 +55,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
@@ -1042,6 +1044,14 @@ priv_fdir_ctrl_func(struct priv *priv, enum 
rte_filter_op filter_op, void *arg)
return ret;
 }

+static const struct rte_flow_ops mlx5_flow_ops = {
+   .validate = mlx5_flow_validate,
+   .create = mlx5_flow_create,
+   .destroy = mlx5_flow_destroy,
+   .flush = mlx5_flow_flush,
+   .query = NULL,
+};
+
 /**
  * Manage filter operations.
  *
@@ -1067,6 +1077,11 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
struct priv *priv = dev->data->dev_private;

switch (filter_type) {
+   case RTE_ETH_FILTER_GENERIC:
+   if (filter_op != RTE_ETH_FILTER_GET)
+   return -EINVAL;
+   *(const void **)arg = _flow_ops;
+   return 0;
case RTE_ETH_FILTER_FDIR:
priv_lock(priv);
ret = priv_fdir_ctrl_func(priv, filter_op, arg);
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
new file mode 100644
index 000..a514dff
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -0,0 +1,122 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 6WIND S.A.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,

[dpdk-dev] [PATCH 0/3] net/mlx5: support flow_rte

2016-11-25 Thread Nelio Laranjeiro

This series requires rte_flow [1].

It brings rte_flow support to the same level as flow director (FDIR) in mlx5.

 [1] http://dpdk.org/ml/archives/dev/2016-November/050262.html

Nelio Laranjeiro (3):
  net/mlx5: add preliminary support for rte_flow
  net/mlx5: add software support for rte_flow
  net/mlx5: add rte_flow rule creation

 drivers/net/mlx5/Makefile   |   1 +
 drivers/net/mlx5/mlx5.h |  17 +
 drivers/net/mlx5/mlx5_fdir.c|  15 +
 drivers/net/mlx5/mlx5_flow.c| 877 
 drivers/net/mlx5/mlx5_trigger.c |   1 +
 5 files changed, 911 insertions(+)
 create mode 100644 drivers/net/mlx5/mlx5_flow.c

-- 
2.1.4

[dpdk-dev] [PATCH 7/7] net/mlx5: remove inefficient prefetching

2016-11-24 Thread Nelio Laranjeiro

Prefetching completion queue entries is inefficient because too few CPU
cycles are spent before their use, which results into cache misses anyway.

Signed-off-by: Nelio Laranjeiro 
Acked-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5_rxtx.c | 21 -
 1 file changed, 21 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 4b8c197..9f74fd4 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -348,23 +348,6 @@ mlx5_tx_dbrec(struct txq *txq)
 }

 /**
- * Prefetch a CQE.
- *
- * @param txq
- *   Pointer to TX queue structure.
- * @param cqe_ci
- *   CQE consumer index.
- */
-static inline void
-tx_prefetch_cqe(struct txq *txq, uint16_t ci)
-{
-   volatile struct mlx5_cqe *cqe;
-
-   cqe = &(*txq->cqes)[ci & ((1 << txq->cqe_n) - 1)];
-   rte_prefetch0(cqe);
-}
-
-/**
  * DPDK callback for TX.
  *
  * @param dpdk_txq
@@ -395,8 +378,6 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
if (unlikely(!pkts_n))
return 0;
/* Prefetch first packet cacheline. */
-   tx_prefetch_cqe(txq, txq->cq_ci);
-   tx_prefetch_cqe(txq, txq->cq_ci + 1);
rte_prefetch0(*pkts);
/* Start processing. */
txq_complete(txq);
@@ -733,7 +714,6 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
if (unlikely(!pkts_n))
return 0;
/* Prefetch first packet cacheline. */
-   tx_prefetch_cqe(txq, txq->cq_ci);
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
/* Start processing. */
@@ -938,7 +918,6 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf 
**pkts,
if (unlikely(!pkts_n))
return 0;
/* Prefetch first packet cacheline. */
-   tx_prefetch_cqe(txq, txq->cq_ci);
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
/* Start processing. */
-- 
2.1.4

[dpdk-dev] [PATCH 6/7] net/mlx5: optimize copy of Ethernet header

2016-11-24 Thread Nelio Laranjeiro

Use fewer instructions to copy the first two bytes of Ethernet headers to
work queue elements.

Signed-off-by: Nelio Laranjeiro 
Acked-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5_rxtx.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 0d0b807..4b8c197 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -409,7 +409,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
unsigned int ds = 0;
uintptr_t addr;
uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE;
-   uint8_t ehdr[2];
+   uint16_t ehdr;
uint8_t cs_flags = 0;
 #ifdef MLX5_PMD_SOFT_COUNTERS
uint32_t total_length = 0;
@@ -436,8 +436,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
rte_prefetch0(*pkts);
addr = rte_pktmbuf_mtod(buf, uintptr_t);
length = DATA_LEN(buf);
-   ehdr[0] = ((uint8_t *)addr)[0];
-   ehdr[1] = ((uint8_t *)addr)[1];
+   ehdr = (((uint8_t *)addr)[1] << 8) |
+  ((uint8_t *)addr)[0];
 #ifdef MLX5_PMD_SOFT_COUNTERS
total_length = length;
 #endif
@@ -600,8 +600,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
0,
cs_flags,
0,
-   (ehdr[1] << 24) | (ehdr[0] << 16) |
-   htons(pkt_inline_sz),
+   (ehdr << 16) | htons(pkt_inline_sz),
};
txq->wqe_ci += (ds + 3) / 4;
 #ifdef MLX5_PMD_SOFT_COUNTERS
-- 
2.1.4

[dpdk-dev] [PATCH 5/7] net/mlx5: move static prototype

2016-11-24 Thread Nelio Laranjeiro

Gather function prototypes at the beginning of the file.

Signed-off-by: Nelio Laranjeiro 
Acked-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5_rxtx.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 52733da..0d0b807 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -76,6 +76,9 @@ check_cqe(volatile struct mlx5_cqe *cqe,
  unsigned int cqes_n, const uint16_t ci)
  __attribute__((always_inline));

+static inline void
+txq_complete(struct txq *txq) __attribute__((always_inline));
+
 static inline uint32_t
 txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
__attribute__((always_inline));
@@ -192,9 +195,6 @@ tx_mlx5_wqe(struct txq *txq, uint16_t ci)
return (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE);
 }

-static inline void
-txq_complete(struct txq *txq) __attribute__((always_inline));
-
 /**
  * Manage TX completions.
  *
-- 
2.1.4

[dpdk-dev] [PATCH 4/7] net/mlx5: fix missing inline attributes

2016-11-24 Thread Nelio Laranjeiro

These functions must be forced inline for better performance.

Fixes: 99c12dcca65d ("net/mlx5: handle Rx CQE compression")
Fixes: 1d88ba171942 ("net/mlx5: refactor Tx data path")
Fixes: 67fa62bc672d ("mlx5: support checksum offload")

CC: stable at dpdk.org
Signed-off-by: Nelio Laranjeiro 
Acked-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5_rxtx.c | 34 +-
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index e161cd9..52733da 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -71,6 +71,31 @@
 #include "mlx5_defs.h"
 #include "mlx5_prm.h"

+static inline int
+check_cqe(volatile struct mlx5_cqe *cqe,
+ unsigned int cqes_n, const uint16_t ci)
+ __attribute__((always_inline));
+
+static inline uint32_t
+txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
+   __attribute__((always_inline));
+
+static inline void
+mlx5_tx_dbrec(struct txq *txq) __attribute__((always_inline));
+
+static inline uint32_t
+rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe)
+   __attribute__((always_inline));
+
+static inline int
+mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
+uint16_t cqe_cnt, uint32_t *rss_hash)
+__attribute__((always_inline));
+
+static inline uint32_t
+rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe)
+  __attribute__((always_inline));
+
 #ifndef NDEBUG

 /**
@@ -100,11 +125,6 @@ check_cqe_seen(volatile struct mlx5_cqe *cqe)

 #endif /* NDEBUG */

-static inline int
-check_cqe(volatile struct mlx5_cqe *cqe,
- unsigned int cqes_n, const uint16_t ci)
- __attribute__((always_inline));
-
 /**
  * Check whether CQE is valid.
  *
@@ -266,10 +286,6 @@ txq_mb2mp(struct rte_mbuf *buf)
return buf->pool;
 }

-static inline uint32_t
-txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
-   __attribute__((always_inline));
-
 /**
  * Get Memory Region (MR) <-> Memory Pool (MP) association from txq->mp2mr[].
  * Add MP to txq->mp2mr[] if it's not registered yet. If mp2mr[] is full,
-- 
2.1.4

[dpdk-dev] [PATCH 3/7] net/mlx5: use vector types to speed up processing

2016-11-24 Thread Nelio Laranjeiro

Let compiler automatically use the vector capabilities of the target
machine to optimize instructions.

Signed-off-by: Nelio Laranjeiro 
Acked-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5_prm.h  |  7 +
 drivers/net/mlx5/mlx5_rxtx.c | 74 +++-
 2 files changed, 46 insertions(+), 35 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 3dd4cbe..9cd9fdf 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -44,6 +44,7 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif

+#include 
 #include "mlx5_autoconf.h"

 /* Get CQE owner bit. */
@@ -134,6 +135,12 @@ struct mlx5_wqe {
struct mlx5_wqe_eth_seg_small eseg;
 };

+/* Vectorize WQE header. */
+struct mlx5_wqe_v {
+   rte_v128u32_t ctrl;
+   rte_v128u32_t eseg;
+};
+
 /* WQE. */
 struct mlx5_wqe64 {
struct mlx5_wqe hdr;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index ada8e74..e161cd9 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -371,7 +371,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
unsigned int j = 0;
unsigned int max;
unsigned int comp;
-   volatile struct mlx5_wqe *wqe = NULL;
+   volatile struct mlx5_wqe_v *wqe = NULL;
unsigned int segs_n = 0;
struct rte_mbuf *buf = NULL;
uint8_t *raw;
@@ -388,12 +388,13 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
if (max > elts_n)
max -= elts_n;
do {
-   volatile struct mlx5_wqe_data_seg *dseg = NULL;
+   volatile rte_v128u32_t *dseg = NULL;
uint32_t length;
unsigned int ds = 0;
uintptr_t addr;
uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE;
uint8_t ehdr[2];
+   uint8_t cs_flags = 0;
 #ifdef MLX5_PMD_SOFT_COUNTERS
uint32_t total_length = 0;
 #endif
@@ -412,7 +413,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
--segs_n;
if (!segs_n)
--pkts_n;
-   wqe = (volatile struct mlx5_wqe *)
+   wqe = (volatile struct mlx5_wqe_v *)
tx_mlx5_wqe(txq, txq->wqe_ci);
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
if (pkts_n > 1)
@@ -438,11 +439,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
/* Should we enable HW CKSUM offload */
if (buf->ol_flags &
(PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
-   wqe->eseg.cs_flags =
-   MLX5_ETH_WQE_L3_CSUM |
-   MLX5_ETH_WQE_L4_CSUM;
-   } else {
-   wqe->eseg.cs_flags = 0;
+   cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
}
raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE;
/*
@@ -498,12 +495,11 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
 */
ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2);
if (length > 0) {
-   dseg = (volatile struct mlx5_wqe_data_seg *)
+   dseg = (volatile rte_v128u32_t *)
((uintptr_t)wqe +
 (ds * MLX5_WQE_DWORD_SIZE));
if ((uintptr_t)dseg >= end)
-   dseg = (volatile struct
-   mlx5_wqe_data_seg *)
+   dseg = (volatile rte_v128u32_t *)
   txq->wqes;
goto use_dseg;
} else if (!segs_n) {
@@ -516,16 +512,17 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
 * No inline has been done in the packet, only the
 * Ethernet Header as been stored.
 */
-   wqe->eseg.inline_hdr_sz = htons(MLX5_WQE_DWORD_SIZE);
-   dseg = (volatile struct mlx5_wqe_data_seg *)
+   dseg = (volatile rte_v128u32_t *)
((uintptr_t)wqe + (3 * MLX5_WQE_DWORD_SIZE));
ds = 3;
 use_dseg:
/* Add the remaining packet as a simple ds. */
-   *dseg = (volatile struct mlx5_wqe_data_seg) {
-   .addr = htonll(addr),
-   .byte_count = htonl(length),
-

[dpdk-dev] [PATCH 2/7] net/mlx5: use work queue buffer as a raw buffer

2016-11-24 Thread Nelio Laranjeiro

Define a single work queue element type that encompasses them all.  It
includes control, Ethernet segment and raw data all grouped in a single
place.

Signed-off-by: Nelio Laranjeiro 
Acked-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5_prm.h  |  13 --
 drivers/net/mlx5/mlx5_rxtx.c | 103 ++-
 drivers/net/mlx5/mlx5_rxtx.h |   2 +-
 drivers/net/mlx5/mlx5_txq.c  |   8 ++--
 4 files changed, 68 insertions(+), 58 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 7f31a2f..3dd4cbe 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -114,12 +114,19 @@ struct mlx5_wqe_eth_seg_small {
uint32_t rsvd2;
uint16_t inline_hdr_sz;
uint8_t inline_hdr[2];
-};
+} __rte_aligned(MLX5_WQE_DWORD_SIZE);

 struct mlx5_wqe_inl_small {
uint32_t byte_cnt;
uint8_t raw;
-};
+} __rte_aligned(MLX5_WQE_DWORD_SIZE);
+
+struct mlx5_wqe_ctrl {
+   uint32_t ctrl0;
+   uint32_t ctrl1;
+   uint32_t ctrl2;
+   uint32_t ctrl3;
+} __rte_aligned(MLX5_WQE_DWORD_SIZE);

 /* Small common part of the WQE. */
 struct mlx5_wqe {
@@ -131,7 +138,7 @@ struct mlx5_wqe {
 struct mlx5_wqe64 {
struct mlx5_wqe hdr;
uint8_t raw[32];
-} __rte_aligned(64);
+} __rte_aligned(MLX5_WQE_SIZE);

 /* MPW session status. */
 enum mlx5_mpw_state {
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 5dacd93..ada8e74 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -154,6 +154,24 @@ check_cqe(volatile struct mlx5_cqe *cqe,
return 0;
 }

+/**
+ * Return the address of the WQE.
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ * @param  wqe_ci
+ *   WQE consumer index.
+ *
+ * @return
+ *   WQE address.
+ */
+static inline uintptr_t *
+tx_mlx5_wqe(struct txq *txq, uint16_t ci)
+{
+   ci &= ((1 << txq->wqe_n) - 1);
+   return (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE);
+}
+
 static inline void
 txq_complete(struct txq *txq) __attribute__((always_inline));

@@ -175,7 +193,7 @@ txq_complete(struct txq *txq)
uint16_t elts_tail;
uint16_t cq_ci = txq->cq_ci;
volatile struct mlx5_cqe *cqe = NULL;
-   volatile struct mlx5_wqe *wqe;
+   volatile struct mlx5_wqe_ctrl *ctrl;

do {
volatile struct mlx5_cqe *tmp;
@@ -201,9 +219,9 @@ txq_complete(struct txq *txq)
} while (1);
if (unlikely(cqe == NULL))
return;
-   wqe = &(*txq->wqes)[ntohs(cqe->wqe_counter) &
-   ((1 << txq->wqe_n) - 1)].hdr;
-   elts_tail = wqe->ctrl[3];
+   ctrl = (volatile struct mlx5_wqe_ctrl *)
+   tx_mlx5_wqe(txq, ntohs(cqe->wqe_counter));
+   elts_tail = ctrl->ctrl3;
assert(elts_tail < (1 << txq->wqe_n));
/* Free buffers. */
while (elts_free != elts_tail) {
@@ -331,23 +349,6 @@ tx_prefetch_cqe(struct txq *txq, uint16_t ci)
 }

 /**
- * Prefetch a WQE.
- *
- * @param txq
- *   Pointer to TX queue structure.
- * @param  wqe_ci
- *   WQE consumer index.
- */
-static inline void
-tx_prefetch_wqe(struct txq *txq, uint16_t ci)
-{
-   volatile struct mlx5_wqe64 *wqe;
-
-   wqe = &(*txq->wqes)[ci & ((1 << txq->wqe_n) - 1)];
-   rte_prefetch0(wqe);
-}
-
-/**
  * DPDK callback for TX.
  *
  * @param dpdk_txq
@@ -411,9 +412,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
--segs_n;
if (!segs_n)
--pkts_n;
-   wqe = &(*txq->wqes)[txq->wqe_ci &
-   ((1 << txq->wqe_n) - 1)].hdr;
-   tx_prefetch_wqe(txq, txq->wqe_ci + 1);
+   wqe = (volatile struct mlx5_wqe *)
+   tx_mlx5_wqe(txq, txq->wqe_ci);
+   rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
if (pkts_n > 1)
rte_prefetch0(*pkts);
addr = rte_pktmbuf_mtod(buf, uintptr_t);
@@ -464,8 +465,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
}
/* Inline if enough room. */
if (txq->max_inline != 0) {
-   uintptr_t end =
-   (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n];
+   uintptr_t end = (uintptr_t)
+   (((uintptr_t)txq->wqes) +
+(1 << txq->wqe_n) * MLX5_WQE_SIZE);
uint16_t max_inline =
txq->max_inline * RTE_CACHE_LINE_SIZE;
uint16_t room;
@@ -496,12 +498,13 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
 */

[dpdk-dev] [PATCH 1/7] net/mlx5: prepare Tx vectorization

2016-11-24 Thread Nelio Laranjeiro

Prepare the code to write the Work Queue Element with vectorized
instructions.

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Elad Persiko 
Acked-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5_rxtx.c | 44 
 1 file changed, 28 insertions(+), 16 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index ffd09ac..5dacd93 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -391,6 +391,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
uint32_t length;
unsigned int ds = 0;
uintptr_t addr;
+   uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE;
+   uint8_t ehdr[2];
 #ifdef MLX5_PMD_SOFT_COUNTERS
uint32_t total_length = 0;
 #endif
@@ -416,6 +418,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
rte_prefetch0(*pkts);
addr = rte_pktmbuf_mtod(buf, uintptr_t);
length = DATA_LEN(buf);
+   ehdr[0] = ((uint8_t *)addr)[0];
+   ehdr[1] = ((uint8_t *)addr)[1];
 #ifdef MLX5_PMD_SOFT_COUNTERS
total_length = length;
 #endif
@@ -439,24 +443,20 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
} else {
wqe->eseg.cs_flags = 0;
}
-   raw  = (uint8_t *)(uintptr_t)>eseg.inline_hdr[0];
-   /* Start the know and common part of the WQE structure. */
-   wqe->ctrl[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
-   wqe->ctrl[2] = 0;
-   wqe->ctrl[3] = 0;
-   wqe->eseg.rsvd0 = 0;
-   wqe->eseg.rsvd1 = 0;
-   wqe->eseg.mss = 0;
-   wqe->eseg.rsvd2 = 0;
-   /* Start by copying the Ethernet Header. */
-   memcpy((uint8_t *)raw, ((uint8_t *)addr), 16);
+   raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE;
+   /*
+* Start by copying the Ethernet header minus the first two
+* bytes which will be appended at the end of the Ethernet
+* segment.
+*/
+   memcpy((uint8_t *)raw, ((uint8_t *)addr) + 2, 16);
length -= MLX5_WQE_DWORD_SIZE;
addr += MLX5_WQE_DWORD_SIZE;
/* Replace the Ethernet type by the VLAN if necessary. */
if (buf->ol_flags & PKT_TX_VLAN_PKT) {
uint32_t vlan = htonl(0x8100 | buf->vlan_tci);

-   memcpy((uint8_t *)(raw + MLX5_WQE_DWORD_SIZE -
+   memcpy((uint8_t *)(raw + MLX5_WQE_DWORD_SIZE - 2 -
   sizeof(vlan)),
   , sizeof(vlan));
addr -= sizeof(vlan);
@@ -468,10 +468,13 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
(uintptr_t)&(*txq->wqes)[1 << txq->wqe_n];
uint16_t max_inline =
txq->max_inline * RTE_CACHE_LINE_SIZE;
-   uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE;
uint16_t room;

-   raw += MLX5_WQE_DWORD_SIZE;
+   /*
+* raw starts two bytes before the boundary to
+* continue the above copy of packet data.
+*/
+   raw += MLX5_WQE_DWORD_SIZE - 2;
room = end - (uintptr_t)raw;
if (room > max_inline) {
uintptr_t addr_end = (addr + max_inline) &
@@ -487,8 +490,6 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
/* Sanity check. */
assert(addr <= addr_end);
}
-   /* Store the inlined packet size in the WQE. */
-   wqe->eseg.inline_hdr_sz = htons(pkt_inline_sz);
/*
 * 2 DWORDs consumed by the WQE header + 1 DSEG +
 * the size of the inline part of the packet.
@@ -570,7 +571,18 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
--pkts_n;
 next_pkt:
++i;
+   /* Initialize known and common part of the WQE structure. */
+   wqe->ctrl[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
wqe->ctrl[1] = htonl(txq->qp_num_8s | ds);
+   wqe->ctrl[2] = 0;
+   wqe->ctrl[3] = 0;
+   wqe->eseg.rsvd0 = 0;
+   wqe->eseg.rsvd1 = 0;
+

[dpdk-dev] [PATCH 0/7] net/mlx5: improve single core performance

2016-11-24 Thread Nelio Laranjeiro

This series applies on top of
"[PATCH] eal: define generic vector types" [1][2]

Using built-in vector types forces compilers to consider SIMD instructions in
specific places in order to improve performance on both IBM POWER8 and Intel
architectures.

For example, testpmd single-thread I/O forwarding packets per second
performance is improved by 6% on Intel platforms.

 [1] http://dpdk.org/ml/archives/dev/2016-November/050261.html
 [2] http://dpdk.org/dev/patchwork/patch/17024/

Nelio Laranjeiro (7):
  net/mlx5: prepare Tx vectorization
  net/mlx5: use work queue buffer as a raw buffer
  net/mlx5: use vector types to speed up processing
  net/mlx5: fix missing inline attributes
  net/mlx5: move static prototype
  net/mlx5: optimize copy of Ethernet header
  net/mlx5: remove inefficient prefetching

 drivers/net/mlx5/mlx5_prm.h  |  20 +++-
 drivers/net/mlx5/mlx5_rxtx.c | 243 +++
 drivers/net/mlx5/mlx5_rxtx.h |   2 +-
 drivers/net/mlx5/mlx5_txq.c  |   8 +-
 4 files changed, 150 insertions(+), 123 deletions(-)

-- 
2.1.4

[dpdk-dev] [PATCH 3/3] net/mlx5: do not invalidate title CQE

2016-11-17 Thread Nelio Laranjeiro

We can leave the title completion queue entry untouched since its contents
are not modified.

Reported-by: Liming Sun 
Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_rxtx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 04860bb..ffd09ac 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1162,7 +1162,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct 
mlx5_cqe *cqe,
zip->na += 8;
}
if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) {
-   uint16_t idx = rxq->cq_ci;
+   uint16_t idx = rxq->cq_ci + 1;
uint16_t end = zip->cq_ci;

while (idx != end) {
-- 
2.1.4

[dpdk-dev] [PATCH 2/3] net/mlx5: fix wrong htons

2016-11-17 Thread Nelio Laranjeiro

Completion queue entry data uses network endian, to access them we should use
ntoh*().

Fixes: c305090bbaf8 ("net/mlx5: replace countdown with threshold for Tx 
completions")

CC: stable at dpdk.org
Reported-by: Liming Sun 
Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_rxtx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 9bd4d80..04860bb 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -201,7 +201,7 @@ txq_complete(struct txq *txq)
} while (1);
if (unlikely(cqe == NULL))
return;
-   wqe = &(*txq->wqes)[htons(cqe->wqe_counter) &
+   wqe = &(*txq->wqes)[ntohs(cqe->wqe_counter) &
((1 << txq->wqe_n) - 1)].hdr;
elts_tail = wqe->ctrl[3];
assert(elts_tail < (1 << txq->wqe_n));
-- 
2.1.4

[dpdk-dev] [PATCH 1/3] net/mlx5: fix leak when starvation occurs

2016-11-17 Thread Nelio Laranjeiro

The list of segments to free was wrongly manipulated ending by only freeing
the first segment instead of freeing all of them.  The last one still
belongs to the NIC and thus should not be freed.

Fixes: a1bdb71a32da ("net/mlx5: fix crash in Rx")

CC: stable at dpdk.org
Reported-by: Liming Sun 
Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_rxtx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index beff580..9bd4d80 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1312,10 +1312,10 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
}
while (pkt != seg) {
assert(pkt != (*rxq->elts)[idx]);
-   seg = NEXT(pkt);
+   rep = NEXT(pkt);
rte_mbuf_refcnt_set(pkt, 0);
__rte_mbuf_raw_free(pkt);
-   pkt = seg;
+   pkt = rep;
}
break;
}
-- 
2.1.4

[dpdk-dev] [PATCH] eal: define generic vector types

2016-11-16 Thread Nelio Laranjeiro

Add common vector type definitions to all CPU architectures.

Signed-off-by: Nelio Laranjeiro 
---
 lib/librte_eal/common/Makefile |   1 +
 lib/librte_eal/common/include/arch/arm/rte_vect.h  |   1 +
 .../common/include/arch/ppc_64/rte_vect.h  |   1 +
 lib/librte_eal/common/include/arch/tile/rte_vect.h |  38 +
 lib/librte_eal/common/include/arch/x86/rte_vect.h  |   7 +-
 lib/librte_eal/common/include/generic/rte_vect.h   | 185 +
 6 files changed, 230 insertions(+), 3 deletions(-)
 create mode 100644 lib/librte_eal/common/include/arch/tile/rte_vect.h
 create mode 100644 lib/librte_eal/common/include/generic/rte_vect.h

diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index dfd64aa..8af06b1 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -48,6 +48,7 @@ endif

 GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
 GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h
+GENERIC_INC += rte_vect.h
 # defined in mk/arch/$(RTE_ARCH)/rte.vars.mk
 ARCH_DIR ?= $(RTE_ARCH)
 ARCH_INC := $(notdir $(wildcard 
$(RTE_SDK)/lib/librte_eal/common/include/arch/$(ARCH_DIR)/*.h))
diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h 
b/lib/librte_eal/common/include/arch/arm/rte_vect.h
index b86c2cf..4107c99 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h
@@ -34,6 +34,7 @@
 #define _RTE_VECT_ARM_H_

 #include 
+#include "generic/rte_vect.h"
 #include "arm_neon.h"

 #ifdef __cplusplus
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h
index 05209e5..99586e5 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h
@@ -34,6 +34,7 @@
 #define _RTE_VECT_PPC_64_H_

 #include 
+#include "generic/rte_vect.h"

 #ifdef __cplusplus
 extern "C" {
diff --git a/lib/librte_eal/common/include/arch/tile/rte_vect.h 
b/lib/librte_eal/common/include/arch/tile/rte_vect.h
new file mode 100644
index 000..f1e1709
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/tile/rte_vect.h
@@ -0,0 +1,38 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 6WIND S.A.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_VECT_TILE_H_
+#define _RTE_VECT_TILE_H_
+
+#include "generic/rte_vect.h"
+
+#endif /* _RTE_VECT_TILE_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h 
b/lib/librte_eal/common/include/arch/x86/rte_vect.h
index 77f2e25..1b4b85d 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h
@@ -31,8 +31,8 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

-#ifndef _RTE_VECT_H_
-#define _RTE_VECT_H_
+#ifndef _RTE_VECT_X86_H_
+#define _RTE_VECT_X86_H_

 /**
  * @file
@@ -41,6 +41,7 @@
  */

 #include 
+#include "generic/rte_vect.h"

 #if (defined(__ICC) || (__GNUC__ == 4 &&  __GNUC_MINOR__ < 4))

@@ -133,4 +134,4 @@ __extension__ ({ \
 }
 #endif

-#endif /* _RTE_VECT_H_ */
+#endif /* _RTE_VECT_X86_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_vect.h 
b/lib/librte_eal/common/include/generic/rte_vect.h
new file mode 100644
index 000..d7b

[dpdk-dev] [PATCH] net: introduce big and little endian types

2016-11-09 Thread Nelio Laranjeiro

This commit introduces new rte_{le,be}{16,32,64}_t types and updates
rte_{le,be,cpu}_to_{le,be,cpu}_*() and network header structures
accordingly.

Specific big/little endian types avoid uncertainty and conversion mistakes.

No ABI change since these are simply typedefs to the original types.

Signed-off-by: Nelio Laranjeiro 
---
 .../common/include/generic/rte_byteorder.h | 31 +++---
 lib/librte_net/rte_arp.h   | 15 +
 lib/librte_net/rte_ether.h | 10 +++---
 lib/librte_net/rte_gre.h   | 30 -
 lib/librte_net/rte_icmp.h  | 11 ---
 lib/librte_net/rte_ip.h| 38 +++---
 lib/librte_net/rte_net.c   | 10 +++---
 lib/librte_net/rte_sctp.h  |  9 ++---
 lib/librte_net/rte_tcp.h   | 19 ++-
 lib/librte_net/rte_udp.h   |  9 ++---
 10 files changed, 97 insertions(+), 85 deletions(-)

diff --git a/lib/librte_eal/common/include/generic/rte_byteorder.h 
b/lib/librte_eal/common/include/generic/rte_byteorder.h
index e00bccb..059c2a5 100644
--- a/lib/librte_eal/common/include/generic/rte_byteorder.h
+++ b/lib/librte_eal/common/include/generic/rte_byteorder.h
@@ -75,6 +75,13 @@
 #define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN
 #endif

+typedef uint16_t rte_be16_t;
+typedef uint32_t rte_be32_t;
+typedef uint64_t rte_be64_t;
+typedef uint16_t rte_le16_t;
+typedef uint32_t rte_le32_t;
+typedef uint64_t rte_le64_t;
+
 /*
  * An internal function to swap bytes in a 16-bit value.
  *
@@ -143,65 +150,65 @@ static uint64_t rte_bswap64(uint64_t x);
 /**
  * Convert a 16-bit value from CPU order to little endian.
  */
-static uint16_t rte_cpu_to_le_16(uint16_t x);
+static rte_le16_t rte_cpu_to_le_16(uint16_t x);

 /**
  * Convert a 32-bit value from CPU order to little endian.
  */
-static uint32_t rte_cpu_to_le_32(uint32_t x);
+static rte_le32_t rte_cpu_to_le_32(uint32_t x);

 /**
  * Convert a 64-bit value from CPU order to little endian.
  */
-static uint64_t rte_cpu_to_le_64(uint64_t x);
+static rte_le64_t rte_cpu_to_le_64(uint64_t x);


 /**
  * Convert a 16-bit value from CPU order to big endian.
  */
-static uint16_t rte_cpu_to_be_16(uint16_t x);
+static rte_be16_t rte_cpu_to_be_16(uint16_t x);

 /**
  * Convert a 32-bit value from CPU order to big endian.
  */
-static uint32_t rte_cpu_to_be_32(uint32_t x);
+static rte_be32_t rte_cpu_to_be_32(uint32_t x);

 /**
  * Convert a 64-bit value from CPU order to big endian.
  */
-static uint64_t rte_cpu_to_be_64(uint64_t x);
+static rte_be64_t rte_cpu_to_be_64(uint64_t x);


 /**
  * Convert a 16-bit value from little endian to CPU order.
  */
-static uint16_t rte_le_to_cpu_16(uint16_t x);
+static uint16_t rte_le_to_cpu_16(rte_le16_t x);

 /**
  * Convert a 32-bit value from little endian to CPU order.
  */
-static uint32_t rte_le_to_cpu_32(uint32_t x);
+static uint32_t rte_le_to_cpu_32(rte_le32_t x);

 /**
  * Convert a 64-bit value from little endian to CPU order.
  */
-static uint64_t rte_le_to_cpu_64(uint64_t x);
+static uint64_t rte_le_to_cpu_64(rte_le64_t x);


 /**
  * Convert a 16-bit value from big endian to CPU order.
  */
-static uint16_t rte_be_to_cpu_16(uint16_t x);
+static uint16_t rte_be_to_cpu_16(rte_be16_t x);

 /**
  * Convert a 32-bit value from big endian to CPU order.
  */
-static uint32_t rte_be_to_cpu_32(uint32_t x);
+static uint32_t rte_be_to_cpu_32(rte_be32_t x);

 /**
  * Convert a 64-bit value from big endian to CPU order.
  */
-static uint64_t rte_be_to_cpu_64(uint64_t x);
+static uint64_t rte_be_to_cpu_64(rte_be64_t x);

 #endif /* __DOXYGEN__ */

diff --git a/lib/librte_net/rte_arp.h b/lib/librte_net/rte_arp.h
index 1836418..95f123e 100644
--- a/lib/librte_net/rte_arp.h
+++ b/lib/librte_net/rte_arp.h
@@ -40,6 +40,7 @@

 #include 
 #include 
+#include 

 #ifdef __cplusplus
 extern "C" {
@@ -50,22 +51,22 @@ extern "C" {
  */
 struct arp_ipv4 {
struct ether_addr arp_sha;  /**< sender hardware address */
-   uint32_t  arp_sip;  /**< sender IP address */
+   rte_be32_tarp_sip;  /**< sender IP address */
struct ether_addr arp_tha;  /**< target hardware address */
-   uint32_t  arp_tip;  /**< target IP address */
+   rte_be32_tarp_tip;  /**< target IP address */
 } __attribute__((__packed__));

 /**
  * ARP header.
  */
 struct arp_hdr {
-   uint16_t arp_hrd;/* format of hardware address */
+   rte_be16_t arp_hrd;  /* format of hardware address */
 #define ARP_HRD_ETHER 1  /* ARP Ethernet address format */

-   uint16_t arp_pro;/* format of protocol address */
-   uint8_t  arp_hln;/* length of hardware address */
-   uint8_t  arp_pln;/* length of protocol address */
-   uint16_t arp_op; /* ARP opcode (command) */
+   rte_be16_t a

[dpdk-dev] [PATCH v2] doc: add mlx5 release notes

2016-11-03 Thread Nelio Laranjeiro

Add list of tested and validated NICs too.

Signed-off-by: Nelio Laranjeiro 
---
 doc/guides/rel_notes/release_16_11.rst | 122 +
 1 file changed, 122 insertions(+)

diff --git a/doc/guides/rel_notes/release_16_11.rst 
b/doc/guides/rel_notes/release_16_11.rst
index aa0c09a..e33d454 100644
--- a/doc/guides/rel_notes/release_16_11.rst
+++ b/doc/guides/rel_notes/release_16_11.rst
@@ -131,6 +131,13 @@ New Features
   The GCC 4.9 ``-march`` option supports the Intel processor code names.
   The config option ``RTE_MACHINE`` can be used to pass code names to the 
compiler as ``-march`` flag.

+* **Updated the mlx5 driver.**
+
+  The following changes were made to mlx5:
+
+  * Add support for RSS hash result
+  * Several performance improvements
+  * Several bug fixes

 Resolved Issues
 ---
@@ -277,6 +284,24 @@ Tested Platforms
This section is a comment. Make sure to start the actual text at the margin.


+#. Intel(R) Server board S2600WTT
+
+   - Processor: Intel(R) Xeon(R) CPU E5-2697 v2 @ 2.70GHz
+
+#. Intel(R) Server
+
+   - Intel(R) Xeon(R) CPU E5-2680 v2 @ 2.80GHz
+
+#. Intel(R) Server
+
+   - Intel(R) Xeon(R) CPU E5-2697 v3 @ 2.60GHz
+
+#. IBM(R) Power8(R)
+
+   - Machine type-model: 8247-22L
+   - Firmware FW810.21 (SV810_108)
+   - Processor: POWER8E (raw), AltiVec supported
+
 Tested NICs
 ---

@@ -291,6 +316,96 @@ Tested NICs

This section is a comment. Make sure to start the actual text at the margin.

+#. Mellanox(R) ConnectX(R)-4 10G MCX4111A-XCAT (1x10G)
+
+   * Host interface: PCI Express 3.0 x8
+   * Device ID: 15b3:1013
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 12.17.1010
+
+#. Mellanox(R) ConnectX(R)-4 10G MCX4121A-XCAT (2x10G)
+
+   * Host interface: PCI Express 3.0 x8
+   * Device ID: 15b3:1013
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 12.17.1010
+
+#. Mellanox(R) ConnectX(R)-4 25G MCX4111A-ACAT (1x25G)
+
+   * Host interface: PCI Express 3.0 x8
+   * Device ID: 15b3:1013
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 12.17.1010
+
+#. Mellanox(R) ConnectX(R)-4 25G MCX4121A-ACAT (2x25G)
+
+   * Host interface: PCI Express 3.0 x8
+   * Device ID: 15b3:1013
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 12.17.1010
+
+#. Mellanox(R) ConnectX(R)-4 40G MCX4131A-BCAT/MCX413A-BCAT (1x40G)
+
+   * Host interface: PCI Express 3.0 x8
+   * Device ID: 15b3:1013
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 12.17.1010
+
+#. Mellanox(R) ConnectX(R)-4 40G MCX415A-BCAT (1x40G)
+
+   * Host interface: PCI Express 3.0 x16
+   * Device ID: 15b3:1013
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 12.17.1010
+
+#. Mellanox(R) ConnectX(R)-4 50G MCX4131A-GCAT/MCX413A-GCAT (1x50G)
+
+   * Host interface: PCI Express 3.0 x8
+   * Device ID: 15b3:1013
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 12.17.1010
+
+#. Mellanox(R) ConnectX(R)-4 50G MCX414A-BCAT (2x50G)
+
+   * Host interface: PCI Express 3.0 x8
+   * Device ID: 15b3:1013
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 12.17.1010
+
+#. Mellanox(R) ConnectX(R)-4 50G MCX415A-GCAT/MCX416A-BCAT/MCX416A-GCAT (2x50G)
+
+   * Host interface: PCI Express 3.0 x16
+   * Device ID: 15b3:1013
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 12.17.1010
+
+#. Mellanox(R) ConnectX(R)-4 50G MCX415A-CCAT (1x100G)
+
+   * Host interface: PCI Express 3.0 x16
+   * Device ID: 15b3:1013
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 12.17.1010
+
+#. Mellanox(R) ConnectX(R)-4 100G MCX416A-CCAT (2x100G)
+
+   * Host interface: PCI Express 3.0 x16
+   * Device ID: 15b3:1013
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 12.17.1010
+
+#. Mellanox(R) ConnectX(R)-4 Lx 10G MCX4121A-XCAT (2x10G)
+
+   * Host interface: PCI Express 3.0 x8
+   * Device ID: 15b3:1015
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 14.17.1010
+
+#. Mellanox(R) ConnectX(R)-4 Lx 25G MCX4121A-ACAT (2x25G)
+
+   * Host interface: PCI Express 3.0 x8
+   * Device ID: 15b3:1015
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 14.17.1010

 Tested OSes
 ---
@@ -309,3 +424,10 @@ Tested OSes
* Wind River Linux 8

This section is a comment. Make sure to start the actual text at the margin.
+
+* Red Hat Enterprise Linux Server release 6.7 (Santiago)
+* Red Hat Enterprise Linux Server release 7.0 (Maipo)
+* Red Hat Enterprise Linux Server release 7.2 (Maipo)
+* Wind River Linux 6.0.0.26
+* Ubuntu 14.04
+* Ubuntu 15.04
-- 
2.1.4

[dpdk-dev] [PATCH 2/2] doc: add mlx5 release notes

2016-11-02 Thread Nelio Laranjeiro

Add list of tested and validated NICs too.

Signed-off-by: Nelio Laranjeiro 
---
 doc/guides/rel_notes/release_16_11.rst | 136 ++---
 1 file changed, 110 insertions(+), 26 deletions(-)

diff --git a/doc/guides/rel_notes/release_16_11.rst 
b/doc/guides/rel_notes/release_16_11.rst
index aa0c09a..7447195 100644
--- a/doc/guides/rel_notes/release_16_11.rst
+++ b/doc/guides/rel_notes/release_16_11.rst
@@ -131,6 +131,13 @@ New Features
   The GCC 4.9 ``-march`` option supports the Intel processor code names.
   The config option ``RTE_MACHINE`` can be used to pass code names to the 
compiler as ``-march`` flag.

+* **Updated the mlx5 driver.**
+
+  The following changes were made to mlx5:
+
+  * Add support for RSS hash result
+  * Several performance improvements
+  * Several bug fixes

 Resolved Issues
 ---
@@ -265,47 +272,124 @@ The libraries prepended with a plus sign were 
incremented in this version.
 Tested Platforms
 

-.. This section should contain a list of platforms that were tested with this 
release.
+#. Intel(R) Server board S2600WTT

-   The format is:
+   - Processor: Intel(R) Xeon(R) CPU E5-2697 v2 @ 2.70GHz

-   #. Platform name.
+#. Intel(R) Server

-  * Platform details.
-  * Platform details.
+   - Intel(R) Xeon(R) CPU E5-2680 v2 @ 2.80GHz

-   This section is a comment. Make sure to start the actual text at the margin.
+#. Intel(R) Server
+
+   - Intel(R) Xeon(R) CPU E5-2697 v3 @ 2.60GHz

+#. IBM(R) Power8(R)
+
+   - Machine type-model: 8247-22L
+   - Firmware FW810.21 (SV810_108)
+   - Processor: POWER8E (raw), AltiVec supported

 Tested NICs
 ---

-.. This section should contain a list of NICs that were tested with this 
release.
+#. Mellanox(R) ConnectX(R)-4 10G MCX4111A-XCAT (1x10G)

-   The format is:
+   * Host interface: PCI Express 3.0 x8
+   * Device ID: 15b3:1013
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 12.17.1010

-   #. NIC name.
+#. Mellanox(R) ConnectX(R)-4 10G MCX4121A-XCAT (2x10G)

-  * NIC details.
-  * NIC details.
+   * Host interface: PCI Express 3.0 x8
+   * Device ID: 15b3:1013
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 12.17.1010

-   This section is a comment. Make sure to start the actual text at the margin.
+#. Mellanox(R) ConnectX(R)-4 25G MCX4111A-ACAT (1x25G)

+   * Host interface: PCI Express 3.0 x8
+   * Device ID: 15b3:1013
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 12.17.1010

-Tested OSes

+#. Mellanox(R) ConnectX(R)-4 25G MCX4121A-ACAT (2x25G)

-.. This section should contain a list of OSes that were tested with this 
release.
-   The format is as follows, in alphabetical order:
+   * Host interface: PCI Express 3.0 x8
+   * Device ID: 15b3:1013
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 12.17.1010

-   * CentOS 7.0
-   * Fedora 23
-   * Fedora 24
-   * FreeBSD 10.3
-   * Red Hat Enterprise Linux 7.2
-   * SUSE Enterprise Linux 12
-   * Ubuntu 15.10
-   * Ubuntu 16.04 LTS
-   * Wind River Linux 8
+#. Mellanox(R) ConnectX(R)-4 40G MCX4131A-BCAT/MCX413A-BCAT (1x40G)

-   This section is a comment. Make sure to start the actual text at the margin.
+   * Host interface: PCI Express 3.0 x8
+   * Device ID: 15b3:1013
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 12.17.1010
+
+#. Mellanox(R) ConnectX(R)-4 40G MCX415A-BCAT (1x40G)
+
+   * Host interface: PCI Express 3.0 x16
+   * Device ID: 15b3:1013
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 12.17.1010
+
+#. Mellanox(R) ConnectX(R)-4 50G MCX4131A-GCAT/MCX413A-GCAT (1x50G)
+
+   * Host interface: PCI Express 3.0 x8
+   * Device ID: 15b3:1013
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 12.17.1010
+
+#. Mellanox(R) ConnectX(R)-4 50G MCX414A-BCAT (2x50G)
+
+   * Host interface: PCI Express 3.0 x8
+   * Device ID: 15b3:1013
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 12.17.1010
+
+#. Mellanox(R) ConnectX(R)-4 50G MCX415A-GCAT/MCX416A-BCAT/MCX416A-GCAT (2x50G)
+
+   * Host interface: PCI Express 3.0 x16
+   * Device ID: 15b3:1013
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 12.17.1010
+
+#. Mellanox(R) ConnectX(R)-4 50G MCX415A-CCAT (1x100G)
+
+   * Host interface: PCI Express 3.0 x16
+   * Device ID: 15b3:1013
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 12.17.1010
+
+#. Mellanox(R) ConnectX(R)-4 100G MCX416A-CCAT (2x100G)
+
+   * Host interface: PCI Express 3.0 x16
+   * Device ID: 15b3:1013
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 12.17.1010
+
+#. Mellanox(R) ConnectX(R)-4 Lx 10G MCX4121A-XCAT (2x10G)
+
+   * Host interface: PCI Express 3.0 x8
+   * Device ID: 15b3:1015
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 14.17.1010
+
+#. Mellanox(R) ConnectX(R)-4 Lx 25G MCX4121A-ACAT (2x25G)
+
+   * Host interface: PCI Express 3.0 x8
+   * Device ID: 15b3:1015
+   * MLNX_OFED: 3.4-1.0.0.0
+   * Firmware version: 14.17.1010
+
+Tested OSes
+---
+
+   * Red Hat Enterprise Linux Server release 6.7 (Santiago

[dpdk-dev] [PATCH 1/2] doc: update mlx5 dependencies

2016-11-02 Thread Nelio Laranjeiro

Signed-off-by: Nelio Laranjeiro 
---
 doc/guides/nics/mlx5.rst | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 0d1fabb..98d1341 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -241,12 +241,12 @@ DPDK and must be installed separately:

 Currently supported by DPDK:

-- Mellanox OFED **3.3-1.0.0.0** and **3.3-2.0.0.0**.
+- Mellanox OFED **3.4-1.0.0.0**.

-- Minimum firmware version:
+- firmware version:

-  - ConnectX-4: **12.16.1006**
-  - ConnectX-4 Lx: **14.16.1006**
+  - ConnectX-4: **12.17.1010**
+  - ConnectX-4 Lx: **14.17.1010**

 Getting Mellanox OFED
 ~
-- 
2.1.4

[dpdk-dev] [PATCH 0/2] update mlx5 release note and guide

2016-11-02 Thread Nelio Laranjeiro

Nelio Laranjeiro (2):
  doc: update mlx5 dependencies
  doc: add mlx5 release notes

 doc/guides/nics/mlx5.rst   |   8 +-
 doc/guides/rel_notes/release_16_11.rst | 136 ++---
 2 files changed, 114 insertions(+), 30 deletions(-)

-- 
2.1.4

[dpdk-dev] [PATCH 3/3] net/mlx: fix support for new Rx checksum flags

2016-11-02 Thread Nelio Laranjeiro

Fixes: 5842289a546c ("mbuf: add new Rx checksum flags")

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx4/mlx4.c  | 21 -
 drivers/net/mlx5/mlx5_rxtx.c | 25 ++---
 2 files changed, 18 insertions(+), 28 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index faa9acd..da61a85 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -2995,25 +2995,20 @@ rxq_cq_to_ol_flags(const struct rxq *rxq, uint32_t 
flags)

if (rxq->csum)
ol_flags |=
-   TRANSPOSE(~flags,
+   TRANSPOSE(flags,
  IBV_EXP_CQ_RX_IP_CSUM_OK,
- PKT_RX_IP_CKSUM_BAD) |
-   TRANSPOSE(~flags,
+ PKT_RX_IP_CKSUM_GOOD) |
+   TRANSPOSE(flags,
  IBV_EXP_CQ_RX_TCP_UDP_CSUM_OK,
- PKT_RX_L4_CKSUM_BAD);
-   /*
-* PKT_RX_IP_CKSUM_BAD and PKT_RX_L4_CKSUM_BAD are used in place
-* of PKT_RX_EIP_CKSUM_BAD because the latter is not functional
-* (its value is 0).
-*/
+ PKT_RX_L4_CKSUM_GOOD);
if ((flags & IBV_EXP_CQ_RX_TUNNEL_PACKET) && (rxq->csum_l2tun))
ol_flags |=
-   TRANSPOSE(~flags,
+   TRANSPOSE(flags,
  IBV_EXP_CQ_RX_OUTER_IP_CSUM_OK,
- PKT_RX_IP_CKSUM_BAD) |
-   TRANSPOSE(~flags,
+ PKT_RX_IP_CKSUM_GOOD) |
+   TRANSPOSE(flags,
  IBV_EXP_CQ_RX_OUTER_TCP_UDP_CSUM_OK,
- PKT_RX_L4_CKSUM_BAD);
+ PKT_RX_L4_CKSUM_GOOD);
return ol_flags;
 }

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index b6e0d65..beff580 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1239,29 +1239,24 @@ rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct 
mlx5_cqe *cqe)

if ((l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV4) ||
(l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV6))
-   ol_flags |=
-   (!(cqe->hds_ip_ext & MLX5_CQE_L3_OK) *
-PKT_RX_IP_CKSUM_BAD);
+   ol_flags |= TRANSPOSE(cqe->hds_ip_ext,
+ MLX5_CQE_L3_OK,
+ PKT_RX_IP_CKSUM_GOOD);
if ((l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP) ||
(l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_EMP_ACK) ||
(l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_ACK) ||
(l4_hdr == MLX5_CQE_L4_HDR_TYPE_UDP))
-   ol_flags |=
-   (!(cqe->hds_ip_ext & MLX5_CQE_L4_OK) *
-PKT_RX_L4_CKSUM_BAD);
-   /*
-* PKT_RX_IP_CKSUM_BAD and PKT_RX_L4_CKSUM_BAD are used in place
-* of PKT_RX_EIP_CKSUM_BAD because the latter is not functional
-* (its value is 0).
-*/
+   ol_flags |= TRANSPOSE(cqe->hds_ip_ext,
+ MLX5_CQE_L4_OK,
+ PKT_RX_L4_CKSUM_GOOD);
if ((cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET) && (rxq->csum_l2tun))
ol_flags |=
-   TRANSPOSE(~cqe->l4_hdr_type_etc,
+   TRANSPOSE(cqe->l4_hdr_type_etc,
  MLX5_CQE_RX_OUTER_IP_CSUM_OK,
- PKT_RX_IP_CKSUM_BAD) |
-   TRANSPOSE(~cqe->l4_hdr_type_etc,
+ PKT_RX_IP_CKSUM_GOOD) |
+   TRANSPOSE(cqe->l4_hdr_type_etc,
  MLX5_CQE_RX_OUTER_TCP_UDP_CSUM_OK,
- PKT_RX_L4_CKSUM_BAD);
+ PKT_RX_L4_CKSUM_GOOD);
return ol_flags;
 }

-- 
2.1.4

[dpdk-dev] [PATCH 2/3] net/mlx5: define explicit fields for Rx offloads

2016-11-02 Thread Nelio Laranjeiro

This commit redefines the completion queue element structure as the
original lacks the required fields.

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_prm.h  | 16 -
 drivers/net/mlx5/mlx5_rxtx.c | 56 +---
 2 files changed, 42 insertions(+), 30 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 500f25a..7f31a2f 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -158,7 +158,21 @@ struct mlx5_cqe {
 #if (RTE_CACHE_LINE_SIZE == 128)
uint8_t padding[64];
 #endif
-   struct mlx5_cqe64 cqe64;
+   uint8_t pkt_info;
+   uint8_t rsvd0[11];
+   uint32_t rx_hash_res;
+   uint8_t rx_hash_type;
+   uint8_t rsvd1[11];
+   uint8_t hds_ip_ext;
+   uint8_t l4_hdr_type_etc;
+   uint16_t vlan_info;
+   uint8_t rsvd2[12];
+   uint32_t byte_cnt;
+   uint64_t timestamp;
+   uint8_t rsvd3[4];
+   uint16_t wqe_counter;
+   uint8_t rsvd4;
+   uint8_t op_own;
 };

 #endif /* RTE_PMD_MLX5_PRM_H_ */
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 7ebe557..b6e0d65 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -83,10 +83,10 @@
  *   0 the first time.
  */
 static inline int
-check_cqe64_seen(volatile struct mlx5_cqe64 *cqe)
+check_cqe_seen(volatile struct mlx5_cqe *cqe)
 {
static const uint8_t magic[] = "seen";
-   volatile uint8_t (*buf)[sizeof(cqe->rsvd40)] = >rsvd40;
+   volatile uint8_t (*buf)[sizeof(cqe->rsvd3)] = >rsvd3;
int ret = 1;
unsigned int i;

@@ -101,9 +101,9 @@ check_cqe64_seen(volatile struct mlx5_cqe64 *cqe)
 #endif /* NDEBUG */

 static inline int
-check_cqe64(volatile struct mlx5_cqe64 *cqe,
-   unsigned int cqes_n, const uint16_t ci)
-   __attribute__((always_inline));
+check_cqe(volatile struct mlx5_cqe *cqe,
+ unsigned int cqes_n, const uint16_t ci)
+ __attribute__((always_inline));

 /**
  * Check whether CQE is valid.
@@ -119,8 +119,8 @@ check_cqe64(volatile struct mlx5_cqe64 *cqe,
  *   0 on success, 1 on failure.
  */
 static inline int
-check_cqe64(volatile struct mlx5_cqe64 *cqe,
-   unsigned int cqes_n, const uint16_t ci)
+check_cqe(volatile struct mlx5_cqe *cqe,
+ unsigned int cqes_n, const uint16_t ci)
 {
uint16_t idx = ci & cqes_n;
uint8_t op_own = cqe->op_own;
@@ -138,14 +138,14 @@ check_cqe64(volatile struct mlx5_cqe64 *cqe,
if ((syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR) ||
(syndrome == MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR))
return 0;
-   if (!check_cqe64_seen(cqe))
+   if (!check_cqe_seen(cqe))
ERROR("unexpected CQE error %u (0x%02x)"
  " syndrome 0x%02x",
  op_code, op_code, syndrome);
return 1;
} else if ((op_code != MLX5_CQE_RESP_SEND) &&
   (op_code != MLX5_CQE_REQ)) {
-   if (!check_cqe64_seen(cqe))
+   if (!check_cqe_seen(cqe))
ERROR("unexpected CQE opcode %u (0x%02x)",
  op_code, op_code);
return 1;
@@ -174,25 +174,25 @@ txq_complete(struct txq *txq)
uint16_t elts_free = txq->elts_tail;
uint16_t elts_tail;
uint16_t cq_ci = txq->cq_ci;
-   volatile struct mlx5_cqe64 *cqe = NULL;
+   volatile struct mlx5_cqe *cqe = NULL;
volatile struct mlx5_wqe *wqe;

do {
-   volatile struct mlx5_cqe64 *tmp;
+   volatile struct mlx5_cqe *tmp;

-   tmp = &(*txq->cqes)[cq_ci & cqe_cnt].cqe64;
-   if (check_cqe64(tmp, cqe_n, cq_ci))
+   tmp = &(*txq->cqes)[cq_ci & cqe_cnt];
+   if (check_cqe(tmp, cqe_n, cq_ci))
break;
cqe = tmp;
 #ifndef NDEBUG
if (MLX5_CQE_FORMAT(cqe->op_own) == MLX5_COMPRESSED) {
-   if (!check_cqe64_seen(cqe))
+   if (!check_cqe_seen(cqe))
ERROR("unexpected compressed CQE, TX stopped");
return;
}
if ((MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) ||
(MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_REQ_ERR)) {
-   if (!check_cqe64_seen(cqe))
+   if (!check_cqe_seen(cqe))
ERROR("unexpected error CQE, TX stopped");
return;
}
@@ -1090,13 +1090,12 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct 
rte_mbuf **pkts,
  *   Packet type for struct rte_mbuf.
  */
 static inline uint32_t
-rxq_cq_

[dpdk-dev] [PATCH 1/3] net/mlx5: fix Rx checksum macros

2016-11-02 Thread Nelio Laranjeiro

Add missing:

 - MLX5_CQE_RX_IPV4_PACKET
 - MLX5_CQE_RX_IPV6_PACKET
 - MLX5_CQE_RX_OUTER_IPV4_PACKET
 - MLX5_CQE_RX_OUTER_IPV6_PACKET
 - MLX5_CQE_RX_TUNNEL_PACKET
 - MLX5_CQE_RX_OUTER_IP_CSUM_OK
 - MLX5_CQE_RX_OUTER_TCP_UDP_CSUM_OK

Fixes: 51a50a3d9b8f ("net/mlx5: add definitions for data path without Verbs")

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_prm.h  | 21 +
 drivers/net/mlx5/mlx5_rxtx.c | 16 
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 90b47f0..500f25a 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -84,6 +84,27 @@
 #define MLX5_OPCODE_TSO MLX5_OPCODE_LSO_MPW /* Compat with OFED 3.3. */
 #endif

+/* IPv4 packet. */
+#define MLX5_CQE_RX_IPV4_PACKET (1u << 2)
+
+/* IPv6 packet. */
+#define MLX5_CQE_RX_IPV6_PACKET (1u << 3)
+
+/* Outer IPv4 packet. */
+#define MLX5_CQE_RX_OUTER_IPV4_PACKET (1u << 7)
+
+/* Outer IPv6 packet. */
+#define MLX5_CQE_RX_OUTER_IPV6_PACKET (1u << 8)
+
+/* Tunnel packet bit in the CQE. */
+#define MLX5_CQE_RX_TUNNEL_PACKET (1u << 4)
+
+/* Outer IP checksum OK. */
+#define MLX5_CQE_RX_OUTER_IP_CSUM_OK (1u << 5)
+
+/* Outer UDP header and checksum OK. */
+#define MLX5_CQE_RX_OUTER_TCP_UDP_CSUM_OK (1u << 6)
+
 /* Subset of struct mlx5_wqe_eth_seg. */
 struct mlx5_wqe_eth_seg_small {
uint32_t rsvd0;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index ba8e202..7ebe557 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1096,19 +1096,19 @@ rxq_cq_to_pkt_type(volatile struct mlx5_cqe64 *cqe)
uint8_t flags = cqe->l4_hdr_type_etc;
uint8_t info = cqe->rsvd0[0];

-   if (info & IBV_EXP_CQ_RX_TUNNEL_PACKET)
+   if (info & MLX5_CQE_RX_TUNNEL_PACKET)
pkt_type =
TRANSPOSE(flags,
- IBV_EXP_CQ_RX_OUTER_IPV4_PACKET,
+ MLX5_CQE_RX_OUTER_IPV4_PACKET,
  RTE_PTYPE_L3_IPV4) |
TRANSPOSE(flags,
- IBV_EXP_CQ_RX_OUTER_IPV6_PACKET,
+ MLX5_CQE_RX_OUTER_IPV6_PACKET,
  RTE_PTYPE_L3_IPV6) |
TRANSPOSE(flags,
- IBV_EXP_CQ_RX_IPV4_PACKET,
+ MLX5_CQE_RX_IPV4_PACKET,
  RTE_PTYPE_INNER_L3_IPV4) |
TRANSPOSE(flags,
- IBV_EXP_CQ_RX_IPV6_PACKET,
+ MLX5_CQE_RX_IPV6_PACKET,
  RTE_PTYPE_INNER_L3_IPV6);
else
pkt_type =
@@ -1256,13 +1256,13 @@ rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct 
mlx5_cqe64 *cqe)
 * of PKT_RX_EIP_CKSUM_BAD because the latter is not functional
 * (its value is 0).
 */
-   if ((info & IBV_EXP_CQ_RX_TUNNEL_PACKET) && (rxq->csum_l2tun))
+   if ((info & MLX5_CQE_RX_TUNNEL_PACKET) && (rxq->csum_l2tun))
ol_flags |=
TRANSPOSE(~cqe->l4_hdr_type_etc,
- IBV_EXP_CQ_RX_OUTER_IP_CSUM_OK,
+ MLX5_CQE_RX_OUTER_IP_CSUM_OK,
  PKT_RX_IP_CKSUM_BAD) |
TRANSPOSE(~cqe->l4_hdr_type_etc,
- IBV_EXP_CQ_RX_OUTER_TCP_UDP_CSUM_OK,
+ MLX5_CQE_RX_OUTER_TCP_UDP_CSUM_OK,
  PKT_RX_L4_CKSUM_BAD);
return ol_flags;
 }
-- 
2.1.4

[dpdk-dev] [PATCH 0/3] fix Rx checksum offloads

2016-11-02 Thread Nelio Laranjeiro

Fill correctly the Mbuf Rx offloads.

Nelio Laranjeiro (3):
  net/mlx5: fix Rx checksum macros
  net/mlx5: define explicit fields for Rx offloads
  net/mlx: fix support for new Rx checksum flags

 drivers/net/mlx4/mlx4.c  | 21 --
 drivers/net/mlx5/mlx5_prm.h  | 37 +-
 drivers/net/mlx5/mlx5_rxtx.c | 93 
 3 files changed, 87 insertions(+), 64 deletions(-)

-- 
2.1.4

[dpdk-dev] [PATCH] doc: fix mlx5 features overview

2016-10-27 Thread Nelio Laranjeiro

Fixes: 75ef62a94301 ("net/mlx5: fix link speed capability information")
Fixes: 188408719888 ("net/mlx5: fix support for newer link speeds")

Signed-off-by: Nelio Laranjeiro 
---
 doc/guides/nics/features/mlx5.ini | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/guides/nics/features/mlx5.ini 
b/doc/guides/nics/features/mlx5.ini
index e84612f..f811e3f 100644
--- a/doc/guides/nics/features/mlx5.ini
+++ b/doc/guides/nics/features/mlx5.ini
@@ -4,6 +4,7 @@
 ; Refer to default.ini for the full list of available PMD features.
 ;
 [Features]
+Speed capabilities   = Y
 Link status  = Y
 Link status event= Y
 Queue start/stop = Y
-- 
2.1.4

[dpdk-dev] [PATCH 2/2] net/mlx5: fix support for newer link speeds

2016-10-26 Thread Nelio Laranjeiro

Not all speed capabilities can be reported properly before Linux 4.8 (25G,
50G and 100G speeds are missing), moreover the API to retrieve them only
exists since Linux 4.5, this commit thus implements compatibility code for
all versions.

Fixes: e274f5732225 ("ethdev: add speed capabilities")

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/Makefile  |  15 +
 drivers/net/mlx5/mlx5_ethdev.c | 123 -
 2 files changed, 135 insertions(+), 3 deletions(-)

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 2c13c30..cf87f0b 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -121,6 +121,21 @@ mlx5_autoconf.h.new: $(RTE_SDK)/scripts/auto-config-h.sh
infiniband/mlx5_hw.h \
enum MLX5_OPCODE_TSO \
$(AUTOCONF_OUTPUT)
+   $Q sh -- '$<' '$@' \
+   HAVE_ETHTOOL_LINK_MODE_25G \
+   /usr/include/linux/ethtool.h \
+   enum ETHTOOL_LINK_MODE_25000baseCR_Full_BIT \
+   $(AUTOCONF_OUTPUT)
+   $Q sh -- '$<' '$@' \
+   HAVE_ETHTOOL_LINK_MODE_50G \
+   /usr/include/linux/ethtool.h \
+   enum ETHTOOL_LINK_MODE_5baseCR2_Full_BIT \
+   $(AUTOCONF_OUTPUT)
+   $Q sh -- '$<' '$@' \
+   HAVE_ETHTOOL_LINK_MODE_100G \
+   /usr/include/linux/ethtool.h \
+   enum ETHTOOL_LINK_MODE_10baseKR4_Full_BIT \
+   $(AUTOCONF_OUTPUT)

 # Create mlx5_autoconf.h or update it in case it differs from the new one.

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 042c9bc..2d49f86 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -627,15 +627,15 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)
 }

 /**
- * DPDK callback to retrieve physical link information (unlocked version).
+ * Retrieve physical link information (unlocked version using legacy ioctl).
  *
  * @param dev
  *   Pointer to Ethernet device structure.
  * @param wait_to_complete
  *   Wait for request completion (ignored).
  */
-int
-mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete)
+static int
+mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
 {
struct priv *priv = mlx5_get_priv(dev);
struct ethtool_cmd edata = {
@@ -691,6 +691,123 @@ mlx5_link_update_unlocked(struct rte_eth_dev *dev, int 
wait_to_complete)
 }

 /**
+ * Retrieve physical link information (unlocked version using new ioctl from
+ * Linux 4.5).
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param wait_to_complete
+ *   Wait for request completion (ignored).
+ */
+static int
+mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
+{
+#ifdef ETHTOOL_GLINKSETTINGS
+   struct priv *priv = mlx5_get_priv(dev);
+   struct ethtool_link_settings edata = {
+   .cmd = ETHTOOL_GLINKSETTINGS,
+   };
+   struct ifreq ifr;
+   struct rte_eth_link dev_link;
+   uint64_t sc;
+
+   (void)wait_to_complete;
+   if (priv_ifreq(priv, SIOCGIFFLAGS, )) {
+   WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno));
+   return -1;
+   }
+   memset(_link, 0, sizeof(dev_link));
+   dev_link.link_status = ((ifr.ifr_flags & IFF_UP) &&
+   (ifr.ifr_flags & IFF_RUNNING));
+   ifr.ifr_data = (void *)
+   if (priv_ifreq(priv, SIOCETHTOOL, )) {
+   DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s",
+ strerror(errno));
+   return -1;
+   }
+   dev_link.link_speed = edata.speed;
+   sc = edata.link_mode_masks[0] |
+   ((uint64_t)edata.link_mode_masks[1] << 32);
+   priv->link_speed_capa = 0;
+   /* Link speeds available in kernel v4.5. */
+   if (sc & ETHTOOL_LINK_MODE_Autoneg_BIT)
+   priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG;
+   if (sc & (ETHTOOL_LINK_MODE_1000baseT_Full_BIT |
+ ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))
+   priv->link_speed_capa |= ETH_LINK_SPEED_1G;
+   if (sc & (ETHTOOL_LINK_MODE_1baseKX4_Full_BIT |
+ ETHTOOL_LINK_MODE_1baseKR_Full_BIT |
+ ETHTOOL_LINK_MODE_1baseR_FEC_BIT))
+   priv->link_speed_capa |= ETH_LINK_SPEED_10G;
+   if (sc & (ETHTOOL_LINK_MODE_2baseMLD2_Full_BIT |
+ ETHTOOL_LINK_MODE_2baseKR2_Full_BIT))
+   priv->link_speed_capa |= ETH_LINK_SPEED_20G;
+   if (sc & (ETHTOOL_LINK_MODE_4baseKR4_Full_BIT |
+ ETHTOOL_LINK_MODE_4baseCR4_Full_BIT |
+ ETHTOOL_LINK_MODE_4baseSR4_Full_BIT |
+ ETHTOOL_LINK_MODE_400

[dpdk-dev] [PATCH 1/2] net/mlx5: fix link speed capability information

2016-10-26 Thread Nelio Laranjeiro

Make hard-coded values dynamic to return correct link speed capabilities
(not all ConnectX-4 NICs support everything).

Fixes: e274f5732225 ("ethdev: add speed capabilities")

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5.h|  1 +
 drivers/net/mlx5/mlx5_ethdev.c | 25 +++--
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 000fb38..d7976cb 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -139,6 +139,7 @@ struct priv {
unsigned int reta_idx_n; /* RETA index size. */
struct fdir_filter_list *fdir_filter_list; /* Flow director rules. */
struct fdir_queue *fdir_drop_queue; /* Flow director drop queue. */
+   uint32_t link_speed_capa; /* Link speed capabilities. */
rte_spinlock_t lock; /* Lock for control functions. */
 };

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index b8b3ea9..042c9bc 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -604,15 +604,7 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *info)
info->hash_key_size = ((*priv->rss_conf) ?
   (*priv->rss_conf)[0]->rss_key_len :
   0);
-   info->speed_capa =
-   ETH_LINK_SPEED_1G |
-   ETH_LINK_SPEED_10G |
-   ETH_LINK_SPEED_20G |
-   ETH_LINK_SPEED_25G |
-   ETH_LINK_SPEED_40G |
-   ETH_LINK_SPEED_50G |
-   ETH_LINK_SPEED_56G |
-   ETH_LINK_SPEED_100G;
+   info->speed_capa = priv->link_speed_capa;
priv_unlock(priv);
 }

@@ -647,7 +639,7 @@ mlx5_link_update_unlocked(struct rte_eth_dev *dev, int 
wait_to_complete)
 {
struct priv *priv = mlx5_get_priv(dev);
struct ethtool_cmd edata = {
-   .cmd = ETHTOOL_GSET
+   .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */
};
struct ifreq ifr;
struct rte_eth_link dev_link;
@@ -672,6 +664,19 @@ mlx5_link_update_unlocked(struct rte_eth_dev *dev, int 
wait_to_complete)
dev_link.link_speed = 0;
else
dev_link.link_speed = link_speed;
+   priv->link_speed_capa = 0;
+   if (edata.supported & SUPPORTED_Autoneg)
+   priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG;
+   if (edata.supported & (SUPPORTED_1000baseT_Full |
+  SUPPORTED_1000baseKX_Full))
+   priv->link_speed_capa |= ETH_LINK_SPEED_1G;
+   if (edata.supported & SUPPORTED_1baseKR_Full)
+   priv->link_speed_capa |= ETH_LINK_SPEED_10G;
+   if (edata.supported & (SUPPORTED_4baseKR4_Full |
+  SUPPORTED_4baseCR4_Full |
+  SUPPORTED_4baseSR4_Full |
+  SUPPORTED_4baseLR4_Full))
+   priv->link_speed_capa |= ETH_LINK_SPEED_40G;
dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ?
ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
-- 
2.1.4

[dpdk-dev] [PATCH 0/2] mlx5: fix link speed capabilities

2016-10-26 Thread Nelio Laranjeiro

Make hard-coded values dynamic to return correct link speed capabilities
(not all ConnectX-4 NICs support everything).

Nelio Laranjeiro (2):
  net/mlx5: fix link speed capability information
  net/mlx5: fix support for newer link speeds

 drivers/net/mlx5/Makefile  |  15 +
 drivers/net/mlx5/mlx5.h|   1 +
 drivers/net/mlx5/mlx5_ethdev.c | 148 +
 3 files changed, 151 insertions(+), 13 deletions(-)

-- 
2.1.4

[dpdk-dev] [PATCH] net/mlx5: fix link status report

2016-10-17 Thread Nelio Laranjeiro

From: Olga Shern 

This commit fixes link status report on device start up when
lcs callback is configured.

Fixes: 62072098b54e ("mlx5: support setting link up or down")

Signed-off-by: Olga Shern 
---
 drivers/net/mlx5/mlx5.c| 1 +
 drivers/net/mlx5/mlx5.h| 1 +
 drivers/net/mlx5/mlx5_ethdev.c | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 2583a37..f066419 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -671,6 +671,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct 
rte_pci_device *pci_dev)
/* Bring Ethernet device up. */
DEBUG("forcing Ethernet interface up");
priv_set_flags(priv, ~IFF_UP, IFF_UP);
+   mlx5_link_update_unlocked(priv->dev, 1);
continue;

 port_error:
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index d24c4f4..000fb38 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -191,6 +191,7 @@ int priv_set_flags(struct priv *, unsigned int, unsigned 
int);
 int mlx5_dev_configure(struct rte_eth_dev *);
 void mlx5_dev_infos_get(struct rte_eth_dev *, struct rte_eth_dev_info *);
 const uint32_t *mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev);
+int mlx5_link_update_unlocked(struct rte_eth_dev *, int);
 int mlx5_link_update(struct rte_eth_dev *, int);
 int mlx5_dev_set_mtu(struct rte_eth_dev *, uint16_t);
 int mlx5_dev_get_flow_ctrl(struct rte_eth_dev *, struct rte_eth_fc_conf *);
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 7ee2006..b8b3ea9 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -642,7 +642,7 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)
  * @param wait_to_complete
  *   Wait for request completion (ignored).
  */
-static int
+int
 mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete)
 {
struct priv *priv = mlx5_get_priv(dev);
-- 
2.1.4

[dpdk-dev] [PATCH] net/mlx5: fix hash key size retrieval

2016-10-14 Thread Nelio Laranjeiro

Return RSS key size in struct rte_eth_dev_info.

Fixes: 0f6f219e7919 ("app/testpmd: fix RSS hash key size")

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_ethdev.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index c1c2d26..b8b3ea9 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -601,6 +601,9 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *info)
 * size if it is not fixed.
 * The API should be updated to solve this problem. */
info->reta_size = priv->ind_table_max_size;
+   info->hash_key_size = ((*priv->rss_conf) ?
+  (*priv->rss_conf)[0]->rss_key_len :
+  0);
info->speed_capa =
ETH_LINK_SPEED_1G |
ETH_LINK_SPEED_10G |
-- 
2.1.4

[dpdk-dev] [PATCH] net/mlx5: fix Rx function selection

2016-10-11 Thread Nelio Laranjeiro

mlx5_rx_queue_setup() was setting the Rx function by itself instead of
using priv_select_rx_function() written for that purpose.

Fixes: cdab90cb5c8d ("net/mlx5: add Tx/Rx burst function selection wrapper")

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_rxq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index b9a5fe6..fe27d22 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1264,7 +1264,7 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t 
idx, uint16_t desc,
  (void *)dev, (void *)rxq_ctrl);
(*priv->rxqs)[idx] = _ctrl->rxq;
/* Update receive callback. */
-   dev->rx_pkt_burst = mlx5_rx_burst;
+   priv_select_rx_function(priv);
}
priv_unlock(priv);
return -ret;
-- 
2.1.4

[dpdk-dev] [PATCH v2 6/6] net/mlx5: remove gather loop on segments

2016-09-14 Thread Nelio Laranjeiro

Tx function was handling a double loop to send segmented packets, it can be
done in a single one.

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Vasily Philipov 
---
 drivers/net/mlx5/mlx5_rxtx.c | 312 ++-
 1 file changed, 158 insertions(+), 154 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 9d00ddc..b91b644 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -288,112 +288,6 @@ txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
 }

 /**
- * Write a regular WQE.
- *
- * @param txq
- *   Pointer to TX queue structure.
- * @param wqe
- *   Pointer to the WQE to fill.
- * @param buf
- *   Buffer.
- * @param length
- *   Packet length.
- *
- * @return ds
- *   Number of DS elements consumed.
- */
-static inline unsigned int
-mlx5_wqe_write(struct txq *txq, volatile struct mlx5_wqe *wqe,
-  struct rte_mbuf *buf, uint32_t length)
-{
-   uint8_t *raw = (uint8_t *)(uintptr_t)>eseg.inline_hdr[0];
-   uint16_t ds;
-   uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE;
-   uintptr_t addr = rte_pktmbuf_mtod(buf, uintptr_t);
-   struct mlx5_wqe_data_seg *dseg = NULL;
-
-   assert(length >= MLX5_WQE_DWORD_SIZE);
-   /* Start the know and common part of the WQE structure. */
-   wqe->ctrl[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
-   wqe->ctrl[2] = 0;
-   wqe->ctrl[3] = 0;
-   wqe->eseg.rsvd0 = 0;
-   wqe->eseg.rsvd1 = 0;
-   wqe->eseg.mss = 0;
-   wqe->eseg.rsvd2 = 0;
-   /* Start by copying the Ethernet Header. */
-   rte_mov16((uint8_t *)raw, (uint8_t *)addr);
-   length -= MLX5_WQE_DWORD_SIZE;
-   addr += MLX5_WQE_DWORD_SIZE;
-   /* Replace the Ethernet type by the VLAN if necessary. */
-   if (buf->ol_flags & PKT_TX_VLAN_PKT) {
-   uint32_t vlan = htonl(0x8100 | buf->vlan_tci);
-
-   memcpy((uint8_t *)(raw + MLX5_WQE_DWORD_SIZE - sizeof(vlan)),
-  , sizeof(vlan));
-   addr -= sizeof(vlan);
-   length += sizeof(vlan);
-   }
-   /* Inline if enough room. */
-   if (txq->max_inline != 0) {
-   uintptr_t end = (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n];
-   uint16_t max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE;
-   uint16_t room;
-
-   raw += MLX5_WQE_DWORD_SIZE;
-   room = end - (uintptr_t)raw;
-   if (room > max_inline) {
-   uintptr_t addr_end = (addr + max_inline) &
-   ~(RTE_CACHE_LINE_SIZE - 1);
-   uint16_t copy_b = ((addr_end - addr) > length) ?
- length :
- (addr_end - addr);
-
-   rte_memcpy((void *)raw, (void *)addr, copy_b);
-   addr += copy_b;
-   length -= copy_b;
-   pkt_inline_sz += copy_b;
-   /* Sanity check. */
-   assert(addr <= addr_end);
-   }
-   /* Store the inlined packet size in the WQE. */
-   wqe->eseg.inline_hdr_sz = htons(pkt_inline_sz);
-   /*
-* 2 DWORDs consumed by the WQE header + 1 DSEG +
-* the size of the inline part of the packet.
-*/
-   ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2);
-   if (length > 0) {
-   dseg = (struct mlx5_wqe_data_seg *)
-   ((uintptr_t)wqe + (ds * MLX5_WQE_DWORD_SIZE));
-   if ((uintptr_t)dseg >= end)
-   dseg = (struct mlx5_wqe_data_seg *)
-   ((uintptr_t)&(*txq->wqes)[0]);
-   goto use_dseg;
-   }
-   } else {
-   /* Add the remaining packet as a simple ds. */
-   ds = 3;
-   /*
-* No inline has been done in the packet, only the Ethernet
-* Header as been stored.
-*/
-   wqe->eseg.inline_hdr_sz = htons(MLX5_WQE_DWORD_SIZE);
-   dseg = (struct mlx5_wqe_data_seg *)
-   ((uintptr_t)wqe + (ds * MLX5_WQE_DWORD_SIZE));
-use_dseg:
-   *dseg = (struct mlx5_wqe_data_seg) {
-   .addr = htonll(addr),
-   .byte_count = htonl(length),
-   .lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
-   };
-   ++ds;
-   }
-   wqe->ctrl[1] = htonl(txq->qp_num_8s | ds);
-   return ds;
-}
-
-/**
  * Ring TX queue doorbell.
  *
  * @param txq
@@ -475,6 +369,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
unsigned int

[dpdk-dev] [PATCH v2 5/6] net/mlx5: reduce Tx and Rx structure size

2016-09-14 Thread Nelio Laranjeiro

PMD uses only power of two number of Work Queue Elements, storing the
number of elements in log2 helps to reduce the size of the container to
store it.

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_rxtx.c | 23 ---
 drivers/net/mlx5/mlx5_rxtx.h |  2 +-
 drivers/net/mlx5/mlx5_txq.c  |  4 ++--
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 214922b..9d00ddc 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -199,9 +199,10 @@ txq_complete(struct txq *txq)
} while (1);
if (unlikely(cqe == NULL))
return;
-   wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)].hdr;
+   wqe = &(*txq->wqes)[htons(cqe->wqe_counter) &
+   ((1 << txq->wqe_n) - 1)].hdr;
elts_tail = wqe->ctrl[3];
-   assert(elts_tail < txq->wqe_n);
+   assert(elts_tail < (1 << txq->wqe_n));
/* Free buffers. */
while (elts_free != elts_tail) {
struct rte_mbuf *elt = (*txq->elts)[elts_free];
@@ -335,7 +336,7 @@ mlx5_wqe_write(struct txq *txq, volatile struct mlx5_wqe 
*wqe,
}
/* Inline if enough room. */
if (txq->max_inline != 0) {
-   uintptr_t end = (uintptr_t)&(*txq->wqes)[txq->wqe_n];
+   uintptr_t end = (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n];
uint16_t max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE;
uint16_t room;

@@ -446,7 +447,7 @@ tx_prefetch_wqe(struct txq *txq, uint16_t ci)
 {
volatile struct mlx5_wqe64 *wqe;

-   wqe = &(*txq->wqes)[ci & (txq->wqe_n - 1)];
+   wqe = &(*txq->wqes)[ci & ((1 << txq->wqe_n) - 1)];
rte_prefetch0(wqe);
 }

@@ -504,7 +505,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
max -= segs_n;
--pkts_n;
elts_head_next = (elts_head + 1) & (elts_n - 1);
-   wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)].hdr;
+   wqe = &(*txq->wqes)[txq->wqe_ci & ((1 << txq->wqe_n) - 1)].hdr;
tx_prefetch_wqe(txq, txq->wqe_ci);
tx_prefetch_wqe(txq, txq->wqe_ci + 1);
if (pkts_n)
@@ -540,7 +541,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE)))
dseg = (volatile void *)
&(*txq->wqes)[txq->wqe_ci++ &
- (txq->wqe_n - 1)];
+ ((1 << txq->wqe_n) - 1)];
else
++dseg;
++ds;
@@ -607,10 +608,10 @@ skip_segs:
 static inline void
 mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
 {
-   uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1);
+   uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] =
(volatile struct mlx5_wqe_data_seg (*)[])
-   (uintptr_t)&(*txq->wqes)[(idx + 1) & (txq->wqe_n - 1)];
+   (uintptr_t)&(*txq->wqes)[(idx + 1) & ((1 << txq->wqe_n) - 1)];

mpw->state = MLX5_MPW_STATE_OPENED;
mpw->pkts_n = 0;
@@ -815,7 +816,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
 static inline void
 mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
 {
-   uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1);
+   uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
struct mlx5_wqe_inl_small *inl;

mpw->state = MLX5_MPW_INL_STATE_OPENED;
@@ -1000,7 +1001,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf 
**pkts,
addr = rte_pktmbuf_mtod(buf, uintptr_t);
(*txq->elts)[elts_head] = buf;
/* Maximum number of bytes before wrapping. */
-   max = ((uintptr_t)&(*txq->wqes)[txq->wqe_n] -
+   max = ((uintptr_t)&(*txq->wqes)[1 << txq->wqe_n] -
   (uintptr_t)mpw.data.raw);
if (length > max) {
rte_memcpy((void *)(uintptr_t)mpw.data.raw,
@@ -1019,7 +1020,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf 
**pkts,
mpw.data.raw += length;
}
if ((uintptr_t)mpw.data.

[dpdk-dev] [PATCH v2 4/6] net/mlx5: reduce Tx structure size

2016-09-14 Thread Nelio Laranjeiro

Blue Flame is a buffer allocated with a power of two value, its size is
returned by Verbs in log2.

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_rxtx.c | 2 +-
 drivers/net/mlx5/mlx5_rxtx.h | 2 +-
 drivers/net/mlx5/mlx5_txq.c  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 4f28aa9..214922b 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -413,7 +413,7 @@ mlx5_tx_dbrec(struct txq *txq)
/* Ensure ordering between DB record and BF copy. */
rte_wmb();
rte_mov16(dst, (uint8_t *)data);
-   txq->bf_offset ^= txq->bf_buf_size;
+   txq->bf_offset ^= (1 << txq->bf_buf_size);
 }

 /**
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 224614e..3dca8ca 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -249,8 +249,8 @@ struct txq {
uint16_t wqe_n; /* Number of WQ elements. */
uint16_t elts_n:4; /* (*elts)[] length (in log2). */
uint16_t cqe_n:4; /* Number of CQ elements (in log2). */
+   uint16_t bf_buf_size:4; /* Log2 Blueflame size. */
uint16_t bf_offset; /* Blueflame offset. */
-   uint16_t bf_buf_size; /* Blueflame size. */
uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
uint32_t qp_num_8s; /* QP number shifted by 8. */
volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 6145b69..9919e37 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -221,7 +221,7 @@ txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl)
tmpl->txq.qp_db = >gen_data.db[MLX5_SND_DBR];
tmpl->txq.bf_reg = qp->gen_data.bf->reg;
tmpl->txq.bf_offset = qp->gen_data.bf->offset;
-   tmpl->txq.bf_buf_size = qp->gen_data.bf->buf_size;
+   tmpl->txq.bf_buf_size = log2above(qp->gen_data.bf->buf_size);
tmpl->txq.cq_db = cq->dbrec;
tmpl->txq.cqes =
(volatile struct mlx5_cqe (*)[])
-- 
2.1.4

[dpdk-dev] [PATCH v2 3/6] net/mlx5: reduce Tx and Rx structure size

2016-09-14 Thread Nelio Laranjeiro

PMD uses only power of two number of Completion Queue Elements, storing the
number of elements in log2 helps to reduce the size of the container to
store it.

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_rxq.c  | 2 +-
 drivers/net/mlx5/mlx5_rxtx.c | 8 
 drivers/net/mlx5/mlx5_rxtx.h | 4 ++--
 drivers/net/mlx5/mlx5_txq.c  | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index d9db368..f6f4315 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -881,7 +881,7 @@ rxq_setup(struct rxq_ctrl *tmpl)
if (elts == NULL)
return ENOMEM;
tmpl->rxq.rq_db = rwq->rq.db;
-   tmpl->rxq.cqe_n = ibcq->cqe + 1;
+   tmpl->rxq.cqe_n = log2above(ibcq->cqe);
tmpl->rxq.cq_ci = 0;
tmpl->rxq.rq_ci = 0;
tmpl->rxq.cq_db = cq->dbrec;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index e132727..4f28aa9 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -166,8 +166,8 @@ txq_complete(struct txq *txq) 
__attribute__((always_inline));
 static inline void
 txq_complete(struct txq *txq)
 {
-   const unsigned int cqe_n = txq->cqe_n;
const unsigned int elts_n = 1 << txq->elts_n;
+   const unsigned int cqe_n = 1 << txq->cqe_n;
const unsigned int cqe_cnt = cqe_n - 1;
uint16_t elts_free = txq->elts_tail;
uint16_t elts_tail;
@@ -427,9 +427,9 @@ mlx5_tx_dbrec(struct txq *txq)
 static inline void
 tx_prefetch_cqe(struct txq *txq, uint16_t ci)
 {
-   volatile struct mlx5_cqe64 *cqe;
+   volatile struct mlx5_cqe *cqe;

-   cqe = &(*txq->cqes)[ci & (txq->cqe_n - 1)].cqe64;
+   cqe = &(*txq->cqes)[ci & ((1 << txq->cqe_n) - 1)];
rte_prefetch0(cqe);
 }

@@ -1272,8 +1272,8 @@ uint16_t
 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
struct rxq *rxq = dpdk_rxq;
-   const unsigned int cqe_cnt = rxq->cqe_n - 1;
const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1;
+   const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1;
const unsigned int sges_n = rxq->sges_n;
struct rte_mbuf *pkt = NULL;
struct rte_mbuf *seg = NULL;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 3ba3913..224614e 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -110,13 +110,13 @@ struct rxq {
unsigned int vlan_strip:1; /* Enable VLAN stripping. */
unsigned int crc_present:1; /* CRC must be subtracted. */
unsigned int sges_n:2; /* Log 2 of SGEs (max buffers per packet). */
+   unsigned int cqe_n:4; /* Log 2 of CQ elements. */
unsigned int elts_n:4; /* Log 2 of Mbufs. */
unsigned int port_id:8;
volatile uint32_t *rq_db;
volatile uint32_t *cq_db;
uint16_t rq_ci;
uint16_t cq_ci;
-   uint16_t cqe_n; /* Number of CQ elements. */
volatile struct mlx5_wqe_data_seg(*wqes)[];
volatile struct mlx5_cqe(*cqes)[];
struct rxq_zip zip; /* Compressed context. */
@@ -245,10 +245,10 @@ struct txq {
uint16_t elts_tail; /* First element awaiting completion. */
uint16_t elts_comp; /* Counter since last completion request. */
uint16_t cq_ci; /* Consumer index for completion queue. */
-   uint16_t cqe_n; /* Number of CQ elements. */
uint16_t wqe_ci; /* Consumer index for work queue. */
uint16_t wqe_n; /* Number of WQ elements. */
uint16_t elts_n:4; /* (*elts)[] length (in log2). */
+   uint16_t cqe_n:4; /* Number of CQ elements (in log2). */
uint16_t bf_offset; /* Blueflame offset. */
uint16_t bf_buf_size; /* Blueflame size. */
uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 9055016..6145b69 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -212,7 +212,7 @@ txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl)
  "it should be set to %u", RTE_CACHE_LINE_SIZE);
return EINVAL;
}
-   tmpl->txq.cqe_n = ibcq->cqe + 1;
+   tmpl->txq.cqe_n = log2above(ibcq->cqe);
tmpl->txq.qp_num_8s = qp->ctrl_seg.qp_num << 8;
tmpl->txq.wqes =
(volatile struct mlx5_wqe64 (*)[])
-- 
2.1.4

[dpdk-dev] [PATCH v2 2/6] net/mlx5: reduce Tx and Rx structure size

2016-09-14 Thread Nelio Laranjeiro

PMD uses only power of two number of descriptors, storing the number of
elements in log2 helps to reduce the size of the container to store it.

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5_ethdev.c |  4 ++--
 drivers/net/mlx5/mlx5_rxq.c| 10 +-
 drivers/net/mlx5/mlx5_rxtx.c   | 10 +-
 drivers/net/mlx5/mlx5_rxtx.h   | 12 +++-
 drivers/net/mlx5/mlx5_txq.c|  6 +++---
 5 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 1ae80e5..137ea66 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -808,7 +808,7 @@ recover:
if (rehash)
ret = rxq_rehash(dev, rxq_ctrl);
else
-   ret = rxq_ctrl_setup(dev, rxq_ctrl, rxq->elts_n,
+   ret = rxq_ctrl_setup(dev, rxq_ctrl, 1 << rxq->elts_n,
 rxq_ctrl->socket, NULL, rxq->mp);
if (!ret)
continue;
@@ -1314,7 +1314,7 @@ mlx5_secondary_data_setup(struct priv *priv)
if (txq_ctrl != NULL) {
if (txq_ctrl_setup(priv->dev,
   primary_txq_ctrl,
-  primary_txq->elts_n,
+  1 << primary_txq->elts_n,
   primary_txq_ctrl->socket,
   NULL) == 0) {
txq_ctrl->txq.stats.idx =
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 44889d1..d9db368 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -723,7 +723,7 @@ rxq_free_elts(struct rxq_ctrl *rxq_ctrl)
if (rxq_ctrl->rxq.elts == NULL)
return;

-   for (i = 0; (i != rxq_ctrl->rxq.elts_n); ++i) {
+   for (i = 0; (i != (1u << rxq_ctrl->rxq.elts_n)); ++i) {
if ((*rxq_ctrl->rxq.elts)[i] != NULL)
rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
(*rxq_ctrl->rxq.elts)[i] = NULL;
@@ -807,7 +807,7 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
 int
 rxq_rehash(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl)
 {
-   unsigned int elts_n = rxq_ctrl->rxq.elts_n;
+   unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
unsigned int i;
struct ibv_exp_wq_attr mod;
int err;
@@ -870,7 +870,7 @@ rxq_setup(struct rxq_ctrl *tmpl)
struct ibv_cq *ibcq = tmpl->cq;
struct mlx5_cq *cq = to_mxxx(cq, cq);
struct mlx5_rwq *rwq = container_of(tmpl->wq, struct mlx5_rwq, wq);
-   struct rte_mbuf *(*elts)[tmpl->rxq.elts_n] =
+   struct rte_mbuf *(*elts)[1 << tmpl->rxq.elts_n] =
rte_calloc_socket("RXQ", 1, sizeof(*elts), 0, tmpl->socket);

if (cq->cqe_sz != RTE_CACHE_LINE_SIZE) {
@@ -924,7 +924,7 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl 
*rxq_ctrl,
.priv = priv,
.socket = socket,
.rxq = {
-   .elts_n = desc,
+   .elts_n = log2above(desc),
.mp = mp,
},
};
@@ -1148,7 +1148,7 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl 
*rxq_ctrl,
}
/* Reuse buffers from original queue if possible. */
if (rxq_ctrl->rxq.elts_n) {
-   assert(rxq_ctrl->rxq.elts_n == desc);
+   assert(1 << rxq_ctrl->rxq.elts_n == desc);
assert(rxq_ctrl->rxq.elts != tmpl.rxq.elts);
ret = rxq_alloc_elts(, desc, rxq_ctrl->rxq.elts);
} else
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 5feeb3f..e132727 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -166,8 +166,8 @@ txq_complete(struct txq *txq) 
__attribute__((always_inline));
 static inline void
 txq_complete(struct txq *txq)
 {
-   const unsigned int elts_n = txq->elts_n;
const unsigned int cqe_n = txq->cqe_n;
+   const unsigned int elts_n = 1 << txq->elts_n;
const unsigned int cqe_cnt = cqe_n - 1;
uint16_t elts_free = txq->elts_tail;
uint16_t elts_tail;
@@ -468,7 +468,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
 {
struct txq *txq = (struct txq *)dpdk_txq;
uint16_t elts_head = txq->elts_head;
-   const unsigned int elts_n = txq->elts_n;
+   const unsigned int elts_n = 1 << txq->elts_n;
unsigned int i = 0;
unsigned int j = 0;
unsigned int max;
@@ -680,7 +680,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts,

[dpdk-dev] [PATCH v2 1/6] net/mlx5: rework hardware structures

2016-09-14 Thread Nelio Laranjeiro

Rework Work Queue Element (aka WQE) structures to fit PMD needs.
A WQE is an aggregation of 16 bytes elements known as "data segments"
(aka dseg).

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_prm.h  |  70 ++
 drivers/net/mlx5/mlx5_rxtx.c | 167 ++-
 drivers/net/mlx5/mlx5_rxtx.h |   2 +-
 drivers/net/mlx5/mlx5_txq.c  |   4 +-
 4 files changed, 111 insertions(+), 132 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 5db219b..042562c 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -65,8 +65,15 @@
 /* Maximum number of packets a multi-packet WQE can handle. */
 #define MLX5_MPW_DSEG_MAX 5

-/* Room for inline data in regular work queue element. */
-#define MLX5_WQE64_INL_DATA 12
+/* WQE DWORD size */
+#define MLX5_WQE_DWORD_SIZE 16
+
+/* WQE size */
+#define MLX5_WQE_SIZE (4 * MLX5_WQE_DWORD_SIZE)
+
+/* Compute the number of DS. */
+#define MLX5_WQE_DS(n) \
+   (((n) + MLX5_WQE_DWORD_SIZE - 1) / MLX5_WQE_DWORD_SIZE)

 /* Room for inline data in multi-packet WQE. */
 #define MLX5_MWQE64_INL_DATA 28
@@ -79,59 +86,26 @@ struct mlx5_wqe_eth_seg_small {
uint16_t mss;
uint32_t rsvd2;
uint16_t inline_hdr_sz;
+   uint8_t inline_hdr[2];
 };

-/* Regular WQE. */
-struct mlx5_wqe_regular {
-   union {
-   struct mlx5_wqe_ctrl_seg ctrl;
-   uint32_t data[4];
-   } ctrl;
-   struct mlx5_wqe_eth_seg eseg;
-   struct mlx5_wqe_data_seg dseg;
-} __rte_aligned(64);
-
-/* Inline WQE. */
-struct mlx5_wqe_inl {
-   union {
-   struct mlx5_wqe_ctrl_seg ctrl;
-   uint32_t data[4];
-   } ctrl;
-   struct mlx5_wqe_eth_seg eseg;
+struct mlx5_wqe_inl_small {
uint32_t byte_cnt;
-   uint8_t data[MLX5_WQE64_INL_DATA];
-} __rte_aligned(64);
+   uint8_t raw;
+};

-/* Multi-packet WQE. */
-struct mlx5_wqe_mpw {
-   union {
-   struct mlx5_wqe_ctrl_seg ctrl;
-   uint32_t data[4];
-   } ctrl;
+/* Small common part of the WQE. */
+struct mlx5_wqe {
+   uint32_t ctrl[4];
struct mlx5_wqe_eth_seg_small eseg;
-   struct mlx5_wqe_data_seg dseg[2];
-} __rte_aligned(64);
+};

-/* Multi-packet WQE with inline. */
-struct mlx5_wqe_mpw_inl {
-   union {
-   struct mlx5_wqe_ctrl_seg ctrl;
-   uint32_t data[4];
-   } ctrl;
-   struct mlx5_wqe_eth_seg_small eseg;
-   uint32_t byte_cnt;
-   uint8_t data[MLX5_MWQE64_INL_DATA];
+/* WQE. */
+struct mlx5_wqe64 {
+   struct mlx5_wqe hdr;
+   uint8_t raw[32];
 } __rte_aligned(64);

-/* Union of all WQE types. */
-union mlx5_wqe {
-   struct mlx5_wqe_regular wqe;
-   struct mlx5_wqe_inl inl;
-   struct mlx5_wqe_mpw mpw;
-   struct mlx5_wqe_mpw_inl mpw_inl;
-   uint8_t data[64];
-};
-
 /* MPW session status. */
 enum mlx5_mpw_state {
MLX5_MPW_STATE_OPENED,
@@ -145,7 +119,7 @@ struct mlx5_mpw {
unsigned int pkts_n;
unsigned int len;
unsigned int total_len;
-   volatile union mlx5_wqe *wqe;
+   volatile struct mlx5_wqe *wqe;
union {
volatile struct mlx5_wqe_data_seg *dseg[MLX5_MPW_DSEG_MAX];
volatile uint8_t *raw;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index ecc76ad..5feeb3f 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -173,7 +173,7 @@ txq_complete(struct txq *txq)
uint16_t elts_tail;
uint16_t cq_ci = txq->cq_ci;
volatile struct mlx5_cqe64 *cqe = NULL;
-   volatile union mlx5_wqe *wqe;
+   volatile struct mlx5_wqe *wqe;

do {
volatile struct mlx5_cqe64 *tmp;
@@ -199,8 +199,8 @@ txq_complete(struct txq *txq)
} while (1);
if (unlikely(cqe == NULL))
return;
-   wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)];
-   elts_tail = wqe->wqe.ctrl.data[3];
+   wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)].hdr;
+   elts_tail = wqe->ctrl[3];
assert(elts_tail < txq->wqe_n);
/* Free buffers. */
while (elts_free != elts_tail) {
@@ -302,33 +302,33 @@ txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
  *   Number of DS elements consumed.
  */
 static inline unsigned int
-mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe *wqe,
+mlx5_wqe_write(struct txq *txq, volatile struct mlx5_wqe *wqe,
   struct rte_mbuf *buf, uint32_t length)
 {
-   uintptr_t raw = (uintptr_t)>wqe.eseg.inline_hdr_start;
+   uint8_t *raw = (uint8_t *)(uintptr_t)>eseg.inline_hdr[0];
uint16_t ds;
-   uint16_t pkt_inline_sz = 16;
+   uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE;
uintptr_t addr = rte_pktmbuf_mtod(buf, uintptr_t);
struct mlx5_wqe_data_seg *

[dpdk-dev] [PATCH v2 0/6] net/mlx5: performance improvement

2016-09-14 Thread Nelio Laranjeiro

 - Rework structure elements to reduce their size.
 - Removes a second useless loop in Tx burst function.

This series should be applied on top of "net/mlx5: various fixes".

Changes in v2:

 - rework serie to apply it on top of "net/mlx5: various fixes".

Nelio Laranjeiro (6):
  net/mlx5: rework hardware structures
  net/mlx5: reduce Tx and Rx structure size
  net/mlx5: reduce Tx and Rx structure size
  net/mlx5: reduce Tx structure size
  net/mlx5: reduce Tx and Rx structure size
  net/mlx5: remove gather loop on segments

 drivers/net/mlx5/mlx5_ethdev.c |   4 +-
 drivers/net/mlx5/mlx5_prm.h|  70 ++-
 drivers/net/mlx5/mlx5_rxq.c|  12 +-
 drivers/net/mlx5/mlx5_rxtx.c   | 460 +
 drivers/net/mlx5/mlx5_rxtx.h   |  22 +-
 drivers/net/mlx5/mlx5_txq.c|  18 +-
 6 files changed, 286 insertions(+), 300 deletions(-)

-- 
2.1.4

[dpdk-dev] [PATCH V2 8/8] net/mlx5: fix inline logic

2016-09-14 Thread Nelio Laranjeiro

To improve performance the NIC expects for large packets to have a pointer
to a cache aligned address, old inline code could break this assumption
which hurts performance.

Fixes: 2a66cf378954 ("net/mlx5: support inline send")

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Vasily Philipov 
---
 drivers/net/mlx5/mlx5_ethdev.c |   4 -
 drivers/net/mlx5/mlx5_rxtx.c   | 422 ++---
 drivers/net/mlx5/mlx5_rxtx.h   |   3 +-
 drivers/net/mlx5/mlx5_txq.c|   9 +-
 4 files changed, 103 insertions(+), 335 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 47f323e..1ae80e5 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1398,10 +1398,6 @@ priv_select_tx_function(struct priv *priv)
} else if ((priv->sriov == 0) && priv->mps) {
priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw;
DEBUG("selected MPW TX function");
-   } else if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
-   priv->dev->tx_pkt_burst = mlx5_tx_burst_inline;
-   DEBUG("selected inline TX function (%u >= %u queues)",
- priv->txqs_n, priv->txqs_inline);
}
 }

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index c7e538f..ecc76ad 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -297,179 +297,99 @@ txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
  *   Buffer.
  * @param length
  *   Packet length.
- * @param lkey
- *   Memory region lkey.
+ *
+ * @return ds
+ *   Number of DS elements consumed.
  */
-static inline void
+static inline unsigned int
 mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe *wqe,
-  struct rte_mbuf *buf, uint32_t length, uint32_t lkey)
+  struct rte_mbuf *buf, uint32_t length)
 {
+   uintptr_t raw = (uintptr_t)>wqe.eseg.inline_hdr_start;
+   uint16_t ds;
+   uint16_t pkt_inline_sz = 16;
uintptr_t addr = rte_pktmbuf_mtod(buf, uintptr_t);
+   struct mlx5_wqe_data_seg *dseg = NULL;

-   rte_mov16((uint8_t *)>wqe.eseg.inline_hdr_start,
- (uint8_t *)addr);
-   addr += 16;
+   assert(length >= 16);
+   /* Start the know and common part of the WQE structure. */
+   wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
+   wqe->wqe.ctrl.data[2] = 0;
+   wqe->wqe.ctrl.data[3] = 0;
+   wqe->wqe.eseg.rsvd0 = 0;
+   wqe->wqe.eseg.rsvd1 = 0;
+   wqe->wqe.eseg.mss = 0;
+   wqe->wqe.eseg.rsvd2 = 0;
+   /* Start by copying the Ethernet Header. */
+   rte_mov16((uint8_t *)raw, (uint8_t *)addr);
length -= 16;
-   /* Need to insert VLAN ? */
+   addr += 16;
+   /* Replace the Ethernet type by the VLAN if necessary. */
if (buf->ol_flags & PKT_TX_VLAN_PKT) {
uint32_t vlan = htonl(0x8100 | buf->vlan_tci);

-   memcpy((uint8_t *)>wqe.eseg.inline_hdr_start + 12,
+   memcpy((uint8_t *)(raw + 16 - sizeof(vlan)),
   , sizeof(vlan));
addr -= sizeof(vlan);
length += sizeof(vlan);
}
-   /* Write the WQE. */
-   wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
-   wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4);
-   wqe->wqe.ctrl.data[2] = 0;
-   wqe->wqe.ctrl.data[3] = 0;
-   wqe->inl.eseg.rsvd0 = 0;
-   wqe->inl.eseg.rsvd1 = 0;
-   wqe->inl.eseg.mss = 0;
-   wqe->inl.eseg.rsvd2 = 0;
-   wqe->wqe.eseg.inline_hdr_sz = htons(16);
-   /* Store remaining data in data segment. */
-   wqe->wqe.dseg.byte_count = htonl(length);
-   wqe->wqe.dseg.lkey = lkey;
-   wqe->wqe.dseg.addr = htonll(addr);
-   /* Increment consumer index. */
-   ++txq->wqe_ci;
-}
-
-/**
- * Write a inline WQE.
- *
- * @param txq
- *   Pointer to TX queue structure.
- * @param wqe
- *   Pointer to the WQE to fill.
- * @param addr
- *   Buffer data address.
- * @param length
- *   Packet length.
- * @param lkey
- *   Memory region lkey.
- */
-static inline void
-mlx5_wqe_write_inline(struct txq *txq, volatile union mlx5_wqe *wqe,
- uintptr_t addr, uint32_t length)
-{
-   uint32_t size;
-   uint16_t wqe_cnt = txq->wqe_n - 1;
-   uint16_t wqe_ci = txq->wqe_ci + 1;
-
-   /* Copy the first 16 bytes into inline header. */
-   rte_memcpy((void *)(uintptr_t)wqe->inl.eseg.inline_hdr_start,
-  (void *)(uintptr_t)addr,
-  MLX5_ETH_INLINE_HEADER_SIZE);
-   addr += MLX5_ETH_INLINE_HEADER_SIZE;
-   length -= MLX5_ETH_INLINE_HEADER_SIZE;
-   size = 3 + ((4 + length + 15) / 16);
-   wqe->inl.by

[dpdk-dev] [PATCH V2 7/8] net/mlx5: re-factorize functions

2016-09-14 Thread Nelio Laranjeiro

Rework logic of wqe_write() and wqe_write_vlan() which are pretty similar
to keep a single one.

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_rxtx.c | 98 ++--
 1 file changed, 22 insertions(+), 76 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 5c39cbb..c7e538f 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -293,8 +293,8 @@ txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
  *   Pointer to TX queue structure.
  * @param wqe
  *   Pointer to the WQE to fill.
- * @param addr
- *   Buffer data address.
+ * @param buf
+ *   Buffer.
  * @param length
  *   Packet length.
  * @param lkey
@@ -302,54 +302,24 @@ txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
  */
 static inline void
 mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe *wqe,
-  uintptr_t addr, uint32_t length, uint32_t lkey)
-{
-   wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
-   wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4);
-   wqe->wqe.ctrl.data[2] = 0;
-   wqe->wqe.ctrl.data[3] = 0;
-   wqe->inl.eseg.rsvd0 = 0;
-   wqe->inl.eseg.rsvd1 = 0;
-   wqe->inl.eseg.mss = 0;
-   wqe->inl.eseg.rsvd2 = 0;
-   wqe->wqe.eseg.inline_hdr_sz = htons(MLX5_ETH_INLINE_HEADER_SIZE);
-   /* Copy the first 16 bytes into inline header. */
-   rte_memcpy((uint8_t *)(uintptr_t)wqe->wqe.eseg.inline_hdr_start,
-  (uint8_t *)(uintptr_t)addr,
-  MLX5_ETH_INLINE_HEADER_SIZE);
-   addr += MLX5_ETH_INLINE_HEADER_SIZE;
-   length -= MLX5_ETH_INLINE_HEADER_SIZE;
-   /* Store remaining data in data segment. */
-   wqe->wqe.dseg.byte_count = htonl(length);
-   wqe->wqe.dseg.lkey = lkey;
-   wqe->wqe.dseg.addr = htonll(addr);
-   /* Increment consumer index. */
-   ++txq->wqe_ci;
-}
-
-/**
- * Write a regular WQE with VLAN.
- *
- * @param txq
- *   Pointer to TX queue structure.
- * @param wqe
- *   Pointer to the WQE to fill.
- * @param addr
- *   Buffer data address.
- * @param length
- *   Packet length.
- * @param lkey
- *   Memory region lkey.
- * @param vlan_tci
- *   VLAN field to insert in packet.
- */
-static inline void
-mlx5_wqe_write_vlan(struct txq *txq, volatile union mlx5_wqe *wqe,
-   uintptr_t addr, uint32_t length, uint32_t lkey,
-   uint16_t vlan_tci)
+  struct rte_mbuf *buf, uint32_t length, uint32_t lkey)
 {
-   uint32_t vlan = htonl(0x8100 | vlan_tci);
-
+   uintptr_t addr = rte_pktmbuf_mtod(buf, uintptr_t);
+
+   rte_mov16((uint8_t *)>wqe.eseg.inline_hdr_start,
+ (uint8_t *)addr);
+   addr += 16;
+   length -= 16;
+   /* Need to insert VLAN ? */
+   if (buf->ol_flags & PKT_TX_VLAN_PKT) {
+   uint32_t vlan = htonl(0x8100 | buf->vlan_tci);
+
+   memcpy((uint8_t *)>wqe.eseg.inline_hdr_start + 12,
+  , sizeof(vlan));
+   addr -= sizeof(vlan);
+   length += sizeof(vlan);
+   }
+   /* Write the WQE. */
wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4);
wqe->wqe.ctrl.data[2] = 0;
@@ -358,20 +328,7 @@ mlx5_wqe_write_vlan(struct txq *txq, volatile union 
mlx5_wqe *wqe,
wqe->inl.eseg.rsvd1 = 0;
wqe->inl.eseg.mss = 0;
wqe->inl.eseg.rsvd2 = 0;
-   wqe->wqe.eseg.inline_hdr_sz = htons(MLX5_ETH_VLAN_INLINE_HEADER_SIZE);
-   /*
-* Copy 12 bytes of source & destination MAC address.
-* Copy 4 bytes of VLAN.
-* Copy 2 bytes of Ether type.
-*/
-   rte_memcpy((uint8_t *)(uintptr_t)wqe->wqe.eseg.inline_hdr_start,
-  (uint8_t *)(uintptr_t)addr, 12);
-   rte_memcpy((uint8_t *)((uintptr_t)wqe->wqe.eseg.inline_hdr_start + 12),
-  , sizeof(vlan));
-   rte_memcpy((uint8_t *)((uintptr_t)wqe->wqe.eseg.inline_hdr_start + 16),
-  (uint8_t *)((uintptr_t)addr + 12), 2);
-   addr += MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan);
-   length -= MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan);
+   wqe->wqe.eseg.inline_hdr_sz = htons(16);
/* Store remaining data in data segment. */
wqe->wqe.dseg.byte_count = htonl(length);
wqe->wqe.dseg.lkey = lkey;
@@ -612,7 +569,6 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
do {
struct rte_mbuf *buf = *(pkts++);
unsigned int elts_head_next;
-   uintptr_t addr;
uint32_t length;
uint32_t lkey;
unsigned int segs_n = buf->nb_segs;
@@ -634,8 +590,6 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkt

[dpdk-dev] [PATCH V2 6/8] net/mlx5: force inline for completion function

2016-09-14 Thread Nelio Laranjeiro

This function was supposed to be inlined, but was not because several
functions calls it.  This function should always be inline avoid
external function calls and to optimize code in data-path.

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_rxtx.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 3757366..5c39cbb 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -152,6 +152,9 @@ check_cqe64(volatile struct mlx5_cqe64 *cqe,
return 0;
 }

+static inline void
+txq_complete(struct txq *txq) __attribute__((always_inline));
+
 /**
  * Manage TX completions.
  *
@@ -160,7 +163,7 @@ check_cqe64(volatile struct mlx5_cqe64 *cqe,
  * @param txq
  *   Pointer to TX queue structure.
  */
-static void
+static inline void
 txq_complete(struct txq *txq)
 {
const unsigned int elts_n = txq->elts_n;
-- 
2.1.4

[dpdk-dev] [PATCH V2 5/8] net/mlx5: fix support for flow director drop mode

2016-09-14 Thread Nelio Laranjeiro

From: Yaacov Hazan <yaac...@mellanox.com>

Packet rejection was routed to a polled queue.  This patch route them to a
dummy queue which is not polled.

Fixes: 76f5c99e6840 ("mlx5: support flow director")

Signed-off-by: Yaacov Hazan 
Signed-off-by: Adrien Mazarguil 
Signed-off-by: Nelio Laranjeiro 
---
 doc/guides/nics/mlx5.rst |  3 ++-
 drivers/net/mlx5/mlx5.h  |  1 +
 drivers/net/mlx5/mlx5_fdir.c | 41 +++--
 3 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 5c10cd3..8923173 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -84,7 +84,8 @@ Features
 - Promiscuous mode.
 - Multicast promiscuous mode.
 - Hardware checksum offloads.
-- Flow director (RTE_FDIR_MODE_PERFECT and RTE_FDIR_MODE_PERFECT_MAC_VLAN).
+- Flow director (RTE_FDIR_MODE_PERFECT, RTE_FDIR_MODE_PERFECT_MAC_VLAN and
+  RTE_ETH_FDIR_REJECT).
 - Secondary process TX is supported.
 - KVM and VMware ESX SR-IOV modes are supported.

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index fa78623..8349e5b 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -134,6 +134,7 @@ struct priv {
unsigned int (*reta_idx)[]; /* RETA index table. */
unsigned int reta_idx_n; /* RETA index size. */
struct fdir_filter_list *fdir_filter_list; /* Flow director rules. */
+   struct fdir_queue *fdir_drop_queue; /* Flow director drop queue. */
rte_spinlock_t lock; /* Lock for control functions. */
 };

diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
index 802669b..25c8c52 100644
--- a/drivers/net/mlx5/mlx5_fdir.c
+++ b/drivers/net/mlx5/mlx5_fdir.c
@@ -75,6 +75,7 @@ struct fdir_flow_desc {
 struct mlx5_fdir_filter {
LIST_ENTRY(mlx5_fdir_filter) next;
uint16_t queue; /* Queue assigned to if FDIR match. */
+   enum rte_eth_fdir_behavior behavior;
struct fdir_flow_desc desc;
struct ibv_exp_flow *flow;
 };
@@ -567,6 +568,33 @@ priv_get_fdir_queue(struct priv *priv, uint16_t idx)
 }

 /**
+ * Get or flow director drop queue. Create it if it does not exist.
+ *
+ * @param priv
+ *   Private structure.
+ *
+ * @return
+ *   Flow director drop queue on success, NULL otherwise.
+ */
+static struct fdir_queue *
+priv_get_fdir_drop_queue(struct priv *priv)
+{
+   struct fdir_queue *fdir_queue = priv->fdir_drop_queue;
+
+   if (fdir_queue == NULL) {
+   unsigned int socket = SOCKET_ID_ANY;
+
+   /* Select a known NUMA socket if possible. */
+   if (priv->rxqs_n && (*priv->rxqs)[0])
+   socket = container_of((*priv->rxqs)[0],
+ struct rxq_ctrl, rxq)->socket;
+   fdir_queue = priv_fdir_queue_create(priv, NULL, socket);
+   priv->fdir_drop_queue = fdir_queue;
+   }
+   return fdir_queue;
+}
+
+/**
  * Enable flow director filter and create steering rules.
  *
  * @param priv
@@ -588,7 +616,11 @@ priv_fdir_filter_enable(struct priv *priv,
return 0;

/* Get fdir_queue for specific queue. */
-   fdir_queue = priv_get_fdir_queue(priv, mlx5_fdir_filter->queue);
+   if (mlx5_fdir_filter->behavior == RTE_ETH_FDIR_REJECT)
+   fdir_queue = priv_get_fdir_drop_queue(priv);
+   else
+   fdir_queue = priv_get_fdir_queue(priv,
+mlx5_fdir_filter->queue);

if (fdir_queue == NULL) {
ERROR("failed to create flow director rxq for queue %d",
@@ -707,6 +739,10 @@ priv_fdir_disable(struct priv *priv)
priv_fdir_queue_destroy(priv, rxq_ctrl->fdir_queue);
rxq_ctrl->fdir_queue = NULL;
}
+   if (priv->fdir_drop_queue) {
+   priv_fdir_queue_destroy(priv, priv->fdir_drop_queue);
+   priv->fdir_drop_queue = NULL;
+   }
 }

 /**
@@ -807,8 +843,9 @@ priv_fdir_filter_add(struct priv *priv,
return err;
}

-   /* Set queue. */
+   /* Set action parameters. */
mlx5_fdir_filter->queue = fdir_filter->action.rx_queue;
+   mlx5_fdir_filter->behavior = fdir_filter->action.behavior;

/* Convert to mlx5 filter descriptor. */
fdir_filter_to_flow_desc(fdir_filter,
-- 
2.1.4

[dpdk-dev] [PATCH V2 4/8] net/mlx5: refactor allocation of flow director queues

2016-09-14 Thread Nelio Laranjeiro

From: Yaacov Hazan 

This is done to prepare support for drop queues, which are not related to
existing RX queues and need to be managed separately.

Signed-off-by: Yaacov Hazan 
Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5.h  |   1 +
 drivers/net/mlx5/mlx5_fdir.c | 229 ---
 drivers/net/mlx5/mlx5_rxq.c  |   2 +
 drivers/net/mlx5/mlx5_rxtx.h |   4 +-
 4 files changed, 156 insertions(+), 80 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 3a86609..fa78623 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -257,6 +257,7 @@ void mlx5_dev_stop(struct rte_eth_dev *);

 /* mlx5_fdir.c */

+void priv_fdir_queue_destroy(struct priv *, struct fdir_queue *);
 int fdir_init_filters_list(struct priv *);
 void priv_fdir_delete_filters_list(struct priv *);
 void priv_fdir_disable(struct priv *);
diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
index 8207573..802669b 100644
--- a/drivers/net/mlx5/mlx5_fdir.c
+++ b/drivers/net/mlx5/mlx5_fdir.c
@@ -400,6 +400,145 @@ create_flow:
 }

 /**
+ * Destroy a flow director queue.
+ *
+ * @param fdir_queue
+ *   Flow director queue to be destroyed.
+ */
+void
+priv_fdir_queue_destroy(struct priv *priv, struct fdir_queue *fdir_queue)
+{
+   struct mlx5_fdir_filter *fdir_filter;
+
+   /* Disable filter flows still applying to this queue. */
+   LIST_FOREACH(fdir_filter, priv->fdir_filter_list, next) {
+   unsigned int idx = fdir_filter->queue;
+   struct rxq_ctrl *rxq_ctrl =
+   container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq);
+
+   assert(idx < priv->rxqs_n);
+   if (fdir_queue == rxq_ctrl->fdir_queue &&
+   fdir_filter->flow != NULL) {
+   claim_zero(ibv_exp_destroy_flow(fdir_filter->flow));
+   fdir_filter->flow = NULL;
+   }
+   }
+   assert(fdir_queue->qp);
+   claim_zero(ibv_destroy_qp(fdir_queue->qp));
+   assert(fdir_queue->ind_table);
+   claim_zero(ibv_exp_destroy_rwq_ind_table(fdir_queue->ind_table));
+   if (fdir_queue->wq)
+   claim_zero(ibv_exp_destroy_wq(fdir_queue->wq));
+   if (fdir_queue->cq)
+   claim_zero(ibv_destroy_cq(fdir_queue->cq));
+#ifndef NDEBUG
+   memset(fdir_queue, 0x2a, sizeof(*fdir_queue));
+#endif
+   rte_free(fdir_queue);
+}
+
+/**
+ * Create a flow director queue.
+ *
+ * @param priv
+ *   Private structure.
+ * @param wq
+ *   Work queue to route matched packets to, NULL if one needs to
+ *   be created.
+ *
+ * @return
+ *   Related flow director queue on success, NULL otherwise.
+ */
+static struct fdir_queue *
+priv_fdir_queue_create(struct priv *priv, struct ibv_exp_wq *wq,
+  unsigned int socket)
+{
+   struct fdir_queue *fdir_queue;
+
+   fdir_queue = rte_calloc_socket(__func__, 1, sizeof(*fdir_queue),
+  0, socket);
+   if (!fdir_queue) {
+   ERROR("cannot allocate flow director queue");
+   return NULL;
+   }
+   assert(priv->pd);
+   assert(priv->ctx);
+   if (!wq) {
+   fdir_queue->cq = ibv_exp_create_cq(
+   priv->ctx, 1, NULL, NULL, 0,
+   &(struct ibv_exp_cq_init_attr){
+   .comp_mask = 0,
+   });
+   if (!fdir_queue->cq) {
+   ERROR("cannot create flow director CQ");
+   goto error;
+   }
+   fdir_queue->wq = ibv_exp_create_wq(
+   priv->ctx,
+   &(struct ibv_exp_wq_init_attr){
+   .wq_type = IBV_EXP_WQT_RQ,
+   .max_recv_wr = 1,
+   .max_recv_sge = 1,
+   .pd = priv->pd,
+   .cq = fdir_queue->cq,
+   });
+   if (!fdir_queue->wq) {
+   ERROR("cannot create flow director WQ");
+   goto error;
+   }
+   wq = fdir_queue->wq;
+   }
+   fdir_queue->ind_table = ibv_exp_create_rwq_ind_table(
+   priv->ctx,
+   &(struct ibv_exp_rwq_ind_table_init_attr){
+   .pd = priv->pd,
+   .log_ind_tbl_size = 0,
+   .ind_tbl = ,
+   .comp_mask = 0,
+   });
+   if (!fdir_queue->ind_table) {
+   ERROR("cannot create flow director indirection table");
+   goto error;
+   }
+   fdir_queue->qp = ibv_exp_create_qp(
+   priv->ctx,
+   &(struct ibv_exp_qp_init_attr){
+   .qp_type = IBV_QPT_RAW_PACKET,
+   .comp_mask =
+

[dpdk-dev] [PATCH V2 3/8] net/mlx5: fix removing VLAN filter

2016-09-14 Thread Nelio Laranjeiro

From: Raslan Darawsheh 

memmove was moving bytes as the number of elements next to i, while it
should move the number of elements multiplied by the size of each element.

Fixes: e9086978 ("mlx5: support VLAN filtering")

Signed-off-by: Raslan Darawsheh 
---
 drivers/net/mlx5/mlx5_vlan.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index 4719e69..fb730e5 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -87,7 +87,8 @@ vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, 
int on)
--priv->vlan_filter_n;
memmove(>vlan_filter[i],
>vlan_filter[i + 1],
-   priv->vlan_filter_n - i);
+   sizeof(priv->vlan_filter[i]) *
+   (priv->vlan_filter_n - i));
priv->vlan_filter[priv->vlan_filter_n] = 0;
} else {
assert(i == priv->vlan_filter_n);
-- 
2.1.4

[dpdk-dev] [PATCH V2 2/8] net/mlx5: fix Rx VLAN offload capability report

2016-09-14 Thread Nelio Laranjeiro

From: Adrien Mazarguil 

This capability is implemented but not reported.

Fixes: f3db9489188a ("mlx5: support Rx VLAN stripping")

Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5_ethdev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 130e15d..47f323e 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -583,7 +583,8 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *info)
 (DEV_RX_OFFLOAD_IPV4_CKSUM |
  DEV_RX_OFFLOAD_UDP_CKSUM |
  DEV_RX_OFFLOAD_TCP_CKSUM) :
-0);
+0) |
+   (priv->hw_vlan_strip ? DEV_RX_OFFLOAD_VLAN_STRIP : 0);
if (!priv->mps)
info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
if (priv->hw_csum)
-- 
2.1.4

[dpdk-dev] [PATCH V2 1/8] net/mlx5: fix inconsistent return value in Flow Director

2016-09-14 Thread Nelio Laranjeiro

From: Yaacov Hazan 

The return value in DPDK is negative errno on failure.
Since internal functions in mlx driver return positive
values need to negate this value when it returned to
dpdk layer.

Fixes: 76f5c99 ("mlx5: support flow director")

Signed-off-by: Yaacov Hazan 
---
 drivers/net/mlx5/mlx5_fdir.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
index 73eb00e..8207573 100644
--- a/drivers/net/mlx5/mlx5_fdir.c
+++ b/drivers/net/mlx5/mlx5_fdir.c
@@ -955,7 +955,7 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
 enum rte_filter_op filter_op,
 void *arg)
 {
-   int ret = -EINVAL;
+   int ret = EINVAL;
struct priv *priv = dev->data->dev_private;

switch (filter_type) {
@@ -970,5 +970,5 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
break;
}

-   return ret;
+   return -ret;
 }
-- 
2.1.4

[dpdk-dev] [PATCH V2 0/8] net/mlx5: various fixes

2016-09-14 Thread Nelio Laranjeiro

 - Flow director
 - Rx Capabilities
 - Inline

Changes in V2:

 - Fix a compilation error.

Adrien Mazarguil (1):
  net/mlx5: fix Rx VLAN offload capability report

Nelio Laranjeiro (3):
  net/mlx5: force inline for completion function
  net/mlx5: re-factorize functions
  net/mlx5: fix inline logic

Raslan Darawsheh (1):
  net/mlx5: fix removing VLAN filter

Yaacov Hazan (3):
  net/mlx5: fix inconsistent return value in Flow Director
  net/mlx5: refactor allocation of flow director queues
  net/mlx5: fix support for flow director drop mode

 doc/guides/nics/mlx5.rst   |   3 +-
 drivers/net/mlx5/mlx5.h|   2 +
 drivers/net/mlx5/mlx5_ethdev.c |   7 +-
 drivers/net/mlx5/mlx5_fdir.c   | 270 +++---
 drivers/net/mlx5/mlx5_rxq.c|   2 +
 drivers/net/mlx5/mlx5_rxtx.c   | 497 +
 drivers/net/mlx5/mlx5_rxtx.h   |   7 +-
 drivers/net/mlx5/mlx5_txq.c|   9 +-
 drivers/net/mlx5/mlx5_vlan.c   |   3 +-
 9 files changed, 317 insertions(+), 483 deletions(-)

-- 
2.1.4

[dpdk-dev] [PATCH] net/mlx5: return RSS hash result in mbuf

2016-09-14 Thread Nelio Laranjeiro

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_rxq.c  | 1 +
 drivers/net/mlx5/mlx5_rxtx.c | 6 +-
 drivers/net/mlx5/mlx5_rxtx.h | 2 ++
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index f6f4315..65c264b 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -926,6 +926,7 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl 
*rxq_ctrl,
.rxq = {
.elts_n = log2above(desc),
.mp = mp,
+   .rss_hash = !!(priv->rxqs_n > 1),
},
};
struct ibv_exp_wq_attr mod;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index b91b644..17ae5e4 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1338,12 +1338,16 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
/* Update packet information. */
pkt->packet_type = 0;
pkt->ol_flags = 0;
+   if (rxq->rss_hash) {
+   pkt->hash.rss = ntohl(cqe->rx_hash_res);
+   pkt->ol_flags = PKT_RX_RSS_HASH;
+   }
if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip |
rxq->crc_present) {
if (rxq->csum) {
pkt->packet_type =
rxq_cq_to_pkt_type(cqe);
-   pkt->ol_flags =
+   pkt->ol_flags |=
rxq_cq_to_ol_flags(rxq, cqe);
}
if (cqe->l4_hdr_type_etc &
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 9828aef..e813f38 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -113,6 +113,8 @@ struct rxq {
unsigned int cqe_n:4; /* Log 2 of CQ elements. */
unsigned int elts_n:4; /* Log 2 of Mbufs. */
unsigned int port_id:8;
+   unsigned int rss_hash:1; /* RSS hash result is enabled. */
+   unsigned int :9; /* Remaining bits. */
volatile uint32_t *rq_db;
volatile uint32_t *cq_db;
uint16_t rq_ci;
-- 
2.1.4

[dpdk-dev] [PATCH] net/mlx5: return RSS hash result in mbuf

2016-09-14 Thread Nelio Laranjeiro

Fill RSS hash result in mbuf.

To be applied on top of 
 "[PATCH 0/6] net/mlx5: performance improvement"

Nelio Laranjeiro (1):
  net/mlx5: return RSS hash result in mbuf

 drivers/net/mlx5/mlx5_rxq.c  | 1 +
 drivers/net/mlx5/mlx5_rxtx.c | 6 +-
 drivers/net/mlx5/mlx5_rxtx.h | 2 ++
 3 files changed, 8 insertions(+), 1 deletion(-)

-- 
2.1.4

[dpdk-dev] [PATCH 6/6] net/mlx5: remove gather loop on segments

2016-09-07 Thread Nelio Laranjeiro

Tx function was handling a double loop to send segmented packets, it can be
done in a single one.

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Vasily Philipov 
---
 drivers/net/mlx5/mlx5_rxtx.c | 312 ++-
 1 file changed, 158 insertions(+), 154 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 9d00ddc..b91b644 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -288,112 +288,6 @@ txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
 }

 /**
- * Write a regular WQE.
- *
- * @param txq
- *   Pointer to TX queue structure.
- * @param wqe
- *   Pointer to the WQE to fill.
- * @param buf
- *   Buffer.
- * @param length
- *   Packet length.
- *
- * @return ds
- *   Number of DS elements consumed.
- */
-static inline unsigned int
-mlx5_wqe_write(struct txq *txq, volatile struct mlx5_wqe *wqe,
-  struct rte_mbuf *buf, uint32_t length)
-{
-   uint8_t *raw = (uint8_t *)(uintptr_t)>eseg.inline_hdr[0];
-   uint16_t ds;
-   uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE;
-   uintptr_t addr = rte_pktmbuf_mtod(buf, uintptr_t);
-   struct mlx5_wqe_data_seg *dseg = NULL;
-
-   assert(length >= MLX5_WQE_DWORD_SIZE);
-   /* Start the know and common part of the WQE structure. */
-   wqe->ctrl[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
-   wqe->ctrl[2] = 0;
-   wqe->ctrl[3] = 0;
-   wqe->eseg.rsvd0 = 0;
-   wqe->eseg.rsvd1 = 0;
-   wqe->eseg.mss = 0;
-   wqe->eseg.rsvd2 = 0;
-   /* Start by copying the Ethernet Header. */
-   rte_mov16((uint8_t *)raw, (uint8_t *)addr);
-   length -= MLX5_WQE_DWORD_SIZE;
-   addr += MLX5_WQE_DWORD_SIZE;
-   /* Replace the Ethernet type by the VLAN if necessary. */
-   if (buf->ol_flags & PKT_TX_VLAN_PKT) {
-   uint32_t vlan = htonl(0x8100 | buf->vlan_tci);
-
-   memcpy((uint8_t *)(raw + MLX5_WQE_DWORD_SIZE - sizeof(vlan)),
-  , sizeof(vlan));
-   addr -= sizeof(vlan);
-   length += sizeof(vlan);
-   }
-   /* Inline if enough room. */
-   if (txq->max_inline != 0) {
-   uintptr_t end = (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n];
-   uint16_t max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE;
-   uint16_t room;
-
-   raw += MLX5_WQE_DWORD_SIZE;
-   room = end - (uintptr_t)raw;
-   if (room > max_inline) {
-   uintptr_t addr_end = (addr + max_inline) &
-   ~(RTE_CACHE_LINE_SIZE - 1);
-   uint16_t copy_b = ((addr_end - addr) > length) ?
- length :
- (addr_end - addr);
-
-   rte_memcpy((void *)raw, (void *)addr, copy_b);
-   addr += copy_b;
-   length -= copy_b;
-   pkt_inline_sz += copy_b;
-   /* Sanity check. */
-   assert(addr <= addr_end);
-   }
-   /* Store the inlined packet size in the WQE. */
-   wqe->eseg.inline_hdr_sz = htons(pkt_inline_sz);
-   /*
-* 2 DWORDs consumed by the WQE header + 1 DSEG +
-* the size of the inline part of the packet.
-*/
-   ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2);
-   if (length > 0) {
-   dseg = (struct mlx5_wqe_data_seg *)
-   ((uintptr_t)wqe + (ds * MLX5_WQE_DWORD_SIZE));
-   if ((uintptr_t)dseg >= end)
-   dseg = (struct mlx5_wqe_data_seg *)
-   ((uintptr_t)&(*txq->wqes)[0]);
-   goto use_dseg;
-   }
-   } else {
-   /* Add the remaining packet as a simple ds. */
-   ds = 3;
-   /*
-* No inline has been done in the packet, only the Ethernet
-* Header as been stored.
-*/
-   wqe->eseg.inline_hdr_sz = htons(MLX5_WQE_DWORD_SIZE);
-   dseg = (struct mlx5_wqe_data_seg *)
-   ((uintptr_t)wqe + (ds * MLX5_WQE_DWORD_SIZE));
-use_dseg:
-   *dseg = (struct mlx5_wqe_data_seg) {
-   .addr = htonll(addr),
-   .byte_count = htonl(length),
-   .lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
-   };
-   ++ds;
-   }
-   wqe->ctrl[1] = htonl(txq->qp_num_8s | ds);
-   return ds;
-}
-
-/**
  * Ring TX queue doorbell.
  *
  * @param txq
@@ -475,6 +369,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
unsigned int

[dpdk-dev] [PATCH 5/6] net/mlx5: reduce Tx and Rx structure size

2016-09-07 Thread Nelio Laranjeiro

PMD uses only power of two number of Work Queue Elements, storing the
number of elements in log2 helps to reduce the size of the container to
store it.

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_rxtx.c | 23 ---
 drivers/net/mlx5/mlx5_rxtx.h |  2 +-
 drivers/net/mlx5/mlx5_txq.c  |  4 ++--
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 214922b..9d00ddc 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -199,9 +199,10 @@ txq_complete(struct txq *txq)
} while (1);
if (unlikely(cqe == NULL))
return;
-   wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)].hdr;
+   wqe = &(*txq->wqes)[htons(cqe->wqe_counter) &
+   ((1 << txq->wqe_n) - 1)].hdr;
elts_tail = wqe->ctrl[3];
-   assert(elts_tail < txq->wqe_n);
+   assert(elts_tail < (1 << txq->wqe_n));
/* Free buffers. */
while (elts_free != elts_tail) {
struct rte_mbuf *elt = (*txq->elts)[elts_free];
@@ -335,7 +336,7 @@ mlx5_wqe_write(struct txq *txq, volatile struct mlx5_wqe 
*wqe,
}
/* Inline if enough room. */
if (txq->max_inline != 0) {
-   uintptr_t end = (uintptr_t)&(*txq->wqes)[txq->wqe_n];
+   uintptr_t end = (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n];
uint16_t max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE;
uint16_t room;

@@ -446,7 +447,7 @@ tx_prefetch_wqe(struct txq *txq, uint16_t ci)
 {
volatile struct mlx5_wqe64 *wqe;

-   wqe = &(*txq->wqes)[ci & (txq->wqe_n - 1)];
+   wqe = &(*txq->wqes)[ci & ((1 << txq->wqe_n) - 1)];
rte_prefetch0(wqe);
 }

@@ -504,7 +505,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
max -= segs_n;
--pkts_n;
elts_head_next = (elts_head + 1) & (elts_n - 1);
-   wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)].hdr;
+   wqe = &(*txq->wqes)[txq->wqe_ci & ((1 << txq->wqe_n) - 1)].hdr;
tx_prefetch_wqe(txq, txq->wqe_ci);
tx_prefetch_wqe(txq, txq->wqe_ci + 1);
if (pkts_n)
@@ -540,7 +541,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE)))
dseg = (volatile void *)
&(*txq->wqes)[txq->wqe_ci++ &
- (txq->wqe_n - 1)];
+ ((1 << txq->wqe_n) - 1)];
else
++dseg;
++ds;
@@ -607,10 +608,10 @@ skip_segs:
 static inline void
 mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
 {
-   uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1);
+   uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] =
(volatile struct mlx5_wqe_data_seg (*)[])
-   (uintptr_t)&(*txq->wqes)[(idx + 1) & (txq->wqe_n - 1)];
+   (uintptr_t)&(*txq->wqes)[(idx + 1) & ((1 << txq->wqe_n) - 1)];

mpw->state = MLX5_MPW_STATE_OPENED;
mpw->pkts_n = 0;
@@ -815,7 +816,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
 static inline void
 mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
 {
-   uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1);
+   uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
struct mlx5_wqe_inl_small *inl;

mpw->state = MLX5_MPW_INL_STATE_OPENED;
@@ -1000,7 +1001,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf 
**pkts,
addr = rte_pktmbuf_mtod(buf, uintptr_t);
(*txq->elts)[elts_head] = buf;
/* Maximum number of bytes before wrapping. */
-   max = ((uintptr_t)&(*txq->wqes)[txq->wqe_n] -
+   max = ((uintptr_t)&(*txq->wqes)[1 << txq->wqe_n] -
   (uintptr_t)mpw.data.raw);
if (length > max) {
rte_memcpy((void *)(uintptr_t)mpw.data.raw,
@@ -1019,7 +1020,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf 
**pkts,
mpw.data.raw += length;
}
if ((uintptr_t)mpw.data.

[dpdk-dev] [PATCH 4/6] net/mlx5: reduce Tx structure size

2016-09-07 Thread Nelio Laranjeiro

Blue Flame is a buffer allocated with a power of two value, its size is
returned by Verbs in log2.

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_rxtx.c | 2 +-
 drivers/net/mlx5/mlx5_rxtx.h | 2 +-
 drivers/net/mlx5/mlx5_txq.c  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 4f28aa9..214922b 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -413,7 +413,7 @@ mlx5_tx_dbrec(struct txq *txq)
/* Ensure ordering between DB record and BF copy. */
rte_wmb();
rte_mov16(dst, (uint8_t *)data);
-   txq->bf_offset ^= txq->bf_buf_size;
+   txq->bf_offset ^= (1 << txq->bf_buf_size);
 }

 /**
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 224614e..3dca8ca 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -249,8 +249,8 @@ struct txq {
uint16_t wqe_n; /* Number of WQ elements. */
uint16_t elts_n:4; /* (*elts)[] length (in log2). */
uint16_t cqe_n:4; /* Number of CQ elements (in log2). */
+   uint16_t bf_buf_size:4; /* Log2 Blueflame size. */
uint16_t bf_offset; /* Blueflame offset. */
-   uint16_t bf_buf_size; /* Blueflame size. */
uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
uint32_t qp_num_8s; /* QP number shifted by 8. */
volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 6145b69..9919e37 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -221,7 +221,7 @@ txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl)
tmpl->txq.qp_db = >gen_data.db[MLX5_SND_DBR];
tmpl->txq.bf_reg = qp->gen_data.bf->reg;
tmpl->txq.bf_offset = qp->gen_data.bf->offset;
-   tmpl->txq.bf_buf_size = qp->gen_data.bf->buf_size;
+   tmpl->txq.bf_buf_size = log2above(qp->gen_data.bf->buf_size);
tmpl->txq.cq_db = cq->dbrec;
tmpl->txq.cqes =
(volatile struct mlx5_cqe (*)[])
-- 
2.1.4

[dpdk-dev] [PATCH 3/6] net/mlx5: reduce Tx and Rx structure size

2016-09-07 Thread Nelio Laranjeiro

PMD uses only power of two number of Completion Queue Elements, storing the
number of elements in log2 helps to reduce the size of the container to
store it.

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_rxq.c  | 2 +-
 drivers/net/mlx5/mlx5_rxtx.c | 8 
 drivers/net/mlx5/mlx5_rxtx.h | 4 ++--
 drivers/net/mlx5/mlx5_txq.c  | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index d9db368..f6f4315 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -881,7 +881,7 @@ rxq_setup(struct rxq_ctrl *tmpl)
if (elts == NULL)
return ENOMEM;
tmpl->rxq.rq_db = rwq->rq.db;
-   tmpl->rxq.cqe_n = ibcq->cqe + 1;
+   tmpl->rxq.cqe_n = log2above(ibcq->cqe);
tmpl->rxq.cq_ci = 0;
tmpl->rxq.rq_ci = 0;
tmpl->rxq.cq_db = cq->dbrec;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index e132727..4f28aa9 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -166,8 +166,8 @@ txq_complete(struct txq *txq) 
__attribute__((always_inline));
 static inline void
 txq_complete(struct txq *txq)
 {
-   const unsigned int cqe_n = txq->cqe_n;
const unsigned int elts_n = 1 << txq->elts_n;
+   const unsigned int cqe_n = 1 << txq->cqe_n;
const unsigned int cqe_cnt = cqe_n - 1;
uint16_t elts_free = txq->elts_tail;
uint16_t elts_tail;
@@ -427,9 +427,9 @@ mlx5_tx_dbrec(struct txq *txq)
 static inline void
 tx_prefetch_cqe(struct txq *txq, uint16_t ci)
 {
-   volatile struct mlx5_cqe64 *cqe;
+   volatile struct mlx5_cqe *cqe;

-   cqe = &(*txq->cqes)[ci & (txq->cqe_n - 1)].cqe64;
+   cqe = &(*txq->cqes)[ci & ((1 << txq->cqe_n) - 1)];
rte_prefetch0(cqe);
 }

@@ -1272,8 +1272,8 @@ uint16_t
 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
struct rxq *rxq = dpdk_rxq;
-   const unsigned int cqe_cnt = rxq->cqe_n - 1;
const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1;
+   const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1;
const unsigned int sges_n = rxq->sges_n;
struct rte_mbuf *pkt = NULL;
struct rte_mbuf *seg = NULL;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 3ba3913..224614e 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -110,13 +110,13 @@ struct rxq {
unsigned int vlan_strip:1; /* Enable VLAN stripping. */
unsigned int crc_present:1; /* CRC must be subtracted. */
unsigned int sges_n:2; /* Log 2 of SGEs (max buffers per packet). */
+   unsigned int cqe_n:4; /* Log 2 of CQ elements. */
unsigned int elts_n:4; /* Log 2 of Mbufs. */
unsigned int port_id:8;
volatile uint32_t *rq_db;
volatile uint32_t *cq_db;
uint16_t rq_ci;
uint16_t cq_ci;
-   uint16_t cqe_n; /* Number of CQ elements. */
volatile struct mlx5_wqe_data_seg(*wqes)[];
volatile struct mlx5_cqe(*cqes)[];
struct rxq_zip zip; /* Compressed context. */
@@ -245,10 +245,10 @@ struct txq {
uint16_t elts_tail; /* First element awaiting completion. */
uint16_t elts_comp; /* Counter since last completion request. */
uint16_t cq_ci; /* Consumer index for completion queue. */
-   uint16_t cqe_n; /* Number of CQ elements. */
uint16_t wqe_ci; /* Consumer index for work queue. */
uint16_t wqe_n; /* Number of WQ elements. */
uint16_t elts_n:4; /* (*elts)[] length (in log2). */
+   uint16_t cqe_n:4; /* Number of CQ elements (in log2). */
uint16_t bf_offset; /* Blueflame offset. */
uint16_t bf_buf_size; /* Blueflame size. */
uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 9055016..6145b69 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -212,7 +212,7 @@ txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl)
  "it should be set to %u", RTE_CACHE_LINE_SIZE);
return EINVAL;
}
-   tmpl->txq.cqe_n = ibcq->cqe + 1;
+   tmpl->txq.cqe_n = log2above(ibcq->cqe);
tmpl->txq.qp_num_8s = qp->ctrl_seg.qp_num << 8;
tmpl->txq.wqes =
(volatile struct mlx5_wqe64 (*)[])
-- 
2.1.4

[dpdk-dev] [PATCH 2/6] net/mlx5: reduce Tx and Rx structure size

2016-09-07 Thread Nelio Laranjeiro

PMD uses only power of two number of descriptors, storing the number of
elements in log2 helps to reduce the size of the container to store it.

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5_ethdev.c |  4 ++--
 drivers/net/mlx5/mlx5_rxq.c| 10 +-
 drivers/net/mlx5/mlx5_rxtx.c   | 10 +-
 drivers/net/mlx5/mlx5_rxtx.h   | 12 +++-
 drivers/net/mlx5/mlx5_txq.c|  6 +++---
 5 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 1ae80e5..137ea66 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -808,7 +808,7 @@ recover:
if (rehash)
ret = rxq_rehash(dev, rxq_ctrl);
else
-   ret = rxq_ctrl_setup(dev, rxq_ctrl, rxq->elts_n,
+   ret = rxq_ctrl_setup(dev, rxq_ctrl, 1 << rxq->elts_n,
 rxq_ctrl->socket, NULL, rxq->mp);
if (!ret)
continue;
@@ -1314,7 +1314,7 @@ mlx5_secondary_data_setup(struct priv *priv)
if (txq_ctrl != NULL) {
if (txq_ctrl_setup(priv->dev,
   primary_txq_ctrl,
-  primary_txq->elts_n,
+  1 << primary_txq->elts_n,
   primary_txq_ctrl->socket,
   NULL) == 0) {
txq_ctrl->txq.stats.idx =
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 44889d1..d9db368 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -723,7 +723,7 @@ rxq_free_elts(struct rxq_ctrl *rxq_ctrl)
if (rxq_ctrl->rxq.elts == NULL)
return;

-   for (i = 0; (i != rxq_ctrl->rxq.elts_n); ++i) {
+   for (i = 0; (i != (1u << rxq_ctrl->rxq.elts_n)); ++i) {
if ((*rxq_ctrl->rxq.elts)[i] != NULL)
rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
(*rxq_ctrl->rxq.elts)[i] = NULL;
@@ -807,7 +807,7 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
 int
 rxq_rehash(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl)
 {
-   unsigned int elts_n = rxq_ctrl->rxq.elts_n;
+   unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
unsigned int i;
struct ibv_exp_wq_attr mod;
int err;
@@ -870,7 +870,7 @@ rxq_setup(struct rxq_ctrl *tmpl)
struct ibv_cq *ibcq = tmpl->cq;
struct mlx5_cq *cq = to_mxxx(cq, cq);
struct mlx5_rwq *rwq = container_of(tmpl->wq, struct mlx5_rwq, wq);
-   struct rte_mbuf *(*elts)[tmpl->rxq.elts_n] =
+   struct rte_mbuf *(*elts)[1 << tmpl->rxq.elts_n] =
rte_calloc_socket("RXQ", 1, sizeof(*elts), 0, tmpl->socket);

if (cq->cqe_sz != RTE_CACHE_LINE_SIZE) {
@@ -924,7 +924,7 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl 
*rxq_ctrl,
.priv = priv,
.socket = socket,
.rxq = {
-   .elts_n = desc,
+   .elts_n = log2above(desc),
.mp = mp,
},
};
@@ -1148,7 +1148,7 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl 
*rxq_ctrl,
}
/* Reuse buffers from original queue if possible. */
if (rxq_ctrl->rxq.elts_n) {
-   assert(rxq_ctrl->rxq.elts_n == desc);
+   assert(1 << rxq_ctrl->rxq.elts_n == desc);
assert(rxq_ctrl->rxq.elts != tmpl.rxq.elts);
ret = rxq_alloc_elts(, desc, rxq_ctrl->rxq.elts);
} else
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 5feeb3f..e132727 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -166,8 +166,8 @@ txq_complete(struct txq *txq) 
__attribute__((always_inline));
 static inline void
 txq_complete(struct txq *txq)
 {
-   const unsigned int elts_n = txq->elts_n;
const unsigned int cqe_n = txq->cqe_n;
+   const unsigned int elts_n = 1 << txq->elts_n;
const unsigned int cqe_cnt = cqe_n - 1;
uint16_t elts_free = txq->elts_tail;
uint16_t elts_tail;
@@ -468,7 +468,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
 {
struct txq *txq = (struct txq *)dpdk_txq;
uint16_t elts_head = txq->elts_head;
-   const unsigned int elts_n = txq->elts_n;
+   const unsigned int elts_n = 1 << txq->elts_n;
unsigned int i = 0;
unsigned int j = 0;
unsigned int max;
@@ -680,7 +680,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts,

[dpdk-dev] [PATCH 1/6] net/mlx5: rework hardware structures

2016-09-07 Thread Nelio Laranjeiro

Rework Work Queue Element (aka WQE) structures to fit PMD needs.
A WQE is an aggregation of 16 bytes elements known as "data segments"
(aka dseg).

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_prm.h  |  70 ++
 drivers/net/mlx5/mlx5_rxtx.c | 165 ++-
 drivers/net/mlx5/mlx5_rxtx.h |   2 +-
 drivers/net/mlx5/mlx5_txq.c  |   4 +-
 4 files changed, 110 insertions(+), 131 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 5db219b..042562c 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -65,8 +65,15 @@
 /* Maximum number of packets a multi-packet WQE can handle. */
 #define MLX5_MPW_DSEG_MAX 5

-/* Room for inline data in regular work queue element. */
-#define MLX5_WQE64_INL_DATA 12
+/* WQE DWORD size */
+#define MLX5_WQE_DWORD_SIZE 16
+
+/* WQE size */
+#define MLX5_WQE_SIZE (4 * MLX5_WQE_DWORD_SIZE)
+
+/* Compute the number of DS. */
+#define MLX5_WQE_DS(n) \
+   (((n) + MLX5_WQE_DWORD_SIZE - 1) / MLX5_WQE_DWORD_SIZE)

 /* Room for inline data in multi-packet WQE. */
 #define MLX5_MWQE64_INL_DATA 28
@@ -79,59 +86,26 @@ struct mlx5_wqe_eth_seg_small {
uint16_t mss;
uint32_t rsvd2;
uint16_t inline_hdr_sz;
+   uint8_t inline_hdr[2];
 };

-/* Regular WQE. */
-struct mlx5_wqe_regular {
-   union {
-   struct mlx5_wqe_ctrl_seg ctrl;
-   uint32_t data[4];
-   } ctrl;
-   struct mlx5_wqe_eth_seg eseg;
-   struct mlx5_wqe_data_seg dseg;
-} __rte_aligned(64);
-
-/* Inline WQE. */
-struct mlx5_wqe_inl {
-   union {
-   struct mlx5_wqe_ctrl_seg ctrl;
-   uint32_t data[4];
-   } ctrl;
-   struct mlx5_wqe_eth_seg eseg;
+struct mlx5_wqe_inl_small {
uint32_t byte_cnt;
-   uint8_t data[MLX5_WQE64_INL_DATA];
-} __rte_aligned(64);
+   uint8_t raw;
+};

-/* Multi-packet WQE. */
-struct mlx5_wqe_mpw {
-   union {
-   struct mlx5_wqe_ctrl_seg ctrl;
-   uint32_t data[4];
-   } ctrl;
+/* Small common part of the WQE. */
+struct mlx5_wqe {
+   uint32_t ctrl[4];
struct mlx5_wqe_eth_seg_small eseg;
-   struct mlx5_wqe_data_seg dseg[2];
-} __rte_aligned(64);
+};

-/* Multi-packet WQE with inline. */
-struct mlx5_wqe_mpw_inl {
-   union {
-   struct mlx5_wqe_ctrl_seg ctrl;
-   uint32_t data[4];
-   } ctrl;
-   struct mlx5_wqe_eth_seg_small eseg;
-   uint32_t byte_cnt;
-   uint8_t data[MLX5_MWQE64_INL_DATA];
+/* WQE. */
+struct mlx5_wqe64 {
+   struct mlx5_wqe hdr;
+   uint8_t raw[32];
 } __rte_aligned(64);

-/* Union of all WQE types. */
-union mlx5_wqe {
-   struct mlx5_wqe_regular wqe;
-   struct mlx5_wqe_inl inl;
-   struct mlx5_wqe_mpw mpw;
-   struct mlx5_wqe_mpw_inl mpw_inl;
-   uint8_t data[64];
-};
-
 /* MPW session status. */
 enum mlx5_mpw_state {
MLX5_MPW_STATE_OPENED,
@@ -145,7 +119,7 @@ struct mlx5_mpw {
unsigned int pkts_n;
unsigned int len;
unsigned int total_len;
-   volatile union mlx5_wqe *wqe;
+   volatile struct mlx5_wqe *wqe;
union {
volatile struct mlx5_wqe_data_seg *dseg[MLX5_MPW_DSEG_MAX];
volatile uint8_t *raw;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 67e0f37..5feeb3f 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -173,7 +173,7 @@ txq_complete(struct txq *txq)
uint16_t elts_tail;
uint16_t cq_ci = txq->cq_ci;
volatile struct mlx5_cqe64 *cqe = NULL;
-   volatile union mlx5_wqe *wqe;
+   volatile struct mlx5_wqe *wqe;

do {
volatile struct mlx5_cqe64 *tmp;
@@ -199,8 +199,8 @@ txq_complete(struct txq *txq)
} while (1);
if (unlikely(cqe == NULL))
return;
-   wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)];
-   elts_tail = wqe->wqe.ctrl.data[3];
+   wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)].hdr;
+   elts_tail = wqe->ctrl[3];
assert(elts_tail < txq->wqe_n);
/* Free buffers. */
while (elts_free != elts_tail) {
@@ -302,33 +302,33 @@ txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
  *   Number of DS elements consumed.
  */
 static inline unsigned int
-mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe *wqe,
+mlx5_wqe_write(struct txq *txq, volatile struct mlx5_wqe *wqe,
   struct rte_mbuf *buf, uint32_t length)
 {
-   uintptr_t raw = (uintptr_t)>wqe.eseg.inline_hdr_start;
+   uint8_t *raw = (uint8_t *)(uintptr_t)>eseg.inline_hdr[0];
uint16_t ds;
-   uint16_t pkt_inline_sz = 16;
+   uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE;
uintptr_t addr = rte_pktmbuf_mtod(buf, uintptr_t);
struct mlx5_wqe_data_seg *

[dpdk-dev] [PATCH 0/6] net/mlx5: performance improvement

2016-09-07 Thread Nelio Laranjeiro

 - Rework structure elements to reduce their size.
 - Removes a second useless loop in Tx burst function.

This series should be applied on top of "net/mlx5: various fixes".

Nelio Laranjeiro (6):
  net/mlx5: rework hardware structures
  net/mlx5: reduce Tx and Rx structure size
  net/mlx5: reduce Tx and Rx structure size
  net/mlx5: reduce Tx structure size
  net/mlx5: reduce Tx and Rx structure size
  net/mlx5: remove gather loop on segments

 drivers/net/mlx5/mlx5_ethdev.c |   4 +-
 drivers/net/mlx5/mlx5_prm.h|  70 ++-
 drivers/net/mlx5/mlx5_rxq.c|  12 +-
 drivers/net/mlx5/mlx5_rxtx.c   | 458 +
 drivers/net/mlx5/mlx5_rxtx.h   |  22 +-
 drivers/net/mlx5/mlx5_txq.c|  18 +-
 6 files changed, 285 insertions(+), 299 deletions(-)

-- 
2.1.4

[dpdk-dev] [PATCH 8/8] net/mlx5: fix inline logic

2016-09-07 Thread Nelio Laranjeiro

To improve performance the NIC expects for large packets to have a pointer
to a cache aligned address, old inline code could break this assumption
which hurts performance.

Fixes: 2a66cf378954 ("net/mlx5: support inline send")

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Vasily Philipov 
---
 drivers/net/mlx5/mlx5_ethdev.c |   4 -
 drivers/net/mlx5/mlx5_rxtx.c   | 424 ++---
 drivers/net/mlx5/mlx5_rxtx.h   |   3 +-
 drivers/net/mlx5/mlx5_txq.c|   9 +-
 4 files changed, 104 insertions(+), 336 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 47f323e..1ae80e5 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1398,10 +1398,6 @@ priv_select_tx_function(struct priv *priv)
} else if ((priv->sriov == 0) && priv->mps) {
priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw;
DEBUG("selected MPW TX function");
-   } else if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
-   priv->dev->tx_pkt_burst = mlx5_tx_burst_inline;
-   DEBUG("selected inline TX function (%u >= %u queues)",
- priv->txqs_n, priv->txqs_inline);
}
 }

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index c7e538f..67e0f37 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -297,179 +297,99 @@ txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
  *   Buffer.
  * @param length
  *   Packet length.
- * @param lkey
- *   Memory region lkey.
+ *
+ * @return ds
+ *   Number of DS elements consumed.
  */
-static inline void
+static inline unsigned int
 mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe *wqe,
-  struct rte_mbuf *buf, uint32_t length, uint32_t lkey)
+  struct rte_mbuf *buf, uint32_t length)
 {
+   uintptr_t raw = (uintptr_t)>wqe.eseg.inline_hdr_start;
+   uint16_t ds;
+   uint16_t pkt_inline_sz = 16;
uintptr_t addr = rte_pktmbuf_mtod(buf, uintptr_t);
+   struct mlx5_wqe_data_seg *dseg = NULL;

-   rte_mov16((uint8_t *)>wqe.eseg.inline_hdr_start,
- (uint8_t *)addr);
-   addr += 16;
+   assert(length >= 16);
+   /* Start the know and common part of the WQE structure. */
+   wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
+   wqe->wqe.ctrl.data[2] = 0;
+   wqe->wqe.ctrl.data[3] = 0;
+   wqe->wqe.eseg.rsvd0 = 0;
+   wqe->wqe.eseg.rsvd1 = 0;
+   wqe->wqe.eseg.mss = 0;
+   wqe->wqe.eseg.rsvd2 = 0;
+   /* Start by copying the Ethernet Header. */
+   rte_mov16((uint8_t *)raw, (uint8_t *)addr);
length -= 16;
-   /* Need to insert VLAN ? */
+   addr += 16;
+   /* Replace the Ethernet type by the VLAN if necessary. */
if (buf->ol_flags & PKT_TX_VLAN_PKT) {
uint32_t vlan = htonl(0x8100 | buf->vlan_tci);

-   memcpy((uint8_t *)>wqe.eseg.inline_hdr_start + 12,
+   memcpy((uint8_t *)(raw + 16 - sizeof(vlan)),
   , sizeof(vlan));
addr -= sizeof(vlan);
length += sizeof(vlan);
}
-   /* Write the WQE. */
-   wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
-   wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4);
-   wqe->wqe.ctrl.data[2] = 0;
-   wqe->wqe.ctrl.data[3] = 0;
-   wqe->inl.eseg.rsvd0 = 0;
-   wqe->inl.eseg.rsvd1 = 0;
-   wqe->inl.eseg.mss = 0;
-   wqe->inl.eseg.rsvd2 = 0;
-   wqe->wqe.eseg.inline_hdr_sz = htons(16);
-   /* Store remaining data in data segment. */
-   wqe->wqe.dseg.byte_count = htonl(length);
-   wqe->wqe.dseg.lkey = lkey;
-   wqe->wqe.dseg.addr = htonll(addr);
-   /* Increment consumer index. */
-   ++txq->wqe_ci;
-}
-
-/**
- * Write a inline WQE.
- *
- * @param txq
- *   Pointer to TX queue structure.
- * @param wqe
- *   Pointer to the WQE to fill.
- * @param addr
- *   Buffer data address.
- * @param length
- *   Packet length.
- * @param lkey
- *   Memory region lkey.
- */
-static inline void
-mlx5_wqe_write_inline(struct txq *txq, volatile union mlx5_wqe *wqe,
- uintptr_t addr, uint32_t length)
-{
-   uint32_t size;
-   uint16_t wqe_cnt = txq->wqe_n - 1;
-   uint16_t wqe_ci = txq->wqe_ci + 1;
-
-   /* Copy the first 16 bytes into inline header. */
-   rte_memcpy((void *)(uintptr_t)wqe->inl.eseg.inline_hdr_start,
-  (void *)(uintptr_t)addr,
-  MLX5_ETH_INLINE_HEADER_SIZE);
-   addr += MLX5_ETH_INLINE_HEADER_SIZE;
-   length -= MLX5_ETH_INLINE_HEADER_SIZE;
-   size = 3 + ((4 + length + 15) / 16);
-   wqe->inl.by

[dpdk-dev] [PATCH 7/8] net/mlx5: re-factorize functions

2016-09-07 Thread Nelio Laranjeiro

Rework logic of wqe_write() and wqe_write_vlan() which are pretty similar
to keep a single one.

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_rxtx.c | 98 ++--
 1 file changed, 22 insertions(+), 76 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 5c39cbb..c7e538f 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -293,8 +293,8 @@ txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
  *   Pointer to TX queue structure.
  * @param wqe
  *   Pointer to the WQE to fill.
- * @param addr
- *   Buffer data address.
+ * @param buf
+ *   Buffer.
  * @param length
  *   Packet length.
  * @param lkey
@@ -302,54 +302,24 @@ txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
  */
 static inline void
 mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe *wqe,
-  uintptr_t addr, uint32_t length, uint32_t lkey)
-{
-   wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
-   wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4);
-   wqe->wqe.ctrl.data[2] = 0;
-   wqe->wqe.ctrl.data[3] = 0;
-   wqe->inl.eseg.rsvd0 = 0;
-   wqe->inl.eseg.rsvd1 = 0;
-   wqe->inl.eseg.mss = 0;
-   wqe->inl.eseg.rsvd2 = 0;
-   wqe->wqe.eseg.inline_hdr_sz = htons(MLX5_ETH_INLINE_HEADER_SIZE);
-   /* Copy the first 16 bytes into inline header. */
-   rte_memcpy((uint8_t *)(uintptr_t)wqe->wqe.eseg.inline_hdr_start,
-  (uint8_t *)(uintptr_t)addr,
-  MLX5_ETH_INLINE_HEADER_SIZE);
-   addr += MLX5_ETH_INLINE_HEADER_SIZE;
-   length -= MLX5_ETH_INLINE_HEADER_SIZE;
-   /* Store remaining data in data segment. */
-   wqe->wqe.dseg.byte_count = htonl(length);
-   wqe->wqe.dseg.lkey = lkey;
-   wqe->wqe.dseg.addr = htonll(addr);
-   /* Increment consumer index. */
-   ++txq->wqe_ci;
-}
-
-/**
- * Write a regular WQE with VLAN.
- *
- * @param txq
- *   Pointer to TX queue structure.
- * @param wqe
- *   Pointer to the WQE to fill.
- * @param addr
- *   Buffer data address.
- * @param length
- *   Packet length.
- * @param lkey
- *   Memory region lkey.
- * @param vlan_tci
- *   VLAN field to insert in packet.
- */
-static inline void
-mlx5_wqe_write_vlan(struct txq *txq, volatile union mlx5_wqe *wqe,
-   uintptr_t addr, uint32_t length, uint32_t lkey,
-   uint16_t vlan_tci)
+  struct rte_mbuf *buf, uint32_t length, uint32_t lkey)
 {
-   uint32_t vlan = htonl(0x8100 | vlan_tci);
-
+   uintptr_t addr = rte_pktmbuf_mtod(buf, uintptr_t);
+
+   rte_mov16((uint8_t *)>wqe.eseg.inline_hdr_start,
+ (uint8_t *)addr);
+   addr += 16;
+   length -= 16;
+   /* Need to insert VLAN ? */
+   if (buf->ol_flags & PKT_TX_VLAN_PKT) {
+   uint32_t vlan = htonl(0x8100 | buf->vlan_tci);
+
+   memcpy((uint8_t *)>wqe.eseg.inline_hdr_start + 12,
+  , sizeof(vlan));
+   addr -= sizeof(vlan);
+   length += sizeof(vlan);
+   }
+   /* Write the WQE. */
wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4);
wqe->wqe.ctrl.data[2] = 0;
@@ -358,20 +328,7 @@ mlx5_wqe_write_vlan(struct txq *txq, volatile union 
mlx5_wqe *wqe,
wqe->inl.eseg.rsvd1 = 0;
wqe->inl.eseg.mss = 0;
wqe->inl.eseg.rsvd2 = 0;
-   wqe->wqe.eseg.inline_hdr_sz = htons(MLX5_ETH_VLAN_INLINE_HEADER_SIZE);
-   /*
-* Copy 12 bytes of source & destination MAC address.
-* Copy 4 bytes of VLAN.
-* Copy 2 bytes of Ether type.
-*/
-   rte_memcpy((uint8_t *)(uintptr_t)wqe->wqe.eseg.inline_hdr_start,
-  (uint8_t *)(uintptr_t)addr, 12);
-   rte_memcpy((uint8_t *)((uintptr_t)wqe->wqe.eseg.inline_hdr_start + 12),
-  , sizeof(vlan));
-   rte_memcpy((uint8_t *)((uintptr_t)wqe->wqe.eseg.inline_hdr_start + 16),
-  (uint8_t *)((uintptr_t)addr + 12), 2);
-   addr += MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan);
-   length -= MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan);
+   wqe->wqe.eseg.inline_hdr_sz = htons(16);
/* Store remaining data in data segment. */
wqe->wqe.dseg.byte_count = htonl(length);
wqe->wqe.dseg.lkey = lkey;
@@ -612,7 +569,6 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
do {
struct rte_mbuf *buf = *(pkts++);
unsigned int elts_head_next;
-   uintptr_t addr;
uint32_t length;
uint32_t lkey;
unsigned int segs_n = buf->nb_segs;
@@ -634,8 +590,6 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkt

[dpdk-dev] [PATCH 6/8] net/mlx5: force inline for completion function

2016-09-07 Thread Nelio Laranjeiro

This function was supposed to be inlined, but was not because several
functions calls it.  This function should always be inline avoid
external function calls and to optimize code in data-path.

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_rxtx.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 3757366..5c39cbb 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -152,6 +152,9 @@ check_cqe64(volatile struct mlx5_cqe64 *cqe,
return 0;
 }

+static inline void
+txq_complete(struct txq *txq) __attribute__((always_inline));
+
 /**
  * Manage TX completions.
  *
@@ -160,7 +163,7 @@ check_cqe64(volatile struct mlx5_cqe64 *cqe,
  * @param txq
  *   Pointer to TX queue structure.
  */
-static void
+static inline void
 txq_complete(struct txq *txq)
 {
const unsigned int elts_n = txq->elts_n;
-- 
2.1.4

[dpdk-dev] [PATCH 5/8] net/mlx5: fix support for flow director drop mode

2016-09-07 Thread Nelio Laranjeiro

From: Yaacov Hazan <yaac...@mellanox.com>

Packet rejection was routed to a polled queue.  This patch route them to a
dummy queue which is not polled.

Fixes: 76f5c99e6840 ("mlx5: support flow director")

Signed-off-by: Yaacov Hazan 
Signed-off-by: Adrien Mazarguil 
Signed-off-by: Nelio Laranjeiro 
---
 doc/guides/nics/mlx5.rst |  3 ++-
 drivers/net/mlx5/mlx5.h  |  1 +
 drivers/net/mlx5/mlx5_fdir.c | 41 +++--
 3 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 5c10cd3..8923173 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -84,7 +84,8 @@ Features
 - Promiscuous mode.
 - Multicast promiscuous mode.
 - Hardware checksum offloads.
-- Flow director (RTE_FDIR_MODE_PERFECT and RTE_FDIR_MODE_PERFECT_MAC_VLAN).
+- Flow director (RTE_FDIR_MODE_PERFECT, RTE_FDIR_MODE_PERFECT_MAC_VLAN and
+  RTE_ETH_FDIR_REJECT).
 - Secondary process TX is supported.
 - KVM and VMware ESX SR-IOV modes are supported.

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index fa78623..8349e5b 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -134,6 +134,7 @@ struct priv {
unsigned int (*reta_idx)[]; /* RETA index table. */
unsigned int reta_idx_n; /* RETA index size. */
struct fdir_filter_list *fdir_filter_list; /* Flow director rules. */
+   struct fdir_queue *fdir_drop_queue; /* Flow director drop queue. */
rte_spinlock_t lock; /* Lock for control functions. */
 };

diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
index 802669b..25c8c52 100644
--- a/drivers/net/mlx5/mlx5_fdir.c
+++ b/drivers/net/mlx5/mlx5_fdir.c
@@ -75,6 +75,7 @@ struct fdir_flow_desc {
 struct mlx5_fdir_filter {
LIST_ENTRY(mlx5_fdir_filter) next;
uint16_t queue; /* Queue assigned to if FDIR match. */
+   enum rte_eth_fdir_behavior behavior;
struct fdir_flow_desc desc;
struct ibv_exp_flow *flow;
 };
@@ -567,6 +568,33 @@ priv_get_fdir_queue(struct priv *priv, uint16_t idx)
 }

 /**
+ * Get or flow director drop queue. Create it if it does not exist.
+ *
+ * @param priv
+ *   Private structure.
+ *
+ * @return
+ *   Flow director drop queue on success, NULL otherwise.
+ */
+static struct fdir_queue *
+priv_get_fdir_drop_queue(struct priv *priv)
+{
+   struct fdir_queue *fdir_queue = priv->fdir_drop_queue;
+
+   if (fdir_queue == NULL) {
+   unsigned int socket = SOCKET_ID_ANY;
+
+   /* Select a known NUMA socket if possible. */
+   if (priv->rxqs_n && (*priv->rxqs)[0])
+   socket = container_of((*priv->rxqs)[0],
+ struct rxq_ctrl, rxq)->socket;
+   fdir_queue = priv_fdir_queue_create(priv, NULL, socket);
+   priv->fdir_drop_queue = fdir_queue;
+   }
+   return fdir_queue;
+}
+
+/**
  * Enable flow director filter and create steering rules.
  *
  * @param priv
@@ -588,7 +616,11 @@ priv_fdir_filter_enable(struct priv *priv,
return 0;

/* Get fdir_queue for specific queue. */
-   fdir_queue = priv_get_fdir_queue(priv, mlx5_fdir_filter->queue);
+   if (mlx5_fdir_filter->behavior == RTE_ETH_FDIR_REJECT)
+   fdir_queue = priv_get_fdir_drop_queue(priv);
+   else
+   fdir_queue = priv_get_fdir_queue(priv,
+mlx5_fdir_filter->queue);

if (fdir_queue == NULL) {
ERROR("failed to create flow director rxq for queue %d",
@@ -707,6 +739,10 @@ priv_fdir_disable(struct priv *priv)
priv_fdir_queue_destroy(priv, rxq_ctrl->fdir_queue);
rxq_ctrl->fdir_queue = NULL;
}
+   if (priv->fdir_drop_queue) {
+   priv_fdir_queue_destroy(priv, priv->fdir_drop_queue);
+   priv->fdir_drop_queue = NULL;
+   }
 }

 /**
@@ -807,8 +843,9 @@ priv_fdir_filter_add(struct priv *priv,
return err;
}

-   /* Set queue. */
+   /* Set action parameters. */
mlx5_fdir_filter->queue = fdir_filter->action.rx_queue;
+   mlx5_fdir_filter->behavior = fdir_filter->action.behavior;

/* Convert to mlx5 filter descriptor. */
fdir_filter_to_flow_desc(fdir_filter,
-- 
2.1.4

[dpdk-dev] [PATCH 4/8] net/mlx5: refactor allocation of flow director queues

2016-09-07 Thread Nelio Laranjeiro

From: Yaacov Hazan 

This is done to prepare support for drop queues, which are not related to
existing RX queues and need to be managed separately.

Signed-off-by: Yaacov Hazan 
Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5.h  |   1 +
 drivers/net/mlx5/mlx5_fdir.c | 229 ---
 drivers/net/mlx5/mlx5_rxq.c  |   2 +
 drivers/net/mlx5/mlx5_rxtx.h |   4 +-
 4 files changed, 156 insertions(+), 80 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 3a86609..fa78623 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -257,6 +257,7 @@ void mlx5_dev_stop(struct rte_eth_dev *);

 /* mlx5_fdir.c */

+void priv_fdir_queue_destroy(struct priv *, struct fdir_queue *);
 int fdir_init_filters_list(struct priv *);
 void priv_fdir_delete_filters_list(struct priv *);
 void priv_fdir_disable(struct priv *);
diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
index 8207573..802669b 100644
--- a/drivers/net/mlx5/mlx5_fdir.c
+++ b/drivers/net/mlx5/mlx5_fdir.c
@@ -400,6 +400,145 @@ create_flow:
 }

 /**
+ * Destroy a flow director queue.
+ *
+ * @param fdir_queue
+ *   Flow director queue to be destroyed.
+ */
+void
+priv_fdir_queue_destroy(struct priv *priv, struct fdir_queue *fdir_queue)
+{
+   struct mlx5_fdir_filter *fdir_filter;
+
+   /* Disable filter flows still applying to this queue. */
+   LIST_FOREACH(fdir_filter, priv->fdir_filter_list, next) {
+   unsigned int idx = fdir_filter->queue;
+   struct rxq_ctrl *rxq_ctrl =
+   container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq);
+
+   assert(idx < priv->rxqs_n);
+   if (fdir_queue == rxq_ctrl->fdir_queue &&
+   fdir_filter->flow != NULL) {
+   claim_zero(ibv_exp_destroy_flow(fdir_filter->flow));
+   fdir_filter->flow = NULL;
+   }
+   }
+   assert(fdir_queue->qp);
+   claim_zero(ibv_destroy_qp(fdir_queue->qp));
+   assert(fdir_queue->ind_table);
+   claim_zero(ibv_exp_destroy_rwq_ind_table(fdir_queue->ind_table));
+   if (fdir_queue->wq)
+   claim_zero(ibv_exp_destroy_wq(fdir_queue->wq));
+   if (fdir_queue->cq)
+   claim_zero(ibv_destroy_cq(fdir_queue->cq));
+#ifndef NDEBUG
+   memset(fdir_queue, 0x2a, sizeof(*fdir_queue));
+#endif
+   rte_free(fdir_queue);
+}
+
+/**
+ * Create a flow director queue.
+ *
+ * @param priv
+ *   Private structure.
+ * @param wq
+ *   Work queue to route matched packets to, NULL if one needs to
+ *   be created.
+ *
+ * @return
+ *   Related flow director queue on success, NULL otherwise.
+ */
+static struct fdir_queue *
+priv_fdir_queue_create(struct priv *priv, struct ibv_exp_wq *wq,
+  unsigned int socket)
+{
+   struct fdir_queue *fdir_queue;
+
+   fdir_queue = rte_calloc_socket(__func__, 1, sizeof(*fdir_queue),
+  0, socket);
+   if (!fdir_queue) {
+   ERROR("cannot allocate flow director queue");
+   return NULL;
+   }
+   assert(priv->pd);
+   assert(priv->ctx);
+   if (!wq) {
+   fdir_queue->cq = ibv_exp_create_cq(
+   priv->ctx, 1, NULL, NULL, 0,
+   &(struct ibv_exp_cq_init_attr){
+   .comp_mask = 0,
+   });
+   if (!fdir_queue->cq) {
+   ERROR("cannot create flow director CQ");
+   goto error;
+   }
+   fdir_queue->wq = ibv_exp_create_wq(
+   priv->ctx,
+   &(struct ibv_exp_wq_init_attr){
+   .wq_type = IBV_EXP_WQT_RQ,
+   .max_recv_wr = 1,
+   .max_recv_sge = 1,
+   .pd = priv->pd,
+   .cq = fdir_queue->cq,
+   });
+   if (!fdir_queue->wq) {
+   ERROR("cannot create flow director WQ");
+   goto error;
+   }
+   wq = fdir_queue->wq;
+   }
+   fdir_queue->ind_table = ibv_exp_create_rwq_ind_table(
+   priv->ctx,
+   &(struct ibv_exp_rwq_ind_table_init_attr){
+   .pd = priv->pd,
+   .log_ind_tbl_size = 0,
+   .ind_tbl = ,
+   .comp_mask = 0,
+   });
+   if (!fdir_queue->ind_table) {
+   ERROR("cannot create flow director indirection table");
+   goto error;
+   }
+   fdir_queue->qp = ibv_exp_create_qp(
+   priv->ctx,
+   &(struct ibv_exp_qp_init_attr){
+   .qp_type = IBV_QPT_RAW_PACKET,
+   .comp_mask =
+

[dpdk-dev] [PATCH 3/8] net/mlx5: fix removing VLAN filter

2016-09-07 Thread Nelio Laranjeiro

From: Raslan Darawsheh 

memmove was moving bytes as the number of elements next to i, while it
should move the number of elements multiplied by the size of each element.

Fixes: e9086978 ("mlx5: support VLAN filtering")

Signed-off-by: Raslan Darawsheh 
---
 drivers/net/mlx5/mlx5_vlan.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index 4719e69..fb730e5 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -87,7 +87,8 @@ vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, 
int on)
--priv->vlan_filter_n;
memmove(>vlan_filter[i],
>vlan_filter[i + 1],
-   priv->vlan_filter_n - i);
+   sizeof(priv->vlan_filter[i]) *
+   (priv->vlan_filter_n - i));
priv->vlan_filter[priv->vlan_filter_n] = 0;
} else {
assert(i == priv->vlan_filter_n);
-- 
2.1.4

[dpdk-dev] [PATCH 2/8] net/mlx5: fix Rx VLAN offload capability report

2016-09-07 Thread Nelio Laranjeiro

From: Adrien Mazarguil 

This capability is implemented but not reported.

Fixes: f3db9489188a ("mlx5: support Rx VLAN stripping")

Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5_ethdev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 130e15d..47f323e 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -583,7 +583,8 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *info)
 (DEV_RX_OFFLOAD_IPV4_CKSUM |
  DEV_RX_OFFLOAD_UDP_CKSUM |
  DEV_RX_OFFLOAD_TCP_CKSUM) :
-0);
+0) |
+   (priv->hw_vlan_strip ? DEV_RX_OFFLOAD_VLAN_STRIP : 0);
if (!priv->mps)
info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
if (priv->hw_csum)
-- 
2.1.4

[dpdk-dev] [PATCH 1/8] net/mlx5: fix inconsistent return value in Flow Director

2016-09-07 Thread Nelio Laranjeiro

From: Yaacov Hazan 

The return value in DPDK is negative errno on failure.
Since internal functions in mlx driver return positive
values need to negate this value when it returned to
dpdk layer.

Fixes: 76f5c99 ("mlx5: support flow director")

Signed-off-by: Yaacov Hazan 
---
 drivers/net/mlx5/mlx5_fdir.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
index 73eb00e..8207573 100644
--- a/drivers/net/mlx5/mlx5_fdir.c
+++ b/drivers/net/mlx5/mlx5_fdir.c
@@ -955,7 +955,7 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
 enum rte_filter_op filter_op,
 void *arg)
 {
-   int ret = -EINVAL;
+   int ret = EINVAL;
struct priv *priv = dev->data->dev_private;

switch (filter_type) {
@@ -970,5 +970,5 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
break;
}

-   return ret;
+   return -ret;
 }
-- 
2.1.4

[dpdk-dev] [PATCH 0/8] net/mlx5: various fixes

2016-09-07 Thread Nelio Laranjeiro

 - Flow director
 - Rx Capabilities
 - Inline

Adrien Mazarguil (1):
  net/mlx5: fix Rx VLAN offload capability report

Nelio Laranjeiro (3):
  net/mlx5: force inline for completion function
  net/mlx5: re-factorize functions
  net/mlx5: fix inline logic

Raslan Darawsheh (1):
  net/mlx5: fix removing VLAN filter

Yaacov Hazan (3):
  net/mlx5: fix inconsistent return value in Flow Director
  net/mlx5: refactor allocation of flow director queues
  net/mlx5: fix support for flow director drop mode

 doc/guides/nics/mlx5.rst   |   3 +-
 drivers/net/mlx5/mlx5.h|   2 +
 drivers/net/mlx5/mlx5_ethdev.c |   7 +-
 drivers/net/mlx5/mlx5_fdir.c   | 270 +++---
 drivers/net/mlx5/mlx5_rxq.c|   2 +
 drivers/net/mlx5/mlx5_rxtx.c   | 499 +
 drivers/net/mlx5/mlx5_rxtx.h   |   7 +-
 drivers/net/mlx5/mlx5_txq.c|   9 +-
 drivers/net/mlx5/mlx5_vlan.c   |   3 +-
 9 files changed, 318 insertions(+), 484 deletions(-)

-- 
2.1.4

[dpdk-dev] [PATCH] net/mlx5: fix a segmentation fault in Rx

2016-07-08 Thread Nelio Laranjeiro

Fixed issue could occur when a Mbuf starvation happens in a middle of
reception of a segmented packet, in such situation, the PMD has to release
all segments of such packet.  The end condition was wrong causing it to
free a Mbuf still handled by the NIC.

Fixes: 9964b965ad69 ("net/mlx5: re-add Rx scatter support")

Reported-by: Yongseok Koh 
Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_rxtx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 0c352f3..3564937 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1572,7 +1572,8 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
rte_prefetch0(wqe);
rep = rte_mbuf_raw_alloc(rxq->mp);
if (unlikely(rep == NULL)) {
-   while (pkt) {
+   while (pkt != seg) {
+   assert(pkt != (*rxq->elts)[idx]);
seg = NEXT(pkt);
rte_mbuf_refcnt_set(pkt, 0);
__rte_mbuf_raw_free(pkt);
-- 
2.1.4

[dpdk-dev] [PATCH v7 25/25] mlx5: resurrect Rx scatter support

2016-06-24 Thread Nelio Laranjeiro

This commit brings back Rx scatter and related support by the MTU update
function. The maximum number of segments per packet is not a fixed value
anymore (previously MLX5_PMD_SGE_WR_N, set to 4 by default) as it caused
performance issues when fewer segments were actually needed as well as
limitations on the maximum packet size that could be received with the
default mbuf size (supporting at most 8576 bytes).

These limitations are now lifted as the number of SGEs is derived from the
MTU (which implies MRU) at queue initialization and during MTU update.

Signed-off-by: Adrien Mazarguil 
Signed-off-by: Vasily Philipov 
Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_ethdev.c |  90 ++
 drivers/net/mlx5/mlx5_rxq.c|  77 ++-
 drivers/net/mlx5/mlx5_rxtx.c   | 139 -
 drivers/net/mlx5/mlx5_rxtx.h   |   1 +
 4 files changed, 225 insertions(+), 82 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 698a50e..72f0826 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -725,6 +725,9 @@ mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
unsigned int i;
uint16_t (*rx_func)(void *, struct rte_mbuf **, uint16_t) =
mlx5_rx_burst;
+   unsigned int max_frame_len;
+   int rehash;
+   int restart = priv->started;

if (mlx5_is_secondary())
return -E_RTE_SECONDARY;
@@ -738,7 +741,6 @@ mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
goto out;
} else
DEBUG("adapter port %u MTU set to %u", priv->port, mtu);
-   priv->mtu = mtu;
/* Temporarily replace RX handler with a fake one, assuming it has not
 * been copied elsewhere. */
dev->rx_pkt_burst = removed_rx_burst;
@@ -746,28 +748,94 @@ mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
 * removed_rx_burst() instead. */
rte_wmb();
usleep(1000);
+   /* MTU does not include header and CRC. */
+   max_frame_len = ETHER_HDR_LEN + mtu + ETHER_CRC_LEN;
+   /* Check if at least one queue is going to need a SGE update. */
+   for (i = 0; i != priv->rxqs_n; ++i) {
+   struct rxq *rxq = (*priv->rxqs)[i];
+   unsigned int mb_len;
+   unsigned int size = RTE_PKTMBUF_HEADROOM + max_frame_len;
+   unsigned int sges_n;
+
+   if (rxq == NULL)
+   continue;
+   mb_len = rte_pktmbuf_data_room_size(rxq->mp);
+   assert(mb_len >= RTE_PKTMBUF_HEADROOM);
+   /*
+* Determine the number of SGEs needed for a full packet
+* and round it to the next power of two.
+*/
+   sges_n = log2above((size / mb_len) + !!(size % mb_len));
+   if (sges_n != rxq->sges_n)
+   break;
+   }
+   /*
+* If all queues have the right number of SGEs, a simple rehash
+* of their buffers is enough, otherwise SGE information can only
+* be updated in a queue by recreating it. All resources that depend
+* on queues (flows, indirection tables) must be recreated as well in
+* that case.
+*/
+   rehash = (i == priv->rxqs_n);
+   if (!rehash) {
+   /* Clean up everything as with mlx5_dev_stop(). */
+   priv_special_flow_disable_all(priv);
+   priv_mac_addrs_disable(priv);
+   priv_destroy_hash_rxqs(priv);
+   priv_fdir_disable(priv);
+   priv_dev_interrupt_handler_uninstall(priv, dev);
+   }
+recover:
/* Reconfigure each RX queue. */
for (i = 0; (i != priv->rxqs_n); ++i) {
struct rxq *rxq = (*priv->rxqs)[i];
-   unsigned int mb_len;
-   unsigned int max_frame_len;
+   struct rxq_ctrl *rxq_ctrl =
+   container_of(rxq, struct rxq_ctrl, rxq);
int sp;
+   unsigned int mb_len;
+   unsigned int tmp;

if (rxq == NULL)
continue;
-   /* Calculate new maximum frame length according to MTU and
-* toggle scattered support (sp) if necessary. */
-   max_frame_len = (priv->mtu + ETHER_HDR_LEN +
-(ETHER_MAX_VLAN_FRAME_LEN - ETHER_MAX_LEN));
mb_len = rte_pktmbuf_data_room_size(rxq->mp);
assert(mb_len >= RTE_PKTMBUF_HEADROOM);
+   /* Toggle scattered support (sp) if necessary. */
sp = (max_frame_len > (mb_len - RTE_PKTMBUF_HEADROOM));
-   if (sp) {
-   ERROR("%p: RX scatter is not supported", (void *)dev);
-   ret = ENOTSUP;
-

[dpdk-dev] [PATCH v7 24/25] mlx5: make Rx queue reinitialization safer

2016-06-24 Thread Nelio Laranjeiro

From: Adrien Mazarguil 

The primary purpose of rxq_rehash() function is to stop and restart
reception on a queue after re-posting buffers. This may fail if the array
that temporarily stores existing buffers for reuse cannot be allocated.

Update rxq_rehash() to work on the target queue directly (not through a
template copy) and avoid this allocation.

rxq_alloc_elts() is modified accordingly to take buffers from an existing
queue directly and update their refcount.

Unlike rxq_rehash(), rxq_setup() must work on a temporary structure but
should not allocate new mbufs from the pool while reinitializing an
existing queue. This is achieved by using the refcount-aware
rxq_alloc_elts() before overwriting queue data.

Signed-off-by: Adrien Mazarguil 
Signed-off-by: Vasily Philipov 
---
 drivers/net/mlx5/mlx5_rxq.c | 83 ++---
 1 file changed, 41 insertions(+), 42 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index fbf14fa..b2ddd0d 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -642,7 +642,7 @@ priv_rehash_flows(struct priv *priv)
  */
 static int
 rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n,
-  struct rte_mbuf **pool)
+  struct rte_mbuf *(*pool)[])
 {
unsigned int i;
int ret = 0;
@@ -654,9 +654,10 @@ rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int 
elts_n,
&(*rxq_ctrl->rxq.wqes)[i];

if (pool != NULL) {
-   buf = *(pool++);
+   buf = (*pool)[i];
assert(buf != NULL);
rte_pktmbuf_reset(buf);
+   rte_pktmbuf_refcnt_update(buf, 1);
} else
buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
if (buf == NULL) {
@@ -781,7 +782,7 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
 }

 /**
- * Reconfigure a RX queue with new parameters.
+ * Reconfigure RX queue buffers.
  *
  * rxq_rehash() does not allocate mbufs, which, if not done from the right
  * thread (such as a control thread), may corrupt the pool.
@@ -798,67 +799,48 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
 int
 rxq_rehash(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl)
 {
-   struct rxq_ctrl tmpl = *rxq_ctrl;
-   unsigned int mbuf_n;
-   unsigned int desc_n;
-   struct rte_mbuf **pool;
-   unsigned int i, k;
+   unsigned int elts_n = rxq_ctrl->rxq.elts_n;
+   unsigned int i;
struct ibv_exp_wq_attr mod;
int err;

DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq_ctrl);
-   /* Number of descriptors and mbufs currently allocated. */
-   desc_n = tmpl.rxq.elts_n;
-   mbuf_n = desc_n;
/* From now on, any failure will render the queue unusable.
 * Reinitialize WQ. */
mod = (struct ibv_exp_wq_attr){
.attr_mask = IBV_EXP_WQ_ATTR_STATE,
.wq_state = IBV_EXP_WQS_RESET,
};
-   err = ibv_exp_modify_wq(tmpl.wq, );
+   err = ibv_exp_modify_wq(rxq_ctrl->wq, );
if (err) {
ERROR("%p: cannot reset WQ: %s", (void *)dev, strerror(err));
assert(err > 0);
return err;
}
-   /* Allocate pool. */
-   pool = rte_malloc(__func__, (mbuf_n * sizeof(*pool)), 0);
-   if (pool == NULL) {
-   ERROR("%p: cannot allocate memory", (void *)dev);
-   return ENOBUFS;
-   }
/* Snatch mbufs from original queue. */
-   k = 0;
-   for (i = 0; (i != desc_n); ++i)
-   pool[k++] = (*rxq_ctrl->rxq.elts)[i];
-   assert(k == mbuf_n);
-   rte_free(pool);
+   claim_zero(rxq_alloc_elts(rxq_ctrl, elts_n, rxq_ctrl->rxq.elts));
+   for (i = 0; i != elts_n; ++i) {
+   struct rte_mbuf *buf = (*rxq_ctrl->rxq.elts)[i];
+
+   assert(rte_mbuf_refcnt_read(buf) == 2);
+   rte_pktmbuf_free_seg(buf);
+   }
/* Change queue state to ready. */
mod = (struct ibv_exp_wq_attr){
.attr_mask = IBV_EXP_WQ_ATTR_STATE,
.wq_state = IBV_EXP_WQS_RDY,
};
-   err = ibv_exp_modify_wq(tmpl.wq, );
+   err = ibv_exp_modify_wq(rxq_ctrl->wq, );
if (err) {
ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s",
  (void *)dev, strerror(err));
goto error;
}
-   /* Post SGEs. */
-   err = rxq_alloc_elts(, desc_n, pool);
-   if (err) {
-   ERROR("%p: cannot reallocate WRs, aborting", (void *)dev);
-   rte_free(pool);
-   assert(err > 0);
-   return err;
-   }
/* Update doorbell counter. */
-   rxq_ctrl->rxq.rq_ci = desc_n;
+   rxq_ctrl->rxq.rq_ci = elts_n;
rte_wmb();
*rxq_ctrl->rxq.rq_db = htonl(rxq_ctrl->rxq.rq_ci);

[dpdk-dev] [PATCH v7 22/25] mlx5: work around spurious compilation errors

2016-06-24 Thread Nelio Laranjeiro

From: Adrien Mazarguil 

Since commit "mlx5: resurrect Tx gather support", older GCC versions (such
as 4.8.5) may complain about the following:

 mlx5_rxtx.c: In function `mlx5_tx_burst':
 mlx5_rxtx.c:705:25: error: `wqe' may be used uninitialized in this
 function [-Werror=maybe-uninitialized]

 mlx5_rxtx.c: In function `mlx5_tx_burst_inline':
 mlx5_rxtx.c:864:25: error: `wqe' may be used uninitialized in this
 function [-Werror=maybe-uninitialized]

In both cases, this code cannot be reached when wqe is not initialized.

Considering older GCC versions are still widely used, work around this
issue by initializing wqe preemptively, even if it should not be necessary.

Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5_rxtx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index c72e7ce..8b67949 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -593,7 +593,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
unsigned int j = 0;
unsigned int max;
unsigned int comp;
-   volatile union mlx5_wqe *wqe;
+   volatile union mlx5_wqe *wqe = NULL;

if (unlikely(!pkts_n))
return 0;
@@ -741,7 +741,7 @@ mlx5_tx_burst_inline(void *dpdk_txq, struct rte_mbuf 
**pkts, uint16_t pkts_n)
unsigned int j = 0;
unsigned int max;
unsigned int comp;
-   volatile union mlx5_wqe *wqe;
+   volatile union mlx5_wqe *wqe = NULL;
unsigned int max_inline = txq->max_inline;

if (unlikely(!pkts_n))
-- 
2.1.4

[dpdk-dev] [PATCH v7 21/25] mlx5: resurrect Tx gather support

2016-06-24 Thread Nelio Laranjeiro

From: Adrien Mazarguil <adrien.mazarg...@6wind.com>

Compared to its previous incarnation, the software limit on the number of
mbuf segments is no more (previously MLX5_PMD_SGE_WR_N, set to 4 by
default) hence no need for linearization code and related buffers that
permanently consumed a non negligible amount of memory to handle oversized
mbufs.

The resulting code is both lighter and faster.

Signed-off-by: Adrien Mazarguil 
Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_rxtx.c | 235 +--
 drivers/net/mlx5/mlx5_txq.c  |   8 +-
 2 files changed, 188 insertions(+), 55 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index fadc182..c72e7ce 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -303,6 +303,7 @@ mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe 
*wqe,
 {
wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4);
+   wqe->wqe.ctrl.data[2] = 0;
wqe->wqe.ctrl.data[3] = 0;
wqe->inl.eseg.rsvd0 = 0;
wqe->inl.eseg.rsvd1 = 0;
@@ -348,6 +349,7 @@ mlx5_wqe_write_vlan(struct txq *txq, volatile union 
mlx5_wqe *wqe,

wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4);
+   wqe->wqe.ctrl.data[2] = 0;
wqe->wqe.ctrl.data[3] = 0;
wqe->inl.eseg.rsvd0 = 0;
wqe->inl.eseg.rsvd1 = 0;
@@ -425,6 +427,7 @@ mlx5_wqe_write_inline(struct txq *txq, volatile union 
mlx5_wqe *wqe,
assert(size < 64);
wqe->inl.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
wqe->inl.ctrl.data[1] = htonl(txq->qp_num_8s | size);
+   wqe->inl.ctrl.data[2] = 0;
wqe->inl.ctrl.data[3] = 0;
wqe->inl.eseg.rsvd0 = 0;
wqe->inl.eseg.rsvd1 = 0;
@@ -498,6 +501,7 @@ mlx5_wqe_write_inline_vlan(struct txq *txq, volatile union 
mlx5_wqe *wqe,
assert(size < 64);
wqe->inl.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
wqe->inl.ctrl.data[1] = htonl(txq->qp_num_8s | size);
+   wqe->inl.ctrl.data[2] = 0;
wqe->inl.ctrl.data[3] = 0;
wqe->inl.eseg.rsvd0 = 0;
wqe->inl.eseg.rsvd1 = 0;
@@ -586,6 +590,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
uint16_t elts_head = txq->elts_head;
const unsigned int elts_n = txq->elts_n;
unsigned int i = 0;
+   unsigned int j = 0;
unsigned int max;
unsigned int comp;
volatile union mlx5_wqe *wqe;
@@ -602,23 +607,27 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
if (max > elts_n)
max -= elts_n;
do {
-   struct rte_mbuf *buf;
+   struct rte_mbuf *buf = *(pkts++);
unsigned int elts_head_next;
uintptr_t addr;
uint32_t length;
uint32_t lkey;
+   unsigned int segs_n = buf->nb_segs;
+   volatile struct mlx5_wqe_data_seg *dseg;
+   unsigned int ds = sizeof(*wqe) / 16;

/*
 * Make sure there is enough room to store this packet and
 * that one ring entry remains unused.
 */
-   if (max < 1 + 1)
+   assert(segs_n);
+   if (max < segs_n + 1)
break;
-   --max;
+   max -= segs_n;
--pkts_n;
-   buf = *(pkts++);
elts_head_next = (elts_head + 1) & (elts_n - 1);
wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)];
+   dseg = >wqe.dseg;
rte_prefetch0(wqe);
if (pkts_n)
rte_prefetch0(*pkts);
@@ -638,7 +647,6 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
buf->vlan_tci);
else
mlx5_wqe_write(txq, wqe, addr, length, lkey);
-   wqe->wqe.ctrl.data[2] = 0;
/* Should we enable HW CKSUM offload */
if (buf->ol_flags &
(PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
@@ -648,6 +656,37 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
} else {
wqe->wqe.eseg.cs_flags = 0;
}
+   while (--segs_n) {
+   /*
+* Spill on next WQE when the current one does not have
+* enough room left. Size of WQE must a be a multiple
+

[dpdk-dev] [PATCH v7 20/25] mlx5: check remaining space while processing Tx burst

2016-06-24 Thread Nelio Laranjeiro

From: Adrien Mazarguil <adrien.mazarg...@6wind.com>

The space necessary to store segmented packets cannot be known in advance
and must be verified for each of them.

Signed-off-by: Adrien Mazarguil 
Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_rxtx.c | 144 +++
 1 file changed, 78 insertions(+), 66 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index ed2b5fe..fadc182 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -585,50 +585,51 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
struct txq *txq = (struct txq *)dpdk_txq;
uint16_t elts_head = txq->elts_head;
const unsigned int elts_n = txq->elts_n;
-   unsigned int i;
+   unsigned int i = 0;
unsigned int max;
unsigned int comp;
volatile union mlx5_wqe *wqe;
-   struct rte_mbuf *buf;

if (unlikely(!pkts_n))
return 0;
-   buf = pkts[0];
/* Prefetch first packet cacheline. */
tx_prefetch_cqe(txq, txq->cq_ci);
tx_prefetch_cqe(txq, txq->cq_ci + 1);
-   rte_prefetch0(buf);
+   rte_prefetch0(*pkts);
/* Start processing. */
txq_complete(txq);
max = (elts_n - (elts_head - txq->elts_tail));
if (max > elts_n)
max -= elts_n;
-   assert(max >= 1);
-   assert(max <= elts_n);
-   /* Always leave one free entry in the ring. */
-   --max;
-   if (max == 0)
-   return 0;
-   if (max > pkts_n)
-   max = pkts_n;
-   for (i = 0; (i != max); ++i) {
-   unsigned int elts_head_next = (elts_head + 1) & (elts_n - 1);
+   do {
+   struct rte_mbuf *buf;
+   unsigned int elts_head_next;
uintptr_t addr;
uint32_t length;
uint32_t lkey;

+   /*
+* Make sure there is enough room to store this packet and
+* that one ring entry remains unused.
+*/
+   if (max < 1 + 1)
+   break;
+   --max;
+   --pkts_n;
+   buf = *(pkts++);
+   elts_head_next = (elts_head + 1) & (elts_n - 1);
wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)];
rte_prefetch0(wqe);
-   if (i + 1 < max)
-   rte_prefetch0(pkts[i + 1]);
+   if (pkts_n)
+   rte_prefetch0(*pkts);
/* Retrieve buffer information. */
addr = rte_pktmbuf_mtod(buf, uintptr_t);
length = DATA_LEN(buf);
/* Update element. */
(*txq->elts)[elts_head] = buf;
/* Prefetch next buffer data. */
-   if (i + 1 < max)
-   rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 1],
+   if (pkts_n)
+   rte_prefetch0(rte_pktmbuf_mtod(*pkts,
   volatile void *));
/* Retrieve Memory Region key for this memory pool. */
lkey = txq_mp2mr(txq, txq_mb2mp(buf));
@@ -652,8 +653,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
txq->stats.obytes += length;
 #endif
elts_head = elts_head_next;
-   buf = pkts[i + 1];
-   }
+   ++i;
+   } while (pkts_n);
/* Take a shortcut if nothing must be sent. */
if (unlikely(i == 0))
return 0;
@@ -697,44 +698,45 @@ mlx5_tx_burst_inline(void *dpdk_txq, struct rte_mbuf 
**pkts, uint16_t pkts_n)
struct txq *txq = (struct txq *)dpdk_txq;
uint16_t elts_head = txq->elts_head;
const unsigned int elts_n = txq->elts_n;
-   unsigned int i;
+   unsigned int i = 0;
unsigned int max;
unsigned int comp;
volatile union mlx5_wqe *wqe;
-   struct rte_mbuf *buf;
unsigned int max_inline = txq->max_inline;

if (unlikely(!pkts_n))
return 0;
-   buf = pkts[0];
/* Prefetch first packet cacheline. */
tx_prefetch_cqe(txq, txq->cq_ci);
tx_prefetch_cqe(txq, txq->cq_ci + 1);
-   rte_prefetch0(buf);
+   rte_prefetch0(*pkts);
/* Start processing. */
txq_complete(txq);
max = (elts_n - (elts_head - txq->elts_tail));
if (max > elts_n)
max -= elts_n;
-   assert(max >= 1);
-   assert(max <= elts_n);
-   /* Always leave one free entry in the ring. */
-   --max;
-   if (max == 0)
-   return 0;
-   if (max > pkts_n)
-   max = pkts_n;
-   for (i = 0; (i != max); ++i) {
-   unsigned int elts_head_next = (elts_he

[dpdk-dev] [PATCH v7 19/25] mlx5: add debugging information about Tx queues capabilities

2016-06-24 Thread Nelio Laranjeiro

From: Adrien Mazarguil 

Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5_txq.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 4f17fb0..bae9f3d 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -343,6 +343,11 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl 
*txq_ctrl,
  (void *)dev, strerror(ret));
goto error;
}
+   DEBUG("TX queue capabilities: max_send_wr=%u, max_send_sge=%u,"
+ " max_inline_data=%u",
+ attr.init.cap.max_send_wr,
+ attr.init.cap.max_send_sge,
+ attr.init.cap.max_inline_data);
attr.mod = (struct ibv_exp_qp_attr){
/* Move the QP to this state. */
.qp_state = IBV_QPS_INIT,
-- 
2.1.4

[dpdk-dev] [PATCH v7 18/25] mlx5: add support for multi-packet send

2016-06-24 Thread Nelio Laranjeiro

This feature enables the TX burst function to emit up to 5 packets using
only two WQEs on devices that support it. Saves PCI bandwidth and improves
performance.

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Adrien Mazarguil 
Signed-off-by: Olga Shern 
---
 doc/guides/nics/mlx5.rst   |  10 +
 drivers/net/mlx5/mlx5.c|  14 +-
 drivers/net/mlx5/mlx5_ethdev.c |  15 +-
 drivers/net/mlx5/mlx5_rxtx.c   | 407 +
 drivers/net/mlx5/mlx5_rxtx.h   |   2 +
 drivers/net/mlx5/mlx5_txq.c|   2 +-
 6 files changed, 446 insertions(+), 4 deletions(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 9ada221..063c4a5 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -171,6 +171,16 @@ Run-time configuration

   This option should be used in combination with ``txq_inline`` above.

+- ``txq_mpw_en`` parameter [int]
+
+  A nonzero value enables multi-packet send. This feature allows the TX
+  burst function to pack up to five packets in two descriptors in order to
+  save PCI bandwidth and improve performance at the cost of a slightly
+  higher CPU usage.
+
+  It is currently only supported on the ConnectX-4 Lx family of adapters.
+  Enabled by default.
+
 Prerequisites
 -

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 73069d2..5aa4adc 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -81,6 +81,9 @@
  */
 #define MLX5_TXQS_MIN_INLINE "txqs_min_inline"

+/* Device parameter to enable multi-packet send WQEs. */
+#define MLX5_TXQ_MPW_EN "txq_mpw_en"
+
 /**
  * Retrieve integer value from environment variable.
  *
@@ -282,6 +285,8 @@ mlx5_args_check(const char *key, const char *val, void 
*opaque)
priv->txq_inline = tmp;
} else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) {
priv->txqs_inline = tmp;
+   } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) {
+   priv->mps = !!tmp;
} else {
WARN("%s: unknown parameter", key);
return -EINVAL;
@@ -307,6 +312,7 @@ mlx5_args(struct priv *priv, struct rte_devargs *devargs)
MLX5_RXQ_CQE_COMP_EN,
MLX5_TXQ_INLINE,
MLX5_TXQS_MIN_INLINE,
+   MLX5_TXQ_MPW_EN,
NULL,
};
struct rte_kvargs *kvlist;
@@ -503,6 +509,7 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct 
rte_pci_device *pci_dev)
priv->port = port;
priv->pd = pd;
priv->mtu = ETHER_MTU;
+   priv->mps = mps; /* Enable MPW by default if supported. */
priv->cqe_comp = 1; /* Enable compression by default. */
err = mlx5_args(priv, pci_dev->devargs);
if (err) {
@@ -551,7 +558,12 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct 
rte_pci_device *pci_dev)

priv_get_num_vfs(priv, _vfs);
priv->sriov = (num_vfs || sriov);
-   priv->mps = mps;
+   if (priv->mps && !mps) {
+   ERROR("multi-packet send not supported on this device"
+ " (" MLX5_TXQ_MPW_EN ")");
+   err = ENOTSUP;
+   goto port_error;
+   }
/* Allocate and register default RSS hash keys. */
priv->rss_conf = rte_calloc(__func__, hash_rxq_init_n,
sizeof((*priv->rss_conf)[0]), 0);
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index aeea4ff..698a50e 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -584,7 +584,8 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *info)
  DEV_RX_OFFLOAD_UDP_CKSUM |
  DEV_RX_OFFLOAD_TCP_CKSUM) :
 0);
-   info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
+   if (!priv->mps)
+   info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
if (priv->hw_csum)
info->tx_offload_capa |=
(DEV_TX_OFFLOAD_IPV4_CKSUM |
@@ -1318,7 +1319,17 @@ void
 priv_select_tx_function(struct priv *priv)
 {
priv->dev->tx_pkt_burst = mlx5_tx_burst;
-   if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
+   /* Display warning for unsupported configurations. */
+   if (priv->sriov && priv->mps)
+   WARN("multi-packet send WQE cannot be used on a SR-IOV setup");
+   /* Select appropriate TX function. */
+   if ((priv->sriov == 0) && priv->mps && priv->txq_inline) {
+   priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline;
+   DEBUG("sele

[dpdk-dev] [PATCH v7 17/25] mlx5: add support for inline send

2016-06-24 Thread Nelio Laranjeiro

From: Yaacov Hazan <yaac...@mellanox.com>

Implement send inline feature which copies packet data directly into WQEs
for improved latency. The maximum packet size and the minimum number of Tx
queues to qualify for inline send are user-configurable.

This feature is effective when HW causes a performance bottleneck.

Signed-off-by: Yaacov Hazan 
Signed-off-by: Adrien Mazarguil 
Signed-off-by: Nelio Laranjeiro 
---
 doc/guides/nics/mlx5.rst   |  17 +++
 drivers/net/mlx5/mlx5.c|  15 +++
 drivers/net/mlx5/mlx5.h|   2 +
 drivers/net/mlx5/mlx5_ethdev.c |   5 +
 drivers/net/mlx5/mlx5_rxtx.c   | 273 +
 drivers/net/mlx5/mlx5_rxtx.h   |   2 +
 drivers/net/mlx5/mlx5_txq.c|   4 +
 7 files changed, 318 insertions(+)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 756153b..9ada221 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -154,6 +154,23 @@ Run-time configuration
   allows to save PCI bandwidth and improve performance at the cost of a
   slightly higher CPU usage.  Enabled by default.

+- ``txq_inline`` parameter [int]
+
+  Amount of data to be inlined during TX operations. Improves latency.
+  Can improve PPS performance when PCI back pressure is detected and may be
+  useful for scenarios involving heavy traffic on many queues.
+
+  It is not enabled by default (set to 0) since the additional software
+  logic necessary to handle this mode can lower performance when back
+  pressure is not expected.
+
+- ``txqs_min_inline`` parameter [int]
+
+  Enable inline send only when the number of TX queues is greater or equal
+  to this value.
+
+  This option should be used in combination with ``txq_inline`` above.
+
 Prerequisites
 -

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 630e5e4..73069d2 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -72,6 +72,15 @@
 /* Device parameter to enable RX completion queue compression. */
 #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en"

+/* Device parameter to configure inline send. */
+#define MLX5_TXQ_INLINE "txq_inline"
+
+/*
+ * Device parameter to configure the number of TX queues threshold for
+ * enabling inline send.
+ */
+#define MLX5_TXQS_MIN_INLINE "txqs_min_inline"
+
 /**
  * Retrieve integer value from environment variable.
  *
@@ -269,6 +278,10 @@ mlx5_args_check(const char *key, const char *val, void 
*opaque)
}
if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) {
priv->cqe_comp = !!tmp;
+   } else if (strcmp(MLX5_TXQ_INLINE, key) == 0) {
+   priv->txq_inline = tmp;
+   } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) {
+   priv->txqs_inline = tmp;
} else {
WARN("%s: unknown parameter", key);
return -EINVAL;
@@ -292,6 +305,8 @@ mlx5_args(struct priv *priv, struct rte_devargs *devargs)
 {
const char **params = (const char *[]){
MLX5_RXQ_CQE_COMP_EN,
+   MLX5_TXQ_INLINE,
+   MLX5_TXQS_MIN_INLINE,
NULL,
};
struct rte_kvargs *kvlist;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 8f5a6df..3a86609 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -113,6 +113,8 @@ struct priv {
unsigned int mps:1; /* Whether multi-packet send is supported. */
unsigned int cqe_comp:1; /* Whether CQE compression is enabled. */
unsigned int pending_alarm:1; /* An alarm is pending. */
+   unsigned int txq_inline; /* Maximum packet size for inlining. */
+   unsigned int txqs_inline; /* Queue number threshold for inlining. */
/* RX/TX queues. */
unsigned int rxqs_n; /* RX queues array size. */
unsigned int txqs_n; /* TX queues array size. */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 47e64b2..aeea4ff 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1318,6 +1318,11 @@ void
 priv_select_tx_function(struct priv *priv)
 {
priv->dev->tx_pkt_burst = mlx5_tx_burst;
+   if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
+   priv->dev->tx_pkt_burst = mlx5_tx_burst_inline;
+   DEBUG("selected inline TX function (%u >= %u queues)",
+ priv->txqs_n, priv->txqs_inline);
+   }
 }

 /**
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 9d992c3..daa22d9 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -376,6 +376,139 @@ mlx5_wqe_write_vlan(struct txq *txq, volatile union 
mlx5_wqe *wqe,
 }

 /**
+ * Write a inline WQE.
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ * @param wqe
+ *   Pointer to the WQE to fill.
+ * @param addr
+ *   Buffer data address.

[dpdk-dev] [PATCH v7 16/25] mlx5: replace countdown with threshold for Tx completions

2016-06-24 Thread Nelio Laranjeiro

From: Adrien Mazarguil <adrien.mazarg...@6wind.com>

Replacing the variable countdown (which depends on the number of
descriptors) with a fixed relative threshold known at compile time improves
performance by reducing the TX queue structure footprint and the amount of
code to manage completions during a burst.

Completions are now requested at most once per burst after threshold is
reached.

Signed-off-by: Adrien Mazarguil 
Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Vasily Philipov 
---
 drivers/net/mlx5/mlx5_defs.h |  7 +--
 drivers/net/mlx5/mlx5_rxtx.c | 44 +---
 drivers/net/mlx5/mlx5_rxtx.h |  5 ++---
 drivers/net/mlx5/mlx5_txq.c  | 21 -
 4 files changed, 44 insertions(+), 33 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index 8d2ec7a..cc2a6f3 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -48,8 +48,11 @@
 /* Maximum number of special flows. */
 #define MLX5_MAX_SPECIAL_FLOWS 4

-/* Request send completion once in every 64 sends, might be less. */
-#define MLX5_PMD_TX_PER_COMP_REQ 64
+/*
+ * Request TX completion every time descriptors reach this threshold since
+ * the previous request. Must be a power of two for performance reasons.
+ */
+#define MLX5_TX_COMP_THRESH 32

 /* RSS Indirection table size. */
 #define RSS_INDIRECTION_TABLE_SIZE 256
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 43236f5..9d992c3 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -156,9 +156,6 @@ check_cqe64(volatile struct mlx5_cqe64 *cqe,
  * Manage TX completions.
  *
  * When sending a burst, mlx5_tx_burst() posts several WRs.
- * To improve performance, a completion event is only required once every
- * MLX5_PMD_TX_PER_COMP_REQ sends. Doing so discards completion information
- * for other WRs, but this information would not be used anyway.
  *
  * @param txq
  *   Pointer to TX queue structure.
@@ -172,14 +169,16 @@ txq_complete(struct txq *txq)
uint16_t elts_free = txq->elts_tail;
uint16_t elts_tail;
uint16_t cq_ci = txq->cq_ci;
-   unsigned int wqe_ci = (unsigned int)-1;
+   volatile struct mlx5_cqe64 *cqe = NULL;
+   volatile union mlx5_wqe *wqe;

do {
-   unsigned int idx = cq_ci & cqe_cnt;
-   volatile struct mlx5_cqe64 *cqe = &(*txq->cqes)[idx].cqe64;
+   volatile struct mlx5_cqe64 *tmp;

-   if (check_cqe64(cqe, cqe_n, cq_ci) == 1)
+   tmp = &(*txq->cqes)[cq_ci & cqe_cnt].cqe64;
+   if (check_cqe64(tmp, cqe_n, cq_ci))
break;
+   cqe = tmp;
 #ifndef NDEBUG
if (MLX5_CQE_FORMAT(cqe->op_own) == MLX5_COMPRESSED) {
if (!check_cqe64_seen(cqe))
@@ -193,14 +192,15 @@ txq_complete(struct txq *txq)
return;
}
 #endif /* NDEBUG */
-   wqe_ci = ntohs(cqe->wqe_counter);
++cq_ci;
} while (1);
-   if (unlikely(wqe_ci == (unsigned int)-1))
+   if (unlikely(cqe == NULL))
return;
+   wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)];
+   elts_tail = wqe->wqe.ctrl.data[3];
+   assert(elts_tail < txq->wqe_n);
/* Free buffers. */
-   elts_tail = (wqe_ci + 1) & (elts_n - 1);
-   do {
+   while (elts_free != elts_tail) {
struct rte_mbuf *elt = (*txq->elts)[elts_free];
unsigned int elts_free_next =
(elts_free + 1) & (elts_n - 1);
@@ -216,7 +216,7 @@ txq_complete(struct txq *txq)
/* Only one segment needs to be freed. */
rte_pktmbuf_free_seg(elt);
elts_free = elts_free_next;
-   } while (elts_free != elts_tail);
+   }
txq->cq_ci = cq_ci;
txq->elts_tail = elts_tail;
/* Update the consumer index. */
@@ -437,6 +437,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
const unsigned int elts_n = txq->elts_n;
unsigned int i;
unsigned int max;
+   unsigned int comp;
volatile union mlx5_wqe *wqe;
struct rte_mbuf *buf;

@@ -486,13 +487,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
buf->vlan_tci);
else
mlx5_wqe_write(txq, wqe, addr, length, lkey);
-   /* Request completion if needed. */
-   if (unlikely(--txq->elts_comp == 0)) {
-   wqe->wqe.ctrl.data[2] = htonl(8);
-   txq->elts_comp = txq->elts_comp_cd_init;
-   } else {
-   wqe->wqe.ctrl.data[2] = 0;
-   }
+   wqe->wqe.

[dpdk-dev] [PATCH v7 15/25] mlx5: handle Rx CQE compression

2016-06-24 Thread Nelio Laranjeiro

Mini (compressed) CQEs are returned by the NIC when PCI back pressure is
detected, in which case the first CQE64 contains common packet information
followed by a number of CQE8 providing the rest, followed by a matching
number of empty CQE64 entries to be used by software for decompression.

Before decompression:

  0   1  2   6 7 8
  +---+  +-+ +---+   +---+ +---+ +---+
  | CQE64 |  |  CQE64  | | CQE64 |   | CQE64 | | CQE64 | | CQE64 |
  |---|  |-| |---|   |---| |---| |---|
  | . |  | cqe8[0] | |   | . |   | |   | | . |
  | . |  | cqe8[1] | |   | . |   | |   | | . |
  | . |  | ... | |   | . |   | |   | | . |
  | . |  | cqe8[7] | |   |   |   | |   | | . |
  +---+  +-+ +---+   +---+ +---+ +---+

After decompression:

  0  1 ... 8
  +---+  +---+ +---+
  | CQE64 |  | CQE64 | | CQE64 |
  |---|  |---| |---|
  | . |  | . |  .  | . |
  | . |  | . |  .  | . |
  | . |  | . |  .  | . |
  | . |  | . | | . |
  +---+  +---+ +---+

This patch does not perform the entire decompression step as it would be
really expensive, instead the first CQE64 is consumed and an internal
context is maintained to interpret the following CQE8 entries directly.

Intermediate empty CQE64 entries are handed back to HW without further
processing.

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Adrien Mazarguil 
Signed-off-by: Olga Shern 
Signed-off-by: Vasily Philipov 
---
 doc/guides/nics/mlx5.rst |   6 +
 drivers/net/mlx5/mlx5.c  |  24 +++-
 drivers/net/mlx5/mlx5.h  |   1 +
 drivers/net/mlx5/mlx5_rxq.c  |   9 +-
 drivers/net/mlx5/mlx5_rxtx.c | 265 +--
 drivers/net/mlx5/mlx5_rxtx.h |  11 ++
 drivers/net/mlx5/mlx5_txq.c  |   5 +
 7 files changed, 253 insertions(+), 68 deletions(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 3a07928..756153b 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -148,6 +148,12 @@ Run-time configuration

 - **ethtool** operations on related kernel interfaces also affect the PMD.

+- ``rxq_cqe_comp_en`` parameter [int]
+
+  A nonzero value enables the compression of CQE on RX side. This feature
+  allows to save PCI bandwidth and improve performance at the cost of a
+  slightly higher CPU usage.  Enabled by default.
+
 Prerequisites
 -

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index d08d4ac..630e5e4 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -69,6 +69,9 @@
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"

+/* Device parameter to enable RX completion queue compression. */
+#define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en"
+
 /**
  * Retrieve integer value from environment variable.
  *
@@ -256,12 +259,21 @@ static int
 mlx5_args_check(const char *key, const char *val, void *opaque)
 {
struct priv *priv = opaque;
+   unsigned long tmp;

-   /* No parameters are expected at the moment. */
-   (void)priv;
-   (void)val;
-   WARN("%s: unknown parameter", key);
-   return -EINVAL;
+   errno = 0;
+   tmp = strtoul(val, NULL, 0);
+   if (errno) {
+   WARN("%s: \"%s\" is not a valid integer", key, val);
+   return errno;
+   }
+   if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) {
+   priv->cqe_comp = !!tmp;
+   } else {
+   WARN("%s: unknown parameter", key);
+   return -EINVAL;
+   }
+   return 0;
 }

 /**
@@ -279,6 +291,7 @@ static int
 mlx5_args(struct priv *priv, struct rte_devargs *devargs)
 {
const char **params = (const char *[]){
+   MLX5_RXQ_CQE_COMP_EN,
NULL,
};
struct rte_kvargs *kvlist;
@@ -475,6 +488,7 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct 
rte_pci_device *pci_dev)
priv->port = port;
priv->pd = pd;
priv->mtu = ETHER_MTU;
+   priv->cqe_comp = 1; /* Enable compression by default. */
err = mlx5_args(priv, pci_dev->devargs);
if (err) {
ERROR("failed to process device arguments: %s",
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 3dca03d..8f5a6df 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -111,6 +111,7 @@ struct priv {
unsigned int hw_padding:1; /* End alignment padding is supported. */
unsigned int sriov:1; /* This is a VF or PF with VF devices. */
unsigned int mps:1; /* Whether multi-packet send is supported. */
+   unsigned int cqe_comp:1; /* W

[dpdk-dev] [PATCH v7 14/25] mlx5: refactor Tx data path

2016-06-24 Thread Nelio Laranjeiro

Bypass Verbs to improve Tx performance.

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Yaacov Hazan 
Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx5/Makefile  |   5 -
 drivers/net/mlx5/mlx5_ethdev.c |  10 +-
 drivers/net/mlx5/mlx5_mr.c |   4 +-
 drivers/net/mlx5/mlx5_rxtx.c   | 361 ++---
 drivers/net/mlx5/mlx5_rxtx.h   |  52 +++---
 drivers/net/mlx5/mlx5_txq.c| 219 +
 6 files changed, 347 insertions(+), 304 deletions(-)

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index dc99797..66687e8 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -106,11 +106,6 @@ mlx5_autoconf.h.new: FORCE
 mlx5_autoconf.h.new: $(RTE_SDK)/scripts/auto-config-h.sh
$Q $(RM) -f -- '$@'
$Q sh -- '$<' '$@' \
-   HAVE_VERBS_VLAN_INSERTION \
-   infiniband/verbs.h \
-   enum IBV_EXP_RECEIVE_WQ_CVLAN_INSERTION \
-   $(AUTOCONF_OUTPUT)
-   $Q sh -- '$<' '$@' \
HAVE_VERBS_IBV_EXP_CQ_COMPRESSED_CQE \
infiniband/verbs_exp.h \
enum IBV_EXP_CQ_COMPRESSED_CQE \
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 16b05d3..47e64b2 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1242,11 +1242,11 @@ mlx5_secondary_data_setup(struct priv *priv)
txq_ctrl = rte_calloc_socket("TXQ", 1, sizeof(*txq_ctrl), 0,
 primary_txq_ctrl->socket);
if (txq_ctrl != NULL) {
-   if (txq_setup(priv->dev,
- primary_txq_ctrl,
- primary_txq->elts_n,
- primary_txq_ctrl->socket,
- NULL) == 0) {
+   if (txq_ctrl_setup(priv->dev,
+  primary_txq_ctrl,
+  primary_txq->elts_n,
+  primary_txq_ctrl->socket,
+  NULL) == 0) {
txq_ctrl->txq.stats.idx =
primary_txq->stats.idx;
tx_queues[i] = _ctrl->txq;
diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index 1d8bf72..67dfefa 100644
--- a/drivers/net/mlx5/mlx5_mr.c
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -190,7 +190,7 @@ txq_mp2mr_reg(struct txq *txq, struct rte_mempool *mp, 
unsigned int idx)
/* Add a new entry, register MR first. */
DEBUG("%p: discovered new memory pool \"%s\" (%p)",
  (void *)txq_ctrl, mp->name, (void *)mp);
-   mr = mlx5_mp2mr(txq_ctrl->txq.priv->pd, mp);
+   mr = mlx5_mp2mr(txq_ctrl->priv->pd, mp);
if (unlikely(mr == NULL)) {
DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
  (void *)txq_ctrl);
@@ -209,7 +209,7 @@ txq_mp2mr_reg(struct txq *txq, struct rte_mempool *mp, 
unsigned int idx)
/* Store the new entry. */
txq_ctrl->txq.mp2mr[idx].mp = mp;
txq_ctrl->txq.mp2mr[idx].mr = mr;
-   txq_ctrl->txq.mp2mr[idx].lkey = mr->lkey;
+   txq_ctrl->txq.mp2mr[idx].lkey = htonl(mr->lkey);
DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32,
  (void *)txq_ctrl, mp->name, (void *)mp,
  txq_ctrl->txq.mp2mr[idx].lkey);
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index f2d00bf..2372fce 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -119,68 +119,52 @@ get_cqe64(volatile struct mlx5_cqe cqes[],
  *
  * @param txq
  *   Pointer to TX queue structure.
- *
- * @return
- *   0 on success, -1 on failure.
  */
-static int
+static void
 txq_complete(struct txq *txq)
 {
-   unsigned int elts_comp = txq->elts_comp;
-   unsigned int elts_tail = txq->elts_tail;
-   unsigned int elts_free = txq->elts_tail;
const unsigned int elts_n = txq->elts_n;
-   int wcs_n;
-
-   if (unlikely(elts_comp == 0))
-   return 0;
-#ifdef DEBUG_SEND
-   DEBUG("%p: processing %u work requests completions",
- (void *)txq, elts_comp);
-#endif
-   wcs_n = txq->poll_cnt(txq->cq, elts_comp);
-   if (unlikely(wcs_n == 0))
-   return 0;
-   if (unlikely(wcs_n < 0)) {
-   DEBUG("%p: ibv_poll_cq() failed (wcs_n=%d)",
- (void *)txq, wcs_n);
-   return -1;
+   const unsigned int cqe_n = txq->cqe_n;
+   uint16_t elts_free = txq->elts_tail;
+   uint16_t elts_tail;
+   uint16_t cq_ci = txq->cq_ci;
+   unsig

[dpdk-dev] [PATCH v7 13/25] mlx5: refactor Rx data path

2016-06-24 Thread Nelio Laranjeiro

Bypass Verbs to improve RX performance.

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Yaacov Hazan 
Signed-off-by: Adrien Mazarguil 
Signed-off-by: Vasily Philipov 
---
 drivers/net/mlx5/mlx5_ethdev.c |   4 +-
 drivers/net/mlx5/mlx5_fdir.c   |   2 +-
 drivers/net/mlx5/mlx5_rxq.c| 303 -
 drivers/net/mlx5/mlx5_rxtx.c   | 289 ---
 drivers/net/mlx5/mlx5_rxtx.h   |  38 +++---
 drivers/net/mlx5/mlx5_vlan.c   |   3 +-
 6 files changed, 326 insertions(+), 313 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 759434e..16b05d3 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1263,7 +1263,9 @@ mlx5_secondary_data_setup(struct priv *priv)
}
/* RX queues. */
for (i = 0; i != nb_rx_queues; ++i) {
-   struct rxq *primary_rxq = (*sd->primary_priv->rxqs)[i];
+   struct rxq_ctrl *primary_rxq =
+   container_of((*sd->primary_priv->rxqs)[i],
+struct rxq_ctrl, rxq);

if (primary_rxq == NULL)
continue;
diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
index 1850218..73eb00e 100644
--- a/drivers/net/mlx5/mlx5_fdir.c
+++ b/drivers/net/mlx5/mlx5_fdir.c
@@ -431,7 +431,7 @@ priv_get_fdir_queue(struct priv *priv, uint16_t idx)
ind_init_attr = (struct ibv_exp_rwq_ind_table_init_attr){
.pd = priv->pd,
.log_ind_tbl_size = 0,
-   .ind_tbl = &((*priv->rxqs)[idx]->wq),
+   .ind_tbl = _ctrl->wq,
.comp_mask = 0,
};

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index b474a18..b1d6cfe 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -43,6 +43,8 @@
 #pragma GCC diagnostic ignored "-pedantic"
 #endif
 #include 
+#include 
+#include 
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-pedantic"
 #endif
@@ -373,8 +375,13 @@ priv_create_hash_rxqs(struct priv *priv)
DEBUG("indirection table extended to assume %u WQs",
  priv->reta_idx_n);
}
-   for (i = 0; (i != priv->reta_idx_n); ++i)
-   wqs[i] = (*priv->rxqs)[(*priv->reta_idx)[i]]->wq;
+   for (i = 0; (i != priv->reta_idx_n); ++i) {
+   struct rxq_ctrl *rxq_ctrl;
+
+   rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
+   struct rxq_ctrl, rxq);
+   wqs[i] = rxq_ctrl->wq;
+   }
/* Get number of hash RX queues to configure. */
for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
hash_rxqs_n += ind_table_init[i].hash_types_n;
@@ -638,21 +645,13 @@ rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int 
elts_n,
   struct rte_mbuf **pool)
 {
unsigned int i;
-   struct rxq_elt (*elts)[elts_n] =
-   rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
- rxq_ctrl->socket);
int ret = 0;

-   if (elts == NULL) {
-   ERROR("%p: can't allocate packets array", (void *)rxq_ctrl);
-   ret = ENOMEM;
-   goto error;
-   }
/* For each WR (packet). */
for (i = 0; (i != elts_n); ++i) {
-   struct rxq_elt *elt = &(*elts)[i];
-   struct ibv_sge *sge = &(*elts)[i].sge;
struct rte_mbuf *buf;
+   volatile struct mlx5_wqe_data_seg *scat =
+   &(*rxq_ctrl->rxq.wqes)[i];

if (pool != NULL) {
buf = *(pool++);
@@ -666,40 +665,36 @@ rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int 
elts_n,
ret = ENOMEM;
goto error;
}
-   elt->buf = buf;
/* Headroom is reserved by rte_pktmbuf_alloc(). */
assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
/* Buffer is supposed to be empty. */
assert(rte_pktmbuf_data_len(buf) == 0);
assert(rte_pktmbuf_pkt_len(buf) == 0);
-   /* sge->addr must be able to store a pointer. */
-   assert(sizeof(sge->addr) >= sizeof(uintptr_t));
-   /* SGE keeps its headroom. */
-   sge->addr = (uintptr_t)
-   ((uint8_t *)buf->buf_addr + RTE_PKTMBUF_HEADROOM);
-   sge->length = (buf->buf_len - RTE_PKTMBUF_HEADROOM);
-   sge->lkey = rxq_ctrl->mr->lkey;
-   /* Redundant check for tailroom. */
-   assert(sge->length == rte_pktmbuf_tailroom(buf));
+   assert(!buf->next);
+   P

[dpdk-dev] [PATCH v7 12/25] mlx5: add Tx/Rx burst function selection wrapper

2016-06-24 Thread Nelio Laranjeiro

These wrappers are meant to prevent code duplication later.

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5.h|  2 ++
 drivers/net/mlx5/mlx5_ethdev.c | 34 --
 drivers/net/mlx5/mlx5_txq.c|  2 +-
 3 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 935e1b0..3dca03d 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -196,6 +196,8 @@ void priv_dev_interrupt_handler_install(struct priv *, 
struct rte_eth_dev *);
 int mlx5_set_link_down(struct rte_eth_dev *dev);
 int mlx5_set_link_up(struct rte_eth_dev *dev);
 struct priv *mlx5_secondary_data_setup(struct priv *priv);
+void priv_select_tx_function(struct priv *);
+void priv_select_rx_function(struct priv *);

 /* mlx5_mac.c */

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 4095a06..759434e 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1099,8 +1099,8 @@ priv_set_link(struct priv *priv, int up)
err = priv_set_flags(priv, ~IFF_UP, IFF_UP);
if (err)
return err;
-   dev->rx_pkt_burst = mlx5_rx_burst;
-   dev->tx_pkt_burst = mlx5_tx_burst;
+   priv_select_tx_function(priv);
+   priv_select_rx_function(priv);
} else {
err = priv_set_flags(priv, ~IFF_UP, ~IFF_UP);
if (err)
@@ -1290,13 +1290,11 @@ mlx5_secondary_data_setup(struct priv *priv)
rte_mb();
priv->dev->data = >data;
rte_mb();
-   priv->dev->tx_pkt_burst = mlx5_tx_burst;
-   priv->dev->rx_pkt_burst = removed_rx_burst;
+   priv_select_tx_function(priv);
+   priv_select_rx_function(priv);
priv_unlock(priv);
 end:
/* More sanity checks. */
-   assert(priv->dev->tx_pkt_burst == mlx5_tx_burst);
-   assert(priv->dev->rx_pkt_burst == removed_rx_burst);
assert(priv->dev->data == >data);
rte_spinlock_unlock(>lock);
return priv;
@@ -1307,3 +1305,27 @@ error:
rte_spinlock_unlock(>lock);
return NULL;
 }
+
+/**
+ * Configure the TX function to use.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+void
+priv_select_tx_function(struct priv *priv)
+{
+   priv->dev->tx_pkt_burst = mlx5_tx_burst;
+}
+
+/**
+ * Configure the RX function to use.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+void
+priv_select_rx_function(struct priv *priv)
+{
+   priv->dev->rx_pkt_burst = mlx5_rx_burst;
+}
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 51210f2..ec4488a 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -478,7 +478,7 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, 
uint16_t desc,
  (void *)dev, (void *)txq_ctrl);
(*priv->txqs)[idx] = _ctrl->txq;
/* Update send callback. */
-   dev->tx_pkt_burst = mlx5_tx_burst;
+   priv_select_tx_function(priv);
}
priv_unlock(priv);
return -ret;
-- 
2.1.4

[dpdk-dev] [PATCH v7 11/25] mlx5: add support for configuration through kvargs

2016-06-24 Thread Nelio Laranjeiro

The intent is to replace the remaining compile-time options and environment
variables with a common mean of runtime configuration. This commit only
adds the kvargs handling code, subsequent commits will update the rest.

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5.c | 73 +
 1 file changed, 73 insertions(+)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 3f45d84..d08d4ac 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 

 /* Verbs header. */
@@ -57,6 +58,7 @@
 #include 
 #include 
 #include 
+#include 
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-pedantic"
 #endif
@@ -237,6 +239,71 @@ mlx5_dev_idx(struct rte_pci_addr *pci_addr)
return ret;
 }

+/**
+ * Verify and store value for device argument.
+ *
+ * @param[in] key
+ *   Key argument to verify.
+ * @param[in] val
+ *   Value associated with key.
+ * @param opaque
+ *   User data.
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+static int
+mlx5_args_check(const char *key, const char *val, void *opaque)
+{
+   struct priv *priv = opaque;
+
+   /* No parameters are expected at the moment. */
+   (void)priv;
+   (void)val;
+   WARN("%s: unknown parameter", key);
+   return -EINVAL;
+}
+
+/**
+ * Parse device parameters.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param devargs
+ *   Device arguments structure.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+mlx5_args(struct priv *priv, struct rte_devargs *devargs)
+{
+   const char **params = (const char *[]){
+   NULL,
+   };
+   struct rte_kvargs *kvlist;
+   int ret = 0;
+   int i;
+
+   if (devargs == NULL)
+   return 0;
+   /* Following UGLY cast is done to pass checkpatch. */
+   kvlist = rte_kvargs_parse(devargs->args, params);
+   if (kvlist == NULL)
+   return 0;
+   /* Process parameters. */
+   for (i = 0; (params[i] != NULL); ++i) {
+   if (rte_kvargs_count(kvlist, params[i])) {
+   ret = rte_kvargs_process(kvlist, params[i],
+mlx5_args_check, priv);
+   if (ret != 0)
+   return ret;
+   }
+   }
+   rte_kvargs_free(kvlist);
+   return 0;
+}
+
 static struct eth_driver mlx5_driver;

 /**
@@ -408,6 +475,12 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct 
rte_pci_device *pci_dev)
priv->port = port;
priv->pd = pd;
priv->mtu = ETHER_MTU;
+   err = mlx5_args(priv, pci_dev->devargs);
+   if (err) {
+   ERROR("failed to process device arguments: %s",
+ strerror(err));
+   goto port_error;
+   }
if (ibv_exp_query_device(ctx, _device_attr)) {
ERROR("ibv_exp_query_device() failed");
goto port_error;
-- 
2.1.4

[dpdk-dev] [PATCH v7 10/25] mlx5: add definitions for data path without Verbs

2016-06-24 Thread Nelio Laranjeiro

These structures and macros extend those exposed by libmlx5 (in mlx5_hw.h)
to let the PMD manage work queue and completion queue elements directly.

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5_prm.h | 163 
 1 file changed, 163 insertions(+)
 create mode 100644 drivers/net/mlx5/mlx5_prm.h

diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
new file mode 100644
index 000..5db219b
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -0,0 +1,163 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 6WIND S.A.
+ *   Copyright 2016 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_PMD_MLX5_PRM_H_
+#define RTE_PMD_MLX5_PRM_H_
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-pedantic"
+#endif
+#include 
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-pedantic"
+#endif
+
+/* Get CQE owner bit. */
+#define MLX5_CQE_OWNER(op_own) ((op_own) & MLX5_CQE_OWNER_MASK)
+
+/* Get CQE format. */
+#define MLX5_CQE_FORMAT(op_own) (((op_own) & MLX5E_CQE_FORMAT_MASK) >> 2)
+
+/* Get CQE opcode. */
+#define MLX5_CQE_OPCODE(op_own) (((op_own) & 0xf0) >> 4)
+
+/* Get CQE solicited event. */
+#define MLX5_CQE_SE(op_own) (((op_own) >> 1) & 1)
+
+/* Invalidate a CQE. */
+#define MLX5_CQE_INVALIDATE (MLX5_CQE_INVALID << 4)
+
+/* CQE value to inform that VLAN is stripped. */
+#define MLX5_CQE_VLAN_STRIPPED 0x1
+
+/* Maximum number of packets a multi-packet WQE can handle. */
+#define MLX5_MPW_DSEG_MAX 5
+
+/* Room for inline data in regular work queue element. */
+#define MLX5_WQE64_INL_DATA 12
+
+/* Room for inline data in multi-packet WQE. */
+#define MLX5_MWQE64_INL_DATA 28
+
+/* Subset of struct mlx5_wqe_eth_seg. */
+struct mlx5_wqe_eth_seg_small {
+   uint32_t rsvd0;
+   uint8_t cs_flags;
+   uint8_t rsvd1;
+   uint16_t mss;
+   uint32_t rsvd2;
+   uint16_t inline_hdr_sz;
+};
+
+/* Regular WQE. */
+struct mlx5_wqe_regular {
+   union {
+   struct mlx5_wqe_ctrl_seg ctrl;
+   uint32_t data[4];
+   } ctrl;
+   struct mlx5_wqe_eth_seg eseg;
+   struct mlx5_wqe_data_seg dseg;
+} __rte_aligned(64);
+
+/* Inline WQE. */
+struct mlx5_wqe_inl {
+   union {
+   struct mlx5_wqe_ctrl_seg ctrl;
+   uint32_t data[4];
+   } ctrl;
+   struct mlx5_wqe_eth_seg eseg;
+   uint32_t byte_cnt;
+   uint8_t data[MLX5_WQE64_INL_DATA];
+} __rte_aligned(64);
+
+/* Multi-packet WQE. */
+struct mlx5_wqe_mpw {
+   union {
+   struct mlx5_wqe_ctrl_seg ctrl;
+   uint32_t data[4];
+   } ctrl;
+   struct mlx5_wqe_eth_seg_small eseg;
+   struct mlx5_wqe_data_seg dseg[2];
+} __rte_aligned(64);
+
+/* Multi-packet WQE with inline. */
+struct mlx5_wqe_mpw_inl {
+   union {
+   struct mlx5_wqe_ctrl_seg ctrl;
+   uint32_t data[4];
+   } ctrl;
+   struct mlx5_wqe_eth_seg_small eseg;
+   uint32_t byte_cnt;
+   uint8_t data[MLX5_MWQE64_INL_DATA];
+} __rte_aligned(64);
+
+/* Union of all WQE types. */
+union mlx5_wqe {
+   struct mlx5_wqe_regular wqe;
+   struct mlx5_wqe_inl inl;
+   struct mlx5_wqe_mpw mpw;
+   struct mlx5_wqe_mpw_inl mpw_inl;
+   u

[dpdk-dev] [PATCH v7 09/25] mlx5: update prerequisites for upcoming enhancements

2016-06-24 Thread Nelio Laranjeiro

The latest version of Mellanox OFED exposes hardware definitions necessary
to implement data path operation bypassing Verbs. Update the minimum
version requirement to MLNX_OFED >= 3.3 and clean up compatibility checks
for previous releases.

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Adrien Mazarguil 
---
 doc/guides/nics/mlx5.rst   | 44 +++---
 drivers/net/mlx5/Makefile  | 39 -
 drivers/net/mlx5/mlx5.c| 23 --
 drivers/net/mlx5/mlx5.h|  5 +
 drivers/net/mlx5/mlx5_defs.h   |  9 -
 drivers/net/mlx5/mlx5_fdir.c   | 10 --
 drivers/net/mlx5/mlx5_rxmode.c |  8 
 drivers/net/mlx5/mlx5_rxq.c| 30 
 drivers/net/mlx5/mlx5_rxtx.c   |  4 
 drivers/net/mlx5/mlx5_rxtx.h   |  8 
 drivers/net/mlx5/mlx5_txq.c|  2 --
 drivers/net/mlx5/mlx5_vlan.c   |  3 ---
 12 files changed, 16 insertions(+), 169 deletions(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 77fa957..3a07928 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -125,16 +125,6 @@ These options can be modified in the ``.config`` file.
 Environment variables
 ~

-- ``MLX5_ENABLE_CQE_COMPRESSION``
-
-  A nonzero value lets ConnectX-4 return smaller completion entries to
-  improve performance when PCI backpressure is detected. It is most useful
-  for scenarios involving heavy traffic on many queues.
-
-  Since the additional software logic necessary to handle this mode can
-  lower performance when there is no backpressure, it is not enabled by
-  default.
-
 - ``MLX5_PMD_ENABLE_PADDING``

   Enables HW packet padding in PCI bus transactions.
@@ -211,40 +201,12 @@ DPDK and must be installed separately:

 Currently supported by DPDK:

-- Mellanox OFED **3.1-1.0.3**, **3.1-1.5.7.1** or **3.2-2.0.0.0** depending
-  on usage.
-
-The following features are supported with version **3.1-1.5.7.1** and
-above only:
-
-- IPv6, UPDv6, TCPv6 RSS.
-- RX checksum offloads.
-- IBM POWER8.
-
-The following features are supported with version **3.2-2.0.0.0** and
-above only:
-
-- Flow director.
-- RX VLAN stripping.
-- TX VLAN insertion.
-- RX CRC stripping configuration.
+- Mellanox OFED **3.3-1.0.0.0**.

 - Minimum firmware version:

-  With MLNX_OFED **3.1-1.0.3**:
-
-  - ConnectX-4: **12.12.1240**
-  - ConnectX-4 Lx: **14.12.1100**
-
-  With MLNX_OFED **3.1-1.5.7.1**:
-
-  - ConnectX-4: **12.13.0144**
-  - ConnectX-4 Lx: **14.13.0144**
-
-  With MLNX_OFED **3.2-2.0.0.0**:
-
-  - ConnectX-4: **12.14.2036**
-  - ConnectX-4 Lx: **14.14.2036**
+  - ConnectX-4: **12.16.1006**
+  - ConnectX-4 Lx: **14.16.1006**

 Getting Mellanox OFED
 ~
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 289c85e..dc99797 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -106,42 +106,19 @@ mlx5_autoconf.h.new: FORCE
 mlx5_autoconf.h.new: $(RTE_SDK)/scripts/auto-config-h.sh
$Q $(RM) -f -- '$@'
$Q sh -- '$<' '$@' \
-   HAVE_EXP_QUERY_DEVICE \
-   infiniband/verbs.h \
-   type 'struct ibv_exp_device_attr' $(AUTOCONF_OUTPUT)
-   $Q sh -- '$<' '$@' \
-   HAVE_FLOW_SPEC_IPV6 \
-   infiniband/verbs.h \
-   type 'struct ibv_exp_flow_spec_ipv6' $(AUTOCONF_OUTPUT)
-   $Q sh -- '$<' '$@' \
-   HAVE_EXP_QP_BURST_CREATE_ENABLE_MULTI_PACKET_SEND_WR \
-   infiniband/verbs.h \
-   enum IBV_EXP_QP_BURST_CREATE_ENABLE_MULTI_PACKET_SEND_WR \
-   $(AUTOCONF_OUTPUT)
-   $Q sh -- '$<' '$@' \
-   HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS \
-   infiniband/verbs.h \
-   enum IBV_EXP_DEVICE_ATTR_VLAN_OFFLOADS \
-   $(AUTOCONF_OUTPUT)
-   $Q sh -- '$<' '$@' \
-   HAVE_EXP_CQ_RX_TCP_PACKET \
+   HAVE_VERBS_VLAN_INSERTION \
infiniband/verbs.h \
-   enum IBV_EXP_CQ_RX_TCP_PACKET \
+   enum IBV_EXP_RECEIVE_WQ_CVLAN_INSERTION \
$(AUTOCONF_OUTPUT)
$Q sh -- '$<' '$@' \
-   HAVE_VERBS_FCS \
-   infiniband/verbs.h \
-   enum IBV_EXP_CREATE_WQ_FLAG_SCATTER_FCS \
+   HAVE_VERBS_IBV_EXP_CQ_COMPRESSED_CQE \
+   infiniband/verbs_exp.h \
+   enum IBV_EXP_CQ_COMPRESSED_CQE \
$(AUTOCONF_OUTPUT)
$Q sh -- '$<' '$@' \
-   HAVE_VERBS_RX_END_PADDING \
-   infiniband/verbs.h \
-   enum IBV_EXP_CREATE_WQ_FLAG_RX_END_PADDING \
-   $(AUTOCONF_OUTPUT)
-   $Q sh -- '$<' '$@' \
-   HAVE_VERBS_VLAN_INSERTION \
-   infiniband/verbs.h \
-   enum IBV_EXP

[dpdk-dev] [PATCH v7 08/25] mlx5: split Rx queue structure

2016-06-24 Thread Nelio Laranjeiro

To keep the data path as efficient as possible, move fields only useful to
the control path into new structure rxq_ctrl.

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5.c  |   6 +-
 drivers/net/mlx5/mlx5_fdir.c |   8 +-
 drivers/net/mlx5/mlx5_rxq.c  | 252 ++-
 drivers/net/mlx5/mlx5_rxtx.c |   1 -
 drivers/net/mlx5/mlx5_rxtx.h |  13 ++-
 5 files changed, 150 insertions(+), 130 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 3d30e00..27a7a30 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -122,12 +122,14 @@ mlx5_dev_close(struct rte_eth_dev *dev)
usleep(1000);
for (i = 0; (i != priv->rxqs_n); ++i) {
struct rxq *rxq = (*priv->rxqs)[i];
+   struct rxq_ctrl *rxq_ctrl;

if (rxq == NULL)
continue;
+   rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
(*priv->rxqs)[i] = NULL;
-   rxq_cleanup(rxq);
-   rte_free(rxq);
+   rxq_cleanup(rxq_ctrl);
+   rte_free(rxq_ctrl);
}
priv->rxqs_n = 0;
priv->rxqs = NULL;
diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
index 63e43ad..e3b97ba 100644
--- a/drivers/net/mlx5/mlx5_fdir.c
+++ b/drivers/net/mlx5/mlx5_fdir.c
@@ -424,7 +424,9 @@ create_flow:
 static struct fdir_queue *
 priv_get_fdir_queue(struct priv *priv, uint16_t idx)
 {
-   struct fdir_queue *fdir_queue = &(*priv->rxqs)[idx]->fdir_queue;
+   struct rxq_ctrl *rxq_ctrl =
+   container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq);
+   struct fdir_queue *fdir_queue = _ctrl->fdir_queue;
struct ibv_exp_rwq_ind_table *ind_table = NULL;
struct ibv_qp *qp = NULL;
struct ibv_exp_rwq_ind_table_init_attr ind_init_attr;
@@ -629,8 +631,10 @@ priv_fdir_disable(struct priv *priv)
/* Run on every RX queue to destroy related flow director QP and
 * indirection table. */
for (i = 0; (i != priv->rxqs_n); i++) {
-   fdir_queue = &(*priv->rxqs)[i]->fdir_queue;
+   struct rxq_ctrl *rxq_ctrl =
+   container_of((*priv->rxqs)[i], struct rxq_ctrl, rxq);

+   fdir_queue = _ctrl->fdir_queue;
if (fdir_queue->qp != NULL) {
claim_zero(ibv_destroy_qp(fdir_queue->qp));
fdir_queue->qp = NULL;
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 4000624..05b7c91 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -636,7 +636,7 @@ priv_rehash_flows(struct priv *priv)
 /**
  * Allocate RX queue elements.
  *
- * @param rxq
+ * @param rxq_ctrl
  *   Pointer to RX queue structure.
  * @param elts_n
  *   Number of elements to allocate.
@@ -648,16 +648,17 @@ priv_rehash_flows(struct priv *priv)
  *   0 on success, errno value on failure.
  */
 static int
-rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool)
+rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n,
+  struct rte_mbuf **pool)
 {
unsigned int i;
struct rxq_elt (*elts)[elts_n] =
rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
- rxq->socket);
+ rxq_ctrl->socket);
int ret = 0;

if (elts == NULL) {
-   ERROR("%p: can't allocate packets array", (void *)rxq);
+   ERROR("%p: can't allocate packets array", (void *)rxq_ctrl);
ret = ENOMEM;
goto error;
}
@@ -672,10 +673,10 @@ rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, 
struct rte_mbuf **pool)
assert(buf != NULL);
rte_pktmbuf_reset(buf);
} else
-   buf = rte_pktmbuf_alloc(rxq->mp);
+   buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
if (buf == NULL) {
assert(pool == NULL);
-   ERROR("%p: empty mbuf pool", (void *)rxq);
+   ERROR("%p: empty mbuf pool", (void *)rxq_ctrl);
ret = ENOMEM;
goto error;
}
@@ -691,15 +692,15 @@ rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, 
struct rte_mbuf **pool)
sge->addr = (uintptr_t)
((uint8_t *)buf->buf_addr + RTE_PKTMBUF_HEADROOM);
sge->length = (buf->buf_len - RTE_PKTMBUF_HEADROOM);
-   sge->lkey = rxq->mr->lkey;
+   sge-

[dpdk-dev] [PATCH v7 07/25] mlx5: split Tx queue structure

2016-06-24 Thread Nelio Laranjeiro

To keep the data path as efficient as possible, move fields only useful to
the control path into new structure txq_ctrl.

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5.c|  21 +++--
 drivers/net/mlx5/mlx5_ethdev.c |  28 +++---
 drivers/net/mlx5/mlx5_mr.c |  39 
 drivers/net/mlx5/mlx5_rxtx.h   |   9 +-
 drivers/net/mlx5/mlx5_txq.c| 200 +
 5 files changed, 160 insertions(+), 137 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 350028b..3d30e00 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -98,7 +98,6 @@ static void
 mlx5_dev_close(struct rte_eth_dev *dev)
 {
struct priv *priv = mlx5_get_priv(dev);
-   void *tmp;
unsigned int i;

priv_lock(priv);
@@ -122,12 +121,13 @@ mlx5_dev_close(struct rte_eth_dev *dev)
/* XXX race condition if mlx5_rx_burst() is still running. */
usleep(1000);
for (i = 0; (i != priv->rxqs_n); ++i) {
-   tmp = (*priv->rxqs)[i];
-   if (tmp == NULL)
+   struct rxq *rxq = (*priv->rxqs)[i];
+
+   if (rxq == NULL)
continue;
(*priv->rxqs)[i] = NULL;
-   rxq_cleanup(tmp);
-   rte_free(tmp);
+   rxq_cleanup(rxq);
+   rte_free(rxq);
}
priv->rxqs_n = 0;
priv->rxqs = NULL;
@@ -136,12 +136,15 @@ mlx5_dev_close(struct rte_eth_dev *dev)
/* XXX race condition if mlx5_tx_burst() is still running. */
usleep(1000);
for (i = 0; (i != priv->txqs_n); ++i) {
-   tmp = (*priv->txqs)[i];
-   if (tmp == NULL)
+   struct txq *txq = (*priv->txqs)[i];
+   struct txq_ctrl *txq_ctrl;
+
+   if (txq == NULL)
continue;
+   txq_ctrl = container_of(txq, struct txq_ctrl, txq);
(*priv->txqs)[i] = NULL;
-   txq_cleanup(tmp);
-   rte_free(tmp);
+   txq_cleanup(txq_ctrl);
+   rte_free(txq_ctrl);
}
priv->txqs_n = 0;
priv->txqs = NULL;
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index ca57021..4095a06 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1232,28 +1232,32 @@ mlx5_secondary_data_setup(struct priv *priv)
/* TX queues. */
for (i = 0; i != nb_tx_queues; ++i) {
struct txq *primary_txq = (*sd->primary_priv->txqs)[i];
-   struct txq *txq;
+   struct txq_ctrl *primary_txq_ctrl;
+   struct txq_ctrl *txq_ctrl;

if (primary_txq == NULL)
continue;
-   txq = rte_calloc_socket("TXQ", 1, sizeof(*txq), 0,
-   primary_txq->socket);
-   if (txq != NULL) {
+   primary_txq_ctrl = container_of(primary_txq,
+   struct txq_ctrl, txq);
+   txq_ctrl = rte_calloc_socket("TXQ", 1, sizeof(*txq_ctrl), 0,
+primary_txq_ctrl->socket);
+   if (txq_ctrl != NULL) {
if (txq_setup(priv->dev,
- txq,
+ primary_txq_ctrl,
  primary_txq->elts_n,
- primary_txq->socket,
+ primary_txq_ctrl->socket,
  NULL) == 0) {
-   txq->stats.idx = primary_txq->stats.idx;
-   tx_queues[i] = txq;
+   txq_ctrl->txq.stats.idx =
+   primary_txq->stats.idx;
+   tx_queues[i] = _ctrl->txq;
continue;
}
-   rte_free(txq);
+   rte_free(txq_ctrl);
}
while (i) {
-   txq = tx_queues[--i];
-   txq_cleanup(txq);
-   rte_free(txq);
+   txq_ctrl = tx_queues[--i];
+   txq_cleanup(txq_ctrl);
+   rte_free(txq_ctrl);
}
goto error;
}
diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index bb44041..1d8bf72 100644
--- a/driv

[dpdk-dev] [PATCH v7 06/25] mlx5: remove inline Tx support

2016-06-24 Thread Nelio Laranjeiro

Inline TX will be fully managed by the PMD after Verbs is bypassed in the
data path. Remove the current code until then.

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Adrien Mazarguil 
---
 config/common_base   |  1 -
 doc/guides/nics/mlx5.rst | 10 --
 drivers/net/mlx5/Makefile|  4 ---
 drivers/net/mlx5/mlx5_defs.h |  5 ---
 drivers/net/mlx5/mlx5_rxtx.c | 75 +++-
 drivers/net/mlx5/mlx5_rxtx.h |  9 --
 drivers/net/mlx5/mlx5_txq.c  | 19 ++-
 7 files changed, 27 insertions(+), 96 deletions(-)

diff --git a/config/common_base b/config/common_base
index 39e6333..5fbac47 100644
--- a/config/common_base
+++ b/config/common_base
@@ -207,7 +207,6 @@ CONFIG_RTE_LIBRTE_MLX4_SOFT_COUNTERS=1
 #
 CONFIG_RTE_LIBRTE_MLX5_PMD=n
 CONFIG_RTE_LIBRTE_MLX5_DEBUG=n
-CONFIG_RTE_LIBRTE_MLX5_MAX_INLINE=0
 CONFIG_RTE_LIBRTE_MLX5_TX_MP_CACHE=8

 #
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 84c35a0..77fa957 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -114,16 +114,6 @@ These options can be modified in the ``.config`` file.
   adds additional run-time checks and debugging messages at the cost of
   lower performance.

-- ``CONFIG_RTE_LIBRTE_MLX5_MAX_INLINE`` (default **0**)
-
-  Amount of data to be inlined during TX operations. Improves latency.
-  Can improve PPS performance when PCI backpressure is detected and may be
-  useful for scenarios involving heavy traffic on many queues.
-
-  Since the additional software logic necessary to handle this mode can
-  lower performance when there is no backpressure, it is not enabled by
-  default.
-
 - ``CONFIG_RTE_LIBRTE_MLX5_TX_MP_CACHE`` (default **8**)

   Maximum number of cached memory pools (MPs) per TX queue. Each MP from
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 656a6e1..289c85e 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -86,10 +86,6 @@ else
 CFLAGS += -DNDEBUG -UPEDANTIC
 endif

-ifdef CONFIG_RTE_LIBRTE_MLX5_MAX_INLINE
-CFLAGS += -DMLX5_PMD_MAX_INLINE=$(CONFIG_RTE_LIBRTE_MLX5_MAX_INLINE)
-endif
-
 ifdef CONFIG_RTE_LIBRTE_MLX5_TX_MP_CACHE
 CFLAGS += -DMLX5_PMD_TX_MP_CACHE=$(CONFIG_RTE_LIBRTE_MLX5_TX_MP_CACHE)
 endif
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index da1c90e..9a19835 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -54,11 +54,6 @@
 /* RSS Indirection table size. */
 #define RSS_INDIRECTION_TABLE_SIZE 256

-/* Maximum size for inline data. */
-#ifndef MLX5_PMD_MAX_INLINE
-#define MLX5_PMD_MAX_INLINE 0
-#endif
-
 /*
  * Maximum number of cached Memory Pools (MPs) per TX queue. Each RTE MP
  * from which buffers are to be transmitted will have to be mapped by this
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index f67cbf4..4ba88ea 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -329,58 +329,33 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
rte_prefetch0((volatile void *)
  (uintptr_t)buf_next_addr);
}
-   /* Put packet into send queue. */
-#if MLX5_PMD_MAX_INLINE > 0
-   if (length <= txq->max_inline) {
-#ifdef HAVE_VERBS_VLAN_INSERTION
-   if (insert_vlan)
-   err = txq->send_pending_inline_vlan
-   (txq->qp,
-(void *)addr,
-length,
-send_flags,
->vlan_tci);
-   else
-#endif /* HAVE_VERBS_VLAN_INSERTION */
-   err = txq->send_pending_inline
-   (txq->qp,
-(void *)addr,
-length,
-send_flags);
-   } else
-#endif
-   {
-   /*
-* Retrieve Memory Region key for this
-* memory pool.
-*/
-   lkey = txq_mp2mr(txq, txq_mb2mp(buf));
-   if (unlikely(lkey == (uint32_t)-1)) {
-   /* MR does not exist. */
-   DEBUG("%p: unable to get MP <-> MR"
- " association", (void *)txq);
-   /* Clean up TX element. */
-   elt->buf = NULL;
-   goto stop;
-   }
+   /* Retrieve Memory Region key for this memory pool. */
+   lkey = txq_mp2mr(txq, txq_mb2mp(buf));
+   if (unlikely(lkey =

[dpdk-dev] [PATCH v7 05/25] mlx5: remove configuration variable

2016-06-24 Thread Nelio Laranjeiro

There is no scatter/gather support anymore, CONFIG_RTE_LIBRTE_MLX5_SGE_WR_N
has no purpose and can be removed.

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Adrien Mazarguil 
---
 config/common_base   | 1 -
 doc/guides/nics/mlx5.rst | 7 ---
 drivers/net/mlx5/Makefile| 4 
 drivers/net/mlx5/mlx5_defs.h | 5 -
 drivers/net/mlx5/mlx5_rxq.c  | 4 
 drivers/net/mlx5/mlx5_txq.c  | 4 
 6 files changed, 25 deletions(-)

diff --git a/config/common_base b/config/common_base
index ead5984..39e6333 100644
--- a/config/common_base
+++ b/config/common_base
@@ -207,7 +207,6 @@ CONFIG_RTE_LIBRTE_MLX4_SOFT_COUNTERS=1
 #
 CONFIG_RTE_LIBRTE_MLX5_PMD=n
 CONFIG_RTE_LIBRTE_MLX5_DEBUG=n
-CONFIG_RTE_LIBRTE_MLX5_SGE_WR_N=4
 CONFIG_RTE_LIBRTE_MLX5_MAX_INLINE=0
 CONFIG_RTE_LIBRTE_MLX5_TX_MP_CACHE=8

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index d9196d1..84c35a0 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -114,13 +114,6 @@ These options can be modified in the ``.config`` file.
   adds additional run-time checks and debugging messages at the cost of
   lower performance.

-- ``CONFIG_RTE_LIBRTE_MLX5_SGE_WR_N`` (default **4**)
-
-  Number of scatter/gather elements (SGEs) per work request (WR). Lowering
-  this number improves performance but also limits the ability to receive
-  scattered packets (packets that do not fit a single mbuf). The default
-  value is a safe tradeoff.
-
 - ``CONFIG_RTE_LIBRTE_MLX5_MAX_INLINE`` (default **0**)

   Amount of data to be inlined during TX operations. Improves latency.
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 999ada5..656a6e1 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -86,10 +86,6 @@ else
 CFLAGS += -DNDEBUG -UPEDANTIC
 endif

-ifdef CONFIG_RTE_LIBRTE_MLX5_SGE_WR_N
-CFLAGS += -DMLX5_PMD_SGE_WR_N=$(CONFIG_RTE_LIBRTE_MLX5_SGE_WR_N)
-endif
-
 ifdef CONFIG_RTE_LIBRTE_MLX5_MAX_INLINE
 CFLAGS += -DMLX5_PMD_MAX_INLINE=$(CONFIG_RTE_LIBRTE_MLX5_MAX_INLINE)
 endif
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index 09207d9..da1c90e 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -54,11 +54,6 @@
 /* RSS Indirection table size. */
 #define RSS_INDIRECTION_TABLE_SIZE 256

-/* Maximum number of Scatter/Gather Elements per Work Request. */
-#ifndef MLX5_PMD_SGE_WR_N
-#define MLX5_PMD_SGE_WR_N 4
-#endif
-
 /* Maximum size for inline data. */
 #ifndef MLX5_PMD_MAX_INLINE
 #define MLX5_PMD_MAX_INLINE 0
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 38ff9fd..4000624 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -976,10 +976,6 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, 
uint16_t desc,
ERROR("%p: invalid number of RX descriptors", (void *)dev);
return EINVAL;
}
-   if (MLX5_PMD_SGE_WR_N > 1) {
-   ERROR("%p: RX scatter is not supported", (void *)dev);
-   return ENOTSUP;
-   }
/* Toggle RX checksum offload if hardware supports it. */
if (priv->hw_csum)
tmpl.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 5a248c9..59974c5 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -264,10 +264,6 @@ txq_setup(struct rte_eth_dev *dev, struct txq *txq, 
uint16_t desc,
ERROR("%p: invalid number of TX descriptors", (void *)dev);
return EINVAL;
}
-   if (MLX5_PMD_SGE_WR_N > 1) {
-   ERROR("%p: TX gather is not supported", (void *)dev);
-   return EINVAL;
-   }
/* MRs will be registered in mp2mr[] later. */
attr.rd = (struct ibv_exp_res_domain_init_attr){
.comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |
-- 
2.1.4

[dpdk-dev] [PATCH v7 04/25] mlx5: remove Rx scatter support

2016-06-24 Thread Nelio Laranjeiro

This is done in preparation of bypassing Verbs entirely for the data path
as a performance improvement. RX scatter cannot be maintained during the
transition and will be reimplemented later.

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5_ethdev.c |  31 +---
 drivers/net/mlx5/mlx5_rxq.c| 314 ++---
 drivers/net/mlx5/mlx5_rxtx.c   | 211 +--
 drivers/net/mlx5/mlx5_rxtx.h   |  13 +-
 4 files changed, 53 insertions(+), 516 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 280a90a..ca57021 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -623,8 +623,7 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)

};

-   if (dev->rx_pkt_burst == mlx5_rx_burst ||
-   dev->rx_pkt_burst == mlx5_rx_burst_sp)
+   if (dev->rx_pkt_burst == mlx5_rx_burst)
return ptypes;
return NULL;
 }
@@ -762,19 +761,11 @@ mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
mb_len = rte_pktmbuf_data_room_size(rxq->mp);
assert(mb_len >= RTE_PKTMBUF_HEADROOM);
sp = (max_frame_len > (mb_len - RTE_PKTMBUF_HEADROOM));
-   /* Provide new values to rxq_setup(). */
-   dev->data->dev_conf.rxmode.jumbo_frame = sp;
-   dev->data->dev_conf.rxmode.max_rx_pkt_len = max_frame_len;
-   ret = rxq_rehash(dev, rxq);
-   if (ret) {
-   /* Force SP RX if that queue requires it and abort. */
-   if (rxq->sp)
-   rx_func = mlx5_rx_burst_sp;
-   break;
+   if (sp) {
+   ERROR("%p: RX scatter is not supported", (void *)dev);
+   ret = ENOTSUP;
+   goto out;
}
-   /* Scattered burst function takes priority. */
-   if (rxq->sp)
-   rx_func = mlx5_rx_burst_sp;
}
/* Burst functions can now be called again. */
rte_wmb();
@@ -1103,22 +1094,12 @@ priv_set_link(struct priv *priv, int up)
 {
struct rte_eth_dev *dev = priv->dev;
int err;
-   unsigned int i;

if (up) {
err = priv_set_flags(priv, ~IFF_UP, IFF_UP);
if (err)
return err;
-   for (i = 0; i < priv->rxqs_n; i++)
-   if ((*priv->rxqs)[i]->sp)
-   break;
-   /* Check if an sp queue exists.
-* Note: Some old frames might be received.
-*/
-   if (i == priv->rxqs_n)
-   dev->rx_pkt_burst = mlx5_rx_burst;
-   else
-   dev->rx_pkt_burst = mlx5_rx_burst_sp;
+   dev->rx_pkt_burst = mlx5_rx_burst;
dev->tx_pkt_burst = mlx5_tx_burst;
} else {
err = priv_set_flags(priv, ~IFF_UP, ~IFF_UP);
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 0bcf55b..38ff9fd 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -634,145 +634,6 @@ priv_rehash_flows(struct priv *priv)
 }

 /**
- * Allocate RX queue elements with scattered packets support.
- *
- * @param rxq
- *   Pointer to RX queue structure.
- * @param elts_n
- *   Number of elements to allocate.
- * @param[in] pool
- *   If not NULL, fetch buffers from this array instead of allocating them
- *   with rte_pktmbuf_alloc().
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-rxq_alloc_elts_sp(struct rxq *rxq, unsigned int elts_n,
- struct rte_mbuf **pool)
-{
-   unsigned int i;
-   struct rxq_elt_sp (*elts)[elts_n] =
-   rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
- rxq->socket);
-   int ret = 0;
-
-   if (elts == NULL) {
-   ERROR("%p: can't allocate packets array", (void *)rxq);
-   ret = ENOMEM;
-   goto error;
-   }
-   /* For each WR (packet). */
-   for (i = 0; (i != elts_n); ++i) {
-   unsigned int j;
-   struct rxq_elt_sp *elt = &(*elts)[i];
-   struct ibv_sge (*sges)[RTE_DIM(elt->sges)] = >sges;
-
-   /* These two arrays must have the same size. */
-   assert(RTE_DIM(elt->sges) == RTE_DIM(elt->bufs));
-   /* For each SGE (segment). */
-   for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
-   struct ibv_sge *sge = &(*sges)[j];
-   struct rte_mbuf *buf;
-
-   if (pool != NULL) {
-   b

[dpdk-dev] [PATCH v7 03/25] mlx5: remove Tx gather support

2016-06-24 Thread Nelio Laranjeiro

This is done in preparation of bypassing Verbs entirely for the data path
as a performance improvement. TX gather cannot be maintained during the
transition and will be reimplemented later.

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5_ethdev.c |   2 +-
 drivers/net/mlx5/mlx5_rxtx.c   | 317 -
 drivers/net/mlx5/mlx5_rxtx.h   |  17 ---
 drivers/net/mlx5/mlx5_txq.c|  49 ++-
 4 files changed, 71 insertions(+), 314 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 0a881b6..280a90a 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1260,7 +1260,7 @@ mlx5_secondary_data_setup(struct priv *priv)
if (txq != NULL) {
if (txq_setup(priv->dev,
  txq,
- primary_txq->elts_n * MLX5_PMD_SGE_WR_N,
+ primary_txq->elts_n,
  primary_txq->socket,
  NULL) == 0) {
txq->stats.idx = primary_txq->stats.idx;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 616cf7a..c4487b9 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -228,156 +228,6 @@ insert_vlan_sw(struct rte_mbuf *buf)
return 0;
 }

-#if MLX5_PMD_SGE_WR_N > 1
-
-/**
- * Copy scattered mbuf contents to a single linear buffer.
- *
- * @param[out] linear
- *   Linear output buffer.
- * @param[in] buf
- *   Scattered input buffer.
- *
- * @return
- *   Number of bytes copied to the output buffer or 0 if not large enough.
- */
-static unsigned int
-linearize_mbuf(linear_t *linear, struct rte_mbuf *buf)
-{
-   unsigned int size = 0;
-   unsigned int offset;
-
-   do {
-   unsigned int len = DATA_LEN(buf);
-
-   offset = size;
-   size += len;
-   if (unlikely(size > sizeof(*linear)))
-   return 0;
-   memcpy(&(*linear)[offset],
-  rte_pktmbuf_mtod(buf, uint8_t *),
-  len);
-   buf = NEXT(buf);
-   } while (buf != NULL);
-   return size;
-}
-
-/**
- * Handle scattered buffers for mlx5_tx_burst().
- *
- * @param txq
- *   TX queue structure.
- * @param segs
- *   Number of segments in buf.
- * @param elt
- *   TX queue element to fill.
- * @param[in] buf
- *   Buffer to process.
- * @param elts_head
- *   Index of the linear buffer to use if necessary (normally txq->elts_head).
- * @param[out] sges
- *   Array filled with SGEs on success.
- *
- * @return
- *   A structure containing the processed packet size in bytes and the
- *   number of SGEs. Both fields are set to (unsigned int)-1 in case of
- *   failure.
- */
-static struct tx_burst_sg_ret {
-   unsigned int length;
-   unsigned int num;
-}
-tx_burst_sg(struct txq *txq, unsigned int segs, struct txq_elt *elt,
-   struct rte_mbuf *buf, unsigned int elts_head,
-   struct ibv_sge (*sges)[MLX5_PMD_SGE_WR_N])
-{
-   unsigned int sent_size = 0;
-   unsigned int j;
-   int linearize = 0;
-
-   /* When there are too many segments, extra segments are
-* linearized in the last SGE. */
-   if (unlikely(segs > RTE_DIM(*sges))) {
-   segs = (RTE_DIM(*sges) - 1);
-   linearize = 1;
-   }
-   /* Update element. */
-   elt->buf = buf;
-   /* Register segments as SGEs. */
-   for (j = 0; (j != segs); ++j) {
-   struct ibv_sge *sge = &(*sges)[j];
-   uint32_t lkey;
-
-   /* Retrieve Memory Region key for this memory pool. */
-   lkey = txq_mp2mr(txq, txq_mb2mp(buf));
-   if (unlikely(lkey == (uint32_t)-1)) {
-   /* MR does not exist. */
-   DEBUG("%p: unable to get MP <-> MR association",
- (void *)txq);
-   /* Clean up TX element. */
-   elt->buf = NULL;
-   goto stop;
-   }
-   /* Update SGE. */
-   sge->addr = rte_pktmbuf_mtod(buf, uintptr_t);
-   if (txq->priv->sriov)
-   rte_prefetch0((volatile void *)
- (uintptr_t)sge->addr);
-   sge->length = DATA_LEN(buf);
-   sge->lkey = lkey;
-   sent_size += sge->length;
-   buf = NEXT(buf);
-   }
-   /* If buf is not NULL here and is not going to be linearized,
-* nb_segs is not valid. */
-   assert(j == segs);
-   assert((buf == NULL) || (linearize));
-   /* Linearize extra segments. */
-   if (linearize) {
-

[dpdk-dev] [PATCH v7 02/25] mlx5: split memory registration function

2016-06-24 Thread Nelio Laranjeiro

Except for the first time when memory registration occurs, the lkey is
always cached. Since memory registration is slow and performs system calls,
performance can be improved by moving that code to its own function outside
of the data path so only the lookup code is left in the original inlined
function.

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx5/Makefile|   1 +
 drivers/net/mlx5/mlx5_mr.c   | 280 +++
 drivers/net/mlx5/mlx5_rxtx.c | 209 ++--
 drivers/net/mlx5/mlx5_rxtx.h |   8 +-
 4 files changed, 298 insertions(+), 200 deletions(-)
 create mode 100644 drivers/net/mlx5/mlx5_mr.c

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 82558aa..999ada5 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -47,6 +47,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_vlan.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_stats.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rss.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_fdir.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c

 # Dependencies.
 DEPDIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += lib/librte_ether
diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
new file mode 100644
index 000..bb44041
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -0,0 +1,280 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 6WIND S.A.
+ *   Copyright 2016 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-pedantic"
+#endif
+#include 
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-pedantic"
+#endif
+
+/* DPDK headers don't like -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-pedantic"
+#endif
+#include 
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-pedantic"
+#endif
+
+#include "mlx5.h"
+#include "mlx5_rxtx.h"
+
+struct mlx5_check_mempool_data {
+   int ret;
+   char *start;
+   char *end;
+};
+
+/* Called by mlx5_check_mempool() when iterating the memory chunks. */
+static void
+mlx5_check_mempool_cb(struct rte_mempool *mp,
+ void *opaque, struct rte_mempool_memhdr *memhdr,
+ unsigned int mem_idx)
+{
+   struct mlx5_check_mempool_data *data = opaque;
+
+   (void)mp;
+   (void)mem_idx;
+
+   /* It already failed, skip the next chunks. */
+   if (data->ret != 0)
+   return;
+   /* It is the first chunk. */
+   if (data->start == NULL && data->end == NULL) {
+   data->start = memhdr->addr;
+   data->end = data->start + memhdr->len;
+   return;
+   }
+   if (data->end == memhdr->addr) {
+   data->end += memhdr->len;
+   return;
+   }
+   if (data->start == (char *)memhdr->addr + memhdr->len) {
+   data->start -= memhdr->len;
+   return;
+   }
+   /* Error, mempool is not virtually contiguous. */
+   data->ret = -1;
+}
+
+/**
+ * Check if a mempool can be used: it must be virtually contiguous.
+ *
+ * @param[in] mp
+ *   Pointer to memory pool.
+ * @param[out] start
+ *   Pointer to the star

[dpdk-dev] [PATCH v7 01/25] drivers: fix PCI class id support

2016-06-24 Thread Nelio Laranjeiro

Fixes: 701c8d80c820 ("pci: support class id probing")

Signed-off-by: Nelio Laranjeiro 
---
 drivers/crypto/qat/rte_qat_cryptodev.c |  5 +
 drivers/net/mlx4/mlx4.c| 18 ++
 drivers/net/mlx5/mlx5.c| 24 
 drivers/net/nfp/nfp_net.c  | 12 
 4 files changed, 19 insertions(+), 40 deletions(-)

diff --git a/drivers/crypto/qat/rte_qat_cryptodev.c 
b/drivers/crypto/qat/rte_qat_cryptodev.c
index a7912f5..f46ec85 100644
--- a/drivers/crypto/qat/rte_qat_cryptodev.c
+++ b/drivers/crypto/qat/rte_qat_cryptodev.c
@@ -69,10 +69,7 @@ static struct rte_cryptodev_ops crypto_qat_ops = {

 static struct rte_pci_id pci_id_qat_map[] = {
{
-   .vendor_id = 0x8086,
-   .device_id = 0x0443,
-   .subsystem_vendor_id = PCI_ANY_ID,
-   .subsystem_device_id = PCI_ANY_ID
+   RTE_PCI_DEVICE(0x8086, 0x0443),
},
{.device_id = 0},
 };
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 9e94630..6228688 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -5807,22 +5807,16 @@ error:

 static const struct rte_pci_id mlx4_pci_id_map[] = {
{
-   .vendor_id = PCI_VENDOR_ID_MELLANOX,
-   .device_id = PCI_DEVICE_ID_MELLANOX_CONNECTX3,
-   .subsystem_vendor_id = PCI_ANY_ID,
-   .subsystem_device_id = PCI_ANY_ID
+   RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+  PCI_DEVICE_ID_MELLANOX_CONNECTX3)
},
{
-   .vendor_id = PCI_VENDOR_ID_MELLANOX,
-   .device_id = PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO,
-   .subsystem_vendor_id = PCI_ANY_ID,
-   .subsystem_device_id = PCI_ANY_ID
+   RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+  PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO)
},
{
-   .vendor_id = PCI_VENDOR_ID_MELLANOX,
-   .device_id = PCI_DEVICE_ID_MELLANOX_CONNECTX3VF,
-   .subsystem_vendor_id = PCI_ANY_ID,
-   .subsystem_device_id = PCI_ANY_ID
+   RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+  PCI_DEVICE_ID_MELLANOX_CONNECTX3VF)
},
{
.vendor_id = 0
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 67a541c..350028b 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -610,28 +610,20 @@ error:

 static const struct rte_pci_id mlx5_pci_id_map[] = {
{
-   .vendor_id = PCI_VENDOR_ID_MELLANOX,
-   .device_id = PCI_DEVICE_ID_MELLANOX_CONNECTX4,
-   .subsystem_vendor_id = PCI_ANY_ID,
-   .subsystem_device_id = PCI_ANY_ID
+   RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+  PCI_DEVICE_ID_MELLANOX_CONNECTX4)
},
{
-   .vendor_id = PCI_VENDOR_ID_MELLANOX,
-   .device_id = PCI_DEVICE_ID_MELLANOX_CONNECTX4VF,
-   .subsystem_vendor_id = PCI_ANY_ID,
-   .subsystem_device_id = PCI_ANY_ID
+   RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+  PCI_DEVICE_ID_MELLANOX_CONNECTX4VF)
},
{
-   .vendor_id = PCI_VENDOR_ID_MELLANOX,
-   .device_id = PCI_DEVICE_ID_MELLANOX_CONNECTX4LX,
-   .subsystem_vendor_id = PCI_ANY_ID,
-   .subsystem_device_id = PCI_ANY_ID
+   RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+  PCI_DEVICE_ID_MELLANOX_CONNECTX4LX)
},
{
-   .vendor_id = PCI_VENDOR_ID_MELLANOX,
-   .device_id = PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF,
-   .subsystem_vendor_id = PCI_ANY_ID,
-   .subsystem_device_id = PCI_ANY_ID
+   RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+  PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF)
},
{
.vendor_id = 0
diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c
index ea5a2a3..dd0c559 100644
--- a/drivers/net/nfp/nfp_net.c
+++ b/drivers/net/nfp/nfp_net.c
@@ -2446,16 +2446,12 @@ nfp_net_init(struct rte_eth_dev *eth_dev)

 static struct rte_pci_id pci_id_nfp_net_map[] = {
{
-   .vendor_id = PCI_VENDOR_ID_NETRONOME,
-   .device_id = PCI_DEVICE_ID_NFP6000_PF_NIC,
-   .subsystem_vendor_id = PCI_ANY_ID,
-   .subsystem_device_id = PCI_ANY_ID,
+   RTE_PCI_DEVICE(PCI_VENDOR_ID_NETRONOME,
+  PCI_DEVICE_ID_NFP6000_PF_NIC)
},
{
-   .vendor_id = PCI_VENDOR_ID_NETRONOME,
-   .device_id = PCI_DEVICE_ID_NFP6000_VF_NIC,
-   .subsystem_vendor_id = PCI_ANY_ID,
-   .subsystem

[dpdk-dev] [PATCH v7 00/25] Refactor mlx5 to improve performance

2016-06-24 Thread Nelio Laranjeiro

Enhance mlx5 with a data path that bypasses Verbs.

The first half of this patchset removes support for functionality completely
rewritten in the second half (scatter/gather, inline send), while the data
path is refactored without Verbs.

The PMD remains usable during the transition.

This patchset must be applied after "Miscellaneous fixes for mlx4 and mlx5".

Changes in v7:
- Fixed a bug introduced by change in v5.

Changes in v6:
- None.

Changes in v5:
- Fixed checkpatches errors.

Changes in v4:
- Fixed errno return value of mlx5_args().
- Fixed long line above 80 characters.

Changes in v3:
- Rebased patchset on top of next-net/rel_16_07.

Changes in v2:
- Rebased patchset on top of dpdk/master.
- Fixed CQE size on Power8.
- Fixed mbuf assertion failure in debug mode.
- Fixed missing class_id field in rte_pci_id by using RTE_PCI_DEVICE.

Adrien Mazarguil (7):
  mlx5: replace countdown with threshold for Tx completions
  mlx5: add debugging information about Tx queues capabilities
  mlx5: check remaining space while processing Tx burst
  mlx5: resurrect Tx gather support
  mlx5: work around spurious compilation errors
  mlx5: remove redundant Rx queue initialization code
  mlx5: make Rx queue reinitialization safer

Nelio Laranjeiro (17):
  drivers: fix PCI class id support
  mlx5: split memory registration function
  mlx5: remove Tx gather support
  mlx5: remove Rx scatter support
  mlx5: remove configuration variable
  mlx5: remove inline Tx support
  mlx5: split Tx queue structure
  mlx5: split Rx queue structure
  mlx5: update prerequisites for upcoming enhancements
  mlx5: add definitions for data path without Verbs
  mlx5: add support for configuration through kvargs
  mlx5: add Tx/Rx burst function selection wrapper
  mlx5: refactor Rx data path
  mlx5: refactor Tx data path
  mlx5: handle Rx CQE compression
  mlx5: add support for multi-packet send
  mlx5: resurrect Rx scatter support

Yaacov Hazan (1):
  mlx5: add support for inline send

 config/common_base |2 -
 doc/guides/nics/mlx5.rst   |   94 +-
 drivers/crypto/qat/rte_qat_cryptodev.c |5 +-
 drivers/net/mlx4/mlx4.c|   18 +-
 drivers/net/mlx5/Makefile  |   49 +-
 drivers/net/mlx5/mlx5.c|  186 ++-
 drivers/net/mlx5/mlx5.h|   10 +
 drivers/net/mlx5/mlx5_defs.h   |   26 +-
 drivers/net/mlx5/mlx5_ethdev.c |  195 ++-
 drivers/net/mlx5/mlx5_fdir.c   |   20 +-
 drivers/net/mlx5/mlx5_mr.c |  283 
 drivers/net/mlx5/mlx5_prm.h|  163 +++
 drivers/net/mlx5/mlx5_rxmode.c |8 -
 drivers/net/mlx5/mlx5_rxq.c|  767 ---
 drivers/net/mlx5/mlx5_rxtx.c   | 2232 +++-
 drivers/net/mlx5/mlx5_rxtx.h   |  176 ++-
 drivers/net/mlx5/mlx5_txq.c|  370 +++---
 drivers/net/mlx5/mlx5_vlan.c   |6 +-
 drivers/net/nfp/nfp_net.c  |   12 +-
 19 files changed, 2671 insertions(+), 1951 deletions(-)
 create mode 100644 drivers/net/mlx5/mlx5_mr.c
 create mode 100644 drivers/net/mlx5/mlx5_prm.h

-- 
2.1.4

[dpdk-dev] [PATCH v6 25/25] mlx5: resurrect Rx scatter support

2016-06-24 Thread Nelio Laranjeiro

This commit brings back Rx scatter and related support by the MTU update
function. The maximum number of segments per packet is not a fixed value
anymore (previously MLX5_PMD_SGE_WR_N, set to 4 by default) as it caused
performance issues when fewer segments were actually needed as well as
limitations on the maximum packet size that could be received with the
default mbuf size (supporting at most 8576 bytes).

These limitations are now lifted as the number of SGEs is derived from the
MTU (which implies MRU) at queue initialization and during MTU update.

Signed-off-by: Adrien Mazarguil 
Signed-off-by: Vasily Philipov 
Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_ethdev.c |  90 ++
 drivers/net/mlx5/mlx5_rxq.c|  77 ++-
 drivers/net/mlx5/mlx5_rxtx.c   | 139 -
 drivers/net/mlx5/mlx5_rxtx.h   |   1 +
 4 files changed, 225 insertions(+), 82 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 698a50e..72f0826 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -725,6 +725,9 @@ mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
unsigned int i;
uint16_t (*rx_func)(void *, struct rte_mbuf **, uint16_t) =
mlx5_rx_burst;
+   unsigned int max_frame_len;
+   int rehash;
+   int restart = priv->started;

if (mlx5_is_secondary())
return -E_RTE_SECONDARY;
@@ -738,7 +741,6 @@ mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
goto out;
} else
DEBUG("adapter port %u MTU set to %u", priv->port, mtu);
-   priv->mtu = mtu;
/* Temporarily replace RX handler with a fake one, assuming it has not
 * been copied elsewhere. */
dev->rx_pkt_burst = removed_rx_burst;
@@ -746,28 +748,94 @@ mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
 * removed_rx_burst() instead. */
rte_wmb();
usleep(1000);
+   /* MTU does not include header and CRC. */
+   max_frame_len = ETHER_HDR_LEN + mtu + ETHER_CRC_LEN;
+   /* Check if at least one queue is going to need a SGE update. */
+   for (i = 0; i != priv->rxqs_n; ++i) {
+   struct rxq *rxq = (*priv->rxqs)[i];
+   unsigned int mb_len;
+   unsigned int size = RTE_PKTMBUF_HEADROOM + max_frame_len;
+   unsigned int sges_n;
+
+   if (rxq == NULL)
+   continue;
+   mb_len = rte_pktmbuf_data_room_size(rxq->mp);
+   assert(mb_len >= RTE_PKTMBUF_HEADROOM);
+   /*
+* Determine the number of SGEs needed for a full packet
+* and round it to the next power of two.
+*/
+   sges_n = log2above((size / mb_len) + !!(size % mb_len));
+   if (sges_n != rxq->sges_n)
+   break;
+   }
+   /*
+* If all queues have the right number of SGEs, a simple rehash
+* of their buffers is enough, otherwise SGE information can only
+* be updated in a queue by recreating it. All resources that depend
+* on queues (flows, indirection tables) must be recreated as well in
+* that case.
+*/
+   rehash = (i == priv->rxqs_n);
+   if (!rehash) {
+   /* Clean up everything as with mlx5_dev_stop(). */
+   priv_special_flow_disable_all(priv);
+   priv_mac_addrs_disable(priv);
+   priv_destroy_hash_rxqs(priv);
+   priv_fdir_disable(priv);
+   priv_dev_interrupt_handler_uninstall(priv, dev);
+   }
+recover:
/* Reconfigure each RX queue. */
for (i = 0; (i != priv->rxqs_n); ++i) {
struct rxq *rxq = (*priv->rxqs)[i];
-   unsigned int mb_len;
-   unsigned int max_frame_len;
+   struct rxq_ctrl *rxq_ctrl =
+   container_of(rxq, struct rxq_ctrl, rxq);
int sp;
+   unsigned int mb_len;
+   unsigned int tmp;

if (rxq == NULL)
continue;
-   /* Calculate new maximum frame length according to MTU and
-* toggle scattered support (sp) if necessary. */
-   max_frame_len = (priv->mtu + ETHER_HDR_LEN +
-(ETHER_MAX_VLAN_FRAME_LEN - ETHER_MAX_LEN));
mb_len = rte_pktmbuf_data_room_size(rxq->mp);
assert(mb_len >= RTE_PKTMBUF_HEADROOM);
+   /* Toggle scattered support (sp) if necessary. */
sp = (max_frame_len > (mb_len - RTE_PKTMBUF_HEADROOM));
-   if (sp) {
-   ERROR("%p: RX scatter is not supported", (void *)dev);
-   ret = ENOTSUP;
-

[dpdk-dev] [PATCH v6 24/25] mlx5: make Rx queue reinitialization safer

2016-06-24 Thread Nelio Laranjeiro

From: Adrien Mazarguil 

The primary purpose of rxq_rehash() function is to stop and restart
reception on a queue after re-posting buffers. This may fail if the array
that temporarily stores existing buffers for reuse cannot be allocated.

Update rxq_rehash() to work on the target queue directly (not through a
template copy) and avoid this allocation.

rxq_alloc_elts() is modified accordingly to take buffers from an existing
queue directly and update their refcount.

Unlike rxq_rehash(), rxq_setup() must work on a temporary structure but
should not allocate new mbufs from the pool while reinitializing an
existing queue. This is achieved by using the refcount-aware
rxq_alloc_elts() before overwriting queue data.

Signed-off-by: Adrien Mazarguil 
Signed-off-by: Vasily Philipov 
---
 drivers/net/mlx5/mlx5_rxq.c | 83 ++---
 1 file changed, 41 insertions(+), 42 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index fbf14fa..b2ddd0d 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -642,7 +642,7 @@ priv_rehash_flows(struct priv *priv)
  */
 static int
 rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n,
-  struct rte_mbuf **pool)
+  struct rte_mbuf *(*pool)[])
 {
unsigned int i;
int ret = 0;
@@ -654,9 +654,10 @@ rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int 
elts_n,
&(*rxq_ctrl->rxq.wqes)[i];

if (pool != NULL) {
-   buf = *(pool++);
+   buf = (*pool)[i];
assert(buf != NULL);
rte_pktmbuf_reset(buf);
+   rte_pktmbuf_refcnt_update(buf, 1);
} else
buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
if (buf == NULL) {
@@ -781,7 +782,7 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
 }

 /**
- * Reconfigure a RX queue with new parameters.
+ * Reconfigure RX queue buffers.
  *
  * rxq_rehash() does not allocate mbufs, which, if not done from the right
  * thread (such as a control thread), may corrupt the pool.
@@ -798,67 +799,48 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
 int
 rxq_rehash(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl)
 {
-   struct rxq_ctrl tmpl = *rxq_ctrl;
-   unsigned int mbuf_n;
-   unsigned int desc_n;
-   struct rte_mbuf **pool;
-   unsigned int i, k;
+   unsigned int elts_n = rxq_ctrl->rxq.elts_n;
+   unsigned int i;
struct ibv_exp_wq_attr mod;
int err;

DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq_ctrl);
-   /* Number of descriptors and mbufs currently allocated. */
-   desc_n = tmpl.rxq.elts_n;
-   mbuf_n = desc_n;
/* From now on, any failure will render the queue unusable.
 * Reinitialize WQ. */
mod = (struct ibv_exp_wq_attr){
.attr_mask = IBV_EXP_WQ_ATTR_STATE,
.wq_state = IBV_EXP_WQS_RESET,
};
-   err = ibv_exp_modify_wq(tmpl.wq, );
+   err = ibv_exp_modify_wq(rxq_ctrl->wq, );
if (err) {
ERROR("%p: cannot reset WQ: %s", (void *)dev, strerror(err));
assert(err > 0);
return err;
}
-   /* Allocate pool. */
-   pool = rte_malloc(__func__, (mbuf_n * sizeof(*pool)), 0);
-   if (pool == NULL) {
-   ERROR("%p: cannot allocate memory", (void *)dev);
-   return ENOBUFS;
-   }
/* Snatch mbufs from original queue. */
-   k = 0;
-   for (i = 0; (i != desc_n); ++i)
-   pool[k++] = (*rxq_ctrl->rxq.elts)[i];
-   assert(k == mbuf_n);
-   rte_free(pool);
+   claim_zero(rxq_alloc_elts(rxq_ctrl, elts_n, rxq_ctrl->rxq.elts));
+   for (i = 0; i != elts_n; ++i) {
+   struct rte_mbuf *buf = (*rxq_ctrl->rxq.elts)[i];
+
+   assert(rte_mbuf_refcnt_read(buf) == 2);
+   rte_pktmbuf_free_seg(buf);
+   }
/* Change queue state to ready. */
mod = (struct ibv_exp_wq_attr){
.attr_mask = IBV_EXP_WQ_ATTR_STATE,
.wq_state = IBV_EXP_WQS_RDY,
};
-   err = ibv_exp_modify_wq(tmpl.wq, );
+   err = ibv_exp_modify_wq(rxq_ctrl->wq, );
if (err) {
ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s",
  (void *)dev, strerror(err));
goto error;
}
-   /* Post SGEs. */
-   err = rxq_alloc_elts(, desc_n, pool);
-   if (err) {
-   ERROR("%p: cannot reallocate WRs, aborting", (void *)dev);
-   rte_free(pool);
-   assert(err > 0);
-   return err;
-   }
/* Update doorbell counter. */
-   rxq_ctrl->rxq.rq_ci = desc_n;
+   rxq_ctrl->rxq.rq_ci = elts_n;
rte_wmb();
*rxq_ctrl->rxq.rq_db = htonl(rxq_ctrl->rxq.rq_ci);

[dpdk-dev] [PATCH v6 23/25] mlx5: remove redundant Rx queue initialization code

2016-06-24 Thread Nelio Laranjeiro

From: Adrien Mazarguil <adrien.mazarg...@6wind.com>

Toggling RX checksum offloads is already done at initialization time. This
code does not belong in rxq_rehash().

Signed-off-by: Adrien Mazarguil 
Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_rxq.c | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index b2f8f9a..fbf14fa 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -798,7 +798,6 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
 int
 rxq_rehash(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl)
 {
-   struct priv *priv = rxq_ctrl->priv;
struct rxq_ctrl tmpl = *rxq_ctrl;
unsigned int mbuf_n;
unsigned int desc_n;
@@ -811,16 +810,6 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq_ctrl 
*rxq_ctrl)
/* Number of descriptors and mbufs currently allocated. */
desc_n = tmpl.rxq.elts_n;
mbuf_n = desc_n;
-   /* Toggle RX checksum offload if hardware supports it. */
-   if (priv->hw_csum) {
-   tmpl.rxq.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
-   rxq_ctrl->rxq.csum = tmpl.rxq.csum;
-   }
-   if (priv->hw_csum_l2tun) {
-   tmpl.rxq.csum_l2tun =
-   !!dev->data->dev_conf.rxmode.hw_ip_checksum;
-   rxq_ctrl->rxq.csum_l2tun = tmpl.rxq.csum_l2tun;
-   }
/* From now on, any failure will render the queue unusable.
 * Reinitialize WQ. */
mod = (struct ibv_exp_wq_attr){
-- 
2.1.4

[dpdk-dev] [PATCH v6 22/25] mlx5: work around spurious compilation errors

2016-06-24 Thread Nelio Laranjeiro

From: Adrien Mazarguil 

Since commit "mlx5: resurrect Tx gather support", older GCC versions (such
as 4.8.5) may complain about the following:

 mlx5_rxtx.c: In function `mlx5_tx_burst':
 mlx5_rxtx.c:705:25: error: `wqe' may be used uninitialized in this
 function [-Werror=maybe-uninitialized]

 mlx5_rxtx.c: In function `mlx5_tx_burst_inline':
 mlx5_rxtx.c:864:25: error: `wqe' may be used uninitialized in this
 function [-Werror=maybe-uninitialized]

In both cases, this code cannot be reached when wqe is not initialized.

Considering older GCC versions are still widely used, work around this
issue by initializing wqe preemptively, even if it should not be necessary.

Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5_rxtx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index c72e7ce..8b67949 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -593,7 +593,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
unsigned int j = 0;
unsigned int max;
unsigned int comp;
-   volatile union mlx5_wqe *wqe;
+   volatile union mlx5_wqe *wqe = NULL;

if (unlikely(!pkts_n))
return 0;
@@ -741,7 +741,7 @@ mlx5_tx_burst_inline(void *dpdk_txq, struct rte_mbuf 
**pkts, uint16_t pkts_n)
unsigned int j = 0;
unsigned int max;
unsigned int comp;
-   volatile union mlx5_wqe *wqe;
+   volatile union mlx5_wqe *wqe = NULL;
unsigned int max_inline = txq->max_inline;

if (unlikely(!pkts_n))
-- 
2.1.4

[dpdk-dev] [PATCH v6 21/25] mlx5: resurrect Tx gather support

2016-06-24 Thread Nelio Laranjeiro

From: Adrien Mazarguil <adrien.mazarg...@6wind.com>

Compared to its previous incarnation, the software limit on the number of
mbuf segments is no more (previously MLX5_PMD_SGE_WR_N, set to 4 by
default) hence no need for linearization code and related buffers that
permanently consumed a non negligible amount of memory to handle oversized
mbufs.

The resulting code is both lighter and faster.

Signed-off-by: Adrien Mazarguil 
Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_rxtx.c | 235 +--
 drivers/net/mlx5/mlx5_txq.c  |   8 +-
 2 files changed, 188 insertions(+), 55 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index fadc182..c72e7ce 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -303,6 +303,7 @@ mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe 
*wqe,
 {
wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4);
+   wqe->wqe.ctrl.data[2] = 0;
wqe->wqe.ctrl.data[3] = 0;
wqe->inl.eseg.rsvd0 = 0;
wqe->inl.eseg.rsvd1 = 0;
@@ -348,6 +349,7 @@ mlx5_wqe_write_vlan(struct txq *txq, volatile union 
mlx5_wqe *wqe,

wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4);
+   wqe->wqe.ctrl.data[2] = 0;
wqe->wqe.ctrl.data[3] = 0;
wqe->inl.eseg.rsvd0 = 0;
wqe->inl.eseg.rsvd1 = 0;
@@ -425,6 +427,7 @@ mlx5_wqe_write_inline(struct txq *txq, volatile union 
mlx5_wqe *wqe,
assert(size < 64);
wqe->inl.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
wqe->inl.ctrl.data[1] = htonl(txq->qp_num_8s | size);
+   wqe->inl.ctrl.data[2] = 0;
wqe->inl.ctrl.data[3] = 0;
wqe->inl.eseg.rsvd0 = 0;
wqe->inl.eseg.rsvd1 = 0;
@@ -498,6 +501,7 @@ mlx5_wqe_write_inline_vlan(struct txq *txq, volatile union 
mlx5_wqe *wqe,
assert(size < 64);
wqe->inl.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
wqe->inl.ctrl.data[1] = htonl(txq->qp_num_8s | size);
+   wqe->inl.ctrl.data[2] = 0;
wqe->inl.ctrl.data[3] = 0;
wqe->inl.eseg.rsvd0 = 0;
wqe->inl.eseg.rsvd1 = 0;
@@ -586,6 +590,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
uint16_t elts_head = txq->elts_head;
const unsigned int elts_n = txq->elts_n;
unsigned int i = 0;
+   unsigned int j = 0;
unsigned int max;
unsigned int comp;
volatile union mlx5_wqe *wqe;
@@ -602,23 +607,27 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
if (max > elts_n)
max -= elts_n;
do {
-   struct rte_mbuf *buf;
+   struct rte_mbuf *buf = *(pkts++);
unsigned int elts_head_next;
uintptr_t addr;
uint32_t length;
uint32_t lkey;
+   unsigned int segs_n = buf->nb_segs;
+   volatile struct mlx5_wqe_data_seg *dseg;
+   unsigned int ds = sizeof(*wqe) / 16;

/*
 * Make sure there is enough room to store this packet and
 * that one ring entry remains unused.
 */
-   if (max < 1 + 1)
+   assert(segs_n);
+   if (max < segs_n + 1)
break;
-   --max;
+   max -= segs_n;
--pkts_n;
-   buf = *(pkts++);
elts_head_next = (elts_head + 1) & (elts_n - 1);
wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)];
+   dseg = >wqe.dseg;
rte_prefetch0(wqe);
if (pkts_n)
rte_prefetch0(*pkts);
@@ -638,7 +647,6 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
buf->vlan_tci);
else
mlx5_wqe_write(txq, wqe, addr, length, lkey);
-   wqe->wqe.ctrl.data[2] = 0;
/* Should we enable HW CKSUM offload */
if (buf->ol_flags &
(PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
@@ -648,6 +656,37 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
} else {
wqe->wqe.eseg.cs_flags = 0;
}
+   while (--segs_n) {
+   /*
+* Spill on next WQE when the current one does not have
+* enough room left. Size of WQE must a be a multiple
+

[dpdk-dev] [PATCH v6 20/25] mlx5: check remaining space while processing Tx burst

2016-06-24 Thread Nelio Laranjeiro

From: Adrien Mazarguil <adrien.mazarg...@6wind.com>

The space necessary to store segmented packets cannot be known in advance
and must be verified for each of them.

Signed-off-by: Adrien Mazarguil 
Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_rxtx.c | 144 +++
 1 file changed, 78 insertions(+), 66 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index ed2b5fe..fadc182 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -585,50 +585,51 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
struct txq *txq = (struct txq *)dpdk_txq;
uint16_t elts_head = txq->elts_head;
const unsigned int elts_n = txq->elts_n;
-   unsigned int i;
+   unsigned int i = 0;
unsigned int max;
unsigned int comp;
volatile union mlx5_wqe *wqe;
-   struct rte_mbuf *buf;

if (unlikely(!pkts_n))
return 0;
-   buf = pkts[0];
/* Prefetch first packet cacheline. */
tx_prefetch_cqe(txq, txq->cq_ci);
tx_prefetch_cqe(txq, txq->cq_ci + 1);
-   rte_prefetch0(buf);
+   rte_prefetch0(*pkts);
/* Start processing. */
txq_complete(txq);
max = (elts_n - (elts_head - txq->elts_tail));
if (max > elts_n)
max -= elts_n;
-   assert(max >= 1);
-   assert(max <= elts_n);
-   /* Always leave one free entry in the ring. */
-   --max;
-   if (max == 0)
-   return 0;
-   if (max > pkts_n)
-   max = pkts_n;
-   for (i = 0; (i != max); ++i) {
-   unsigned int elts_head_next = (elts_head + 1) & (elts_n - 1);
+   do {
+   struct rte_mbuf *buf;
+   unsigned int elts_head_next;
uintptr_t addr;
uint32_t length;
uint32_t lkey;

+   /*
+* Make sure there is enough room to store this packet and
+* that one ring entry remains unused.
+*/
+   if (max < 1 + 1)
+   break;
+   --max;
+   --pkts_n;
+   buf = *(pkts++);
+   elts_head_next = (elts_head + 1) & (elts_n - 1);
wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)];
rte_prefetch0(wqe);
-   if (i + 1 < max)
-   rte_prefetch0(pkts[i + 1]);
+   if (pkts_n)
+   rte_prefetch0(*pkts);
/* Retrieve buffer information. */
addr = rte_pktmbuf_mtod(buf, uintptr_t);
length = DATA_LEN(buf);
/* Update element. */
(*txq->elts)[elts_head] = buf;
/* Prefetch next buffer data. */
-   if (i + 1 < max)
-   rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 1],
+   if (pkts_n)
+   rte_prefetch0(rte_pktmbuf_mtod(*pkts,
   volatile void *));
/* Retrieve Memory Region key for this memory pool. */
lkey = txq_mp2mr(txq, txq_mb2mp(buf));
@@ -652,8 +653,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
txq->stats.obytes += length;
 #endif
elts_head = elts_head_next;
-   buf = pkts[i + 1];
-   }
+   ++i;
+   } while (pkts_n);
/* Take a shortcut if nothing must be sent. */
if (unlikely(i == 0))
return 0;
@@ -697,44 +698,45 @@ mlx5_tx_burst_inline(void *dpdk_txq, struct rte_mbuf 
**pkts, uint16_t pkts_n)
struct txq *txq = (struct txq *)dpdk_txq;
uint16_t elts_head = txq->elts_head;
const unsigned int elts_n = txq->elts_n;
-   unsigned int i;
+   unsigned int i = 0;
unsigned int max;
unsigned int comp;
volatile union mlx5_wqe *wqe;
-   struct rte_mbuf *buf;
unsigned int max_inline = txq->max_inline;

if (unlikely(!pkts_n))
return 0;
-   buf = pkts[0];
/* Prefetch first packet cacheline. */
tx_prefetch_cqe(txq, txq->cq_ci);
tx_prefetch_cqe(txq, txq->cq_ci + 1);
-   rte_prefetch0(buf);
+   rte_prefetch0(*pkts);
/* Start processing. */
txq_complete(txq);
max = (elts_n - (elts_head - txq->elts_tail));
if (max > elts_n)
max -= elts_n;
-   assert(max >= 1);
-   assert(max <= elts_n);
-   /* Always leave one free entry in the ring. */
-   --max;
-   if (max == 0)
-   return 0;
-   if (max > pkts_n)
-   max = pkts_n;
-   for (i = 0; (i != max); ++i) {
-   unsigned int elts_head_next = (elts_he

[dpdk-dev] [PATCH v6 19/25] mlx5: add debugging information about Tx queues capabilities

2016-06-24 Thread Nelio Laranjeiro

From: Adrien Mazarguil 

Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx5/mlx5_txq.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 4f17fb0..bae9f3d 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -343,6 +343,11 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl 
*txq_ctrl,
  (void *)dev, strerror(ret));
goto error;
}
+   DEBUG("TX queue capabilities: max_send_wr=%u, max_send_sge=%u,"
+ " max_inline_data=%u",
+ attr.init.cap.max_send_wr,
+ attr.init.cap.max_send_sge,
+ attr.init.cap.max_inline_data);
attr.mod = (struct ibv_exp_qp_attr){
/* Move the QP to this state. */
.qp_state = IBV_QPS_INIT,
-- 
2.1.4

[dpdk-dev] [PATCH v6 18/25] mlx5: add support for multi-packet send

2016-06-24 Thread Nelio Laranjeiro

This feature enables the TX burst function to emit up to 5 packets using
only two WQEs on devices that support it. Saves PCI bandwidth and improves
performance.

Signed-off-by: Nelio Laranjeiro 
Signed-off-by: Adrien Mazarguil 
Signed-off-by: Olga Shern 
---
 doc/guides/nics/mlx5.rst   |  10 +
 drivers/net/mlx5/mlx5.c|  14 +-
 drivers/net/mlx5/mlx5_ethdev.c |  15 +-
 drivers/net/mlx5/mlx5_rxtx.c   | 407 +
 drivers/net/mlx5/mlx5_rxtx.h   |   2 +
 drivers/net/mlx5/mlx5_txq.c|   2 +-
 6 files changed, 446 insertions(+), 4 deletions(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 9ada221..063c4a5 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -171,6 +171,16 @@ Run-time configuration

   This option should be used in combination with ``txq_inline`` above.

+- ``txq_mpw_en`` parameter [int]
+
+  A nonzero value enables multi-packet send. This feature allows the TX
+  burst function to pack up to five packets in two descriptors in order to
+  save PCI bandwidth and improve performance at the cost of a slightly
+  higher CPU usage.
+
+  It is currently only supported on the ConnectX-4 Lx family of adapters.
+  Enabled by default.
+
 Prerequisites
 -

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 2d63a48..0e83dd5 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -81,6 +81,9 @@
  */
 #define MLX5_TXQS_MIN_INLINE "txqs_min_inline"

+/* Device parameter to enable multi-packet send WQEs. */
+#define MLX5_TXQ_MPW_EN "txq_mpw_en"
+
 /**
  * Retrieve integer value from environment variable.
  *
@@ -282,6 +285,8 @@ mlx5_args_check(const char *key, const char *val, void 
*opaque)
priv->txq_inline = tmp;
} else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) {
priv->txqs_inline = tmp;
+   } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) {
+   priv->mps = !!tmp;
} else {
WARN("%s: unknown parameter", key);
return -EINVAL;
@@ -307,6 +312,7 @@ mlx5_args(struct priv *priv, struct rte_devargs *devargs)
MLX5_RXQ_CQE_COMP_EN,
MLX5_TXQ_INLINE,
MLX5_TXQS_MIN_INLINE,
+   MLX5_TXQ_MPW_EN,
};
struct rte_kvargs *kvlist;
int ret = 0;
@@ -502,6 +508,7 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct 
rte_pci_device *pci_dev)
priv->port = port;
priv->pd = pd;
priv->mtu = ETHER_MTU;
+   priv->mps = mps; /* Enable MPW by default if supported. */
priv->cqe_comp = 1; /* Enable compression by default. */
err = mlx5_args(priv, pci_dev->devargs);
if (err) {
@@ -550,7 +557,12 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct 
rte_pci_device *pci_dev)

priv_get_num_vfs(priv, _vfs);
priv->sriov = (num_vfs || sriov);
-   priv->mps = mps;
+   if (priv->mps && !mps) {
+   ERROR("multi-packet send not supported on this device"
+ " (" MLX5_TXQ_MPW_EN ")");
+   err = ENOTSUP;
+   goto port_error;
+   }
/* Allocate and register default RSS hash keys. */
priv->rss_conf = rte_calloc(__func__, hash_rxq_init_n,
sizeof((*priv->rss_conf)[0]), 0);
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index aeea4ff..698a50e 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -584,7 +584,8 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *info)
  DEV_RX_OFFLOAD_UDP_CKSUM |
  DEV_RX_OFFLOAD_TCP_CKSUM) :
 0);
-   info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
+   if (!priv->mps)
+   info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
if (priv->hw_csum)
info->tx_offload_capa |=
(DEV_TX_OFFLOAD_IPV4_CKSUM |
@@ -1318,7 +1319,17 @@ void
 priv_select_tx_function(struct priv *priv)
 {
priv->dev->tx_pkt_burst = mlx5_tx_burst;
-   if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
+   /* Display warning for unsupported configurations. */
+   if (priv->sriov && priv->mps)
+   WARN("multi-packet send WQE cannot be used on a SR-IOV setup");
+   /* Select appropriate TX function. */
+   if ((priv->sriov == 0) && priv->mps && priv->txq_inline) {
+   priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline;
+   DEBUG("sele

1 2 3 >

1 - 100 of 252 matches

Mail list logo