[dpdk-dev] [PATCH v7 17/25] mlx5: add support for inline send

2016-06-27 Thread NĂ©lio Laranjeiro
On Mon, Jun 27, 2016 at 01:17:42PM +0100, Bruce Richardson wrote:
> On Fri, Jun 24, 2016 at 03:17:56PM +0200, Nelio Laranjeiro wrote:
> > From: Yaacov Hazan 
> > 
> > Implement send inline feature which copies packet data directly into WQEs
> > for improved latency. The maximum packet size and the minimum number of Tx
> > queues to qualify for inline send are user-configurable.
> > 
> 
> WQE?

WQE is a Work Queue Entry.

-- 
N?lio Laranjeiro
6WIND


[dpdk-dev] [PATCH v7 17/25] mlx5: add support for inline send

2016-06-27 Thread Bruce Richardson
On Fri, Jun 24, 2016 at 03:17:56PM +0200, Nelio Laranjeiro wrote:
> From: Yaacov Hazan 
> 
> Implement send inline feature which copies packet data directly into WQEs
> for improved latency. The maximum packet size and the minimum number of Tx
> queues to qualify for inline send are user-configurable.
> 

WQE?


[dpdk-dev] [PATCH v7 17/25] mlx5: add support for inline send

2016-06-24 Thread Nelio Laranjeiro
From: Yaacov Hazan 

Implement send inline feature which copies packet data directly into WQEs
for improved latency. The maximum packet size and the minimum number of Tx
queues to qualify for inline send are user-configurable.

This feature is effective when HW causes a performance bottleneck.

Signed-off-by: Yaacov Hazan 
Signed-off-by: Adrien Mazarguil 
Signed-off-by: Nelio Laranjeiro 
---
 doc/guides/nics/mlx5.rst   |  17 +++
 drivers/net/mlx5/mlx5.c|  15 +++
 drivers/net/mlx5/mlx5.h|   2 +
 drivers/net/mlx5/mlx5_ethdev.c |   5 +
 drivers/net/mlx5/mlx5_rxtx.c   | 273 +
 drivers/net/mlx5/mlx5_rxtx.h   |   2 +
 drivers/net/mlx5/mlx5_txq.c|   4 +
 7 files changed, 318 insertions(+)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 756153b..9ada221 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -154,6 +154,23 @@ Run-time configuration
   allows to save PCI bandwidth and improve performance at the cost of a
   slightly higher CPU usage.  Enabled by default.

+- ``txq_inline`` parameter [int]
+
+  Amount of data to be inlined during TX operations. Improves latency.
+  Can improve PPS performance when PCI back pressure is detected and may be
+  useful for scenarios involving heavy traffic on many queues.
+
+  It is not enabled by default (set to 0) since the additional software
+  logic necessary to handle this mode can lower performance when back
+  pressure is not expected.
+
+- ``txqs_min_inline`` parameter [int]
+
+  Enable inline send only when the number of TX queues is greater or equal
+  to this value.
+
+  This option should be used in combination with ``txq_inline`` above.
+
 Prerequisites
 -

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 630e5e4..73069d2 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -72,6 +72,15 @@
 /* Device parameter to enable RX completion queue compression. */
 #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en"

+/* Device parameter to configure inline send. */
+#define MLX5_TXQ_INLINE "txq_inline"
+
+/*
+ * Device parameter to configure the number of TX queues threshold for
+ * enabling inline send.
+ */
+#define MLX5_TXQS_MIN_INLINE "txqs_min_inline"
+
 /**
  * Retrieve integer value from environment variable.
  *
@@ -269,6 +278,10 @@ mlx5_args_check(const char *key, const char *val, void 
*opaque)
}
if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) {
priv->cqe_comp = !!tmp;
+   } else if (strcmp(MLX5_TXQ_INLINE, key) == 0) {
+   priv->txq_inline = tmp;
+   } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) {
+   priv->txqs_inline = tmp;
} else {
WARN("%s: unknown parameter", key);
return -EINVAL;
@@ -292,6 +305,8 @@ mlx5_args(struct priv *priv, struct rte_devargs *devargs)
 {
const char **params = (const char *[]){
MLX5_RXQ_CQE_COMP_EN,
+   MLX5_TXQ_INLINE,
+   MLX5_TXQS_MIN_INLINE,
NULL,
};
struct rte_kvargs *kvlist;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 8f5a6df..3a86609 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -113,6 +113,8 @@ struct priv {
unsigned int mps:1; /* Whether multi-packet send is supported. */
unsigned int cqe_comp:1; /* Whether CQE compression is enabled. */
unsigned int pending_alarm:1; /* An alarm is pending. */
+   unsigned int txq_inline; /* Maximum packet size for inlining. */
+   unsigned int txqs_inline; /* Queue number threshold for inlining. */
/* RX/TX queues. */
unsigned int rxqs_n; /* RX queues array size. */
unsigned int txqs_n; /* TX queues array size. */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 47e64b2..aeea4ff 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1318,6 +1318,11 @@ void
 priv_select_tx_function(struct priv *priv)
 {
priv->dev->tx_pkt_burst = mlx5_tx_burst;
+   if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
+   priv->dev->tx_pkt_burst = mlx5_tx_burst_inline;
+   DEBUG("selected inline TX function (%u >= %u queues)",
+ priv->txqs_n, priv->txqs_inline);
+   }
 }

 /**
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 9d992c3..daa22d9 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -376,6 +376,139 @@ mlx5_wqe_write_vlan(struct txq *txq, volatile union 
mlx5_wqe *wqe,
 }

 /**
+ * Write a inline WQE.
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ * @param wqe
+ *   Pointer to the WQE to fill.
+ * @param addr
+ *   Buffer data address.
+ * @param length
+ *   Packet length.
+ * @param lkey
+ *   Memory region lkey.
+ */
+static inline void