from:"Tomasz Kulasek"

[dpdk-dev] [PATCH v12 6/6] testpmd: use Tx preparation in csum engine

2016-11-23 Thread Tomasz Kulasek

Added "csum txprep (on|off)" command which allows to switch to the
tx path using Tx preparation API.

By default unchanged implementation is used.

Using Tx preparation path, pseudo header calculation for udp/tcp/tso
packets from application, and used Tx preparation API for
packet preparation and verification.

Adding additional step to the csum engine costs about 3-4% of performance
drop, on my setup with ixgbe driver. It's caused mostly by the need
of reaccessing and modification of packet data.

Signed-off-by: Tomasz Kulasek 
Acked-by: Konstantin Ananyev 
---
 app/test-pmd/cmdline.c  |   49 +++
 app/test-pmd/csumonly.c |   33 ---
 app/test-pmd/testpmd.c  |5 +
 app/test-pmd/testpmd.h  |2 ++
 4 files changed, 82 insertions(+), 7 deletions(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index 63b55dc..373fc59 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -366,6 +366,10 @@ static void cmd_help_long_parsed(void *parsed_result,
"csum show (port_id)\n"
"Display tx checksum offload configuration\n\n"

+   "csum txprep (on|off)"
+   "Enable tx preparation path in csum forward engine"
+   "\n\n"
+
"tso set (segsize) (portid)\n"
"Enable TCP Segmentation Offload in csum forward"
" engine.\n"
@@ -3523,6 +3527,50 @@ struct cmd_csum_tunnel_result {
},
 };

+/* Enable/disable tx preparation path */
+struct cmd_csum_txprep_result {
+   cmdline_fixed_string_t csum;
+   cmdline_fixed_string_t parse;
+   cmdline_fixed_string_t onoff;
+};
+
+static void
+cmd_csum_txprep_parsed(void *parsed_result,
+  __attribute__((unused)) struct cmdline *cl,
+  __attribute__((unused)) void *data)
+{
+   struct cmd_csum_txprep_result *res = parsed_result;
+
+   if (!strcmp(res->onoff, "on"))
+   tx_prepare = 1;
+   else
+   tx_prepare = 0;
+
+}
+
+cmdline_parse_token_string_t cmd_csum_txprep_csum =
+   TOKEN_STRING_INITIALIZER(struct cmd_csum_txprep_result,
+   csum, "csum");
+cmdline_parse_token_string_t cmd_csum_txprep_parse =
+   TOKEN_STRING_INITIALIZER(struct cmd_csum_txprep_result,
+   parse, "txprep");
+cmdline_parse_token_string_t cmd_csum_txprep_onoff =
+   TOKEN_STRING_INITIALIZER(struct cmd_csum_txprep_result,
+   onoff, "on#off");
+
+cmdline_parse_inst_t cmd_csum_txprep = {
+   .f = cmd_csum_txprep_parsed,
+   .data = NULL,
+   .help_str = "enable/disable tx preparation path for csum engine: "
+   "csum txprep on|off",
+   .tokens = {
+   (void *)_csum_txprep_csum,
+   (void *)_csum_txprep_parse,
+   (void *)_csum_txprep_onoff,
+   NULL,
+   },
+};
+
 /* *** ENABLE HARDWARE SEGMENTATION IN TX NON-TUNNELED PACKETS *** */
 struct cmd_tso_set_result {
cmdline_fixed_string_t tso;
@@ -11470,6 +11518,7 @@ struct cmd_set_vf_mac_addr_result {
(cmdline_parse_inst_t *)_csum_set,
(cmdline_parse_inst_t *)_csum_show,
(cmdline_parse_inst_t *)_csum_tunnel,
+   (cmdline_parse_inst_t *)_csum_txprep,
(cmdline_parse_inst_t *)_tso_set,
(cmdline_parse_inst_t *)_tso_show,
(cmdline_parse_inst_t *)_tunnel_tso_set,
diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 57e6ae2..3afa9ab 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -372,8 +372,10 @@ struct simple_gre_hdr {
udp_hdr->dgram_cksum = 0;
if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) {
ol_flags |= PKT_TX_UDP_CKSUM;
-   udp_hdr->dgram_cksum = get_psd_sum(l3_hdr,
-   info->ethertype, ol_flags);
+   if (!tx_prepare)
+   udp_hdr->dgram_cksum = get_psd_sum(
+   l3_hdr, info->ethertype,
+   ol_flags);
} else {
udp_hdr->dgram_cksum =
get_udptcp_checksum(l3_hdr, udp_hdr,
@@ -385,12 +387,15 @@ struct simple_gre_hdr {
tcp_hdr->cksum = 0;
if (tso_segsz) {
ol_flags |= PKT_TX_TCP_SEG;
-   tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-

[dpdk-dev] [PATCH v12 5/6] ixgbe: add Tx preparation

2016-11-23 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
Acked-by: Konstantin Ananyev 
---
 drivers/net/ixgbe/ixgbe_ethdev.c |3 ++
 drivers/net/ixgbe/ixgbe_ethdev.h |5 +++-
 drivers/net/ixgbe/ixgbe_rxtx.c   |   56 ++
 drivers/net/ixgbe/ixgbe_rxtx.h   |2 ++
 4 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index edc9b22..a75f59d 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -517,6 +517,8 @@ static int ixgbe_dev_udp_tunnel_port_del(struct rte_eth_dev 
*dev,
.nb_max = IXGBE_MAX_RING_DESC,
.nb_min = IXGBE_MIN_RING_DESC,
.nb_align = IXGBE_TXD_ALIGN,
+   .nb_seg_max = IXGBE_TX_MAX_SEG,
+   .nb_mtu_seg_max = IXGBE_TX_MAX_SEG,
 };

 static const struct eth_dev_ops ixgbe_eth_dev_ops = {
@@ -1103,6 +1105,7 @@ struct rte_ixgbe_xstats_name_off {
eth_dev->dev_ops = _eth_dev_ops;
eth_dev->rx_pkt_burst = _recv_pkts;
eth_dev->tx_pkt_burst = _xmit_pkts;
+   eth_dev->tx_pkt_prepare = _prep_pkts;

/*
 * For secondary processes, we don't initialise any further as primary
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index 4ff6338..e229cf5 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -396,6 +396,9 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
  struct rte_eth_rss_conf *rss_conf);

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index b2d9f45..dbe83e7 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -70,6 +70,7 @@
 #include 
 #include 
 #include 
+#include 

 #include "ixgbe_logs.h"
 #include "base/ixgbe_api.h"
@@ -87,6 +88,9 @@
PKT_TX_TCP_SEG | \
PKT_TX_OUTER_IP_CKSUM)

+#define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
+
 #if 1
 #define RTE_PMD_USE_PREFETCH
 #endif
@@ -905,6 +909,56 @@ static inline int __attribute__((always_inline))

 /*
  *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+   int i, ret;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+   struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /**
+* Check if packet meets requirements for number of segments
+*
+* NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and 
non-TSO
+*/
+
+   if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_net_intel_cksum_prepare(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+
+   return i;
+}
+
+/*
+ *
  *  RX functions
  *
  **/
@@ -2282,6 +2336,7 @@ void __attribute__((cold))
if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
&& (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
PMD_INIT_LOG(DEBUG, "Using simple tx code path");
+   dev->tx_pkt_prepare = NULL;
 #ifdef RTE_IXGBE_INC_VECTOR
if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
(rte_eal_process_type() != RTE_PROC_PRIMARY

[dpdk-dev] [PATCH v12 4/6] i40e: add Tx preparation

2016-11-23 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
Acked-by: Konstantin Ananyev 
---
 drivers/net/i40e/i40e_ethdev.c |3 ++
 drivers/net/i40e/i40e_rxtx.c   |   72 +++-
 drivers/net/i40e/i40e_rxtx.h   |8 +
 3 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 67778ba..5761357 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -943,6 +943,7 @@ static inline void i40e_GLQF_reg_init(struct i40e_hw *hw)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = i40e_recv_pkts;
dev->tx_pkt_burst = i40e_xmit_pkts;
+   dev->tx_pkt_prepare = i40e_prep_pkts;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
@@ -2645,6 +2646,8 @@ static int i40e_dev_xstats_get_names(__rte_unused struct 
rte_eth_dev *dev,
.nb_max = I40E_MAX_RING_DESC,
.nb_min = I40E_MIN_RING_DESC,
.nb_align = I40E_ALIGN_RING_DESC,
+   .nb_seg_max = I40E_TX_MAX_SEG,
+   .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG,
};

if (pf->flags & I40E_FLAG_VMDQ) {
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 7ae7d9f..5827f2f 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 

 #include "i40e_logs.h"
 #include "base/i40e_prototype.h"
@@ -79,6 +81,17 @@
PKT_TX_TCP_SEG | \
PKT_TX_OUTER_IP_CKSUM)

+#define I40E_TX_OFFLOAD_MASK (  \
+   PKT_TX_IP_CKSUM |   \
+   PKT_TX_L4_MASK |\
+   PKT_TX_OUTER_IP_CKSUM | \
+   PKT_TX_TCP_SEG |\
+   PKT_TX_QINQ_PKT |   \
+   PKT_TX_VLAN_PKT)
+
+#define I40E_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK)
+
 static uint16_t i40e_xmit_pkts_simple(void *tx_queue,
  struct rte_mbuf **tx_pkts,
  uint16_t nb_pkts);
@@ -1411,6 +1424,61 @@ static inline int __attribute__((always_inline))
return nb_tx;
 }

+/*
+ *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /**
+* m->nb_segs is uint8_t, so m->nb_segs is always less than
+* I40E_TX_MAX_SEG.
+* We check only a condition for m->nb_segs > 
I40E_TX_MAX_MTU_SEG.
+*/
+   if (!(ol_flags & PKT_TX_TCP_SEG)) {
+   if (m->nb_segs > I40E_TX_MAX_MTU_SEG) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+   } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) ||
+   (m->tso_segsz > I40E_MAX_TSO_MSS)) {
+   /* MSS outside the range (256B - 9674B) are considered 
malicious */
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_net_intel_cksum_prepare(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+   return i;
+}
+
 /*
  * Find the VSI the queue belongs to. 'queue_idx' is the queue index
  * application used, which assume having sequential ones. But from driver's
@@ -2763,9 +2831,11 @@ void __attribute__((cold))
PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
dev->tx_pkt_burst = i40e_xmit_pkts_simple;
}
+   dev->t

[dpdk-dev] [PATCH v12 3/6] fm10k: add Tx preparation

2016-11-23 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
Acked-by: Konstantin Ananyev 
---
 drivers/net/fm10k/fm10k.h|6 +
 drivers/net/fm10k/fm10k_ethdev.c |5 
 drivers/net/fm10k/fm10k_rxtx.c   |   50 +-
 3 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h
index 05aa1a2..c6fed21 100644
--- a/drivers/net/fm10k/fm10k.h
+++ b/drivers/net/fm10k/fm10k.h
@@ -69,6 +69,9 @@
 #define FM10K_MAX_RX_DESC  (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc))
 #define FM10K_MAX_TX_DESC  (FM10K_MAX_TX_RING_SZ / sizeof(struct 
fm10k_tx_desc))

+#define FM10K_TX_MAX_SEG UINT8_MAX
+#define FM10K_TX_MAX_MTU_SEG UINT8_MAX
+
 /*
  * byte aligment for HW RX data buffer
  * Datasheet requires RX buffer addresses shall either be 512-byte aligned or
@@ -356,6 +359,9 @@ uint16_t fm10k_recv_scattered_pkts(void *rx_queue,
 uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq);
 int fm10k_rx_vec_condition_check(struct rte_eth_dev *);
 void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq);
diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index 923690c..a116822 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -1447,6 +1447,8 @@ static int fm10k_xstats_get_names(__rte_unused struct 
rte_eth_dev *dev,
.nb_max = FM10K_MAX_TX_DESC,
.nb_min = FM10K_MIN_TX_DESC,
.nb_align = FM10K_MULT_TX_DESC,
+   .nb_seg_max = FM10K_TX_MAX_SEG,
+   .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG,
};

dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G |
@@ -2755,8 +2757,10 @@ static void __attribute__((cold))
fm10k_txq_vec_setup(txq);
}
dev->tx_pkt_burst = fm10k_xmit_pkts_vec;
+   dev->tx_pkt_prepare = NULL;
} else {
dev->tx_pkt_burst = fm10k_xmit_pkts;
+   dev->tx_pkt_prepare = fm10k_prep_pkts;
PMD_INIT_LOG(DEBUG, "Use regular Tx func");
}
 }
@@ -2835,6 +2839,7 @@ static void __attribute__((cold))
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = _recv_pkts;
dev->tx_pkt_burst = _xmit_pkts;
+   dev->tx_pkt_prepare = _prep_pkts;

/* only initialize in the primary process */
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c
index 32cc7ff..144e5e6 100644
--- a/drivers/net/fm10k/fm10k_rxtx.c
+++ b/drivers/net/fm10k/fm10k_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -35,6 +35,7 @@

 #include 
 #include 
+#include 
 #include "fm10k.h"
 #include "base/fm10k_type.h"

@@ -65,6 +66,15 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd)
 }
 #endif

+#define FM10K_TX_OFFLOAD_MASK (  \
+   PKT_TX_VLAN_PKT |\
+   PKT_TX_IP_CKSUM |\
+   PKT_TX_L4_MASK | \
+   PKT_TX_TCP_SEG)
+
+#define FM10K_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK)
+
 /* @note: When this function is changed, make corresponding change to
  * fm10k_dev_supported_ptypes_get()
  */
@@ -597,3 +607,41 @@ static inline void tx_xmit_pkt(struct fm10k_tx_queue *q, 
struct rte_mbuf *mb)

return count;
 }
+
+uint16_t
+fm10k_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   if ((m->ol_flags & PKT_TX_TCP_SEG) &&
+   (m->tso_segsz < FM10K_TSO_MINMSS)) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_net_intel_cksum_prepare(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+
+   return i;
+}
-- 
1.7.9.5

[dpdk-dev] [PATCH v12 2/6] e1000: add Tx preparation

2016-11-23 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
Acked-by: Konstantin Ananyev 
---
 drivers/net/e1000/e1000_ethdev.h |   11 
 drivers/net/e1000/em_ethdev.c|5 +++-
 drivers/net/e1000/em_rxtx.c  |   48 ++-
 drivers/net/e1000/igb_ethdev.c   |4 +++
 drivers/net/e1000/igb_rxtx.c |   52 +-
 5 files changed, 117 insertions(+), 3 deletions(-)

diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
index 6c25c8d..bd0f277 100644
--- a/drivers/net/e1000/e1000_ethdev.h
+++ b/drivers/net/e1000/e1000_ethdev.h
@@ -138,6 +138,11 @@
 #define E1000_MISC_VEC_ID   RTE_INTR_VEC_ZERO_OFFSET
 #define E1000_RX_VEC_START  RTE_INTR_VEC_RXTX_OFFSET

+#define IGB_TX_MAX_SEG UINT8_MAX
+#define IGB_TX_MAX_MTU_SEG UINT8_MAX
+#define EM_TX_MAX_SEG  UINT8_MAX
+#define EM_TX_MAX_MTU_SEG  UINT8_MAX
+
 /* structure for interrupt relative data */
 struct e1000_interrupt {
uint32_t flags;
@@ -315,6 +320,9 @@ int eth_igb_tx_queue_setup(struct rte_eth_dev *dev, 
uint16_t tx_queue_id,
 uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t eth_igb_prep_pkts(void *txq, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

@@ -376,6 +384,9 @@ int eth_em_tx_queue_setup(struct rte_eth_dev *dev, uint16_t 
tx_queue_id,
 uint16_t eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t eth_em_prep_pkts(void *txq, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 uint16_t eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
index aee3d34..a004ee9 100644
--- a/drivers/net/e1000/em_ethdev.c
+++ b/drivers/net/e1000/em_ethdev.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -300,6 +300,7 @@ static int eth_em_set_mc_addr_list(struct rte_eth_dev *dev,
eth_dev->dev_ops = _em_ops;
eth_dev->rx_pkt_burst = (eth_rx_burst_t)_em_recv_pkts;
eth_dev->tx_pkt_burst = (eth_tx_burst_t)_em_xmit_pkts;
+   eth_dev->tx_pkt_prepare = (eth_tx_prep_t)_em_prep_pkts;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
@@ -1079,6 +1080,8 @@ static int eth_em_set_mc_addr_list(struct rte_eth_dev 
*dev,
.nb_max = E1000_MAX_RING_DESC,
.nb_min = E1000_MIN_RING_DESC,
.nb_align = EM_TXD_ALIGN,
+   .nb_seg_max = EM_TX_MAX_SEG,
+   .nb_mtu_seg_max = EM_TX_MAX_MTU_SEG,
};

dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
index 41f51c0..7e271ad 100644
--- a/drivers/net/e1000/em_rxtx.c
+++ b/drivers/net/e1000/em_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -66,6 +66,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 

 #include "e1000_logs.h"
@@ -77,6 +78,14 @@

 #define E1000_RXDCTL_GRAN  0x0100 /* RXDCTL Granularity */

+#define E1000_TX_OFFLOAD_MASK ( \
+   PKT_TX_IP_CKSUM |   \
+   PKT_TX_L4_MASK |\
+   PKT_TX_VLAN_PKT)
+
+#define E1000_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK)
+
 /**
  * Structure associated with each descriptor of the RX ring of a RX queue.
  */
@@ -618,6 +627,43 @@ struct em_tx_queue {

 /*
  *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+eth_em_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   if (m->ol_flags & E1000_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+

[dpdk-dev] [PATCH v12 1/6] ethdev: add Tx preparation

2016-11-23 Thread Tomasz Kulasek

Added API for `rte_eth_tx_prepare`

uint16_t rte_eth_tx_prepare(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts)

Added fields to the `struct rte_eth_desc_lim`:

uint16_t nb_seg_max;
/**< Max number of segments per whole packet. */

uint16_t nb_mtu_seg_max;
/**< Max number of segments per one MTU */

Added functions:

int rte_validate_tx_offload(struct rte_mbuf *m)
to validate general requirements for tx offload set in mbuf of packet
  such a flag completness. In current implementation this function is
  called optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled.

int rte_net_intel_cksum_prepare(struct rte_mbuf *m)
to fix pseudo header checksum for TSO and non-TSO tcp/udp packets
before hardware tx checksum offload.
 - for non-TSO tcp/udp packets full pseudo-header checksum is
   counted and set.
 - for TSO the IP payload length is not included.

PERFORMANCE TESTS
-

This feature was tested with modified csum engine from test-pmd.

The packet checksum preparation was moved from application to Tx
preparation step placed before burst.

We may expect some overhead costs caused by:
1) using additional callback before burst,
2) rescanning burst,
3) additional condition checking (packet validation),
4) worse optimization (e.g. packet data access, etc.)

We tested it using ixgbe Tx preparation implementation with some parts
disabled to have comparable information about the impact of different
parts of implementation.

IMPACT:

1) For unimplemented Tx preparation callback the performance impact is
   negligible,
2) For packet condition check without checksum modifications (nb_segs,
   available offloads, etc.) is 14626628/14252168 (~2.62% drop),
3) Full support in ixgbe driver (point 2 + packet checksum
   initialization) is 14060924/13588094 (~3.48% drop)

Signed-off-by: Tomasz Kulasek 
Acked-by: Konstantin Ananyev 
Acked-by: Olivier Matz 
---
 config/common_base|1 +
 lib/librte_ether/rte_ethdev.h |  106 +
 lib/librte_mbuf/rte_mbuf.h|   64 +
 lib/librte_net/rte_net.h  |   85 +
 4 files changed, 256 insertions(+)

diff --git a/config/common_base b/config/common_base
index 4bff83a..d609a88 100644
--- a/config/common_base
+++ b/config/common_base
@@ -120,6 +120,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
 CONFIG_RTE_LIBRTE_IEEE1588=n
 CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
+CONFIG_RTE_ETHDEV_TX_PREPARE=y

 #
 # Support NIC bypass logic
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 9678179..4ffc1b3 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -182,6 +182,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "rte_ether.h"
 #include "rte_eth_ctrl.h"
 #include "rte_dev_info.h"
@@ -702,6 +703,8 @@ struct rte_eth_desc_lim {
uint16_t nb_max;   /**< Max allowed number of descriptors. */
uint16_t nb_min;   /**< Min allowed number of descriptors. */
uint16_t nb_align; /**< Number of descriptors should be aligned to. */
+   uint16_t nb_seg_max; /**< Max number of segments per whole packet. 
*/
+   uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */
 };

 /**
@@ -1191,6 +1194,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq,
   uint16_t nb_pkts);
 /**< @internal Send output packets on a transmit queue of an Ethernet device. 
*/

+typedef uint16_t (*eth_tx_prep_t)(void *txq,
+  struct rte_mbuf **tx_pkts,
+  uint16_t nb_pkts);
+/**< @internal Prepare output packets on a transmit queue of an Ethernet 
device. */
+
 typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev,
   struct rte_eth_fc_conf *fc_conf);
 /**< @internal Get current flow control parameter on an Ethernet device */
@@ -1625,6 +1633,7 @@ struct rte_eth_rxtx_callback {
 struct rte_eth_dev {
eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
+   eth_tx_prep_t tx_pkt_prepare; /**< Pointer to PMD transmit prepare 
function. */
struct rte_eth_dev_data *data;  /**< Pointer to device data */
const struct eth_driver *driver;/**< Driver for this device */
const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
@@ -2819,6 +2828,103 @@ int rte_eth_dev_set_vlan_ether_type(uint8_t port_id,
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, 
nb_pkts);
 }

+/**
+ * Process a burst of output packets on a transmit queue of an Ethernet device.
+ *
+ * The rte_eth_tx_prepare()

[dpdk-dev] [PATCH v12 0/6] add Tx preparation

2016-11-23 Thread Tomasz Kulasek

 - added performance impact test results to the patch description

v5 changes:
 - rebased csum engine modification
 - added information to the csum engine about performance tests
 - some performance improvements

v4 changes:
 - tx_prep is now set to default behavior (NULL) for simple/vector path
   in fm10k, i40e and ixgbe drivers to increase performance, when
   Tx offloads are not intentionally available

v3 changes:
 - reworked csum testpmd engine instead adding new one,
 - fixed checksum initialization procedure to include also outer
   checksum offloads,
 - some minor formattings and optimalizations

v2 changes:
 - rte_eth_tx_prep() returns number of packets when device doesn't
   support tx_prep functionality,
 - introduced CONFIG_RTE_ETHDEV_TX_PREP allowing to turn off tx_prep


Tomasz Kulasek (6):
  ethdev: add Tx preparation
  e1000: add Tx preparation
  fm10k: add Tx preparation
  i40e: add Tx preparation
  ixgbe: add Tx preparation
  testpmd: use Tx preparation in csum engine

 app/test-pmd/cmdline.c   |   49 ++
 app/test-pmd/csumonly.c  |   33 +---
 app/test-pmd/testpmd.c   |5 ++
 app/test-pmd/testpmd.h   |2 +
 config/common_base   |1 +
 drivers/net/e1000/e1000_ethdev.h |   11 
 drivers/net/e1000/em_ethdev.c|5 +-
 drivers/net/e1000/em_rxtx.c  |   48 -
 drivers/net/e1000/igb_ethdev.c   |4 ++
 drivers/net/e1000/igb_rxtx.c |   52 ++-
 drivers/net/fm10k/fm10k.h|6 +++
 drivers/net/fm10k/fm10k_ethdev.c |5 ++
 drivers/net/fm10k/fm10k_rxtx.c   |   50 +-
 drivers/net/i40e/i40e_ethdev.c   |3 ++
 drivers/net/i40e/i40e_rxtx.c |   72 +-
 drivers/net/i40e/i40e_rxtx.h |8 +++
 drivers/net/ixgbe/ixgbe_ethdev.c |3 ++
 drivers/net/ixgbe/ixgbe_ethdev.h |5 +-
 drivers/net/ixgbe/ixgbe_rxtx.c   |   56 
 drivers/net/ixgbe/ixgbe_rxtx.h   |2 +
 lib/librte_ether/rte_ethdev.h|  106 ++
 lib/librte_mbuf/rte_mbuf.h   |   64 +++
 lib/librte_net/rte_net.h |   85 ++
 23 files changed, 662 insertions(+), 13 deletions(-)

-- 
1.7.9.5

[dpdk-dev] [PATCH v11 6/6] testpmd: use Tx preparation in csum engine

2016-10-26 Thread Tomasz Kulasek

Removed pseudo header calculation for udp/tcp/tso packets from
application and used Tx preparation API for packet preparation and
verification.

Adding additional step to the csum engine costs about 3-4% of
performance drop, on my setup with ixgbe driver. It's caused mostly by
the need of reaccessing and modification of packet data.

Signed-off-by: Tomasz Kulasek 
---
 app/test-pmd/csumonly.c |   36 +---
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 57e6ae2..6f33ae9 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -112,15 +112,6 @@ struct simple_gre_hdr {
 } __attribute__((__packed__));

 static uint16_t
-get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags)
-{
-   if (ethertype == _htons(ETHER_TYPE_IPv4))
-   return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
-   else /* assume ethertype == ETHER_TYPE_IPv6 */
-   return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
-}
-
-static uint16_t
 get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype)
 {
if (ethertype == _htons(ETHER_TYPE_IPv4))
@@ -370,32 +361,24 @@ process_inner_cksums(void *l3_hdr, const struct 
testpmd_offload_info *info,
/* do not recalculate udp cksum if it was 0 */
if (udp_hdr->dgram_cksum != 0) {
udp_hdr->dgram_cksum = 0;
-   if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) {
+   if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM)
ol_flags |= PKT_TX_UDP_CKSUM;
-   udp_hdr->dgram_cksum = get_psd_sum(l3_hdr,
-   info->ethertype, ol_flags);
-   } else {
+   else
udp_hdr->dgram_cksum =
get_udptcp_checksum(l3_hdr, udp_hdr,
info->ethertype);
-   }
}
} else if (info->l4_proto == IPPROTO_TCP) {
tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len);
tcp_hdr->cksum = 0;
-   if (tso_segsz) {
+   if (tso_segsz)
ol_flags |= PKT_TX_TCP_SEG;
-   tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-   ol_flags);
-   } else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) {
+   else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM)
ol_flags |= PKT_TX_TCP_CKSUM;
-   tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-   ol_flags);
-   } else {
+   else
tcp_hdr->cksum =
get_udptcp_checksum(l3_hdr, tcp_hdr,
info->ethertype);
-   }
} else if (info->l4_proto == IPPROTO_SCTP) {
sctp_hdr = (struct sctp_hdr *)((char *)l3_hdr + info->l3_len);
sctp_hdr->cksum = 0;
@@ -648,6 +631,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */
uint16_t nb_rx;
uint16_t nb_tx;
+   uint16_t nb_prep;
uint16_t i;
uint64_t rx_ol_flags, tx_ol_flags;
uint16_t testpmd_ol_flags;
@@ -857,7 +841,13 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
printf("\n");
}
}
-   nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
+   nb_prep = rte_eth_tx_prep(fs->tx_port, fs->tx_queue, pkts_burst,
+   nb_rx);
+   if (nb_prep != nb_rx)
+   printf("Preparing packet burst to transmit failed: %s\n",
+   rte_strerror(rte_errno));
+
+   nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, 
nb_prep);
/*
 * Retry if necessary
 */
-- 
1.7.9.5

[dpdk-dev] [PATCH v11 5/6] ixgbe: add Tx preparation

2016-10-26 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/ixgbe/ixgbe_ethdev.c |3 ++
 drivers/net/ixgbe/ixgbe_ethdev.h |5 +++-
 drivers/net/ixgbe/ixgbe_rxtx.c   |   58 +-
 drivers/net/ixgbe/ixgbe_rxtx.h   |2 ++
 4 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 4ca5747..4c6a8e1 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -517,6 +517,8 @@ static const struct rte_eth_desc_lim tx_desc_lim = {
.nb_max = IXGBE_MAX_RING_DESC,
.nb_min = IXGBE_MIN_RING_DESC,
.nb_align = IXGBE_TXD_ALIGN,
+   .nb_seg_max = IXGBE_TX_MAX_SEG,
+   .nb_mtu_seg_max = IXGBE_TX_MAX_SEG,
 };

 static const struct eth_dev_ops ixgbe_eth_dev_ops = {
@@ -1103,6 +1105,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
eth_dev->dev_ops = _eth_dev_ops;
eth_dev->rx_pkt_burst = _recv_pkts;
eth_dev->tx_pkt_burst = _xmit_pkts;
+   eth_dev->tx_pkt_prep = _prep_pkts;

/*
 * For secondary processes, we don't initialise any further as primary
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index 4ff6338..e229cf5 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -396,6 +396,9 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
  struct rte_eth_rss_conf *rss_conf);

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 2ce8234..031414c 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   Copyright 2014 6WIND S.A.
  *   All rights reserved.
  *
@@ -70,6 +70,7 @@
 #include 
 #include 
 #include 
+#include 

 #include "ixgbe_logs.h"
 #include "base/ixgbe_api.h"
@@ -87,6 +88,9 @@
PKT_TX_TCP_SEG | \
PKT_TX_OUTER_IP_CKSUM)

+#define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
+
 #if 1
 #define RTE_PMD_USE_PREFETCH
 #endif
@@ -905,6 +909,56 @@ end_of_tx:

 /*
  *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+   int i, ret;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+   struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /**
+* Check if packet meets requirements for number of segments
+*
+* NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and 
non-TSO
+*/
+
+   if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+
+   return i;
+}
+
+/*
+ *
  *  RX functions
  *
  **/
@@ -2282,6 +2336,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct 
ixgbe_tx_queue *txq)
if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
&& (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
PMD_INIT_LOG(DEBUG, "Using simple tx code path");
+   dev->t

[dpdk-dev] [PATCH v11 4/6] i40e: add Tx preparation

2016-10-26 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/i40e/i40e_ethdev.c |3 ++
 drivers/net/i40e/i40e_rxtx.c   |   72 +++-
 drivers/net/i40e/i40e_rxtx.h   |8 +
 3 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 5af0e43..dab0d48 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -936,6 +936,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = i40e_recv_pkts;
dev->tx_pkt_burst = i40e_xmit_pkts;
+   dev->tx_pkt_prep = i40e_prep_pkts;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
@@ -2629,6 +2630,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
.nb_max = I40E_MAX_RING_DESC,
.nb_min = I40E_MIN_RING_DESC,
.nb_align = I40E_ALIGN_RING_DESC,
+   .nb_seg_max = I40E_TX_MAX_SEG,
+   .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG,
};

if (pf->flags & I40E_FLAG_VMDQ) {
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 7ae7d9f..7f6d3d8 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 

 #include "i40e_logs.h"
 #include "base/i40e_prototype.h"
@@ -79,6 +81,17 @@
PKT_TX_TCP_SEG | \
PKT_TX_OUTER_IP_CKSUM)

+#define I40E_TX_OFFLOAD_MASK (  \
+   PKT_TX_IP_CKSUM |   \
+   PKT_TX_L4_MASK |\
+   PKT_TX_OUTER_IP_CKSUM | \
+   PKT_TX_TCP_SEG |\
+   PKT_TX_QINQ_PKT |   \
+   PKT_TX_VLAN_PKT)
+
+#define I40E_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK)
+
 static uint16_t i40e_xmit_pkts_simple(void *tx_queue,
  struct rte_mbuf **tx_pkts,
  uint16_t nb_pkts);
@@ -1411,6 +1424,61 @@ i40e_xmit_pkts_simple(void *tx_queue,
return nb_tx;
 }

+/*
+ *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /**
+* m->nb_segs is uint8_t, so m->nb_segs is always less than
+* I40E_TX_MAX_SEG.
+* We check only a condition for m->nb_segs > 
I40E_TX_MAX_MTU_SEG.
+*/
+   if (!(ol_flags & PKT_TX_TCP_SEG)) {
+   if (m->nb_segs > I40E_TX_MAX_MTU_SEG) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+   } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) ||
+   (m->tso_segsz > I40E_MAX_TSO_MSS)) {
+   /* MSS outside the range (256B - 9674B) are considered 
malicious */
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+   return i;
+}
+
 /*
  * Find the VSI the queue belongs to. 'queue_idx' is the queue index
  * application used, which assume having sequential ones. But from driver's
@@ -2763,9 +2831,11 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
dev->tx_pkt_burst = i40e_xmit_pkts_simple;
}
+   dev->tx_pkt_prep = NULL;
} else {

[dpdk-dev] [PATCH v11 3/6] fm10k: add Tx preparation

2016-10-26 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/fm10k/fm10k.h|6 +
 drivers/net/fm10k/fm10k_ethdev.c |5 
 drivers/net/fm10k/fm10k_rxtx.c   |   50 +-
 3 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h
index 05aa1a2..c6fed21 100644
--- a/drivers/net/fm10k/fm10k.h
+++ b/drivers/net/fm10k/fm10k.h
@@ -69,6 +69,9 @@
 #define FM10K_MAX_RX_DESC  (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc))
 #define FM10K_MAX_TX_DESC  (FM10K_MAX_TX_RING_SZ / sizeof(struct 
fm10k_tx_desc))

+#define FM10K_TX_MAX_SEG UINT8_MAX
+#define FM10K_TX_MAX_MTU_SEG UINT8_MAX
+
 /*
  * byte aligment for HW RX data buffer
  * Datasheet requires RX buffer addresses shall either be 512-byte aligned or
@@ -356,6 +359,9 @@ fm10k_dev_rx_descriptor_done(void *rx_queue, uint16_t 
offset);
 uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq);
 int fm10k_rx_vec_condition_check(struct rte_eth_dev *);
 void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq);
diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index c804436..dffb6d1 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -1446,6 +1446,8 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev,
.nb_max = FM10K_MAX_TX_DESC,
.nb_min = FM10K_MIN_TX_DESC,
.nb_align = FM10K_MULT_TX_DESC,
+   .nb_seg_max = FM10K_TX_MAX_SEG,
+   .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG,
};

dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G |
@@ -2754,8 +2756,10 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
fm10k_txq_vec_setup(txq);
}
dev->tx_pkt_burst = fm10k_xmit_pkts_vec;
+   dev->tx_pkt_prep = NULL;
} else {
dev->tx_pkt_burst = fm10k_xmit_pkts;
+   dev->tx_pkt_prep = fm10k_prep_pkts;
PMD_INIT_LOG(DEBUG, "Use regular Tx func");
}
 }
@@ -2834,6 +2838,7 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = _recv_pkts;
dev->tx_pkt_burst = _xmit_pkts;
+   dev->tx_pkt_prep = _prep_pkts;

/* only initialize in the primary process */
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c
index 32cc7ff..5fc4d5a 100644
--- a/drivers/net/fm10k/fm10k_rxtx.c
+++ b/drivers/net/fm10k/fm10k_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -35,6 +35,7 @@

 #include 
 #include 
+#include 
 #include "fm10k.h"
 #include "base/fm10k_type.h"

@@ -65,6 +66,15 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd)
 }
 #endif

+#define FM10K_TX_OFFLOAD_MASK (  \
+   PKT_TX_VLAN_PKT |\
+   PKT_TX_IP_CKSUM |\
+   PKT_TX_L4_MASK | \
+   PKT_TX_TCP_SEG)
+
+#define FM10K_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK)
+
 /* @note: When this function is changed, make corresponding change to
  * fm10k_dev_supported_ptypes_get()
  */
@@ -597,3 +607,41 @@ fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,

return count;
 }
+
+uint16_t
+fm10k_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   if ((m->ol_flags & PKT_TX_TCP_SEG) &&
+   (m->tso_segsz < FM10K_TSO_MINMSS)) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+
+   return i;
+}
-- 
1.7.9.5

[dpdk-dev] [PATCH v11 2/6] e1000: add Tx preparation

2016-10-26 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/e1000/e1000_ethdev.h |   11 
 drivers/net/e1000/em_ethdev.c|5 +++-
 drivers/net/e1000/em_rxtx.c  |   48 ++-
 drivers/net/e1000/igb_ethdev.c   |4 +++
 drivers/net/e1000/igb_rxtx.c |   52 +-
 5 files changed, 117 insertions(+), 3 deletions(-)

diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
index 6c25c8d..bd0f277 100644
--- a/drivers/net/e1000/e1000_ethdev.h
+++ b/drivers/net/e1000/e1000_ethdev.h
@@ -138,6 +138,11 @@
 #define E1000_MISC_VEC_ID   RTE_INTR_VEC_ZERO_OFFSET
 #define E1000_RX_VEC_START  RTE_INTR_VEC_RXTX_OFFSET

+#define IGB_TX_MAX_SEG UINT8_MAX
+#define IGB_TX_MAX_MTU_SEG UINT8_MAX
+#define EM_TX_MAX_SEG  UINT8_MAX
+#define EM_TX_MAX_MTU_SEG  UINT8_MAX
+
 /* structure for interrupt relative data */
 struct e1000_interrupt {
uint32_t flags;
@@ -315,6 +320,9 @@ void eth_igb_tx_init(struct rte_eth_dev *dev);
 uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t eth_igb_prep_pkts(void *txq, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

@@ -376,6 +384,9 @@ void eth_em_tx_init(struct rte_eth_dev *dev);
 uint16_t eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t eth_em_prep_pkts(void *txq, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 uint16_t eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
index 7cf5f0c..17b45cb 100644
--- a/drivers/net/e1000/em_ethdev.c
+++ b/drivers/net/e1000/em_ethdev.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -300,6 +300,7 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev)
eth_dev->dev_ops = _em_ops;
eth_dev->rx_pkt_burst = (eth_rx_burst_t)_em_recv_pkts;
eth_dev->tx_pkt_burst = (eth_tx_burst_t)_em_xmit_pkts;
+   eth_dev->tx_pkt_prep = (eth_tx_prep_t)_em_prep_pkts;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
@@ -1067,6 +1068,8 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
.nb_max = E1000_MAX_RING_DESC,
.nb_min = E1000_MIN_RING_DESC,
.nb_align = EM_TXD_ALIGN,
+   .nb_seg_max = EM_TX_MAX_SEG,
+   .nb_mtu_seg_max = EM_TX_MAX_MTU_SEG,
};

dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
index 41f51c0..5bd3c99 100644
--- a/drivers/net/e1000/em_rxtx.c
+++ b/drivers/net/e1000/em_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -66,6 +66,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 

 #include "e1000_logs.h"
@@ -77,6 +78,14 @@

 #define E1000_RXDCTL_GRAN  0x0100 /* RXDCTL Granularity */

+#define E1000_TX_OFFLOAD_MASK ( \
+   PKT_TX_IP_CKSUM |   \
+   PKT_TX_L4_MASK |\
+   PKT_TX_VLAN_PKT)
+
+#define E1000_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK)
+
 /**
  * Structure associated with each descriptor of the RX ring of a RX queue.
  */
@@ -618,6 +627,43 @@ end_of_tx:

 /*
  *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+eth_em_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   if (m->ol_flags & E1000_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+

[dpdk-dev] [PATCH v11 1/6] ethdev: add Tx preparation

2016-10-26 Thread Tomasz Kulasek

Added API for `rte_eth_tx_prep`

uint16_t rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts)

Added fields to the `struct rte_eth_desc_lim`:

uint16_t nb_seg_max;
/**< Max number of segments per whole packet. */

uint16_t nb_mtu_seg_max;
/**< Max number of segments per one MTU */

Added functions:

int rte_validate_tx_offload(struct rte_mbuf *m)
to validate general requirements for tx offload set in mbuf of packet
  such a flag completness. In current implementation this function is
  called optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled.

int rte_phdr_cksum_fix(struct rte_mbuf *m)
to fix pseudo header checksum for TSO and non-TSO tcp/udp packets
before hardware tx checksum offload.
 - for non-TSO tcp/udp packets full pseudo-header checksum is
   counted and set.
 - for TSO the IP payload length is not included.

PERFORMANCE TESTS
-

This feature was tested with modified csum engine from test-pmd.

The packet checksum preparation was moved from application to Tx
preparation step placed before burst.

We may expect some overhead costs caused by:
1) using additional callback before burst,
2) rescanning burst,
3) additional condition checking (packet validation),
4) worse optimization (e.g. packet data access, etc.)

We tested it using ixgbe Tx preparation implementation with some parts
disabled to have comparable information about the impact of different
parts of implementation.

IMPACT:

1) For unimplemented Tx preparation callback the performance impact is
   negligible,
2) For packet condition check without checksum modifications (nb_segs,
   available offloads, etc.) is 14626628/14252168 (~2.62% drop),
3) Full support in ixgbe driver (point 2 + packet checksum
   initialization) is 14060924/13588094 (~3.48% drop)

Signed-off-by: Tomasz Kulasek 
---
 config/common_base|1 +
 lib/librte_ether/rte_ethdev.h |  103 +
 lib/librte_mbuf/rte_mbuf.h|   64 +
 lib/librte_net/rte_net.h  |   85 ++
 4 files changed, 253 insertions(+)

diff --git a/config/common_base b/config/common_base
index c7fd3db..619284b 100644
--- a/config/common_base
+++ b/config/common_base
@@ -120,6 +120,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
 CONFIG_RTE_LIBRTE_IEEE1588=n
 CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
+CONFIG_RTE_ETHDEV_TX_PREP=y

 #
 # Support NIC bypass logic
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 38641e8..cf6f68e 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -182,6 +182,7 @@ extern "C" {
 #include 
 #include 
 #include 
+#include 
 #include "rte_ether.h"
 #include "rte_eth_ctrl.h"
 #include "rte_dev_info.h"
@@ -699,6 +700,8 @@ struct rte_eth_desc_lim {
uint16_t nb_max;   /**< Max allowed number of descriptors. */
uint16_t nb_min;   /**< Min allowed number of descriptors. */
uint16_t nb_align; /**< Number of descriptors should be aligned to. */
+   uint16_t nb_seg_max; /**< Max number of segments per whole packet. 
*/
+   uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */
 };

 /**
@@ -1188,6 +1191,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq,
   uint16_t nb_pkts);
 /**< @internal Send output packets on a transmit queue of an Ethernet device. 
*/

+typedef uint16_t (*eth_tx_prep_t)(void *txq,
+  struct rte_mbuf **tx_pkts,
+  uint16_t nb_pkts);
+/**< @internal Prepare output packets on a transmit queue of an Ethernet 
device. */
+
 typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev,
   struct rte_eth_fc_conf *fc_conf);
 /**< @internal Get current flow control parameter on an Ethernet device */
@@ -1622,6 +1630,7 @@ struct rte_eth_rxtx_callback {
 struct rte_eth_dev {
eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
+   eth_tx_prep_t tx_pkt_prep; /**< Pointer to PMD transmit prepare 
function. */
struct rte_eth_dev_data *data;  /**< Pointer to device data */
const struct eth_driver *driver;/**< Driver for this device */
const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
@@ -2816,6 +2825,100 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, 
nb_pkts);
 }

+/**
+ * Process a burst of output packets on a transmit queue of an Ethernet device.
+ *
+ * The rte_eth_tx_prep() function is invoked to prepare output packets to be
+ *

[dpdk-dev] [PATCH v11 0/6] add Tx preparation

2016-10-26 Thread Tomasz Kulasek

e not intentionally available

v3 changes:
 - reworked csum testpmd engine instead adding new one,
 - fixed checksum initialization procedure to include also outer
   checksum offloads,
 - some minor formattings and optimalizations

v2 changes:
 - rte_eth_tx_prep() returns number of packets when device doesn't
   support tx_prep functionality,
 - introduced CONFIG_RTE_ETHDEV_TX_PREP allowing to turn off tx_prep


Tomasz Kulasek (6):
  ethdev: add Tx preparation
  e1000: add Tx preparation
  fm10k: add Tx preparation
  i40e: add Tx preparation
  ixgbe: add Tx preparation
  testpmd: use Tx preparation in csum engine

 app/test-pmd/csumonly.c  |   36 +
 config/common_base   |1 +
 drivers/net/e1000/e1000_ethdev.h |   11 
 drivers/net/e1000/em_ethdev.c|5 +-
 drivers/net/e1000/em_rxtx.c  |   48 +-
 drivers/net/e1000/igb_ethdev.c   |4 ++
 drivers/net/e1000/igb_rxtx.c |   52 ++-
 drivers/net/fm10k/fm10k.h|6 +++
 drivers/net/fm10k/fm10k_ethdev.c |5 ++
 drivers/net/fm10k/fm10k_rxtx.c   |   50 +-
 drivers/net/i40e/i40e_ethdev.c   |3 ++
 drivers/net/i40e/i40e_rxtx.c |   72 +-
 drivers/net/i40e/i40e_rxtx.h |8 +++
 drivers/net/ixgbe/ixgbe_ethdev.c |3 ++
 drivers/net/ixgbe/ixgbe_ethdev.h |5 +-
 drivers/net/ixgbe/ixgbe_rxtx.c   |   58 -
 drivers/net/ixgbe/ixgbe_rxtx.h   |2 +
 lib/librte_ether/rte_ethdev.h|  103 ++
 lib/librte_mbuf/rte_mbuf.h   |   64 +++
 lib/librte_net/rte_net.h |   85 +++
 20 files changed, 591 insertions(+), 30 deletions(-)

-- 
1.7.9.5

[dpdk-dev] [PATCH v10 6/6] testpmd: use Tx preparation in csum engine

2016-10-24 Thread Tomasz Kulasek

Removed pseudo header calculation for udp/tcp/tso packets from
application and used Tx preparation API for packet preparation and
verification.

Adding additional step to the csum engine costs about 3-4% of performance
drop, on my setup with ixgbe driver. It's caused mostly by the need
of reaccessing and modification of packet data.

Signed-off-by: Tomasz Kulasek 
---
 app/test-pmd/csumonly.c |   36 +---
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 57e6ae2..6f33ae9 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -112,15 +112,6 @@ struct simple_gre_hdr {
 } __attribute__((__packed__));

 static uint16_t
-get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags)
-{
-   if (ethertype == _htons(ETHER_TYPE_IPv4))
-   return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
-   else /* assume ethertype == ETHER_TYPE_IPv6 */
-   return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
-}
-
-static uint16_t
 get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype)
 {
if (ethertype == _htons(ETHER_TYPE_IPv4))
@@ -370,32 +361,24 @@ process_inner_cksums(void *l3_hdr, const struct 
testpmd_offload_info *info,
/* do not recalculate udp cksum if it was 0 */
if (udp_hdr->dgram_cksum != 0) {
udp_hdr->dgram_cksum = 0;
-   if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) {
+   if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM)
ol_flags |= PKT_TX_UDP_CKSUM;
-   udp_hdr->dgram_cksum = get_psd_sum(l3_hdr,
-   info->ethertype, ol_flags);
-   } else {
+   else
udp_hdr->dgram_cksum =
get_udptcp_checksum(l3_hdr, udp_hdr,
info->ethertype);
-   }
}
} else if (info->l4_proto == IPPROTO_TCP) {
tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len);
tcp_hdr->cksum = 0;
-   if (tso_segsz) {
+   if (tso_segsz)
ol_flags |= PKT_TX_TCP_SEG;
-   tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-   ol_flags);
-   } else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) {
+   else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM)
ol_flags |= PKT_TX_TCP_CKSUM;
-   tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-   ol_flags);
-   } else {
+   else
tcp_hdr->cksum =
get_udptcp_checksum(l3_hdr, tcp_hdr,
info->ethertype);
-   }
} else if (info->l4_proto == IPPROTO_SCTP) {
sctp_hdr = (struct sctp_hdr *)((char *)l3_hdr + info->l3_len);
sctp_hdr->cksum = 0;
@@ -648,6 +631,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */
uint16_t nb_rx;
uint16_t nb_tx;
+   uint16_t nb_prep;
uint16_t i;
uint64_t rx_ol_flags, tx_ol_flags;
uint16_t testpmd_ol_flags;
@@ -857,7 +841,13 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
printf("\n");
}
}
-   nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
+   nb_prep = rte_eth_tx_prep(fs->tx_port, fs->tx_queue, pkts_burst,
+   nb_rx);
+   if (nb_prep != nb_rx)
+   printf("Preparing packet burst to transmit failed: %s\n",
+   rte_strerror(rte_errno));
+
+   nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, 
nb_prep);
/*
 * Retry if necessary
 */
-- 
1.7.9.5

[dpdk-dev] [PATCH v10 5/6] ixgbe: add Tx preparation

2016-10-24 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/ixgbe/ixgbe_ethdev.c |3 ++
 drivers/net/ixgbe/ixgbe_ethdev.h |5 +++-
 drivers/net/ixgbe/ixgbe_rxtx.c   |   58 +-
 drivers/net/ixgbe/ixgbe_rxtx.h   |2 ++
 4 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 4ca5747..4c6a8e1 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -517,6 +517,8 @@ static const struct rte_eth_desc_lim tx_desc_lim = {
.nb_max = IXGBE_MAX_RING_DESC,
.nb_min = IXGBE_MIN_RING_DESC,
.nb_align = IXGBE_TXD_ALIGN,
+   .nb_seg_max = IXGBE_TX_MAX_SEG,
+   .nb_mtu_seg_max = IXGBE_TX_MAX_SEG,
 };

 static const struct eth_dev_ops ixgbe_eth_dev_ops = {
@@ -1103,6 +1105,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
eth_dev->dev_ops = _eth_dev_ops;
eth_dev->rx_pkt_burst = _recv_pkts;
eth_dev->tx_pkt_burst = _xmit_pkts;
+   eth_dev->tx_pkt_prep = _prep_pkts;

/*
 * For secondary processes, we don't initialise any further as primary
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index 4ff6338..e229cf5 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -396,6 +396,9 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
  struct rte_eth_rss_conf *rss_conf);

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 2ce8234..031414c 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   Copyright 2014 6WIND S.A.
  *   All rights reserved.
  *
@@ -70,6 +70,7 @@
 #include 
 #include 
 #include 
+#include 

 #include "ixgbe_logs.h"
 #include "base/ixgbe_api.h"
@@ -87,6 +88,9 @@
PKT_TX_TCP_SEG | \
PKT_TX_OUTER_IP_CKSUM)

+#define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
+
 #if 1
 #define RTE_PMD_USE_PREFETCH
 #endif
@@ -905,6 +909,56 @@ end_of_tx:

 /*
  *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+   int i, ret;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+   struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /**
+* Check if packet meets requirements for number of segments
+*
+* NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and 
non-TSO
+*/
+
+   if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+
+   return i;
+}
+
+/*
+ *
  *  RX functions
  *
  **/
@@ -2282,6 +2336,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct 
ixgbe_tx_queue *txq)
if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
&& (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
PMD_INIT_LOG(DEBUG, "Using simple tx code path");
+   dev->t

[dpdk-dev] [PATCH v10 4/6] i40e: add Tx preparation

2016-10-24 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/i40e/i40e_ethdev.c |3 ++
 drivers/net/i40e/i40e_rxtx.c   |   72 +++-
 drivers/net/i40e/i40e_rxtx.h   |8 +
 3 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 5af0e43..dab0d48 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -936,6 +936,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = i40e_recv_pkts;
dev->tx_pkt_burst = i40e_xmit_pkts;
+   dev->tx_pkt_prep = i40e_prep_pkts;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
@@ -2629,6 +2630,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
.nb_max = I40E_MAX_RING_DESC,
.nb_min = I40E_MIN_RING_DESC,
.nb_align = I40E_ALIGN_RING_DESC,
+   .nb_seg_max = I40E_TX_MAX_SEG,
+   .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG,
};

if (pf->flags & I40E_FLAG_VMDQ) {
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 7ae7d9f..7f6d3d8 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 

 #include "i40e_logs.h"
 #include "base/i40e_prototype.h"
@@ -79,6 +81,17 @@
PKT_TX_TCP_SEG | \
PKT_TX_OUTER_IP_CKSUM)

+#define I40E_TX_OFFLOAD_MASK (  \
+   PKT_TX_IP_CKSUM |   \
+   PKT_TX_L4_MASK |\
+   PKT_TX_OUTER_IP_CKSUM | \
+   PKT_TX_TCP_SEG |\
+   PKT_TX_QINQ_PKT |   \
+   PKT_TX_VLAN_PKT)
+
+#define I40E_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK)
+
 static uint16_t i40e_xmit_pkts_simple(void *tx_queue,
  struct rte_mbuf **tx_pkts,
  uint16_t nb_pkts);
@@ -1411,6 +1424,61 @@ i40e_xmit_pkts_simple(void *tx_queue,
return nb_tx;
 }

+/*
+ *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /**
+* m->nb_segs is uint8_t, so m->nb_segs is always less than
+* I40E_TX_MAX_SEG.
+* We check only a condition for m->nb_segs > 
I40E_TX_MAX_MTU_SEG.
+*/
+   if (!(ol_flags & PKT_TX_TCP_SEG)) {
+   if (m->nb_segs > I40E_TX_MAX_MTU_SEG) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+   } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) ||
+   (m->tso_segsz > I40E_MAX_TSO_MSS)) {
+   /* MSS outside the range (256B - 9674B) are considered 
malicious */
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+   return i;
+}
+
 /*
  * Find the VSI the queue belongs to. 'queue_idx' is the queue index
  * application used, which assume having sequential ones. But from driver's
@@ -2763,9 +2831,11 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
dev->tx_pkt_burst = i40e_xmit_pkts_simple;
}
+   dev->tx_pkt_prep = NULL;
} else {

[dpdk-dev] [PATCH v10 3/6] fm10k: add Tx preparation

2016-10-24 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/fm10k/fm10k.h|6 +
 drivers/net/fm10k/fm10k_ethdev.c |5 
 drivers/net/fm10k/fm10k_rxtx.c   |   50 +-
 3 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h
index 05aa1a2..c6fed21 100644
--- a/drivers/net/fm10k/fm10k.h
+++ b/drivers/net/fm10k/fm10k.h
@@ -69,6 +69,9 @@
 #define FM10K_MAX_RX_DESC  (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc))
 #define FM10K_MAX_TX_DESC  (FM10K_MAX_TX_RING_SZ / sizeof(struct 
fm10k_tx_desc))

+#define FM10K_TX_MAX_SEG UINT8_MAX
+#define FM10K_TX_MAX_MTU_SEG UINT8_MAX
+
 /*
  * byte aligment for HW RX data buffer
  * Datasheet requires RX buffer addresses shall either be 512-byte aligned or
@@ -356,6 +359,9 @@ fm10k_dev_rx_descriptor_done(void *rx_queue, uint16_t 
offset);
 uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq);
 int fm10k_rx_vec_condition_check(struct rte_eth_dev *);
 void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq);
diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index c804436..dffb6d1 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -1446,6 +1446,8 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev,
.nb_max = FM10K_MAX_TX_DESC,
.nb_min = FM10K_MIN_TX_DESC,
.nb_align = FM10K_MULT_TX_DESC,
+   .nb_seg_max = FM10K_TX_MAX_SEG,
+   .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG,
};

dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G |
@@ -2754,8 +2756,10 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
fm10k_txq_vec_setup(txq);
}
dev->tx_pkt_burst = fm10k_xmit_pkts_vec;
+   dev->tx_pkt_prep = NULL;
} else {
dev->tx_pkt_burst = fm10k_xmit_pkts;
+   dev->tx_pkt_prep = fm10k_prep_pkts;
PMD_INIT_LOG(DEBUG, "Use regular Tx func");
}
 }
@@ -2834,6 +2838,7 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = _recv_pkts;
dev->tx_pkt_burst = _xmit_pkts;
+   dev->tx_pkt_prep = _prep_pkts;

/* only initialize in the primary process */
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c
index 32cc7ff..5fc4d5a 100644
--- a/drivers/net/fm10k/fm10k_rxtx.c
+++ b/drivers/net/fm10k/fm10k_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -35,6 +35,7 @@

 #include 
 #include 
+#include 
 #include "fm10k.h"
 #include "base/fm10k_type.h"

@@ -65,6 +66,15 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd)
 }
 #endif

+#define FM10K_TX_OFFLOAD_MASK (  \
+   PKT_TX_VLAN_PKT |\
+   PKT_TX_IP_CKSUM |\
+   PKT_TX_L4_MASK | \
+   PKT_TX_TCP_SEG)
+
+#define FM10K_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK)
+
 /* @note: When this function is changed, make corresponding change to
  * fm10k_dev_supported_ptypes_get()
  */
@@ -597,3 +607,41 @@ fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,

return count;
 }
+
+uint16_t
+fm10k_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   if ((m->ol_flags & PKT_TX_TCP_SEG) &&
+   (m->tso_segsz < FM10K_TSO_MINMSS)) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+
+   return i;
+}
-- 
1.7.9.5

[dpdk-dev] [PATCH v10 2/6] e1000: add Tx preparation

2016-10-24 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/e1000/e1000_ethdev.h |   11 
 drivers/net/e1000/em_ethdev.c|5 +++-
 drivers/net/e1000/em_rxtx.c  |   48 ++-
 drivers/net/e1000/igb_ethdev.c   |4 +++
 drivers/net/e1000/igb_rxtx.c |   52 +-
 5 files changed, 117 insertions(+), 3 deletions(-)

diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
index 6c25c8d..bd0f277 100644
--- a/drivers/net/e1000/e1000_ethdev.h
+++ b/drivers/net/e1000/e1000_ethdev.h
@@ -138,6 +138,11 @@
 #define E1000_MISC_VEC_ID   RTE_INTR_VEC_ZERO_OFFSET
 #define E1000_RX_VEC_START  RTE_INTR_VEC_RXTX_OFFSET

+#define IGB_TX_MAX_SEG UINT8_MAX
+#define IGB_TX_MAX_MTU_SEG UINT8_MAX
+#define EM_TX_MAX_SEG  UINT8_MAX
+#define EM_TX_MAX_MTU_SEG  UINT8_MAX
+
 /* structure for interrupt relative data */
 struct e1000_interrupt {
uint32_t flags;
@@ -315,6 +320,9 @@ void eth_igb_tx_init(struct rte_eth_dev *dev);
 uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t eth_igb_prep_pkts(void *txq, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

@@ -376,6 +384,9 @@ void eth_em_tx_init(struct rte_eth_dev *dev);
 uint16_t eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t eth_em_prep_pkts(void *txq, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 uint16_t eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
index 7cf5f0c..17b45cb 100644
--- a/drivers/net/e1000/em_ethdev.c
+++ b/drivers/net/e1000/em_ethdev.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -300,6 +300,7 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev)
eth_dev->dev_ops = _em_ops;
eth_dev->rx_pkt_burst = (eth_rx_burst_t)_em_recv_pkts;
eth_dev->tx_pkt_burst = (eth_tx_burst_t)_em_xmit_pkts;
+   eth_dev->tx_pkt_prep = (eth_tx_prep_t)_em_prep_pkts;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
@@ -1067,6 +1068,8 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
.nb_max = E1000_MAX_RING_DESC,
.nb_min = E1000_MIN_RING_DESC,
.nb_align = EM_TXD_ALIGN,
+   .nb_seg_max = EM_TX_MAX_SEG,
+   .nb_mtu_seg_max = EM_TX_MAX_MTU_SEG,
};

dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
index 41f51c0..5bd3c99 100644
--- a/drivers/net/e1000/em_rxtx.c
+++ b/drivers/net/e1000/em_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -66,6 +66,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 

 #include "e1000_logs.h"
@@ -77,6 +78,14 @@

 #define E1000_RXDCTL_GRAN  0x0100 /* RXDCTL Granularity */

+#define E1000_TX_OFFLOAD_MASK ( \
+   PKT_TX_IP_CKSUM |   \
+   PKT_TX_L4_MASK |\
+   PKT_TX_VLAN_PKT)
+
+#define E1000_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK)
+
 /**
  * Structure associated with each descriptor of the RX ring of a RX queue.
  */
@@ -618,6 +627,43 @@ end_of_tx:

 /*
  *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+eth_em_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   if (m->ol_flags & E1000_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+

[dpdk-dev] [PATCH v10 1/6] ethdev: add Tx preparation

2016-10-24 Thread Tomasz Kulasek

Added API for `rte_eth_tx_prep`

uint16_t rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts)

Added fields to the `struct rte_eth_desc_lim`:

uint16_t nb_seg_max;
/**< Max number of segments per whole packet. */

uint16_t nb_mtu_seg_max;
/**< Max number of segments per one MTU */

Added functions:

int rte_validate_tx_offload(struct rte_mbuf *m)
to validate general requirements for tx offload set in mbuf of packet
  such a flag completness. In current implementation this function is
  called optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled.

int rte_phdr_cksum_fix(struct rte_mbuf *m)
to fix pseudo header checksum for TSO and non-TSO tcp/udp packets
before hardware tx checksum offload.
 - for non-TSO tcp/udp packets full pseudo-header checksum is
   counted and set.
 - for TSO the IP payload length is not included.

PERFORMANCE TESTS
-

This feature was tested with modified csum engine from test-pmd.

The packet checksum preparation was moved from application to Tx
preparation step placed before burst.

We may expect some overhead costs caused by:
1) using additional callback before burst,
2) rescanning burst,
3) additional condition checking (packet validation),
4) worse optimization (e.g. packet data access, etc.)

We tested it using ixgbe Tx preparation implementation with some parts
disabled to have comparable information about the impact of diferent
parts of implementation.

IMPACT:

1) For unimplemented Tx preparation callback the performance impact is
   negligible,
2) For packet condition check without checksum modifications (nb_segs,
   available offloads, etc.) is 14626628/14252168 (~2.62% drop),
3) Full support in ixgbe driver (point 2 + packet checksum
   initialization) is 14060924/13588094 (~3.48% drop)

Signed-off-by: Tomasz Kulasek 
---
 config/common_base|1 +
 lib/librte_ether/rte_ethdev.h |   96 +
 lib/librte_mbuf/rte_mbuf.h|   64 +++
 lib/librte_net/rte_net.h  |   85 
 4 files changed, 246 insertions(+)

diff --git a/config/common_base b/config/common_base
index c7fd3db..619284b 100644
--- a/config/common_base
+++ b/config/common_base
@@ -120,6 +120,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
 CONFIG_RTE_LIBRTE_IEEE1588=n
 CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
+CONFIG_RTE_ETHDEV_TX_PREP=y

 #
 # Support NIC bypass logic
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 38641e8..c4a8ccd 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -182,6 +182,7 @@ extern "C" {
 #include 
 #include 
 #include 
+#include 
 #include "rte_ether.h"
 #include "rte_eth_ctrl.h"
 #include "rte_dev_info.h"
@@ -699,6 +700,8 @@ struct rte_eth_desc_lim {
uint16_t nb_max;   /**< Max allowed number of descriptors. */
uint16_t nb_min;   /**< Min allowed number of descriptors. */
uint16_t nb_align; /**< Number of descriptors should be aligned to. */
+   uint16_t nb_seg_max; /**< Max number of segments per whole packet. 
*/
+   uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */
 };

 /**
@@ -1188,6 +1191,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq,
   uint16_t nb_pkts);
 /**< @internal Send output packets on a transmit queue of an Ethernet device. 
*/

+typedef uint16_t (*eth_tx_prep_t)(void *txq,
+  struct rte_mbuf **tx_pkts,
+  uint16_t nb_pkts);
+/**< @internal Prepare output packets on a transmit queue of an Ethernet 
device. */
+
 typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev,
   struct rte_eth_fc_conf *fc_conf);
 /**< @internal Get current flow control parameter on an Ethernet device */
@@ -1622,6 +1630,7 @@ struct rte_eth_rxtx_callback {
 struct rte_eth_dev {
eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
+   eth_tx_prep_t tx_pkt_prep; /**< Pointer to PMD transmit prepare 
function. */
struct rte_eth_dev_data *data;  /**< Pointer to device data */
const struct eth_driver *driver;/**< Driver for this device */
const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
@@ -2816,6 +2825,93 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, 
nb_pkts);
 }

+/**
+ * Process a burst of output packets on a transmit queue of an Ethernet device.
+ *
+ * The rte_eth_tx_prep() function is invoked to prepare output packets to be
+ *

[dpdk-dev] [PATCH v10 0/6] add Tx preparation

2016-10-24 Thread Tomasz Kulasek

alization procedure to include also outer
   checksum offloads,
 - some minor formattings and optimalizations

v2 changes:
 - rte_eth_tx_prep() returns number of packets when device doesn't
   support tx_prep functionality,
 - introduced CONFIG_RTE_ETHDEV_TX_PREP allowing to turn off tx_prep

Tomasz Kulasek (6):
  ethdev: add Tx preparation
  e1000: add Tx preparation
  fm10k: add Tx preparation
  i40e: add Tx preparation
  ixgbe: add Tx preparation
  testpmd: use Tx preparation in csum engine

 app/test-pmd/csumonly.c  |   36 ++
 config/common_base   |1 +
 drivers/net/e1000/e1000_ethdev.h |   11 +
 drivers/net/e1000/em_ethdev.c|5 +-
 drivers/net/e1000/em_rxtx.c  |   48 ++-
 drivers/net/e1000/igb_ethdev.c   |4 ++
 drivers/net/e1000/igb_rxtx.c |   52 -
 drivers/net/fm10k/fm10k.h|6 +++
 drivers/net/fm10k/fm10k_ethdev.c |5 ++
 drivers/net/fm10k/fm10k_rxtx.c   |   50 +++-
 drivers/net/i40e/i40e_ethdev.c   |3 ++
 drivers/net/i40e/i40e_rxtx.c |   72 +++-
 drivers/net/i40e/i40e_rxtx.h |8 
 drivers/net/ixgbe/ixgbe_ethdev.c |3 ++
 drivers/net/ixgbe/ixgbe_ethdev.h |5 +-
 drivers/net/ixgbe/ixgbe_rxtx.c   |   58 ++-
 drivers/net/ixgbe/ixgbe_rxtx.h   |2 +
 lib/librte_ether/rte_ethdev.h|   96 ++
 lib/librte_mbuf/rte_mbuf.h   |   64 +
 lib/librte_net/rte_net.h |   85 +
 20 files changed, 584 insertions(+), 30 deletions(-)

-- 
1.7.9.5

[dpdk-dev] [PATCH v9 6/6] testpmd: use Tx preparation in csum engine

2016-10-24 Thread Tomasz Kulasek

Removed pseudo header calculation for udp/tcp/tso packets from
application and used Tx preparation API for packet preparation and
verification.

Adding additional step to the csum engine costs about 3-4% of performance
drop, on my setup with ixgbe driver. It's caused mostly by the need
of reaccessing and modification of packet data.

Signed-off-by: Tomasz Kulasek 
---
 app/test-pmd/csumonly.c |   36 +---
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 57e6ae2..6f33ae9 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -112,15 +112,6 @@ struct simple_gre_hdr {
 } __attribute__((__packed__));

 static uint16_t
-get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags)
-{
-   if (ethertype == _htons(ETHER_TYPE_IPv4))
-   return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
-   else /* assume ethertype == ETHER_TYPE_IPv6 */
-   return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
-}
-
-static uint16_t
 get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype)
 {
if (ethertype == _htons(ETHER_TYPE_IPv4))
@@ -370,32 +361,24 @@ process_inner_cksums(void *l3_hdr, const struct 
testpmd_offload_info *info,
/* do not recalculate udp cksum if it was 0 */
if (udp_hdr->dgram_cksum != 0) {
udp_hdr->dgram_cksum = 0;
-   if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) {
+   if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM)
ol_flags |= PKT_TX_UDP_CKSUM;
-   udp_hdr->dgram_cksum = get_psd_sum(l3_hdr,
-   info->ethertype, ol_flags);
-   } else {
+   else
udp_hdr->dgram_cksum =
get_udptcp_checksum(l3_hdr, udp_hdr,
info->ethertype);
-   }
}
} else if (info->l4_proto == IPPROTO_TCP) {
tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len);
tcp_hdr->cksum = 0;
-   if (tso_segsz) {
+   if (tso_segsz)
ol_flags |= PKT_TX_TCP_SEG;
-   tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-   ol_flags);
-   } else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) {
+   else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM)
ol_flags |= PKT_TX_TCP_CKSUM;
-   tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-   ol_flags);
-   } else {
+   else
tcp_hdr->cksum =
get_udptcp_checksum(l3_hdr, tcp_hdr,
info->ethertype);
-   }
} else if (info->l4_proto == IPPROTO_SCTP) {
sctp_hdr = (struct sctp_hdr *)((char *)l3_hdr + info->l3_len);
sctp_hdr->cksum = 0;
@@ -648,6 +631,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */
uint16_t nb_rx;
uint16_t nb_tx;
+   uint16_t nb_prep;
uint16_t i;
uint64_t rx_ol_flags, tx_ol_flags;
uint16_t testpmd_ol_flags;
@@ -857,7 +841,13 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
printf("\n");
}
}
-   nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
+   nb_prep = rte_eth_tx_prep(fs->tx_port, fs->tx_queue, pkts_burst,
+   nb_rx);
+   if (nb_prep != nb_rx)
+   printf("Preparing packet burst to transmit failed: %s\n",
+   rte_strerror(rte_errno));
+
+   nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, 
nb_prep);
/*
 * Retry if necessary
 */
-- 
1.7.9.5

[dpdk-dev] [PATCH v9 5/6] ixgbe: add Tx preparation

2016-10-24 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/ixgbe/ixgbe_ethdev.c |3 ++
 drivers/net/ixgbe/ixgbe_ethdev.h |5 +++-
 drivers/net/ixgbe/ixgbe_rxtx.c   |   58 +-
 drivers/net/ixgbe/ixgbe_rxtx.h   |2 ++
 4 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 4ca5747..4c6a8e1 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -517,6 +517,8 @@ static const struct rte_eth_desc_lim tx_desc_lim = {
.nb_max = IXGBE_MAX_RING_DESC,
.nb_min = IXGBE_MIN_RING_DESC,
.nb_align = IXGBE_TXD_ALIGN,
+   .nb_seg_max = IXGBE_TX_MAX_SEG,
+   .nb_mtu_seg_max = IXGBE_TX_MAX_SEG,
 };

 static const struct eth_dev_ops ixgbe_eth_dev_ops = {
@@ -1103,6 +1105,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
eth_dev->dev_ops = _eth_dev_ops;
eth_dev->rx_pkt_burst = _recv_pkts;
eth_dev->tx_pkt_burst = _xmit_pkts;
+   eth_dev->tx_pkt_prep = _prep_pkts;

/*
 * For secondary processes, we don't initialise any further as primary
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index 4ff6338..e229cf5 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -396,6 +396,9 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
  struct rte_eth_rss_conf *rss_conf);

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 2ce8234..031414c 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   Copyright 2014 6WIND S.A.
  *   All rights reserved.
  *
@@ -70,6 +70,7 @@
 #include 
 #include 
 #include 
+#include 

 #include "ixgbe_logs.h"
 #include "base/ixgbe_api.h"
@@ -87,6 +88,9 @@
PKT_TX_TCP_SEG | \
PKT_TX_OUTER_IP_CKSUM)

+#define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
+
 #if 1
 #define RTE_PMD_USE_PREFETCH
 #endif
@@ -905,6 +909,56 @@ end_of_tx:

 /*
  *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+   int i, ret;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+   struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /**
+* Check if packet meets requirements for number of segments
+*
+* NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and 
non-TSO
+*/
+
+   if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+
+   return i;
+}
+
+/*
+ *
  *  RX functions
  *
  **/
@@ -2282,6 +2336,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct 
ixgbe_tx_queue *txq)
if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
&& (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
PMD_INIT_LOG(DEBUG, "Using simple tx code path");
+   dev->t

[dpdk-dev] [PATCH v9 4/6] i40e: add Tx preparation

2016-10-24 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/i40e/i40e_ethdev.c |3 ++
 drivers/net/i40e/i40e_rxtx.c   |   72 +++-
 drivers/net/i40e/i40e_rxtx.h   |8 +
 3 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 5af0e43..dab0d48 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -936,6 +936,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = i40e_recv_pkts;
dev->tx_pkt_burst = i40e_xmit_pkts;
+   dev->tx_pkt_prep = i40e_prep_pkts;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
@@ -2629,6 +2630,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
.nb_max = I40E_MAX_RING_DESC,
.nb_min = I40E_MIN_RING_DESC,
.nb_align = I40E_ALIGN_RING_DESC,
+   .nb_seg_max = I40E_TX_MAX_SEG,
+   .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG,
};

if (pf->flags & I40E_FLAG_VMDQ) {
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 7ae7d9f..7f6d3d8 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 

 #include "i40e_logs.h"
 #include "base/i40e_prototype.h"
@@ -79,6 +81,17 @@
PKT_TX_TCP_SEG | \
PKT_TX_OUTER_IP_CKSUM)

+#define I40E_TX_OFFLOAD_MASK (  \
+   PKT_TX_IP_CKSUM |   \
+   PKT_TX_L4_MASK |\
+   PKT_TX_OUTER_IP_CKSUM | \
+   PKT_TX_TCP_SEG |\
+   PKT_TX_QINQ_PKT |   \
+   PKT_TX_VLAN_PKT)
+
+#define I40E_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK)
+
 static uint16_t i40e_xmit_pkts_simple(void *tx_queue,
  struct rte_mbuf **tx_pkts,
  uint16_t nb_pkts);
@@ -1411,6 +1424,61 @@ i40e_xmit_pkts_simple(void *tx_queue,
return nb_tx;
 }

+/*
+ *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /**
+* m->nb_segs is uint8_t, so m->nb_segs is always less than
+* I40E_TX_MAX_SEG.
+* We check only a condition for m->nb_segs > 
I40E_TX_MAX_MTU_SEG.
+*/
+   if (!(ol_flags & PKT_TX_TCP_SEG)) {
+   if (m->nb_segs > I40E_TX_MAX_MTU_SEG) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+   } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) ||
+   (m->tso_segsz > I40E_MAX_TSO_MSS)) {
+   /* MSS outside the range (256B - 9674B) are considered 
malicious */
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+   return i;
+}
+
 /*
  * Find the VSI the queue belongs to. 'queue_idx' is the queue index
  * application used, which assume having sequential ones. But from driver's
@@ -2763,9 +2831,11 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
dev->tx_pkt_burst = i40e_xmit_pkts_simple;
}
+   dev->tx_pkt_prep = NULL;
} else {

[dpdk-dev] [PATCH v9 3/6] fm10k: add Tx preparation

2016-10-24 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/fm10k/fm10k.h|6 +
 drivers/net/fm10k/fm10k_ethdev.c |5 
 drivers/net/fm10k/fm10k_rxtx.c   |   50 +-
 3 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h
index 05aa1a2..c6fed21 100644
--- a/drivers/net/fm10k/fm10k.h
+++ b/drivers/net/fm10k/fm10k.h
@@ -69,6 +69,9 @@
 #define FM10K_MAX_RX_DESC  (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc))
 #define FM10K_MAX_TX_DESC  (FM10K_MAX_TX_RING_SZ / sizeof(struct 
fm10k_tx_desc))

+#define FM10K_TX_MAX_SEG UINT8_MAX
+#define FM10K_TX_MAX_MTU_SEG UINT8_MAX
+
 /*
  * byte aligment for HW RX data buffer
  * Datasheet requires RX buffer addresses shall either be 512-byte aligned or
@@ -356,6 +359,9 @@ fm10k_dev_rx_descriptor_done(void *rx_queue, uint16_t 
offset);
 uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq);
 int fm10k_rx_vec_condition_check(struct rte_eth_dev *);
 void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq);
diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index c804436..dffb6d1 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -1446,6 +1446,8 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev,
.nb_max = FM10K_MAX_TX_DESC,
.nb_min = FM10K_MIN_TX_DESC,
.nb_align = FM10K_MULT_TX_DESC,
+   .nb_seg_max = FM10K_TX_MAX_SEG,
+   .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG,
};

dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G |
@@ -2754,8 +2756,10 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
fm10k_txq_vec_setup(txq);
}
dev->tx_pkt_burst = fm10k_xmit_pkts_vec;
+   dev->tx_pkt_prep = NULL;
} else {
dev->tx_pkt_burst = fm10k_xmit_pkts;
+   dev->tx_pkt_prep = fm10k_prep_pkts;
PMD_INIT_LOG(DEBUG, "Use regular Tx func");
}
 }
@@ -2834,6 +2838,7 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = _recv_pkts;
dev->tx_pkt_burst = _xmit_pkts;
+   dev->tx_pkt_prep = _prep_pkts;

/* only initialize in the primary process */
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c
index 32cc7ff..5fc4d5a 100644
--- a/drivers/net/fm10k/fm10k_rxtx.c
+++ b/drivers/net/fm10k/fm10k_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -35,6 +35,7 @@

 #include 
 #include 
+#include 
 #include "fm10k.h"
 #include "base/fm10k_type.h"

@@ -65,6 +66,15 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd)
 }
 #endif

+#define FM10K_TX_OFFLOAD_MASK (  \
+   PKT_TX_VLAN_PKT |\
+   PKT_TX_IP_CKSUM |\
+   PKT_TX_L4_MASK | \
+   PKT_TX_TCP_SEG)
+
+#define FM10K_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK)
+
 /* @note: When this function is changed, make corresponding change to
  * fm10k_dev_supported_ptypes_get()
  */
@@ -597,3 +607,41 @@ fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,

return count;
 }
+
+uint16_t
+fm10k_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   if ((m->ol_flags & PKT_TX_TCP_SEG) &&
+   (m->tso_segsz < FM10K_TSO_MINMSS)) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+
+   return i;
+}
-- 
1.7.9.5

[dpdk-dev] [PATCH v9 2/6] e1000: add Tx preparation

2016-10-24 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/e1000/e1000_ethdev.h |   11 
 drivers/net/e1000/em_ethdev.c|5 +++-
 drivers/net/e1000/em_rxtx.c  |   48 ++-
 drivers/net/e1000/igb_ethdev.c   |4 +++
 drivers/net/e1000/igb_rxtx.c |   52 +-
 5 files changed, 117 insertions(+), 3 deletions(-)

diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
index 6c25c8d..bd0f277 100644
--- a/drivers/net/e1000/e1000_ethdev.h
+++ b/drivers/net/e1000/e1000_ethdev.h
@@ -138,6 +138,11 @@
 #define E1000_MISC_VEC_ID   RTE_INTR_VEC_ZERO_OFFSET
 #define E1000_RX_VEC_START  RTE_INTR_VEC_RXTX_OFFSET

+#define IGB_TX_MAX_SEG UINT8_MAX
+#define IGB_TX_MAX_MTU_SEG UINT8_MAX
+#define EM_TX_MAX_SEG  UINT8_MAX
+#define EM_TX_MAX_MTU_SEG  UINT8_MAX
+
 /* structure for interrupt relative data */
 struct e1000_interrupt {
uint32_t flags;
@@ -315,6 +320,9 @@ void eth_igb_tx_init(struct rte_eth_dev *dev);
 uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t eth_igb_prep_pkts(void *txq, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

@@ -376,6 +384,9 @@ void eth_em_tx_init(struct rte_eth_dev *dev);
 uint16_t eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t eth_em_prep_pkts(void *txq, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 uint16_t eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
index 7cf5f0c..17b45cb 100644
--- a/drivers/net/e1000/em_ethdev.c
+++ b/drivers/net/e1000/em_ethdev.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -300,6 +300,7 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev)
eth_dev->dev_ops = _em_ops;
eth_dev->rx_pkt_burst = (eth_rx_burst_t)_em_recv_pkts;
eth_dev->tx_pkt_burst = (eth_tx_burst_t)_em_xmit_pkts;
+   eth_dev->tx_pkt_prep = (eth_tx_prep_t)_em_prep_pkts;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
@@ -1067,6 +1068,8 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
.nb_max = E1000_MAX_RING_DESC,
.nb_min = E1000_MIN_RING_DESC,
.nb_align = EM_TXD_ALIGN,
+   .nb_seg_max = EM_TX_MAX_SEG,
+   .nb_mtu_seg_max = EM_TX_MAX_MTU_SEG,
};

dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
index 41f51c0..5bd3c99 100644
--- a/drivers/net/e1000/em_rxtx.c
+++ b/drivers/net/e1000/em_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -66,6 +66,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 

 #include "e1000_logs.h"
@@ -77,6 +78,14 @@

 #define E1000_RXDCTL_GRAN  0x0100 /* RXDCTL Granularity */

+#define E1000_TX_OFFLOAD_MASK ( \
+   PKT_TX_IP_CKSUM |   \
+   PKT_TX_L4_MASK |\
+   PKT_TX_VLAN_PKT)
+
+#define E1000_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK)
+
 /**
  * Structure associated with each descriptor of the RX ring of a RX queue.
  */
@@ -618,6 +627,43 @@ end_of_tx:

 /*
  *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+eth_em_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   if (m->ol_flags & E1000_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+

[dpdk-dev] [PATCH v9 1/6] ethdev: add Tx preparation

2016-10-24 Thread Tomasz Kulasek

Added API for `rte_eth_tx_prep`

uint16_t rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts)

Added fields to the `struct rte_eth_desc_lim`:

uint16_t nb_seg_max;
/**< Max number of segments per whole packet. */

uint16_t nb_mtu_seg_max;
/**< Max number of segments per one MTU */

Added functions:

int rte_validate_tx_offload(struct rte_mbuf *m)
to validate general requirements for tx offload set in mbuf of packet
  such a flag completness. In current implementation this function is
  called optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled.

int rte_phdr_cksum_fix(struct rte_mbuf *m)
to fix pseudo header checksum for TSO and non-TSO tcp/udp packets
before hardware tx checksum offload.
 - for non-TSO tcp/udp packets full pseudo-header checksum is
   counted and set.
 - for TSO the IP payload length is not included.

PERFORMANCE TESTS
-

This feature was tested with modified csum engine from test-pmd.

The packet checksum preparation was moved from application to Tx
preparation step placed before burst.

We may expect some overhead costs caused by:
1) using additional callback before burst,
2) rescanning burst,
3) additional condition checking (packet validation),
4) worse optimization (e.g. packet data access, etc.)

We tested it using ixgbe Tx preparation implementation with some parts
disabled to have comparable information about the impact of different
parts of implementation.

IMPACT:

1) For unimplemented Tx preparation callback the performance impact is
   negligible,
2) For packet condition check without checksum modifications (nb_segs,
   available offloads, etc.) is 14626628/14252168 (~2.62% drop),
3) Full support in ixgbe driver (point 2 + packet checksum
   initialization) is 14060924/13588094 (~3.48% drop)

Signed-off-by: Tomasz Kulasek 
---
 config/common_base|1 +
 lib/librte_ether/rte_ethdev.h |   97 +
 lib/librte_mbuf/rte_mbuf.h|   64 +++
 lib/librte_net/rte_net.h  |   85 
 4 files changed, 247 insertions(+)

diff --git a/config/common_base b/config/common_base
index c7fd3db..619284b 100644
--- a/config/common_base
+++ b/config/common_base
@@ -120,6 +120,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
 CONFIG_RTE_LIBRTE_IEEE1588=n
 CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
+CONFIG_RTE_ETHDEV_TX_PREP=y

 #
 # Support NIC bypass logic
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 38641e8..d548d48 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -182,6 +182,7 @@ extern "C" {
 #include 
 #include 
 #include 
+#include 
 #include "rte_ether.h"
 #include "rte_eth_ctrl.h"
 #include "rte_dev_info.h"
@@ -699,6 +700,8 @@ struct rte_eth_desc_lim {
uint16_t nb_max;   /**< Max allowed number of descriptors. */
uint16_t nb_min;   /**< Min allowed number of descriptors. */
uint16_t nb_align; /**< Number of descriptors should be aligned to. */
+   uint16_t nb_seg_max; /**< Max number of segments per whole packet. 
*/
+   uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */
 };

 /**
@@ -1188,6 +1191,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq,
   uint16_t nb_pkts);
 /**< @internal Send output packets on a transmit queue of an Ethernet device. 
*/

+typedef uint16_t (*eth_tx_prep_t)(void *txq,
+  struct rte_mbuf **tx_pkts,
+  uint16_t nb_pkts);
+/**< @internal Prepare output packets on a transmit queue of an Ethernet 
device. */
+
 typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev,
   struct rte_eth_fc_conf *fc_conf);
 /**< @internal Get current flow control parameter on an Ethernet device */
@@ -1622,6 +1630,7 @@ struct rte_eth_rxtx_callback {
 struct rte_eth_dev {
eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
+   eth_tx_prep_t tx_pkt_prep; /**< Pointer to PMD transmit prepare 
function. */
struct rte_eth_dev_data *data;  /**< Pointer to device data */
const struct eth_driver *driver;/**< Driver for this device */
const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
@@ -2816,6 +2825,94 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, 
nb_pkts);
 }

+/**
+ * Process a burst of output packets on a transmit queue of an Ethernet device.
+ *
+ * The rte_eth_tx_prep() function is invoked to prepare output packets to be
+ *

[dpdk-dev] [PATCH v9 0/6] add Tx preparation

2016-10-24 Thread Tomasz Kulasek

>From 35b09a978d244092337b6f46fd1309f8c733bb6b Mon Sep 17 00:00:00 2001
From: Tomasz Kulasek <tomaszx.kula...@intel.com>
Date: Fri, 14 Oct 2016 16:10:35 +0200
Subject: [PATCH v6 0/6] add Tx preparation

As discussed in that thread:

http://dpdk.org/ml/archives/dev/2015-September/023603.html

Different NIC models depending on HW offload requested might impose
different requirements on packets to be TX-ed in terms of:

 - Max number of fragments per packet allowed
 - Max number of fragments per TSO segments
 - The way pseudo-header checksum should be pre-calculated
 - L3/L4 header fields filling
 - etc.

MOTIVATION:
---

1) Some work cannot (and didn't should) be done in rte_eth_tx_burst.
   However, this work is sometimes required, and now, it's an
   application issue.

2) Different hardware may have different requirements for TX offloads,
   other subset can be supported and so on.

3) Some parameters (e.g. number of segments in ixgbe driver) may hung
   device. These parameters may be vary for different devices.

   For example i40e HW allows 8 fragments per packet, but that is after
   TSO segmentation. While ixgbe has a 38-fragment pre-TSO limit.

4) Fields in packet may require different initialization (like e.g. will
   require pseudo-header checksum precalculation, sometimes in a
   different way depending on packet type, and so on). Now application
   needs to care about it.

5) Using additional API (rte_eth_tx_prep) before rte_eth_tx_burst let to
   prepare packet burst in acceptable form for specific device.

6) Some additional checks may be done in debug mode keeping tx_burst
   implementation clean.

PROPOSAL:
-

To help user to deal with all these varieties we propose to:

1) Introduce rte_eth_tx_prep() function to do necessary preparations of
   packet burst to be safely transmitted on device for desired HW
   offloads (set/reset checksum field according to the hardware
   requirements) and check HW constraints (number of segments per
   packet, etc).

   While the limitations and requirements may differ for devices, it
   requires to extend rte_eth_dev structure with new function pointer
   "tx_pkt_prep" which can be implemented in the driver to prepare and
   verify packets, in devices specific way, before burst, what should to
   prevent application to send malformed packets.

2) Also new fields will be introduced in rte_eth_desc_lim: 
   nb_seg_max and nb_mtu_seg_max, providing an information about max
   segments in TSO and non-TSO packets acceptable by device.

   This information is useful for application to not create/limit
   malicious packet.

APPLICATION (CASE OF USE):
--

1) Application should to initialize burst of packets to send, set
   required tx offload flags and required fields, like l2_len, l3_len,
   l4_len, and tso_segsz

2) Application passes burst to the rte_eth_tx_prep to check conditions
   required to send packets through the NIC.

3) The result of rte_eth_tx_prep can be used to send valid packets
   and/or restore invalid if function fails.

e.g.

for (i = 0; i < nb_pkts; i++) {

/* initialize or process packet */

bufs[i]->tso_segsz = 800;
bufs[i]->ol_flags = PKT_TX_TCP_SEG | PKT_TX_IPV4
| PKT_TX_IP_CKSUM;
bufs[i]->l2_len = sizeof(struct ether_hdr);
bufs[i]->l3_len = sizeof(struct ipv4_hdr);
bufs[i]->l4_len = sizeof(struct tcp_hdr);
}

/* Prepare burst of TX packets */
nb_prep = rte_eth_tx_prep(port, 0, bufs, nb_pkts);

if (nb_prep < nb_pkts) {
printf("tx_prep failed\n");

/* nb_prep indicates here first invalid packet. rte_eth_tx_prep
 * can be used on remaining packets to find another ones.
 */

}

/* Send burst of TX packets */
nb_tx = rte_eth_tx_burst(port, 0, bufs, nb_prep);

/* Free any unsent packets. */

v9 changes:
 - fixed headers structure fragmentation check
 - moved fragmentation check into rte_validate_tx_offload()

v8 changes:
 - mbuf argument in rte_validate_tx_offload declared as const

v7 changes:
 - comments reworded/added
 - changed errno values returned from Tx prep API
 - added check in rte_phdr_cksum_fix if headers are in the first
   data segment and can be safetly modified
 - moved rte_validate_tx_offload to rte_mbuf
 - moved rte_phdr_cksum_fix to rte_net.h
 - removed rte_pkt.h new file as useless

v6 changes:
- added performance impact test results to the patch description

v5 changes:
 - rebased csum engine modification
 - added information to the csum engine about performance tests
 - some performance improvements

v4 changes:
 - tx_prep is now set to default behavior (NULL) for simple/vector path
   in fm10k, i40e and ixgbe drivers to increase performance, when
   Tx offloads are no

[dpdk-dev] [PATCH v8 6/6] testpmd: use Tx preparation in csum engine

2016-10-21 Thread Tomasz Kulasek

Removed pseudo header calculation for udp/tcp/tso packets from
application and used Tx preparation API for packet preparation and
verification.

Adding aditional step to the csum engine costs about 3-4% of performance
drop, on my setup with ixgbe driver. It's caused mostly by the need
of reaccessing and modification of packet data.

Signed-off-by: Tomasz Kulasek 
---
 app/test-pmd/csumonly.c |   36 +---
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 57e6ae2..6f33ae9 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -112,15 +112,6 @@ struct simple_gre_hdr {
 } __attribute__((__packed__));

 static uint16_t
-get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags)
-{
-   if (ethertype == _htons(ETHER_TYPE_IPv4))
-   return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
-   else /* assume ethertype == ETHER_TYPE_IPv6 */
-   return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
-}
-
-static uint16_t
 get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype)
 {
if (ethertype == _htons(ETHER_TYPE_IPv4))
@@ -370,32 +361,24 @@ process_inner_cksums(void *l3_hdr, const struct 
testpmd_offload_info *info,
/* do not recalculate udp cksum if it was 0 */
if (udp_hdr->dgram_cksum != 0) {
udp_hdr->dgram_cksum = 0;
-   if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) {
+   if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM)
ol_flags |= PKT_TX_UDP_CKSUM;
-   udp_hdr->dgram_cksum = get_psd_sum(l3_hdr,
-   info->ethertype, ol_flags);
-   } else {
+   else
udp_hdr->dgram_cksum =
get_udptcp_checksum(l3_hdr, udp_hdr,
info->ethertype);
-   }
}
} else if (info->l4_proto == IPPROTO_TCP) {
tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len);
tcp_hdr->cksum = 0;
-   if (tso_segsz) {
+   if (tso_segsz)
ol_flags |= PKT_TX_TCP_SEG;
-   tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-   ol_flags);
-   } else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) {
+   else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM)
ol_flags |= PKT_TX_TCP_CKSUM;
-   tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-   ol_flags);
-   } else {
+   else
tcp_hdr->cksum =
get_udptcp_checksum(l3_hdr, tcp_hdr,
info->ethertype);
-   }
} else if (info->l4_proto == IPPROTO_SCTP) {
sctp_hdr = (struct sctp_hdr *)((char *)l3_hdr + info->l3_len);
sctp_hdr->cksum = 0;
@@ -648,6 +631,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */
uint16_t nb_rx;
uint16_t nb_tx;
+   uint16_t nb_prep;
uint16_t i;
uint64_t rx_ol_flags, tx_ol_flags;
uint16_t testpmd_ol_flags;
@@ -857,7 +841,13 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
printf("\n");
}
}
-   nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
+   nb_prep = rte_eth_tx_prep(fs->tx_port, fs->tx_queue, pkts_burst,
+   nb_rx);
+   if (nb_prep != nb_rx)
+   printf("Preparing packet burst to transmit failed: %s\n",
+   rte_strerror(rte_errno));
+
+   nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, 
nb_prep);
/*
 * Retry if necessary
 */
-- 
1.7.9.5

[dpdk-dev] [PATCH v8 5/6] ixgbe: add Tx preparation

2016-10-21 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/ixgbe/ixgbe_ethdev.c |3 ++
 drivers/net/ixgbe/ixgbe_ethdev.h |5 +++-
 drivers/net/ixgbe/ixgbe_rxtx.c   |   58 +-
 drivers/net/ixgbe/ixgbe_rxtx.h   |2 ++
 4 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 4ca5747..4c6a8e1 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -517,6 +517,8 @@ static const struct rte_eth_desc_lim tx_desc_lim = {
.nb_max = IXGBE_MAX_RING_DESC,
.nb_min = IXGBE_MIN_RING_DESC,
.nb_align = IXGBE_TXD_ALIGN,
+   .nb_seg_max = IXGBE_TX_MAX_SEG,
+   .nb_mtu_seg_max = IXGBE_TX_MAX_SEG,
 };

 static const struct eth_dev_ops ixgbe_eth_dev_ops = {
@@ -1103,6 +1105,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
eth_dev->dev_ops = _eth_dev_ops;
eth_dev->rx_pkt_burst = _recv_pkts;
eth_dev->tx_pkt_burst = _xmit_pkts;
+   eth_dev->tx_pkt_prep = _prep_pkts;

/*
 * For secondary processes, we don't initialise any further as primary
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index 4ff6338..e229cf5 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -396,6 +396,9 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
  struct rte_eth_rss_conf *rss_conf);

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 2ce8234..031414c 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   Copyright 2014 6WIND S.A.
  *   All rights reserved.
  *
@@ -70,6 +70,7 @@
 #include 
 #include 
 #include 
+#include 

 #include "ixgbe_logs.h"
 #include "base/ixgbe_api.h"
@@ -87,6 +88,9 @@
PKT_TX_TCP_SEG | \
PKT_TX_OUTER_IP_CKSUM)

+#define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
+
 #if 1
 #define RTE_PMD_USE_PREFETCH
 #endif
@@ -905,6 +909,56 @@ end_of_tx:

 /*
  *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+   int i, ret;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+   struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /**
+* Check if packet meets requirements for number of segments
+*
+* NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and 
non-TSO
+*/
+
+   if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+
+   return i;
+}
+
+/*
+ *
  *  RX functions
  *
  **/
@@ -2282,6 +2336,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct 
ixgbe_tx_queue *txq)
if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
&& (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
PMD_INIT_LOG(DEBUG, "Using simple tx code path");
+   dev->t

[dpdk-dev] [PATCH v8 4/6] i40e: add Tx preparation

2016-10-21 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/i40e/i40e_ethdev.c |3 ++
 drivers/net/i40e/i40e_rxtx.c   |   72 +++-
 drivers/net/i40e/i40e_rxtx.h   |8 +
 3 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 5af0e43..dab0d48 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -936,6 +936,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = i40e_recv_pkts;
dev->tx_pkt_burst = i40e_xmit_pkts;
+   dev->tx_pkt_prep = i40e_prep_pkts;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
@@ -2629,6 +2630,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
.nb_max = I40E_MAX_RING_DESC,
.nb_min = I40E_MIN_RING_DESC,
.nb_align = I40E_ALIGN_RING_DESC,
+   .nb_seg_max = I40E_TX_MAX_SEG,
+   .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG,
};

if (pf->flags & I40E_FLAG_VMDQ) {
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 7ae7d9f..7f6d3d8 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 

 #include "i40e_logs.h"
 #include "base/i40e_prototype.h"
@@ -79,6 +81,17 @@
PKT_TX_TCP_SEG | \
PKT_TX_OUTER_IP_CKSUM)

+#define I40E_TX_OFFLOAD_MASK (  \
+   PKT_TX_IP_CKSUM |   \
+   PKT_TX_L4_MASK |\
+   PKT_TX_OUTER_IP_CKSUM | \
+   PKT_TX_TCP_SEG |\
+   PKT_TX_QINQ_PKT |   \
+   PKT_TX_VLAN_PKT)
+
+#define I40E_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK)
+
 static uint16_t i40e_xmit_pkts_simple(void *tx_queue,
  struct rte_mbuf **tx_pkts,
  uint16_t nb_pkts);
@@ -1411,6 +1424,61 @@ i40e_xmit_pkts_simple(void *tx_queue,
return nb_tx;
 }

+/*
+ *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /**
+* m->nb_segs is uint8_t, so m->nb_segs is always less than
+* I40E_TX_MAX_SEG.
+* We check only a condition for m->nb_segs > 
I40E_TX_MAX_MTU_SEG.
+*/
+   if (!(ol_flags & PKT_TX_TCP_SEG)) {
+   if (m->nb_segs > I40E_TX_MAX_MTU_SEG) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+   } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) ||
+   (m->tso_segsz > I40E_MAX_TSO_MSS)) {
+   /* MSS outside the range (256B - 9674B) are considered 
malicious */
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+   return i;
+}
+
 /*
  * Find the VSI the queue belongs to. 'queue_idx' is the queue index
  * application used, which assume having sequential ones. But from driver's
@@ -2763,9 +2831,11 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
dev->tx_pkt_burst = i40e_xmit_pkts_simple;
}
+   dev->tx_pkt_prep = NULL;
} else {

[dpdk-dev] [PATCH v8 3/6] fm10k: add Tx preparation

2016-10-21 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/fm10k/fm10k.h|6 +
 drivers/net/fm10k/fm10k_ethdev.c |5 
 drivers/net/fm10k/fm10k_rxtx.c   |   50 +-
 3 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h
index 05aa1a2..c6fed21 100644
--- a/drivers/net/fm10k/fm10k.h
+++ b/drivers/net/fm10k/fm10k.h
@@ -69,6 +69,9 @@
 #define FM10K_MAX_RX_DESC  (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc))
 #define FM10K_MAX_TX_DESC  (FM10K_MAX_TX_RING_SZ / sizeof(struct 
fm10k_tx_desc))

+#define FM10K_TX_MAX_SEG UINT8_MAX
+#define FM10K_TX_MAX_MTU_SEG UINT8_MAX
+
 /*
  * byte aligment for HW RX data buffer
  * Datasheet requires RX buffer addresses shall either be 512-byte aligned or
@@ -356,6 +359,9 @@ fm10k_dev_rx_descriptor_done(void *rx_queue, uint16_t 
offset);
 uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq);
 int fm10k_rx_vec_condition_check(struct rte_eth_dev *);
 void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq);
diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index c804436..dffb6d1 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -1446,6 +1446,8 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev,
.nb_max = FM10K_MAX_TX_DESC,
.nb_min = FM10K_MIN_TX_DESC,
.nb_align = FM10K_MULT_TX_DESC,
+   .nb_seg_max = FM10K_TX_MAX_SEG,
+   .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG,
};

dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G |
@@ -2754,8 +2756,10 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
fm10k_txq_vec_setup(txq);
}
dev->tx_pkt_burst = fm10k_xmit_pkts_vec;
+   dev->tx_pkt_prep = NULL;
} else {
dev->tx_pkt_burst = fm10k_xmit_pkts;
+   dev->tx_pkt_prep = fm10k_prep_pkts;
PMD_INIT_LOG(DEBUG, "Use regular Tx func");
}
 }
@@ -2834,6 +2838,7 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = _recv_pkts;
dev->tx_pkt_burst = _xmit_pkts;
+   dev->tx_pkt_prep = _prep_pkts;

/* only initialize in the primary process */
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c
index 32cc7ff..5fc4d5a 100644
--- a/drivers/net/fm10k/fm10k_rxtx.c
+++ b/drivers/net/fm10k/fm10k_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -35,6 +35,7 @@

 #include 
 #include 
+#include 
 #include "fm10k.h"
 #include "base/fm10k_type.h"

@@ -65,6 +66,15 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd)
 }
 #endif

+#define FM10K_TX_OFFLOAD_MASK (  \
+   PKT_TX_VLAN_PKT |\
+   PKT_TX_IP_CKSUM |\
+   PKT_TX_L4_MASK | \
+   PKT_TX_TCP_SEG)
+
+#define FM10K_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK)
+
 /* @note: When this function is changed, make corresponding change to
  * fm10k_dev_supported_ptypes_get()
  */
@@ -597,3 +607,41 @@ fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,

return count;
 }
+
+uint16_t
+fm10k_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   if ((m->ol_flags & PKT_TX_TCP_SEG) &&
+   (m->tso_segsz < FM10K_TSO_MINMSS)) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+
+   return i;
+}
-- 
1.7.9.5

[dpdk-dev] [PATCH v8 2/6] e1000: add Tx preparation

2016-10-21 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/e1000/e1000_ethdev.h |   11 
 drivers/net/e1000/em_ethdev.c|5 +++-
 drivers/net/e1000/em_rxtx.c  |   48 ++-
 drivers/net/e1000/igb_ethdev.c   |4 +++
 drivers/net/e1000/igb_rxtx.c |   52 +-
 5 files changed, 117 insertions(+), 3 deletions(-)

diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
index 6c25c8d..bd0f277 100644
--- a/drivers/net/e1000/e1000_ethdev.h
+++ b/drivers/net/e1000/e1000_ethdev.h
@@ -138,6 +138,11 @@
 #define E1000_MISC_VEC_ID   RTE_INTR_VEC_ZERO_OFFSET
 #define E1000_RX_VEC_START  RTE_INTR_VEC_RXTX_OFFSET

+#define IGB_TX_MAX_SEG UINT8_MAX
+#define IGB_TX_MAX_MTU_SEG UINT8_MAX
+#define EM_TX_MAX_SEG  UINT8_MAX
+#define EM_TX_MAX_MTU_SEG  UINT8_MAX
+
 /* structure for interrupt relative data */
 struct e1000_interrupt {
uint32_t flags;
@@ -315,6 +320,9 @@ void eth_igb_tx_init(struct rte_eth_dev *dev);
 uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t eth_igb_prep_pkts(void *txq, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

@@ -376,6 +384,9 @@ void eth_em_tx_init(struct rte_eth_dev *dev);
 uint16_t eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t eth_em_prep_pkts(void *txq, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 uint16_t eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
index 7cf5f0c..17b45cb 100644
--- a/drivers/net/e1000/em_ethdev.c
+++ b/drivers/net/e1000/em_ethdev.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -300,6 +300,7 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev)
eth_dev->dev_ops = _em_ops;
eth_dev->rx_pkt_burst = (eth_rx_burst_t)_em_recv_pkts;
eth_dev->tx_pkt_burst = (eth_tx_burst_t)_em_xmit_pkts;
+   eth_dev->tx_pkt_prep = (eth_tx_prep_t)_em_prep_pkts;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
@@ -1067,6 +1068,8 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
.nb_max = E1000_MAX_RING_DESC,
.nb_min = E1000_MIN_RING_DESC,
.nb_align = EM_TXD_ALIGN,
+   .nb_seg_max = EM_TX_MAX_SEG,
+   .nb_mtu_seg_max = EM_TX_MAX_MTU_SEG,
};

dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
index 41f51c0..5bd3c99 100644
--- a/drivers/net/e1000/em_rxtx.c
+++ b/drivers/net/e1000/em_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -66,6 +66,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 

 #include "e1000_logs.h"
@@ -77,6 +78,14 @@

 #define E1000_RXDCTL_GRAN  0x0100 /* RXDCTL Granularity */

+#define E1000_TX_OFFLOAD_MASK ( \
+   PKT_TX_IP_CKSUM |   \
+   PKT_TX_L4_MASK |\
+   PKT_TX_VLAN_PKT)
+
+#define E1000_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK)
+
 /**
  * Structure associated with each descriptor of the RX ring of a RX queue.
  */
@@ -618,6 +627,43 @@ end_of_tx:

 /*
  *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+eth_em_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   if (m->ol_flags & E1000_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+

[dpdk-dev] [PATCH v8 1/6] ethdev: add Tx preparation

2016-10-21 Thread Tomasz Kulasek

Added API for `rte_eth_tx_prep`

uint16_t rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts)

Added fields to the `struct rte_eth_desc_lim`:

uint16_t nb_seg_max;
/**< Max number of segments per whole packet. */

uint16_t nb_mtu_seg_max;
/**< Max number of segments per one MTU */

Added functions:

int rte_validate_tx_offload(const struct rte_mbuf *m)
to validate general requirements for tx offload set in mbuf of packet
  such a flag completness. In current implementation this function is
  called optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled.

int rte_phdr_cksum_fix(struct rte_mbuf *m)
to fix pseudo header checksum for TSO and non-TSO tcp/udp packets
before hardware tx checksum offload.
 - for non-TSO tcp/udp packets full pseudo-header checksum is
   counted and set.
 - for TSO the IP payload length is not included.

PERFORMANCE TESTS
-

This feature was tested with modified csum engine from test-pmd.

The packet checksum preparation was moved from application to Tx
preparation step placed before burst.

We may expect some overhead costs caused by:
1) using additional callback before burst,
2) rescanning burst,
3) additional condition checking (packet validation),
4) worse optimization (e.g. packet data access, etc.)

We tested it using ixgbe Tx preparation implementation with some parts
disabled to have comparable information about the impact of diferent
parts of implementation.

IMPACT:

1) For unimplemented Tx preparation callback the performance impact is
   negligible,
2) For packet condition check without checksum modifications (nb_segs,
   available offloads, etc.) is 14626628/14252168 (~2.62% drop),
3) Full support in ixgbe driver (point 2 + packet checksum
   initialization) is 14060924/13588094 (~3.48% drop)

Signed-off-by: Tomasz Kulasek 
---
 config/common_base|1 +
 lib/librte_ether/rte_ethdev.h |   97 +
 lib/librte_mbuf/rte_mbuf.h|   56 
 lib/librte_net/rte_net.h  |   90 ++
 4 files changed, 244 insertions(+)

diff --git a/config/common_base b/config/common_base
index c7fd3db..619284b 100644
--- a/config/common_base
+++ b/config/common_base
@@ -120,6 +120,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
 CONFIG_RTE_LIBRTE_IEEE1588=n
 CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
+CONFIG_RTE_ETHDEV_TX_PREP=y

 #
 # Support NIC bypass logic
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 38641e8..d548d48 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -182,6 +182,7 @@ extern "C" {
 #include 
 #include 
 #include 
+#include 
 #include "rte_ether.h"
 #include "rte_eth_ctrl.h"
 #include "rte_dev_info.h"
@@ -699,6 +700,8 @@ struct rte_eth_desc_lim {
uint16_t nb_max;   /**< Max allowed number of descriptors. */
uint16_t nb_min;   /**< Min allowed number of descriptors. */
uint16_t nb_align; /**< Number of descriptors should be aligned to. */
+   uint16_t nb_seg_max; /**< Max number of segments per whole packet. 
*/
+   uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */
 };

 /**
@@ -1188,6 +1191,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq,
   uint16_t nb_pkts);
 /**< @internal Send output packets on a transmit queue of an Ethernet device. 
*/

+typedef uint16_t (*eth_tx_prep_t)(void *txq,
+  struct rte_mbuf **tx_pkts,
+  uint16_t nb_pkts);
+/**< @internal Prepare output packets on a transmit queue of an Ethernet 
device. */
+
 typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev,
   struct rte_eth_fc_conf *fc_conf);
 /**< @internal Get current flow control parameter on an Ethernet device */
@@ -1622,6 +1630,7 @@ struct rte_eth_rxtx_callback {
 struct rte_eth_dev {
eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
+   eth_tx_prep_t tx_pkt_prep; /**< Pointer to PMD transmit prepare 
function. */
struct rte_eth_dev_data *data;  /**< Pointer to device data */
const struct eth_driver *driver;/**< Driver for this device */
const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
@@ -2816,6 +2825,94 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, 
nb_pkts);
 }

+/**
+ * Process a burst of output packets on a transmit queue of an Ethernet device.
+ *
+ * The rte_eth_tx_prep() function is invoked to prepare output packets

[dpdk-dev] [PATCH v8 0/6] add Tx preparation

2016-10-21 Thread Tomasz Kulasek

>From 35b09a978d244092337b6f46fd1309f8c733bb6b Mon Sep 17 00:00:00 2001
From: Tomasz Kulasek <tomaszx.kula...@intel.com>
Date: Fri, 14 Oct 2016 16:10:35 +0200
Subject: [PATCH v6 0/6] add Tx preparation

As discussed in that thread:

http://dpdk.org/ml/archives/dev/2015-September/023603.html

Different NIC models depending on HW offload requested might impose different 
requirements on packets to be TX-ed in terms of:

 - Max number of fragments per packet allowed
 - Max number of fragments per TSO segments
 - The way pseudo-header checksum should be pre-calculated
 - L3/L4 header fields filling
 - etc.

MOTIVATION:
---

1) Some work cannot (and didn't should) be done in rte_eth_tx_burst.
   However, this work is sometimes required, and now, it's an
   application issue.

2) Different hardware may have different requirements for TX offloads,
   other subset can be supported and so on.

3) Some parameters (e.g. number of segments in ixgbe driver) may hung
   device. These parameters may be vary for different devices.

   For example i40e HW allows 8 fragments per packet, but that is after
   TSO segmentation. While ixgbe has a 38-fragment pre-TSO limit.

4) Fields in packet may require different initialization (like e.g. will
   require pseudo-header checksum precalculation, sometimes in a
   different way depending on packet type, and so on). Now application
   needs to care about it.

5) Using additional API (rte_eth_tx_prep) before rte_eth_tx_burst let to
   prepare packet burst in acceptable form for specific device.

6) Some additional checks may be done in debug mode keeping tx_burst
   implementation clean.

PROPOSAL:
-

To help user to deal with all these varieties we propose to:

1) Introduce rte_eth_tx_prep() function to do necessary preparations of
   packet burst to be safely transmitted on device for desired HW
   offloads (set/reset checksum field according to the hardware
   requirements) and check HW constraints (number of segments per
   packet, etc).

   While the limitations and requirements may differ for devices, it
   requires to extend rte_eth_dev structure with new function pointer
   "tx_pkt_prep" which can be implemented in the driver to prepare and
   verify packets, in devices specific way, before burst, what should to
   prevent application to send malformed packets.

2) Also new fields will be introduced in rte_eth_desc_lim: 
   nb_seg_max and nb_mtu_seg_max, providing an information about max
   segments in TSO and non-TSO packets acceptable by device.

   This information is useful for application to not create/limit
   malicious packet.

APPLICATION (CASE OF USE):
--

1) Application should to initialize burst of packets to send, set
   required tx offload flags and required fields, like l2_len, l3_len,
   l4_len, and tso_segsz

2) Application passes burst to the rte_eth_tx_prep to check conditions
   required to send packets through the NIC.

3) The result of rte_eth_tx_prep can be used to send valid packets
   and/or restore invalid if function fails.

e.g.

for (i = 0; i < nb_pkts; i++) {

/* initialize or process packet */

bufs[i]->tso_segsz = 800;
bufs[i]->ol_flags = PKT_TX_TCP_SEG | PKT_TX_IPV4
| PKT_TX_IP_CKSUM;
bufs[i]->l2_len = sizeof(struct ether_hdr);
bufs[i]->l3_len = sizeof(struct ipv4_hdr);
bufs[i]->l4_len = sizeof(struct tcp_hdr);
}

/* Prepare burst of TX packets */
nb_prep = rte_eth_tx_prep(port, 0, bufs, nb_pkts);

if (nb_prep < nb_pkts) {
printf("tx_prep failed\n");

/* nb_prep indicates here first invalid packet. rte_eth_tx_prep
 * can be used on remaining packets to find another ones.
 */

}

/* Send burst of TX packets */
nb_tx = rte_eth_tx_burst(port, 0, bufs, nb_prep);

/* Free any unsent packets. */

v8 changes:
 - mbuf argument in rte_validate_tx_offload declared as const

v7 changes:
 - comments reworded/added
 - changed errno values returned from Tx prep API
 - added check in rte_phdr_cksum_fix if headers are in the first
   data segment and can be safetly modified
 - moved rte_validate_tx_offload to rte_mbuf
 - moved rte_phdr_cksum_fix to rte_net.h
 - removed rte_pkt.h new file as useless

v5 changes:
 - rebased csum engine modification
 - added information to the csum engine about performance tests
 - some performance improvements

v4 changes:
 - tx_prep is now set to default behavior (NULL) for simple/vector path
   in fm10k, i40e and ixgbe drivers to increase performance, when
   Tx offloads are not intentionally available

v3 changes:
 - reworked csum testpmd engine instead adding new one,
 - fixed checksum initialization procedure to include also outer
   checksum offloads,
 - some

[dpdk-dev] [PATCH v7 6/6] testpmd: use Tx preparation in csum engine

2016-10-21 Thread Tomasz Kulasek

Removed pseudo header calculation for udp/tcp/tso packets from
application and used Tx preparation API for packet preparation and
verification.

Adding aditional step to the csum engine costs about 3-4% of performance
drop, on my setup with ixgbe driver. It's caused mostly by the need
of reaccessing and modification of packet data.

Signed-off-by: Tomasz Kulasek 
---
 app/test-pmd/csumonly.c |   36 +---
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 57e6ae2..6f33ae9 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -112,15 +112,6 @@ struct simple_gre_hdr {
 } __attribute__((__packed__));

 static uint16_t
-get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags)
-{
-   if (ethertype == _htons(ETHER_TYPE_IPv4))
-   return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
-   else /* assume ethertype == ETHER_TYPE_IPv6 */
-   return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
-}
-
-static uint16_t
 get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype)
 {
if (ethertype == _htons(ETHER_TYPE_IPv4))
@@ -370,32 +361,24 @@ process_inner_cksums(void *l3_hdr, const struct 
testpmd_offload_info *info,
/* do not recalculate udp cksum if it was 0 */
if (udp_hdr->dgram_cksum != 0) {
udp_hdr->dgram_cksum = 0;
-   if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) {
+   if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM)
ol_flags |= PKT_TX_UDP_CKSUM;
-   udp_hdr->dgram_cksum = get_psd_sum(l3_hdr,
-   info->ethertype, ol_flags);
-   } else {
+   else
udp_hdr->dgram_cksum =
get_udptcp_checksum(l3_hdr, udp_hdr,
info->ethertype);
-   }
}
} else if (info->l4_proto == IPPROTO_TCP) {
tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len);
tcp_hdr->cksum = 0;
-   if (tso_segsz) {
+   if (tso_segsz)
ol_flags |= PKT_TX_TCP_SEG;
-   tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-   ol_flags);
-   } else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) {
+   else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM)
ol_flags |= PKT_TX_TCP_CKSUM;
-   tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-   ol_flags);
-   } else {
+   else
tcp_hdr->cksum =
get_udptcp_checksum(l3_hdr, tcp_hdr,
info->ethertype);
-   }
} else if (info->l4_proto == IPPROTO_SCTP) {
sctp_hdr = (struct sctp_hdr *)((char *)l3_hdr + info->l3_len);
sctp_hdr->cksum = 0;
@@ -648,6 +631,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */
uint16_t nb_rx;
uint16_t nb_tx;
+   uint16_t nb_prep;
uint16_t i;
uint64_t rx_ol_flags, tx_ol_flags;
uint16_t testpmd_ol_flags;
@@ -857,7 +841,13 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
printf("\n");
}
}
-   nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
+   nb_prep = rte_eth_tx_prep(fs->tx_port, fs->tx_queue, pkts_burst,
+   nb_rx);
+   if (nb_prep != nb_rx)
+   printf("Preparing packet burst to transmit failed: %s\n",
+   rte_strerror(rte_errno));
+
+   nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, 
nb_prep);
/*
 * Retry if necessary
 */
-- 
1.7.9.5

[dpdk-dev] [PATCH v7 4/6] i40e: add Tx preparation

2016-10-21 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/i40e/i40e_ethdev.c |3 ++
 drivers/net/i40e/i40e_rxtx.c   |   72 +++-
 drivers/net/i40e/i40e_rxtx.h   |8 +
 3 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 5af0e43..dab0d48 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -936,6 +936,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = i40e_recv_pkts;
dev->tx_pkt_burst = i40e_xmit_pkts;
+   dev->tx_pkt_prep = i40e_prep_pkts;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
@@ -2629,6 +2630,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
.nb_max = I40E_MAX_RING_DESC,
.nb_min = I40E_MIN_RING_DESC,
.nb_align = I40E_ALIGN_RING_DESC,
+   .nb_seg_max = I40E_TX_MAX_SEG,
+   .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG,
};

if (pf->flags & I40E_FLAG_VMDQ) {
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 7ae7d9f..7f6d3d8 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 

 #include "i40e_logs.h"
 #include "base/i40e_prototype.h"
@@ -79,6 +81,17 @@
PKT_TX_TCP_SEG | \
PKT_TX_OUTER_IP_CKSUM)

+#define I40E_TX_OFFLOAD_MASK (  \
+   PKT_TX_IP_CKSUM |   \
+   PKT_TX_L4_MASK |\
+   PKT_TX_OUTER_IP_CKSUM | \
+   PKT_TX_TCP_SEG |\
+   PKT_TX_QINQ_PKT |   \
+   PKT_TX_VLAN_PKT)
+
+#define I40E_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK)
+
 static uint16_t i40e_xmit_pkts_simple(void *tx_queue,
  struct rte_mbuf **tx_pkts,
  uint16_t nb_pkts);
@@ -1411,6 +1424,61 @@ i40e_xmit_pkts_simple(void *tx_queue,
return nb_tx;
 }

+/*
+ *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /**
+* m->nb_segs is uint8_t, so m->nb_segs is always less than
+* I40E_TX_MAX_SEG.
+* We check only a condition for m->nb_segs > 
I40E_TX_MAX_MTU_SEG.
+*/
+   if (!(ol_flags & PKT_TX_TCP_SEG)) {
+   if (m->nb_segs > I40E_TX_MAX_MTU_SEG) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+   } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) ||
+   (m->tso_segsz > I40E_MAX_TSO_MSS)) {
+   /* MSS outside the range (256B - 9674B) are considered 
malicious */
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+   return i;
+}
+
 /*
  * Find the VSI the queue belongs to. 'queue_idx' is the queue index
  * application used, which assume having sequential ones. But from driver's
@@ -2763,9 +2831,11 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
dev->tx_pkt_burst = i40e_xmit_pkts_simple;
}
+   dev->tx_pkt_prep = NULL;
} else {

[dpdk-dev] [PATCH v7 3/6] fm10k: add Tx preparation

2016-10-21 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/fm10k/fm10k.h|6 +
 drivers/net/fm10k/fm10k_ethdev.c |5 
 drivers/net/fm10k/fm10k_rxtx.c   |   50 +-
 3 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h
index 05aa1a2..c6fed21 100644
--- a/drivers/net/fm10k/fm10k.h
+++ b/drivers/net/fm10k/fm10k.h
@@ -69,6 +69,9 @@
 #define FM10K_MAX_RX_DESC  (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc))
 #define FM10K_MAX_TX_DESC  (FM10K_MAX_TX_RING_SZ / sizeof(struct 
fm10k_tx_desc))

+#define FM10K_TX_MAX_SEG UINT8_MAX
+#define FM10K_TX_MAX_MTU_SEG UINT8_MAX
+
 /*
  * byte aligment for HW RX data buffer
  * Datasheet requires RX buffer addresses shall either be 512-byte aligned or
@@ -356,6 +359,9 @@ fm10k_dev_rx_descriptor_done(void *rx_queue, uint16_t 
offset);
 uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq);
 int fm10k_rx_vec_condition_check(struct rte_eth_dev *);
 void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq);
diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index c804436..dffb6d1 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -1446,6 +1446,8 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev,
.nb_max = FM10K_MAX_TX_DESC,
.nb_min = FM10K_MIN_TX_DESC,
.nb_align = FM10K_MULT_TX_DESC,
+   .nb_seg_max = FM10K_TX_MAX_SEG,
+   .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG,
};

dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G |
@@ -2754,8 +2756,10 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
fm10k_txq_vec_setup(txq);
}
dev->tx_pkt_burst = fm10k_xmit_pkts_vec;
+   dev->tx_pkt_prep = NULL;
} else {
dev->tx_pkt_burst = fm10k_xmit_pkts;
+   dev->tx_pkt_prep = fm10k_prep_pkts;
PMD_INIT_LOG(DEBUG, "Use regular Tx func");
}
 }
@@ -2834,6 +2838,7 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = _recv_pkts;
dev->tx_pkt_burst = _xmit_pkts;
+   dev->tx_pkt_prep = _prep_pkts;

/* only initialize in the primary process */
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c
index 32cc7ff..5fc4d5a 100644
--- a/drivers/net/fm10k/fm10k_rxtx.c
+++ b/drivers/net/fm10k/fm10k_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -35,6 +35,7 @@

 #include 
 #include 
+#include 
 #include "fm10k.h"
 #include "base/fm10k_type.h"

@@ -65,6 +66,15 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd)
 }
 #endif

+#define FM10K_TX_OFFLOAD_MASK (  \
+   PKT_TX_VLAN_PKT |\
+   PKT_TX_IP_CKSUM |\
+   PKT_TX_L4_MASK | \
+   PKT_TX_TCP_SEG)
+
+#define FM10K_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK)
+
 /* @note: When this function is changed, make corresponding change to
  * fm10k_dev_supported_ptypes_get()
  */
@@ -597,3 +607,41 @@ fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,

return count;
 }
+
+uint16_t
+fm10k_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   if ((m->ol_flags & PKT_TX_TCP_SEG) &&
+   (m->tso_segsz < FM10K_TSO_MINMSS)) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+
+   return i;
+}
-- 
1.7.9.5

[dpdk-dev] [PATCH v7 2/6] e1000: add Tx preparation

2016-10-21 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/e1000/e1000_ethdev.h |   11 
 drivers/net/e1000/em_ethdev.c|5 +++-
 drivers/net/e1000/em_rxtx.c  |   48 ++-
 drivers/net/e1000/igb_ethdev.c   |4 +++
 drivers/net/e1000/igb_rxtx.c |   52 +-
 5 files changed, 117 insertions(+), 3 deletions(-)

diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
index 6c25c8d..bd0f277 100644
--- a/drivers/net/e1000/e1000_ethdev.h
+++ b/drivers/net/e1000/e1000_ethdev.h
@@ -138,6 +138,11 @@
 #define E1000_MISC_VEC_ID   RTE_INTR_VEC_ZERO_OFFSET
 #define E1000_RX_VEC_START  RTE_INTR_VEC_RXTX_OFFSET

+#define IGB_TX_MAX_SEG UINT8_MAX
+#define IGB_TX_MAX_MTU_SEG UINT8_MAX
+#define EM_TX_MAX_SEG  UINT8_MAX
+#define EM_TX_MAX_MTU_SEG  UINT8_MAX
+
 /* structure for interrupt relative data */
 struct e1000_interrupt {
uint32_t flags;
@@ -315,6 +320,9 @@ void eth_igb_tx_init(struct rte_eth_dev *dev);
 uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t eth_igb_prep_pkts(void *txq, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

@@ -376,6 +384,9 @@ void eth_em_tx_init(struct rte_eth_dev *dev);
 uint16_t eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t eth_em_prep_pkts(void *txq, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 uint16_t eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
index 7cf5f0c..17b45cb 100644
--- a/drivers/net/e1000/em_ethdev.c
+++ b/drivers/net/e1000/em_ethdev.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -300,6 +300,7 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev)
eth_dev->dev_ops = _em_ops;
eth_dev->rx_pkt_burst = (eth_rx_burst_t)_em_recv_pkts;
eth_dev->tx_pkt_burst = (eth_tx_burst_t)_em_xmit_pkts;
+   eth_dev->tx_pkt_prep = (eth_tx_prep_t)_em_prep_pkts;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
@@ -1067,6 +1068,8 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
.nb_max = E1000_MAX_RING_DESC,
.nb_min = E1000_MIN_RING_DESC,
.nb_align = EM_TXD_ALIGN,
+   .nb_seg_max = EM_TX_MAX_SEG,
+   .nb_mtu_seg_max = EM_TX_MAX_MTU_SEG,
};

dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
index 41f51c0..5bd3c99 100644
--- a/drivers/net/e1000/em_rxtx.c
+++ b/drivers/net/e1000/em_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -66,6 +66,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 

 #include "e1000_logs.h"
@@ -77,6 +78,14 @@

 #define E1000_RXDCTL_GRAN  0x0100 /* RXDCTL Granularity */

+#define E1000_TX_OFFLOAD_MASK ( \
+   PKT_TX_IP_CKSUM |   \
+   PKT_TX_L4_MASK |\
+   PKT_TX_VLAN_PKT)
+
+#define E1000_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK)
+
 /**
  * Structure associated with each descriptor of the RX ring of a RX queue.
  */
@@ -618,6 +627,43 @@ end_of_tx:

 /*
  *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+eth_em_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   if (m->ol_flags & E1000_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -ENOTSUP;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+

[dpdk-dev] [PATCH v7 1/6] ethdev: add Tx preparation

2016-10-21 Thread Tomasz Kulasek

Added API for `rte_eth_tx_prep`

uint16_t rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts)

Added fields to the `struct rte_eth_desc_lim`:

uint16_t nb_seg_max;
/**< Max number of segments per whole packet. */

uint16_t nb_mtu_seg_max;
/**< Max number of segments per one MTU */

Added functions:

int rte_validate_tx_offload(struct rte_mbuf *m)
to validate general requirements for tx offload set in mbuf of packet
  such a flag completness. In current implementation this function is
  called optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled.

int rte_phdr_cksum_fix(struct rte_mbuf *m)
to fix pseudo header checksum for TSO and non-TSO tcp/udp packets
before hardware tx checksum offload.
 - for non-TSO tcp/udp packets full pseudo-header checksum is
   counted and set.
 - for TSO the IP payload length is not included.

PERFORMANCE TESTS
-

This feature was tested with modified csum engine from test-pmd.

The packet checksum preparation was moved from application to Tx
preparation step placed before burst.

We may expect some overhead costs caused by:
1) using additional callback before burst,
2) rescanning burst,
3) additional condition checking (packet validation),
4) worse optimization (e.g. packet data access, etc.)

We tested it using ixgbe Tx preparation implementation with some parts
disabled to have comparable information about the impact of diferent
parts of implementation.

IMPACT:

1) For unimplemented Tx preparation callback the performance impact is
   negligible,
2) For packet condition check without checksum modifications (nb_segs,
   available offloads, etc.) is 14626628/14252168 (~2.62% drop),
3) Full support in ixgbe driver (point 2 + packet checksum
   initialization) is 14060924/13588094 (~3.48% drop)

Signed-off-by: Tomasz Kulasek 
---
 config/common_base|1 +
 lib/librte_ether/rte_ethdev.h |   97 +
 lib/librte_mbuf/rte_mbuf.h|   57 
 lib/librte_net/rte_net.h  |   90 ++
 4 files changed, 245 insertions(+)

diff --git a/config/common_base b/config/common_base
index c7fd3db..619284b 100644
--- a/config/common_base
+++ b/config/common_base
@@ -120,6 +120,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
 CONFIG_RTE_LIBRTE_IEEE1588=n
 CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
+CONFIG_RTE_ETHDEV_TX_PREP=y

 #
 # Support NIC bypass logic
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 38641e8..d548d48 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -182,6 +182,7 @@ extern "C" {
 #include 
 #include 
 #include 
+#include 
 #include "rte_ether.h"
 #include "rte_eth_ctrl.h"
 #include "rte_dev_info.h"
@@ -699,6 +700,8 @@ struct rte_eth_desc_lim {
uint16_t nb_max;   /**< Max allowed number of descriptors. */
uint16_t nb_min;   /**< Min allowed number of descriptors. */
uint16_t nb_align; /**< Number of descriptors should be aligned to. */
+   uint16_t nb_seg_max; /**< Max number of segments per whole packet. 
*/
+   uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */
 };

 /**
@@ -1188,6 +1191,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq,
   uint16_t nb_pkts);
 /**< @internal Send output packets on a transmit queue of an Ethernet device. 
*/

+typedef uint16_t (*eth_tx_prep_t)(void *txq,
+  struct rte_mbuf **tx_pkts,
+  uint16_t nb_pkts);
+/**< @internal Prepare output packets on a transmit queue of an Ethernet 
device. */
+
 typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev,
   struct rte_eth_fc_conf *fc_conf);
 /**< @internal Get current flow control parameter on an Ethernet device */
@@ -1622,6 +1630,7 @@ struct rte_eth_rxtx_callback {
 struct rte_eth_dev {
eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
+   eth_tx_prep_t tx_pkt_prep; /**< Pointer to PMD transmit prepare 
function. */
struct rte_eth_dev_data *data;  /**< Pointer to device data */
const struct eth_driver *driver;/**< Driver for this device */
const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
@@ -2816,6 +2825,94 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, 
nb_pkts);
 }

+/**
+ * Process a burst of output packets on a transmit queue of an Ethernet device.
+ *
+ * The rte_eth_tx_prep() function is invoked to prepare output packets to be
+ *

[dpdk-dev] [PATCH v7 0/6] add Tx preparation

2016-10-21 Thread Tomasz Kulasek

>From 35b09a978d244092337b6f46fd1309f8c733bb6b Mon Sep 17 00:00:00 2001
From: Tomasz Kulasek <tomaszx.kula...@intel.com>
Date: Fri, 14 Oct 2016 16:10:35 +0200
Subject: [PATCH v6 0/6] add Tx preparation

As discussed in that thread:

http://dpdk.org/ml/archives/dev/2015-September/023603.html

Different NIC models depending on HW offload requested might impose different 
requirements on packets to be TX-ed in terms of:

 - Max number of fragments per packet allowed
 - Max number of fragments per TSO segments
 - The way pseudo-header checksum should be pre-calculated
 - L3/L4 header fields filling
 - etc.

MOTIVATION:
---

1) Some work cannot (and didn't should) be done in rte_eth_tx_burst.
   However, this work is sometimes required, and now, it's an
   application issue.

2) Different hardware may have different requirements for TX offloads,
   other subset can be supported and so on.

3) Some parameters (e.g. number of segments in ixgbe driver) may hung
   device. These parameters may be vary for different devices.

   For example i40e HW allows 8 fragments per packet, but that is after
   TSO segmentation. While ixgbe has a 38-fragment pre-TSO limit.

4) Fields in packet may require different initialization (like e.g. will
   require pseudo-header checksum precalculation, sometimes in a
   different way depending on packet type, and so on). Now application
   needs to care about it.

5) Using additional API (rte_eth_tx_prep) before rte_eth_tx_burst let to
   prepare packet burst in acceptable form for specific device.

6) Some additional checks may be done in debug mode keeping tx_burst
   implementation clean.

PROPOSAL:
-

To help user to deal with all these varieties we propose to:

1) Introduce rte_eth_tx_prep() function to do necessary preparations of
   packet burst to be safely transmitted on device for desired HW
   offloads (set/reset checksum field according to the hardware
   requirements) and check HW constraints (number of segments per
   packet, etc).

   While the limitations and requirements may differ for devices, it
   requires to extend rte_eth_dev structure with new function pointer
   "tx_pkt_prep" which can be implemented in the driver to prepare and
   verify packets, in devices specific way, before burst, what should to
   prevent application to send malformed packets.

2) Also new fields will be introduced in rte_eth_desc_lim: 
   nb_seg_max and nb_mtu_seg_max, providing an information about max
   segments in TSO and non-TSO packets acceptable by device.

   This information is useful for application to not create/limit
   malicious packet.

APPLICATION (CASE OF USE):
--

1) Application should to initialize burst of packets to send, set
   required tx offload flags and required fields, like l2_len, l3_len,
   l4_len, and tso_segsz

2) Application passes burst to the rte_eth_tx_prep to check conditions
   required to send packets through the NIC.

3) The result of rte_eth_tx_prep can be used to send valid packets
   and/or restore invalid if function fails.

e.g.

for (i = 0; i < nb_pkts; i++) {

/* initialize or process packet */

bufs[i]->tso_segsz = 800;
bufs[i]->ol_flags = PKT_TX_TCP_SEG | PKT_TX_IPV4
| PKT_TX_IP_CKSUM;
bufs[i]->l2_len = sizeof(struct ether_hdr);
bufs[i]->l3_len = sizeof(struct ipv4_hdr);
bufs[i]->l4_len = sizeof(struct tcp_hdr);
}

/* Prepare burst of TX packets */
nb_prep = rte_eth_tx_prep(port, 0, bufs, nb_pkts);

if (nb_prep < nb_pkts) {
printf("tx_prep failed\n");

/* nb_prep indicates here first invalid packet. rte_eth_tx_prep
 * can be used on remaining packets to find another ones.
 */

}

/* Send burst of TX packets */
nb_tx = rte_eth_tx_burst(port, 0, bufs, nb_prep);

/* Free any unsent packets. */

v7 changes:
 - comments reworded/added
 - changed errno values returned from Tx prep API
 - added check in rte_phdr_cksum_fix if headers are in the first
   data segment and can be safetly modified
 - moved rte_validate_tx_offload to rte_mbuf
 - moved rte_phdr_cksum_fix to rte_net.h
 - removed rte_pkt.h new file as useless

v5 changes:
 - rebased csum engine modification
 - added information to the csum engine about performance tests
 - some performance improvements

v4 changes:
 - tx_prep is now set to default behavior (NULL) for simple/vector path
   in fm10k, i40e and ixgbe drivers to increase performance, when
   Tx offloads are not intentionally available

v3 changes:
 - reworked csum testpmd engine instead adding new one,
 - fixed checksum initialization procedure to include also outer
   checksum offloads,
 - some minor formattings and optimalizations

v2 changes:
 - rte_eth_tx_prep() return

[dpdk-dev] [PATCH v6 6/6] testpmd: use Tx preparation in csum engine

2016-10-14 Thread Tomasz Kulasek

Removed pseudo header calculation for udp/tcp/tso packets from
application and used Tx preparation API for packet preparation and
verification.

Adding aditional step to the csum engine costs about 3-4% of performance
drop, on my setup with ixgbe driver. It's caused mostly by the need
of reaccessing and modification of packet data.

Signed-off-by: Tomasz Kulasek 
---
 app/test-pmd/csumonly.c |   36 +---
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 57e6ae2..6f33ae9 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -112,15 +112,6 @@ struct simple_gre_hdr {
 } __attribute__((__packed__));

 static uint16_t
-get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags)
-{
-   if (ethertype == _htons(ETHER_TYPE_IPv4))
-   return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
-   else /* assume ethertype == ETHER_TYPE_IPv6 */
-   return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
-}
-
-static uint16_t
 get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype)
 {
if (ethertype == _htons(ETHER_TYPE_IPv4))
@@ -370,32 +361,24 @@ process_inner_cksums(void *l3_hdr, const struct 
testpmd_offload_info *info,
/* do not recalculate udp cksum if it was 0 */
if (udp_hdr->dgram_cksum != 0) {
udp_hdr->dgram_cksum = 0;
-   if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) {
+   if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM)
ol_flags |= PKT_TX_UDP_CKSUM;
-   udp_hdr->dgram_cksum = get_psd_sum(l3_hdr,
-   info->ethertype, ol_flags);
-   } else {
+   else
udp_hdr->dgram_cksum =
get_udptcp_checksum(l3_hdr, udp_hdr,
info->ethertype);
-   }
}
} else if (info->l4_proto == IPPROTO_TCP) {
tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len);
tcp_hdr->cksum = 0;
-   if (tso_segsz) {
+   if (tso_segsz)
ol_flags |= PKT_TX_TCP_SEG;
-   tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-   ol_flags);
-   } else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) {
+   else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM)
ol_flags |= PKT_TX_TCP_CKSUM;
-   tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-   ol_flags);
-   } else {
+   else
tcp_hdr->cksum =
get_udptcp_checksum(l3_hdr, tcp_hdr,
info->ethertype);
-   }
} else if (info->l4_proto == IPPROTO_SCTP) {
sctp_hdr = (struct sctp_hdr *)((char *)l3_hdr + info->l3_len);
sctp_hdr->cksum = 0;
@@ -648,6 +631,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */
uint16_t nb_rx;
uint16_t nb_tx;
+   uint16_t nb_prep;
uint16_t i;
uint64_t rx_ol_flags, tx_ol_flags;
uint16_t testpmd_ol_flags;
@@ -857,7 +841,13 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
printf("\n");
}
}
-   nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
+   nb_prep = rte_eth_tx_prep(fs->tx_port, fs->tx_queue, pkts_burst,
+   nb_rx);
+   if (nb_prep != nb_rx)
+   printf("Preparing packet burst to transmit failed: %s\n",
+   rte_strerror(rte_errno));
+
+   nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, 
nb_prep);
/*
 * Retry if necessary
 */
-- 
1.7.9.5

[dpdk-dev] [PATCH v6 5/6] ixgbe: add Tx preparation

2016-10-14 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/ixgbe/ixgbe_ethdev.c |3 ++
 drivers/net/ixgbe/ixgbe_ethdev.h |5 +++-
 drivers/net/ixgbe/ixgbe_rxtx.c   |   58 +-
 drivers/net/ixgbe/ixgbe_rxtx.h   |2 ++
 4 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 4ca5747..4c6a8e1 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -517,6 +517,8 @@ static const struct rte_eth_desc_lim tx_desc_lim = {
.nb_max = IXGBE_MAX_RING_DESC,
.nb_min = IXGBE_MIN_RING_DESC,
.nb_align = IXGBE_TXD_ALIGN,
+   .nb_seg_max = IXGBE_TX_MAX_SEG,
+   .nb_mtu_seg_max = IXGBE_TX_MAX_SEG,
 };

 static const struct eth_dev_ops ixgbe_eth_dev_ops = {
@@ -1103,6 +1105,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
eth_dev->dev_ops = _eth_dev_ops;
eth_dev->rx_pkt_burst = _recv_pkts;
eth_dev->tx_pkt_burst = _xmit_pkts;
+   eth_dev->tx_pkt_prep = _prep_pkts;

/*
 * For secondary processes, we don't initialise any further as primary
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index 4ff6338..e229cf5 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -396,6 +396,9 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
  struct rte_eth_rss_conf *rss_conf);

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 2ce8234..83db18f 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   Copyright 2014 6WIND S.A.
  *   All rights reserved.
  *
@@ -70,6 +70,7 @@
 #include 
 #include 
 #include 
+#include 

 #include "ixgbe_logs.h"
 #include "base/ixgbe_api.h"
@@ -87,6 +88,9 @@
PKT_TX_TCP_SEG | \
PKT_TX_OUTER_IP_CKSUM)

+#define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
+
 #if 1
 #define RTE_PMD_USE_PREFETCH
 #endif
@@ -905,6 +909,56 @@ end_of_tx:

 /*
  *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+   int i, ret;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+   struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /**
+* Check if packet meets requirements for number of segments
+*
+* NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and 
non-TSO
+*/
+
+   if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+
+   return i;
+}
+
+/*
+ *
  *  RX functions
  *
  **/
@@ -2282,6 +2336,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct 
ixgbe_tx_queue *txq)
if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
&& (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
PMD_INIT_LOG(DEBUG, "Using simple tx code path");
+   dev->t

[dpdk-dev] [PATCH v6 4/6] i40e: add Tx preparation

2016-10-14 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/i40e/i40e_ethdev.c |3 ++
 drivers/net/i40e/i40e_rxtx.c   |   72 +++-
 drivers/net/i40e/i40e_rxtx.h   |8 +
 3 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 5af0e43..dab0d48 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -936,6 +936,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = i40e_recv_pkts;
dev->tx_pkt_burst = i40e_xmit_pkts;
+   dev->tx_pkt_prep = i40e_prep_pkts;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
@@ -2629,6 +2630,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
.nb_max = I40E_MAX_RING_DESC,
.nb_min = I40E_MIN_RING_DESC,
.nb_align = I40E_ALIGN_RING_DESC,
+   .nb_seg_max = I40E_TX_MAX_SEG,
+   .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG,
};

if (pf->flags & I40E_FLAG_VMDQ) {
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 7ae7d9f..3e2c428 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 

 #include "i40e_logs.h"
 #include "base/i40e_prototype.h"
@@ -79,6 +81,17 @@
PKT_TX_TCP_SEG | \
PKT_TX_OUTER_IP_CKSUM)

+#define I40E_TX_OFFLOAD_MASK (  \
+   PKT_TX_IP_CKSUM |   \
+   PKT_TX_L4_MASK |\
+   PKT_TX_OUTER_IP_CKSUM | \
+   PKT_TX_TCP_SEG |\
+   PKT_TX_QINQ_PKT |   \
+   PKT_TX_VLAN_PKT)
+
+#define I40E_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK)
+
 static uint16_t i40e_xmit_pkts_simple(void *tx_queue,
  struct rte_mbuf **tx_pkts,
  uint16_t nb_pkts);
@@ -1411,6 +1424,61 @@ i40e_xmit_pkts_simple(void *tx_queue,
return nb_tx;
 }

+/*
+ *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /**
+* m->nb_segs is uint8_t, so m->nb_segs is always less than
+* I40E_TX_MAX_SEG.
+* We check only a condition for m->nb_segs > 
I40E_TX_MAX_MTU_SEG.
+*/
+   if (!(ol_flags & PKT_TX_TCP_SEG)) {
+   if (m->nb_segs > I40E_TX_MAX_MTU_SEG) {
+   rte_errno = -1;
+   return i;
+   }
+   } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) ||
+   (m->tso_segsz > I40E_MAX_TSO_MSS)) {
+   /* MSS outside the range (256B - 9674B) are considered 
malicious */
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+   return i;
+}
+
 /*
  * Find the VSI the queue belongs to. 'queue_idx' is the queue index
  * application used, which assume having sequential ones. But from driver's
@@ -2763,9 +2831,11 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
dev->tx_pkt_burst = i40e_xmit_pkts_simple;
}
+   dev->tx_pkt_prep = NULL;
} else {

[dpdk-dev] [PATCH v6 3/6] fm10k: add Tx preparation

2016-10-14 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/fm10k/fm10k.h|6 +
 drivers/net/fm10k/fm10k_ethdev.c |5 
 drivers/net/fm10k/fm10k_rxtx.c   |   50 +-
 3 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h
index 05aa1a2..c6fed21 100644
--- a/drivers/net/fm10k/fm10k.h
+++ b/drivers/net/fm10k/fm10k.h
@@ -69,6 +69,9 @@
 #define FM10K_MAX_RX_DESC  (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc))
 #define FM10K_MAX_TX_DESC  (FM10K_MAX_TX_RING_SZ / sizeof(struct 
fm10k_tx_desc))

+#define FM10K_TX_MAX_SEG UINT8_MAX
+#define FM10K_TX_MAX_MTU_SEG UINT8_MAX
+
 /*
  * byte aligment for HW RX data buffer
  * Datasheet requires RX buffer addresses shall either be 512-byte aligned or
@@ -356,6 +359,9 @@ fm10k_dev_rx_descriptor_done(void *rx_queue, uint16_t 
offset);
 uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq);
 int fm10k_rx_vec_condition_check(struct rte_eth_dev *);
 void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq);
diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index c804436..dffb6d1 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -1446,6 +1446,8 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev,
.nb_max = FM10K_MAX_TX_DESC,
.nb_min = FM10K_MIN_TX_DESC,
.nb_align = FM10K_MULT_TX_DESC,
+   .nb_seg_max = FM10K_TX_MAX_SEG,
+   .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG,
};

dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G |
@@ -2754,8 +2756,10 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
fm10k_txq_vec_setup(txq);
}
dev->tx_pkt_burst = fm10k_xmit_pkts_vec;
+   dev->tx_pkt_prep = NULL;
} else {
dev->tx_pkt_burst = fm10k_xmit_pkts;
+   dev->tx_pkt_prep = fm10k_prep_pkts;
PMD_INIT_LOG(DEBUG, "Use regular Tx func");
}
 }
@@ -2834,6 +2838,7 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = _recv_pkts;
dev->tx_pkt_burst = _xmit_pkts;
+   dev->tx_pkt_prep = _prep_pkts;

/* only initialize in the primary process */
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c
index 32cc7ff..7ca28c0 100644
--- a/drivers/net/fm10k/fm10k_rxtx.c
+++ b/drivers/net/fm10k/fm10k_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -35,6 +35,7 @@

 #include 
 #include 
+#include 
 #include "fm10k.h"
 #include "base/fm10k_type.h"

@@ -65,6 +66,15 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd)
 }
 #endif

+#define FM10K_TX_OFFLOAD_MASK (  \
+   PKT_TX_VLAN_PKT |\
+   PKT_TX_IP_CKSUM |\
+   PKT_TX_L4_MASK | \
+   PKT_TX_TCP_SEG)
+
+#define FM10K_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK)
+
 /* @note: When this function is changed, make corresponding change to
  * fm10k_dev_supported_ptypes_get()
  */
@@ -597,3 +607,41 @@ fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,

return count;
 }
+
+uint16_t
+fm10k_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   if ((m->ol_flags & PKT_TX_TCP_SEG) &&
+   (m->tso_segsz < FM10K_TSO_MINMSS)) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+
+   return i;
+}
-- 
1.7.9.5

[dpdk-dev] [PATCH v6 2/6] e1000: add Tx preparation

2016-10-14 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/e1000/e1000_ethdev.h |   11 
 drivers/net/e1000/em_ethdev.c|5 +++-
 drivers/net/e1000/em_rxtx.c  |   48 ++-
 drivers/net/e1000/igb_ethdev.c   |4 +++
 drivers/net/e1000/igb_rxtx.c |   52 +-
 5 files changed, 117 insertions(+), 3 deletions(-)

diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
index 6c25c8d..bd0f277 100644
--- a/drivers/net/e1000/e1000_ethdev.h
+++ b/drivers/net/e1000/e1000_ethdev.h
@@ -138,6 +138,11 @@
 #define E1000_MISC_VEC_ID   RTE_INTR_VEC_ZERO_OFFSET
 #define E1000_RX_VEC_START  RTE_INTR_VEC_RXTX_OFFSET

+#define IGB_TX_MAX_SEG UINT8_MAX
+#define IGB_TX_MAX_MTU_SEG UINT8_MAX
+#define EM_TX_MAX_SEG  UINT8_MAX
+#define EM_TX_MAX_MTU_SEG  UINT8_MAX
+
 /* structure for interrupt relative data */
 struct e1000_interrupt {
uint32_t flags;
@@ -315,6 +320,9 @@ void eth_igb_tx_init(struct rte_eth_dev *dev);
 uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t eth_igb_prep_pkts(void *txq, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

@@ -376,6 +384,9 @@ void eth_em_tx_init(struct rte_eth_dev *dev);
 uint16_t eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t eth_em_prep_pkts(void *txq, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 uint16_t eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
index 7cf5f0c..17b45cb 100644
--- a/drivers/net/e1000/em_ethdev.c
+++ b/drivers/net/e1000/em_ethdev.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -300,6 +300,7 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev)
eth_dev->dev_ops = _em_ops;
eth_dev->rx_pkt_burst = (eth_rx_burst_t)_em_recv_pkts;
eth_dev->tx_pkt_burst = (eth_tx_burst_t)_em_xmit_pkts;
+   eth_dev->tx_pkt_prep = (eth_tx_prep_t)_em_prep_pkts;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
@@ -1067,6 +1068,8 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
.nb_max = E1000_MAX_RING_DESC,
.nb_min = E1000_MIN_RING_DESC,
.nb_align = EM_TXD_ALIGN,
+   .nb_seg_max = EM_TX_MAX_SEG,
+   .nb_mtu_seg_max = EM_TX_MAX_MTU_SEG,
};

dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
index 41f51c0..3af2f69 100644
--- a/drivers/net/e1000/em_rxtx.c
+++ b/drivers/net/e1000/em_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -66,6 +66,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 

 #include "e1000_logs.h"
@@ -77,6 +78,14 @@

 #define E1000_RXDCTL_GRAN  0x0100 /* RXDCTL Granularity */

+#define E1000_TX_OFFLOAD_MASK ( \
+   PKT_TX_IP_CKSUM |   \
+   PKT_TX_L4_MASK |\
+   PKT_TX_VLAN_PKT)
+
+#define E1000_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK)
+
 /**
  * Structure associated with each descriptor of the RX ring of a RX queue.
  */
@@ -618,6 +627,43 @@ end_of_tx:

 /*
  *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+eth_em_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   if (m->ol_flags & E1000_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+

[dpdk-dev] [PATCH v6 1/6] ethdev: add Tx preparation

2016-10-14 Thread Tomasz Kulasek

Added API for `rte_eth_tx_prep`

uint16_t rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts)

Added fields to the `struct rte_eth_desc_lim`:

uint16_t nb_seg_max;
/**< Max number of segments per whole packet. */

uint16_t nb_mtu_seg_max;
/**< Max number of segments per one MTU */

Created `rte_pkt.h` header with common used functions:

int rte_validate_tx_offload(struct rte_mbuf *m)
to validate general requirements for tx offload in packet such a
flag completness. In current implementation this function is called
optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled.

int rte_phdr_cksum_fix(struct rte_mbuf *m)
to fix pseudo header checksum for TSO and non-TSO tcp/udp packets
before hardware tx checksum offload.
 - for non-TSO tcp/udp packets full pseudo-header checksum is
   counted and set.
 - for TSO the IP payload length is not included.


PERFORMANCE TESTS
-

This feature was tested with modified csum engine from test-pmd.

The packet checksum preparation was moved from application to Tx
preparation step placed before burst.

We may expect some overhead costs caused by:
1) using additional callback before burst,
2) rescanning burst,
3) additional condition checking (packet validation),
4) worse optimization (e.g. packet data access, etc.)

We tested it using ixgbe Tx preparation implementation with some parts
disabled to have comparable information about the impact of diferent
parts of implementation.

IMPACT:

1) For unimplemented Tx preparation callback the performance impact is
   negligible,
2) For packet condition check without checksum modifications (nb_segs,
   available offloads, etc.) is 14626628/14252168 (~2.62% drop),
3) Full support in ixgbe driver (point 2 + packet checksum
   initialization) is 14060924/13588094 (~3.48% drop)


Signed-off-by: Tomasz Kulasek 
---
 config/common_base|1 +
 lib/librte_ether/rte_ethdev.h |   85 +
 lib/librte_mbuf/rte_mbuf.h|9 +++
 lib/librte_net/Makefile   |3 +-
 lib/librte_net/rte_pkt.h  |  137 +
 5 files changed, 234 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_net/rte_pkt.h

diff --git a/config/common_base b/config/common_base
index c7fd3db..619284b 100644
--- a/config/common_base
+++ b/config/common_base
@@ -120,6 +120,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
 CONFIG_RTE_LIBRTE_IEEE1588=n
 CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
+CONFIG_RTE_ETHDEV_TX_PREP=y

 #
 # Support NIC bypass logic
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 38641e8..a10ed9c 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -182,6 +182,7 @@ extern "C" {
 #include 
 #include 
 #include 
+#include 
 #include "rte_ether.h"
 #include "rte_eth_ctrl.h"
 #include "rte_dev_info.h"
@@ -699,6 +700,8 @@ struct rte_eth_desc_lim {
uint16_t nb_max;   /**< Max allowed number of descriptors. */
uint16_t nb_min;   /**< Min allowed number of descriptors. */
uint16_t nb_align; /**< Number of descriptors should be aligned to. */
+   uint16_t nb_seg_max; /**< Max number of segments per whole packet. 
*/
+   uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */
 };

 /**
@@ -1188,6 +1191,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq,
   uint16_t nb_pkts);
 /**< @internal Send output packets on a transmit queue of an Ethernet device. 
*/

+typedef uint16_t (*eth_tx_prep_t)(void *txq,
+  struct rte_mbuf **tx_pkts,
+  uint16_t nb_pkts);
+/**< @internal Prepare output packets on a transmit queue of an Ethernet 
device. */
+
 typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev,
   struct rte_eth_fc_conf *fc_conf);
 /**< @internal Get current flow control parameter on an Ethernet device */
@@ -1622,6 +1630,7 @@ struct rte_eth_rxtx_callback {
 struct rte_eth_dev {
eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
+   eth_tx_prep_t tx_pkt_prep; /**< Pointer to PMD transmit prepare 
function. */
struct rte_eth_dev_data *data;  /**< Pointer to device data */
const struct eth_driver *driver;/**< Driver for this device */
const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
@@ -2816,6 +2825,82 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, 
nb_pkts);
 }

+/**
+ * Process a burst of output packets on a transmit queue o

[dpdk-dev] [PATCH v6 0/6] add Tx preparation

2016-10-14 Thread Tomasz Kulasek

As discussed in that thread:

http://dpdk.org/ml/archives/dev/2015-September/023603.html

Different NIC models depending on HW offload requested might impose different 
requirements on packets to be TX-ed in terms of:

 - Max number of fragments per packet allowed
 - Max number of fragments per TSO segments
 - The way pseudo-header checksum should be pre-calculated
 - L3/L4 header fields filling
 - etc.


MOTIVATION:
---

1) Some work cannot (and didn't should) be done in rte_eth_tx_burst.
   However, this work is sometimes required, and now, it's an
   application issue.

2) Different hardware may have different requirements for TX offloads,
   other subset can be supported and so on.

3) Some parameters (e.g. number of segments in ixgbe driver) may hung
   device. These parameters may be vary for different devices.

   For example i40e HW allows 8 fragments per packet, but that is after
   TSO segmentation. While ixgbe has a 38-fragment pre-TSO limit.

4) Fields in packet may require different initialization (like e.g. will
   require pseudo-header checksum precalculation, sometimes in a
   different way depending on packet type, and so on). Now application
   needs to care about it.

5) Using additional API (rte_eth_tx_prep) before rte_eth_tx_burst let to
   prepare packet burst in acceptable form for specific device.

6) Some additional checks may be done in debug mode keeping tx_burst
   implementation clean.


PROPOSAL:
-

To help user to deal with all these varieties we propose to:

1) Introduce rte_eth_tx_prep() function to do necessary preparations of
   packet burst to be safely transmitted on device for desired HW
   offloads (set/reset checksum field according to the hardware
   requirements) and check HW constraints (number of segments per
   packet, etc).

   While the limitations and requirements may differ for devices, it
   requires to extend rte_eth_dev structure with new function pointer
   "tx_pkt_prep" which can be implemented in the driver to prepare and
   verify packets, in devices specific way, before burst, what should to
   prevent application to send malformed packets.

2) Also new fields will be introduced in rte_eth_desc_lim: 
   nb_seg_max and nb_mtu_seg_max, providing an information about max
   segments in TSO and non-TSO packets acceptable by device.

   This information is useful for application to not create/limit
   malicious packet.


APPLICATION (CASE OF USE):
--

1) Application should to initialize burst of packets to send, set
   required tx offload flags and required fields, like l2_len, l3_len,
   l4_len, and tso_segsz

2) Application passes burst to the rte_eth_tx_prep to check conditions
   required to send packets through the NIC.

3) The result of rte_eth_tx_prep can be used to send valid packets
   and/or restore invalid if function fails.

e.g.

for (i = 0; i < nb_pkts; i++) {

/* initialize or process packet */

bufs[i]->tso_segsz = 800;
bufs[i]->ol_flags = PKT_TX_TCP_SEG | PKT_TX_IPV4
| PKT_TX_IP_CKSUM;
bufs[i]->l2_len = sizeof(struct ether_hdr);
bufs[i]->l3_len = sizeof(struct ipv4_hdr);
bufs[i]->l4_len = sizeof(struct tcp_hdr);
}

/* Prepare burst of TX packets */
nb_prep = rte_eth_tx_prep(port, 0, bufs, nb_pkts);

if (nb_prep < nb_pkts) {
printf("tx_prep failed\n");

/* nb_prep indicates here first invalid packet. rte_eth_tx_prep
 * can be used on remaining packets to find another ones.
 */

}

/* Send burst of TX packets */
nb_tx = rte_eth_tx_burst(port, 0, bufs, nb_prep);

/* Free any unsent packets. */


v5 changes:
 - rebased csum engine modification
 - added information to the csum engine about performance tests
 - some performance improvements

v4 changes:
 - tx_prep is now set to default behavior (NULL) for simple/vector path
   in fm10k, i40e and ixgbe drivers to increase performance, when
   Tx offloads are not intentionally available

v3 changes:
 - reworked csum testpmd engine instead adding new one,
 - fixed checksum initialization procedure to include also outer
   checksum offloads,
 - some minor formattings and optimalizations

v2 changes:
 - rte_eth_tx_prep() returns number of packets when device doesn't
   support tx_prep functionality,
 - introduced CONFIG_RTE_ETHDEV_TX_PREP allowing to turn off tx_prep


Tomasz Kulasek (6):
  ethdev: add Tx preparation
  e1000: add Tx preparation
  fm10k: add Tx preparation
  i40e: add Tx preparation
  ixgbe: add Tx preparation
  testpmd: use Tx preparation in csum engine

 app/test-pmd/csumonly.c  |   36 --
 config/common_base   |1 +
 drivers/net/e1000/e1000_ethdev.h |   11 +++
 drivers/net/e1000/em_ethdev.c|5 +-

[dpdk-dev] [PATCH v5 6/6] testpmd: use Tx preparation in csum engine

2016-10-13 Thread Tomasz Kulasek

Removed pseudo header calculation for udp/tcp/tso packets from
application and used Tx preparation API for packet preparation and
verification.

Adding aditional step to the csum engine costs about 3-4% of performance
drop, on my setup with ixgbe driver. It's caused mostly by the need
of reaccessing and modification of packet data.

Signed-off-by: Tomasz Kulasek 
---
 app/test-pmd/csumonly.c |   36 +---
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index f9e65b6..3354b3d 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -112,15 +112,6 @@ struct simple_gre_hdr {
 } __attribute__((__packed__));

 static uint16_t
-get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags)
-{
-   if (ethertype == _htons(ETHER_TYPE_IPv4))
-   return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
-   else /* assume ethertype == ETHER_TYPE_IPv6 */
-   return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
-}
-
-static uint16_t
 get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype)
 {
if (ethertype == _htons(ETHER_TYPE_IPv4))
@@ -372,32 +363,24 @@ process_inner_cksums(void *l3_hdr, const struct 
testpmd_offload_info *info,
/* do not recalculate udp cksum if it was 0 */
if (udp_hdr->dgram_cksum != 0) {
udp_hdr->dgram_cksum = 0;
-   if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) {
+   if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM)
ol_flags |= PKT_TX_UDP_CKSUM;
-   udp_hdr->dgram_cksum = get_psd_sum(l3_hdr,
-   info->ethertype, ol_flags);
-   } else {
+   else
udp_hdr->dgram_cksum =
get_udptcp_checksum(l3_hdr, udp_hdr,
info->ethertype);
-   }
}
} else if (info->l4_proto == IPPROTO_TCP) {
tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len);
tcp_hdr->cksum = 0;
-   if (tso_segsz) {
+   if (tso_segsz)
ol_flags |= PKT_TX_TCP_SEG;
-   tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-   ol_flags);
-   } else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) {
+   else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM)
ol_flags |= PKT_TX_TCP_CKSUM;
-   tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-   ol_flags);
-   } else {
+   else
tcp_hdr->cksum =
get_udptcp_checksum(l3_hdr, tcp_hdr,
info->ethertype);
-   }
} else if (info->l4_proto == IPPROTO_SCTP) {
sctp_hdr = (struct sctp_hdr *)((char *)l3_hdr + info->l3_len);
sctp_hdr->cksum = 0;
@@ -650,6 +633,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */
uint16_t nb_rx;
uint16_t nb_tx;
+   uint16_t nb_prep;
uint16_t i;
uint64_t rx_ol_flags, tx_ol_flags;
uint16_t testpmd_ol_flags;
@@ -855,7 +839,13 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
printf("\n");
}
}
-   nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
+   nb_prep = rte_eth_tx_prep(fs->tx_port, fs->tx_queue, pkts_burst,
+   nb_rx);
+   if (nb_prep != nb_rx)
+   printf("Preparing packet burst to transmit failed: %s\n",
+   rte_strerror(rte_errno));
+
+   nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, 
nb_prep);
/*
 * Retry if necessary
 */
-- 
1.7.9.5

[dpdk-dev] [PATCH v5 5/6] ixgbe: add Tx preparation

2016-10-13 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/ixgbe/ixgbe_ethdev.c |3 ++
 drivers/net/ixgbe/ixgbe_ethdev.h |5 +++-
 drivers/net/ixgbe/ixgbe_rxtx.c   |   58 +-
 drivers/net/ixgbe/ixgbe_rxtx.h   |2 ++
 4 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 6b3d4fa..1d740f0 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -515,6 +515,8 @@ static const struct rte_eth_desc_lim tx_desc_lim = {
.nb_max = IXGBE_MAX_RING_DESC,
.nb_min = IXGBE_MIN_RING_DESC,
.nb_align = IXGBE_TXD_ALIGN,
+   .nb_seg_max = IXGBE_TX_MAX_SEG,
+   .nb_mtu_seg_max = IXGBE_TX_MAX_SEG,
 };

 static const struct eth_dev_ops ixgbe_eth_dev_ops = {
@@ -1101,6 +1103,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
eth_dev->dev_ops = _eth_dev_ops;
eth_dev->rx_pkt_burst = _recv_pkts;
eth_dev->tx_pkt_burst = _xmit_pkts;
+   eth_dev->tx_pkt_prep = _prep_pkts;

/*
 * For secondary processes, we don't initialise any further as primary
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index 4ff6338..e229cf5 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -396,6 +396,9 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
  struct rte_eth_rss_conf *rss_conf);

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 8b99282..dd4f5ba 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   Copyright 2014 6WIND S.A.
  *   All rights reserved.
  *
@@ -70,6 +70,7 @@
 #include 
 #include 
 #include 
+#include 

 #include "ixgbe_logs.h"
 #include "base/ixgbe_api.h"
@@ -87,6 +88,9 @@
PKT_TX_TCP_SEG | \
PKT_TX_OUTER_IP_CKSUM)

+#define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
+
 #if 1
 #define RTE_PMD_USE_PREFETCH
 #endif
@@ -905,6 +909,56 @@ end_of_tx:

 /*
  *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+   int i, ret;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+   struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /**
+* Check if packet meets requirements for number of segments
+*
+* NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and 
non-TSO
+*/
+
+   if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+
+   return i;
+}
+
+/*
+ *
  *  RX functions
  *
  **/
@@ -2280,6 +2334,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct 
ixgbe_tx_queue *txq)
if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
&& (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
PMD_INIT_LOG(DEBUG, "Using simple tx code path");
+   dev->t

[dpdk-dev] [PATCH v5 4/6] i40e: add Tx preparation

2016-10-13 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/i40e/i40e_ethdev.c |3 ++
 drivers/net/i40e/i40e_rxtx.c   |   72 +++-
 drivers/net/i40e/i40e_rxtx.h   |8 +
 3 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index d0640b9..c6d61c0 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -936,6 +936,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = i40e_recv_pkts;
dev->tx_pkt_burst = i40e_xmit_pkts;
+   dev->tx_pkt_prep = i40e_prep_pkts;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
@@ -2629,6 +2630,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
.nb_max = I40E_MAX_RING_DESC,
.nb_min = I40E_MIN_RING_DESC,
.nb_align = I40E_ALIGN_RING_DESC,
+   .nb_seg_max = I40E_TX_MAX_SEG,
+   .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG,
};

if (pf->flags & I40E_FLAG_VMDQ) {
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 2cb2e30..499217d 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 

 #include "i40e_logs.h"
 #include "base/i40e_prototype.h"
@@ -79,6 +81,17 @@
PKT_TX_TCP_SEG | \
PKT_TX_OUTER_IP_CKSUM)

+#define I40E_TX_OFFLOAD_MASK (  \
+   PKT_TX_IP_CKSUM |   \
+   PKT_TX_L4_MASK |\
+   PKT_TX_OUTER_IP_CKSUM | \
+   PKT_TX_TCP_SEG |\
+   PKT_TX_QINQ_PKT |   \
+   PKT_TX_VLAN_PKT)
+
+#define I40E_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK)
+
 static uint16_t i40e_xmit_pkts_simple(void *tx_queue,
  struct rte_mbuf **tx_pkts,
  uint16_t nb_pkts);
@@ -1968,6 +1981,61 @@ i40e_xmit_pkts_simple(void *tx_queue,
return nb_tx;
 }

+/*
+ *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /**
+* m->nb_segs is uint8_t, so m->nb_segs is always less than
+* I40E_TX_MAX_SEG.
+* We check only a condition for m->nb_segs > 
I40E_TX_MAX_MTU_SEG.
+*/
+   if (!(ol_flags & PKT_TX_TCP_SEG)) {
+   if (m->nb_segs > I40E_TX_MAX_MTU_SEG) {
+   rte_errno = -1;
+   return i;
+   }
+   } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) ||
+   (m->tso_segsz > I40E_MAX_TSO_MSS)) {
+   /* MSS outside the range (256B - 9674B) are considered 
malicious */
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+   return i;
+}
+
 /*
  * Find the VSI the queue belongs to. 'queue_idx' is the queue index
  * application used, which assume having sequential ones. But from driver's
@@ -3318,9 +3386,11 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
dev->tx_pkt_burst = i40e_xmit_pkts_simple;
}
+   dev->tx_pkt_prep = NULL;
} else {

[dpdk-dev] [PATCH v5 3/6] fm10k: add Tx preparation

2016-10-13 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/fm10k/fm10k.h|6 +
 drivers/net/fm10k/fm10k_ethdev.c |5 
 drivers/net/fm10k/fm10k_rxtx.c   |   50 +-
 3 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h
index 05aa1a2..c6fed21 100644
--- a/drivers/net/fm10k/fm10k.h
+++ b/drivers/net/fm10k/fm10k.h
@@ -69,6 +69,9 @@
 #define FM10K_MAX_RX_DESC  (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc))
 #define FM10K_MAX_TX_DESC  (FM10K_MAX_TX_RING_SZ / sizeof(struct 
fm10k_tx_desc))

+#define FM10K_TX_MAX_SEG UINT8_MAX
+#define FM10K_TX_MAX_MTU_SEG UINT8_MAX
+
 /*
  * byte aligment for HW RX data buffer
  * Datasheet requires RX buffer addresses shall either be 512-byte aligned or
@@ -356,6 +359,9 @@ fm10k_dev_rx_descriptor_done(void *rx_queue, uint16_t 
offset);
 uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq);
 int fm10k_rx_vec_condition_check(struct rte_eth_dev *);
 void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq);
diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index 372564b..b9060c7 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -1446,6 +1446,8 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev,
.nb_max = FM10K_MAX_TX_DESC,
.nb_min = FM10K_MIN_TX_DESC,
.nb_align = FM10K_MULT_TX_DESC,
+   .nb_seg_max = FM10K_TX_MAX_SEG,
+   .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG,
};

dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G |
@@ -2754,8 +2756,10 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
fm10k_txq_vec_setup(txq);
}
dev->tx_pkt_burst = fm10k_xmit_pkts_vec;
+   dev->tx_pkt_prep = NULL;
} else {
dev->tx_pkt_burst = fm10k_xmit_pkts;
+   dev->tx_pkt_prep = fm10k_prep_pkts;
PMD_INIT_LOG(DEBUG, "Use regular Tx func");
}
 }
@@ -2834,6 +2838,7 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = _recv_pkts;
dev->tx_pkt_burst = _xmit_pkts;
+   dev->tx_pkt_prep = _prep_pkts;

/* only initialize in the primary process */
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c
index 5b2d04b..76b5705 100644
--- a/drivers/net/fm10k/fm10k_rxtx.c
+++ b/drivers/net/fm10k/fm10k_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -35,6 +35,7 @@

 #include 
 #include 
+#include 
 #include "fm10k.h"
 #include "base/fm10k_type.h"

@@ -65,6 +66,15 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd)
 }
 #endif

+#define FM10K_TX_OFFLOAD_MASK (  \
+   PKT_TX_VLAN_PKT |\
+   PKT_TX_IP_CKSUM |\
+   PKT_TX_L4_MASK | \
+   PKT_TX_TCP_SEG)
+
+#define FM10K_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK)
+
 /* @note: When this function is changed, make corresponding change to
  * fm10k_dev_supported_ptypes_get()
  */
@@ -583,3 +593,41 @@ fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,

return count;
 }
+
+uint16_t
+fm10k_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   if ((m->ol_flags & PKT_TX_TCP_SEG) &&
+   (m->tso_segsz < FM10K_TSO_MINMSS)) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+
+   return i;
+}
-- 
1.7.9.5

[dpdk-dev] [PATCH v5 2/6] e1000: add Tx preparation

2016-10-13 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/e1000/e1000_ethdev.h |   11 
 drivers/net/e1000/em_ethdev.c|5 +++-
 drivers/net/e1000/em_rxtx.c  |   48 ++-
 drivers/net/e1000/igb_ethdev.c   |4 +++
 drivers/net/e1000/igb_rxtx.c |   52 +-
 5 files changed, 117 insertions(+), 3 deletions(-)

diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
index 6c25c8d..bd0f277 100644
--- a/drivers/net/e1000/e1000_ethdev.h
+++ b/drivers/net/e1000/e1000_ethdev.h
@@ -138,6 +138,11 @@
 #define E1000_MISC_VEC_ID   RTE_INTR_VEC_ZERO_OFFSET
 #define E1000_RX_VEC_START  RTE_INTR_VEC_RXTX_OFFSET

+#define IGB_TX_MAX_SEG UINT8_MAX
+#define IGB_TX_MAX_MTU_SEG UINT8_MAX
+#define EM_TX_MAX_SEG  UINT8_MAX
+#define EM_TX_MAX_MTU_SEG  UINT8_MAX
+
 /* structure for interrupt relative data */
 struct e1000_interrupt {
uint32_t flags;
@@ -315,6 +320,9 @@ void eth_igb_tx_init(struct rte_eth_dev *dev);
 uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t eth_igb_prep_pkts(void *txq, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

@@ -376,6 +384,9 @@ void eth_em_tx_init(struct rte_eth_dev *dev);
 uint16_t eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t eth_em_prep_pkts(void *txq, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 uint16_t eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);

diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
index f767e1c..6d2c5fc 100644
--- a/drivers/net/e1000/em_ethdev.c
+++ b/drivers/net/e1000/em_ethdev.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -300,6 +300,7 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev)
eth_dev->dev_ops = _em_ops;
eth_dev->rx_pkt_burst = (eth_rx_burst_t)_em_recv_pkts;
eth_dev->tx_pkt_burst = (eth_tx_burst_t)_em_xmit_pkts;
+   eth_dev->tx_pkt_prep = (eth_tx_prep_t)_em_prep_pkts;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
@@ -1067,6 +1068,8 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
.nb_max = E1000_MAX_RING_DESC,
.nb_min = E1000_MIN_RING_DESC,
.nb_align = EM_TXD_ALIGN,
+   .nb_seg_max = EM_TX_MAX_SEG,
+   .nb_mtu_seg_max = EM_TX_MAX_MTU_SEG,
};

dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
index 41f51c0..3af2f69 100644
--- a/drivers/net/e1000/em_rxtx.c
+++ b/drivers/net/e1000/em_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -66,6 +66,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 

 #include "e1000_logs.h"
@@ -77,6 +78,14 @@

 #define E1000_RXDCTL_GRAN  0x0100 /* RXDCTL Granularity */

+#define E1000_TX_OFFLOAD_MASK ( \
+   PKT_TX_IP_CKSUM |   \
+   PKT_TX_L4_MASK |\
+   PKT_TX_VLAN_PKT)
+
+#define E1000_TX_OFFLOAD_NOTSUP_MASK \
+   (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK)
+
 /**
  * Structure associated with each descriptor of the RX ring of a RX queue.
  */
@@ -618,6 +627,43 @@ end_of_tx:

 /*
  *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+eth_em_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   if (m->ol_flags & E1000_TX_OFFLOAD_NOTSUP_MASK) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+

[dpdk-dev] [PATCH v5 1/6] ethdev: add Tx preparation

2016-10-13 Thread Tomasz Kulasek

Added API for `rte_eth_tx_prep`

uint16_t rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts)

Added fields to the `struct rte_eth_desc_lim`:

uint16_t nb_seg_max;
/**< Max number of segments per whole packet. */

uint16_t nb_mtu_seg_max;
/**< Max number of segments per one MTU */

Created `rte_pkt.h` header with common used functions:

int rte_validate_tx_offload(struct rte_mbuf *m)
to validate general requirements for tx offload in packet such a
flag completness. In current implementation this function is called
optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled.

int rte_phdr_cksum_fix(struct rte_mbuf *m)
to fix pseudo header checksum for TSO and non-TSO tcp/udp packets
before hardware tx checksum offload.
 - for non-TSO tcp/udp packets full pseudo-header checksum is
   counted and set.
 - for TSO the IP payload length is not included.

Signed-off-by: Tomasz Kulasek 
---
 config/common_base|1 +
 lib/librte_ether/rte_ethdev.h |   85 +
 lib/librte_mbuf/rte_mbuf.h|9 +++
 lib/librte_net/Makefile   |3 +-
 lib/librte_net/rte_pkt.h  |  139 +
 5 files changed, 236 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_net/rte_pkt.h

diff --git a/config/common_base b/config/common_base
index f5d2eff..0af6481 100644
--- a/config/common_base
+++ b/config/common_base
@@ -120,6 +120,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
 CONFIG_RTE_LIBRTE_IEEE1588=n
 CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
+CONFIG_RTE_ETHDEV_TX_PREP=y

 #
 # Support NIC bypass logic
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 0a32ebb..db5dc93 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -182,6 +182,7 @@ extern "C" {
 #include 
 #include 
 #include 
+#include 
 #include "rte_ether.h"
 #include "rte_eth_ctrl.h"
 #include "rte_dev_info.h"
@@ -699,6 +700,8 @@ struct rte_eth_desc_lim {
uint16_t nb_max;   /**< Max allowed number of descriptors. */
uint16_t nb_min;   /**< Min allowed number of descriptors. */
uint16_t nb_align; /**< Number of descriptors should be aligned to. */
+   uint16_t nb_seg_max; /**< Max number of segments per whole packet. 
*/
+   uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */
 };

 /**
@@ -1188,6 +1191,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq,
   uint16_t nb_pkts);
 /**< @internal Send output packets on a transmit queue of an Ethernet device. 
*/

+typedef uint16_t (*eth_tx_prep_t)(void *txq,
+  struct rte_mbuf **tx_pkts,
+  uint16_t nb_pkts);
+/**< @internal Prepare output packets on a transmit queue of an Ethernet 
device. */
+
 typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev,
   struct rte_eth_fc_conf *fc_conf);
 /**< @internal Get current flow control parameter on an Ethernet device */
@@ -1622,6 +1630,7 @@ struct rte_eth_rxtx_callback {
 struct rte_eth_dev {
eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
+   eth_tx_prep_t tx_pkt_prep; /**< Pointer to PMD transmit prepare 
function. */
struct rte_eth_dev_data *data;  /**< Pointer to device data */
const struct eth_driver *driver;/**< Driver for this device */
const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
@@ -2816,6 +2825,82 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, 
nb_pkts);
 }

+/**
+ * Process a burst of output packets on a transmit queue of an Ethernet device.
+ *
+ * The rte_eth_tx_prep() function is invoked to prepare output packets to be
+ * transmitted on the output queue *queue_id* of the Ethernet device designated
+ * by its *port_id*.
+ * The *nb_pkts* parameter is the number of packets to be prepared which are
+ * supplied in the *tx_pkts* array of *rte_mbuf* structures, each of them
+ * allocated from a pool created with rte_pktmbuf_pool_create().
+ * For each packet to send, the rte_eth_tx_prep() function performs
+ * the following operations:
+ *
+ * - Check if packet meets devices requirements for tx offloads.
+ *
+ * - Check limitations about number of segments.
+ *
+ * - Check additional requirements when debug is enabled.
+ *
+ * - Update and/or reset required checksums when tx offload is set for packet.
+ *
+ * The rte_eth_tx_prep() function returns the number of packets ready to be
+ * sent. A return value equal to *nb_pkts* means that

[dpdk-dev] [PATCH v5 0/6] add Tx preparation

2016-10-13 Thread Tomasz Kulasek

As discussed in that thread:

http://dpdk.org/ml/archives/dev/2015-September/023603.html

Different NIC models depending on HW offload requested might impose different 
requirements on packets to be TX-ed in terms of:

 - Max number of fragments per packet allowed
 - Max number of fragments per TSO segments
 - The way pseudo-header checksum should be pre-calculated
 - L3/L4 header fields filling
 - etc.


MOTIVATION:
---

1) Some work cannot (and didn't should) be done in rte_eth_tx_burst.
   However, this work is sometimes required, and now, it's an
   application issue.

2) Different hardware may have different requirements for TX offloads,
   other subset can be supported and so on.

3) Some parameters (e.g. number of segments in ixgbe driver) may hung
   device. These parameters may be vary for different devices.

   For example i40e HW allows 8 fragments per packet, but that is after
   TSO segmentation. While ixgbe has a 38-fragment pre-TSO limit.

4) Fields in packet may require different initialization (like e.g. will
   require pseudo-header checksum precalculation, sometimes in a
   different way depending on packet type, and so on). Now application
   needs to care about it.

5) Using additional API (rte_eth_tx_prep) before rte_eth_tx_burst let to
   prepare packet burst in acceptable form for specific device.

6) Some additional checks may be done in debug mode keeping tx_burst
   implementation clean.


PROPOSAL:
-

To help user to deal with all these varieties we propose to:

1) Introduce rte_eth_tx_prep() function to do necessary preparations of
   packet burst to be safely transmitted on device for desired HW
   offloads (set/reset checksum field according to the hardware
   requirements) and check HW constraints (number of segments per
   packet, etc).

   While the limitations and requirements may differ for devices, it
   requires to extend rte_eth_dev structure with new function pointer
   "tx_pkt_prep" which can be implemented in the driver to prepare and
   verify packets, in devices specific way, before burst, what should to
   prevent application to send malformed packets.

2) Also new fields will be introduced in rte_eth_desc_lim: 
   nb_seg_max and nb_mtu_seg_max, providing an information about max
   segments in TSO and non-TSO packets acceptable by device.

   This information is useful for application to not create/limit
   malicious packet.


APPLICATION (CASE OF USE):
--

1) Application should to initialize burst of packets to send, set
   required tx offload flags and required fields, like l2_len, l3_len,
   l4_len, and tso_segsz

2) Application passes burst to the rte_eth_tx_prep to check conditions
   required to send packets through the NIC.

3) The result of rte_eth_tx_prep can be used to send valid packets
   and/or restore invalid if function fails.

e.g.

for (i = 0; i < nb_pkts; i++) {

/* initialize or process packet */

bufs[i]->tso_segsz = 800;
bufs[i]->ol_flags = PKT_TX_TCP_SEG | PKT_TX_IPV4
| PKT_TX_IP_CKSUM;
bufs[i]->l2_len = sizeof(struct ether_hdr);
bufs[i]->l3_len = sizeof(struct ipv4_hdr);
bufs[i]->l4_len = sizeof(struct tcp_hdr);
}

/* Prepare burst of TX packets */
nb_prep = rte_eth_tx_prep(port, 0, bufs, nb_pkts);

if (nb_prep < nb_pkts) {
printf("tx_prep failed\n");

/* nb_prep indicates here first invalid packet. rte_eth_tx_prep
 * can be used on remaining packets to find another ones.
 */

}

/* Send burst of TX packets */
nb_tx = rte_eth_tx_burst(port, 0, bufs, nb_prep);

/* Free any unsent packets. */


v5 changes:
 - rebased csum engine modification
 - added information to the csum engine about performance tests
 - some performance improvements

v4 changes:
 - tx_prep is now set to default behavior (NULL) for simple/vector path
   in fm10k, i40e and ixgbe drivers to increase performance, when
   Tx offloads are not intentionally available

v3 changes:
 - reworked csum testpmd engine instead adding new one,
 - fixed checksum initialization procedure to include also outer
   checksum offloads,
 - some minor formattings and optimalizations

v2 changes:
 - rte_eth_tx_prep() returns number of packets when device doesn't
   support tx_prep functionality,
 - introduced CONFIG_RTE_ETHDEV_TX_PREP allowing to turn off tx_prep


Tomasz Kulasek (6):
  ethdev: add Tx preparation
  e1000: add Tx preparation
  fm10k: add Tx preparation
  i40e: add Tx preparation
  ixgbe: add Tx preparation
  csum: fixup

 app/test-pmd/csumonly.c  |   36 --
 config/common_base   |1 +
 drivers/net/e1000/e1000_ethdev.h |   11 +++
 drivers/net/e1000/em_ethdev.c|5 +-
 drivers/net/e1000/em_rxtx.c

[dpdk-dev] [PATCH v2 6/6] testpmd: add txprep engine

2016-09-12 Thread Tomasz Kulasek

This patch adds txprep engine to the testpmd application.

Txprep engine is intended to verify Tx preparation functionality
implemented in pmd driver.

It's based on the default "io" engine with the folowing changes:
 - Tx HW offloads are reset in incoming packet,
 - burst is passed to the Tx preparation function before tx burst,
 - added "txsplit" and "tso" functionality for outgoing packets.

Signed-off-by: Tomasz Kulasek 
---
 app/test-pmd/Makefile  |3 +-
 app/test-pmd/testpmd.c |3 +
 app/test-pmd/testpmd.h |4 +-
 app/test-pmd/txprep.c  |  412 
 4 files changed, 420 insertions(+), 2 deletions(-)
 create mode 100644 app/test-pmd/txprep.c

diff --git a/app/test-pmd/Makefile b/app/test-pmd/Makefile
index 2a0b5a5..3f9ad1c 100644
--- a/app/test-pmd/Makefile
+++ b/app/test-pmd/Makefile
@@ -1,6 +1,6 @@
 #   BSD LICENSE
 #
-#   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+#   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
 #   All rights reserved.
 #
 #   Redistribution and use in source and binary forms, with or without
@@ -49,6 +49,7 @@ SRCS-y += parameters.c
 SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline.c
 SRCS-y += config.c
 SRCS-y += iofwd.c
+SRCS-$(CONFIG_RTE_ETHDEV_TX_PREP) += txprep.c
 SRCS-y += macfwd.c
 SRCS-y += macswap.c
 SRCS-y += flowgen.c
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 1428974..9b6c475 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -152,6 +152,9 @@ struct fwd_engine * fwd_engines[] = {
_only_engine,
_only_engine,
_fwd_engine,
+#ifdef RTE_ETHDEV_TX_PREP
+   _fwd_engine,
+#endif
_echo_engine,
 #ifdef RTE_LIBRTE_IEEE1588
_fwd_engine,
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 2b281cc..f800846 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -239,7 +239,9 @@ extern struct fwd_engine icmp_echo_engine;
 #ifdef RTE_LIBRTE_IEEE1588
 extern struct fwd_engine ieee1588_fwd_engine;
 #endif
-
+#ifdef RTE_ETHDEV_TX_PREP
+extern struct fwd_engine txprep_fwd_engine;
+#endif
 extern struct fwd_engine * fwd_engines[]; /**< NULL terminated array. */

 /**
diff --git a/app/test-pmd/txprep.c b/app/test-pmd/txprep.c
new file mode 100644
index 000..688927e
--- /dev/null
+++ b/app/test-pmd/txprep.c
@@ -0,0 +1,412 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "testpmd.h"
+
+/* We cannot use rte_cpu_to_be_16() on a constant in a switch/case */
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+#define _htons(x) ((uint16_t)x) & 0x00ffU) << 8) | (((x) & 0xff00U) >> 8)))
+#else
+#define _htons(x) (x)
+#endif
+
+/*
+ * Helper function.
+ * Performs actual copying.
+ * Returns number of segments in the destination mbuf on success,
+ * or negative error code on failure.
+ */
+s

[dpdk-dev] [PATCH v2 5/6] ixgbe: add Tx preparation

2016-09-12 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/ixgbe/ixgbe_ethdev.c |3 ++
 drivers/net/ixgbe/ixgbe_ethdev.h |8 +++-
 drivers/net/ixgbe/ixgbe_rxtx.c   |   83 +-
 drivers/net/ixgbe/ixgbe_rxtx.h   |2 +
 4 files changed, 94 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index fb618ef..1509979 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -515,6 +515,8 @@ static const struct rte_eth_desc_lim tx_desc_lim = {
.nb_max = IXGBE_MAX_RING_DESC,
.nb_min = IXGBE_MIN_RING_DESC,
.nb_align = IXGBE_TXD_ALIGN,
+   .nb_seg_max = IXGBE_TX_MAX_SEG,
+   .nb_mtu_seg_max = IXGBE_TX_MAX_SEG,
 };

 static const struct eth_dev_ops ixgbe_eth_dev_ops = {
@@ -1101,6 +1103,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
eth_dev->dev_ops = _eth_dev_ops;
eth_dev->rx_pkt_burst = _recv_pkts;
eth_dev->tx_pkt_burst = _xmit_pkts;
+   eth_dev->tx_pkt_prep = _prep_pkts;

/*
 * For secondary processes, we don't initialise any further as primary
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index 4ff6338..09d96de 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -396,6 +396,12 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
+uint16_t ixgbe_prep_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
  struct rte_eth_rss_conf *rss_conf);

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 8a306b0..87defa0 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   Copyright 2014 6WIND S.A.
  *   All rights reserved.
  *
@@ -71,6 +71,7 @@
 #include 
 #include 
 #include 
+#include 

 #include "ixgbe_logs.h"
 #include "base/ixgbe_api.h"
@@ -906,6 +907,84 @@ end_of_tx:

 /*
  *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+   struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   /**
+* Check if packet meets requirements for number of segments
+*
+* NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and 
non-TSO
+*/
+
+   if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if ((m->ol_flags & PKT_TX_OFFLOAD_MASK) !=
+   (m->ol_flags & IXGBE_TX_OFFLOAD_MASK)) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+
+   return i;
+}
+
+/* ixgbe simple path as well as vector TX doesn't support tx offloads */
+uint16_t
+ixgbe_prep_pkts_simple(void *tx_queue __rte_unused, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i;
+   struct rte_mbuf *m;
+   uint64_t ol_flags;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /* simple tx path doesn't support multi-segments */
+   if (m->nb_segs != 1) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   /* For simple path (simple and vector) no tx offloads a

[dpdk-dev] [PATCH v2 4/6] i40e: add Tx preparation

2016-09-12 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/i40e/i40e_ethdev.c |3 ++
 drivers/net/i40e/i40e_rxtx.c   |   98 +++-
 drivers/net/i40e/i40e_rxtx.h   |   10 
 3 files changed, 110 insertions(+), 1 deletion(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index d0aeb70..52a2abb 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -948,6 +948,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = i40e_recv_pkts;
dev->tx_pkt_burst = i40e_xmit_pkts;
+   dev->tx_pkt_prep = i40e_prep_pkts;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
@@ -2614,6 +2615,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
.nb_max = I40E_MAX_RING_DESC,
.nb_min = I40E_MIN_RING_DESC,
.nb_align = I40E_ALIGN_RING_DESC,
+   .nb_seg_max = I40E_TX_MAX_SEG,
+   .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG,
};

if (pf->flags & I40E_FLAG_VMDQ) {
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 554d167..1cd34f7 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 

 #include "i40e_logs.h"
 #include "base/i40e_prototype.h"
@@ -79,6 +81,14 @@
PKT_TX_TCP_SEG | \
PKT_TX_OUTER_IP_CKSUM)

+#define I40E_TX_OFFLOAD_MASK (  \
+   PKT_TX_IP_CKSUM |   \
+   PKT_TX_L4_MASK |\
+   PKT_TX_OUTER_IP_CKSUM | \
+   PKT_TX_TCP_SEG |\
+   PKT_TX_QINQ_PKT |   \
+   PKT_TX_VLAN_PKT)
+
 static uint16_t i40e_xmit_pkts_simple(void *tx_queue,
  struct rte_mbuf **tx_pkts,
  uint16_t nb_pkts);
@@ -1930,6 +1940,90 @@ i40e_xmit_pkts_simple(void *tx_queue,
return nb_tx;
 }

+/*
+ *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+i40e_prep_pkts(void *tx_queue __rte_unused, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /**
+* m->nb_segs is uint8_t, so m->nb_segs is always less than
+* I40E_TX_MAX_SEG.
+* We check only a condition for m->nb_segs > 
I40E_TX_MAX_MTU_SEG.
+*/
+   if (!(ol_flags & PKT_TX_TCP_SEG)) {
+   if (m->nb_segs > I40E_TX_MAX_MTU_SEG) {
+   rte_errno = -1;
+   return i;
+   }
+   } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) ||
+   (m->tso_segsz > I40E_MAX_TSO_MSS)) {
+   /* MSS outside the range (256B - 9674B) are considered 
malicious */
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if ((ol_flags & PKT_TX_OFFLOAD_MASK) !=
+   (ol_flags & I40E_TX_OFFLOAD_MASK)) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+   return i;
+}
+
+/* i40e simple path doesn't support tx offloads */
+uint16_t
+i40e_prep_pkts_simple(void *tx_queue __rte_unused, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /* For simple path (simple and vector) no tx offloads are 
supported */
+   if (m->nb_segs &

[dpdk-dev] [PATCH v2 3/6] fm10k: add Tx preparation

2016-09-12 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/fm10k/fm10k.h|9 +
 drivers/net/fm10k/fm10k_ethdev.c |5 +++
 drivers/net/fm10k/fm10k_rxtx.c   |   77 +-
 3 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h
index 05aa1a2..83d2bfb 100644
--- a/drivers/net/fm10k/fm10k.h
+++ b/drivers/net/fm10k/fm10k.h
@@ -69,6 +69,9 @@
 #define FM10K_MAX_RX_DESC  (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc))
 #define FM10K_MAX_TX_DESC  (FM10K_MAX_TX_RING_SZ / sizeof(struct 
fm10k_tx_desc))

+#define FM10K_TX_MAX_SEG UINT8_MAX
+#define FM10K_TX_MAX_MTU_SEG UINT8_MAX
+
 /*
  * byte aligment for HW RX data buffer
  * Datasheet requires RX buffer addresses shall either be 512-byte aligned or
@@ -356,6 +359,12 @@ fm10k_dev_rx_descriptor_done(void *rx_queue, uint16_t 
offset);
 uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
+uint16_t fm10k_prep_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq);
 int fm10k_rx_vec_condition_check(struct rte_eth_dev *);
 void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq);
diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index 01f4a72..c9f450e 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -1441,6 +1441,8 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev,
.nb_max = FM10K_MAX_TX_DESC,
.nb_min = FM10K_MIN_TX_DESC,
.nb_align = FM10K_MULT_TX_DESC,
+   .nb_seg_max = FM10K_TX_MAX_SEG,
+   .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG,
};

dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G |
@@ -2749,8 +2751,10 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
fm10k_txq_vec_setup(txq);
}
dev->tx_pkt_burst = fm10k_xmit_pkts_vec;
+   dev->tx_pkt_prep = fm10k_prep_pkts_simple;
} else {
dev->tx_pkt_burst = fm10k_xmit_pkts;
+   dev->tx_pkt_prep = fm10k_prep_pkts;
PMD_INIT_LOG(DEBUG, "Use regular Tx func");
}
 }
@@ -2829,6 +2833,7 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = _recv_pkts;
dev->tx_pkt_burst = _xmit_pkts;
+   dev->tx_pkt_prep = _prep_pkts;

/* only initialize in the primary process */
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c
index 5b2d04b..a87b06f 100644
--- a/drivers/net/fm10k/fm10k_rxtx.c
+++ b/drivers/net/fm10k/fm10k_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -35,6 +35,7 @@

 #include 
 #include 
+#include 
 #include "fm10k.h"
 #include "base/fm10k_type.h"

@@ -65,6 +66,12 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd)
 }
 #endif

+#define FM10K_TX_OFFLOAD_MASK (  \
+   PKT_TX_VLAN_PKT |\
+   PKT_TX_IP_CKSUM |\
+   PKT_TX_L4_MASK | \
+   PKT_TX_TCP_SEG)
+
 /* @note: When this function is changed, make corresponding change to
  * fm10k_dev_supported_ptypes_get()
  */
@@ -583,3 +590,71 @@ fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,

return count;
 }
+
+uint16_t
+fm10k_prep_pkts(void *tx_queue __rte_unused, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   if ((m->ol_flags & PKT_TX_TCP_SEG) &&
+   (m->tso_segsz < FM10K_TSO_MINMSS)) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if ((m->ol_flags & PKT_TX_OFFLOAD_MASK) !=
+   (m->ol_flags & FM10K_TX_OFFLOAD_MASK)) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+

[dpdk-dev] [PATCH v2 1/6] ethdev: add Tx preparation

2016-09-12 Thread Tomasz Kulasek

Added API for `rte_eth_tx_prep`

uint16_t rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts)

Added fields to the `struct rte_eth_desc_lim`:

uint16_t nb_seg_max;
/**< Max number of segments per whole packet. */

uint16_t nb_mtu_seg_max;
/**< Max number of segments per one MTU */

Created `rte_pkt.h` header with common used functions:

int rte_validate_tx_offload(struct rte_mbuf *m)
to validate general requirements for tx offload in packet such a
flag completness. In current implementation this funtion is called
optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled.

int rte_phdr_cksum_fix(struct rte_mbuf *m)
to fix pseudo header checksum for TSO and non-TSO tcp/udp packets
before hardware tx checksum offload.
 - for non-TSO tcp/udp packets full pseudo-header checksum is
   counted and set.
 - for TSO the IP payload length is not included.

Signed-off-by: Tomasz Kulasek 
---
 config/common_base|1 +
 lib/librte_ether/rte_ethdev.h |   85 ++
 lib/librte_mbuf/rte_mbuf.h|8 +++
 lib/librte_net/Makefile   |2 +-
 lib/librte_net/rte_pkt.h  |  132 +
 5 files changed, 227 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_net/rte_pkt.h

diff --git a/config/common_base b/config/common_base
index 7830535..7ada9e0 100644
--- a/config/common_base
+++ b/config/common_base
@@ -120,6 +120,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
 CONFIG_RTE_LIBRTE_IEEE1588=n
 CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
+CONFIG_RTE_ETHDEV_TX_PREP=y

 #
 # Support NIC bypass logic
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index b0fe033..4fa674d 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -182,6 +182,7 @@ extern "C" {
 #include 
 #include 
 #include 
+#include 
 #include "rte_ether.h"
 #include "rte_eth_ctrl.h"
 #include "rte_dev_info.h"
@@ -696,6 +697,8 @@ struct rte_eth_desc_lim {
uint16_t nb_max;   /**< Max allowed number of descriptors. */
uint16_t nb_min;   /**< Min allowed number of descriptors. */
uint16_t nb_align; /**< Number of descriptors should be aligned to. */
+   uint16_t nb_seg_max; /**< Max number of segments per whole packet. 
*/
+   uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */
 };

 /**
@@ -1181,6 +1184,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq,
   uint16_t nb_pkts);
 /**< @internal Send output packets on a transmit queue of an Ethernet device. 
*/

+typedef uint16_t (*eth_tx_prep_t)(void *txq,
+  struct rte_mbuf **tx_pkts,
+  uint16_t nb_pkts);
+/**< @internal Prepare output packets on a transmit queue of an Ethernet 
device. */
+
 typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev,
   struct rte_eth_fc_conf *fc_conf);
 /**< @internal Get current flow control parameter on an Ethernet device */
@@ -1626,6 +1634,7 @@ enum rte_eth_dev_type {
 struct rte_eth_dev {
eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
+   eth_tx_prep_t tx_pkt_prep; /**< Pointer to PMD transmit prepare 
function. */
struct rte_eth_dev_data *data;  /**< Pointer to device data */
const struct eth_driver *driver;/**< Driver for this device */
const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
@@ -2833,6 +2842,82 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, 
nb_pkts);
 }

+/**
+ * Process a burst of output packets on a transmit queue of an Ethernet device.
+ *
+ * The rte_eth_tx_prep() function is invoked to prepare output packets to be
+ * transmitted on the output queue *queue_id* of the Ethernet device designated
+ * by its *port_id*.
+ * The *nb_pkts* parameter is the number of packets to be prepared which are
+ * supplied in the *tx_pkts* array of *rte_mbuf* structures, each of them
+ * allocated from a pool created with rte_pktmbuf_pool_create().
+ * For each packet to send, the rte_eth_tx_prep() function performs
+ * the following operations:
+ *
+ * - Check if packet meets devices requirements for tx offloads.
+ *
+ * - Check limitations about number of segments.
+ *
+ * - Check additional requirements when debug is enabled.
+ *
+ * - Update and/or reset required checksums when tx offload is set for packet.
+ *
+ * The rte_eth_tx_prep() function returns the number of packets ready to be
+ * sent. A return value equal to *nb_pkts* means that all packets are

[dpdk-dev] [PATCH v2 0/6] add Tx preparation

2016-09-12 Thread Tomasz Kulasek

As discussed in that thread:

http://dpdk.org/ml/archives/dev/2015-September/023603.html

Different NIC models depending on HW offload requested might impose
different requirements on packets to be TX-ed in terms of:

 - Max number of fragments per packet allowed
 - Max number of fragments per TSO segments
 - The way pseudo-header checksum should be pre-calculated
 - L3/L4 header fields filling
 - etc.


MOTIVATION:
---

1) Some work cannot (and didn't should) be done in rte_eth_tx_burst.
   However, this work is sometimes required, and now, it's an
   application issue.

2) Different hardware may have different requirements for TX offloads,
   other subset can be supported and so on.

3) Some parameters (e.g. number of segments in ixgbe driver) may hung
   device. These parameters may be vary for different devices.

   For example i40e HW allows 8 fragments per packet, but that is after
   TSO segmentation. While ixgbe has a 38-fragment pre-TSO limit.

4) Fields in packet may require different initialization (like e.g. will
   require pseudo-header checksum precalculation, sometimes in a
   different way depending on packet type, and so on). Now application
   needs to care about it.

5) Using additional API (rte_eth_tx_prep) before rte_eth_tx_burst let to
   prepare packet burst in acceptable form for specific device.

6) Some additional checks may be done in debug mode keeping tx_burst
   implementation clean.


PROPOSAL:
-

To help user to deal with all these varieties we propose to:

1) Introduce rte_eth_tx_prep() function to do necessary preparations of
   packet burst to be safely transmitted on device for desired HW
   offloads (set/reset checksum field according to the hardware
   requirements) and check HW constraints (number of segments per
   packet, etc).

   While the limitations and requirements may differ for devices, it
   requires to extend rte_eth_dev structure with new function pointer
   "tx_pkt_prep" which can be implemented in the driver to prepare and
   verify packets, in devices specific way, before burst, what should to
   prevent application to send malformed packets.

2) Also new fields will be introduced in rte_eth_desc_lim: 
   nb_seg_max and nb_mtu_seg_max, providing an information about max
   segments in TSO and non-TSO packets acceptable by device.

   This information is useful for application to not create/limit
   malicious packet.


APPLICATION (CASE OF USE):
--

1) Application should to initialize burst of packets to send, set
   required tx offload flags and required fields, like l2_len, l3_len,
   l4_len, and tso_segsz

2) Application passes burst to the rte_eth_tx_prep to check conditions
   required to send packets through the NIC.

3) The result of rte_eth_tx_prep can be used to send valid packets
   and/or restore invalid if function fails.

e.g.

for (i = 0; i < nb_pkts; i++) {

/* initialize or process packet */

bufs[i]->tso_segsz = 800;
bufs[i]->ol_flags = PKT_TX_TCP_SEG | PKT_TX_IPV4
| PKT_TX_IP_CKSUM;
bufs[i]->l2_len = sizeof(struct ether_hdr);
bufs[i]->l3_len = sizeof(struct ipv4_hdr);
bufs[i]->l4_len = sizeof(struct tcp_hdr);
}

/* Prepare burst of TX packets */
nb_prep = rte_eth_tx_prep(port, 0, bufs, nb_pkts);

if (nb_prep < nb_pkts) {
printf("tx_prep failed\n");

/* nb_prep indicates here first invalid packet. rte_eth_tx_prep
 * can be used on remaining packets to find another ones.
 */

}

/* Send burst of TX packets */
nb_tx = rte_eth_tx_burst(port, 0, bufs, nb_prep);

/* Free any unsent packets. */


v2 changes:
 - rte_eth_tx_prep() returns number of packets when device doesn't
   support tx_prep functionality,
 - introduced CONFIG_RTE_ETHDEV_TX_PREP allowing to turn off tx_prep

Tomasz Kulasek (6):
  ethdev: add Tx preparation
  e1000: add Tx preparation
  fm10k: add Tx preparation
  i40e: add Tx preparation
  ixgbe: add Tx preparation
  testpmd: add txprep engine

 app/test-pmd/Makefile|3 +-
 app/test-pmd/testpmd.c   |3 +
 app/test-pmd/testpmd.h   |4 +-
 app/test-pmd/txprep.c|  412 ++
 config/common_base   |1 +
 drivers/net/e1000/e1000_ethdev.h |   11 +
 drivers/net/e1000/em_ethdev.c|5 +-
 drivers/net/e1000/em_rxtx.c  |   46 -
 drivers/net/e1000/igb_ethdev.c   |4 +
 drivers/net/e1000/igb_rxtx.c |   50 -
 drivers/net/fm10k/fm10k.h|9 +
 drivers/net/fm10k/fm10k_ethdev.c |5 +
 drivers/net/fm10k/fm10k_rxtx.c   |   77 ++-
 drivers/net/i40e/i40e_ethdev.c   |3 +
 drivers/net/i40e/i40e_rxtx.c |   98 -
 drivers/net/i40e/i40e_rxtx.h |   10 +

[dpdk-dev] [PATCH 6/6] testpmd: add txprep engine

2016-08-26 Thread Tomasz Kulasek

This patch adds txprep engine to the testpmd application.

Txprep engine is intended to verify Tx preparation functionality
implemented in pmd driver.

It's based on the default "io" engine with the folowing changes:
 - Tx HW offloads are reset in incoming packet,
 - burst is passed to the Tx preparation function before tx burst,
 - added "txsplit" and "tso" functionality for outgoing packets.

Signed-off-by: Tomasz Kulasek 
---
 app/test-pmd/Makefile  |3 +-
 app/test-pmd/testpmd.c |1 +
 app/test-pmd/testpmd.h |1 +
 app/test-pmd/txprep.c  |  412 
 4 files changed, 416 insertions(+), 1 deletion(-)
 create mode 100644 app/test-pmd/txprep.c

diff --git a/app/test-pmd/Makefile b/app/test-pmd/Makefile
index 2a0b5a5..7540244 100644
--- a/app/test-pmd/Makefile
+++ b/app/test-pmd/Makefile
@@ -1,6 +1,6 @@
 #   BSD LICENSE
 #
-#   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+#   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
 #   All rights reserved.
 #
 #   Redistribution and use in source and binary forms, with or without
@@ -49,6 +49,7 @@ SRCS-y += parameters.c
 SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline.c
 SRCS-y += config.c
 SRCS-y += iofwd.c
+SRCS-y += txprep.c
 SRCS-y += macfwd.c
 SRCS-y += macswap.c
 SRCS-y += flowgen.c
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 1428974..5858a33 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -152,6 +152,7 @@ struct fwd_engine * fwd_engines[] = {
_only_engine,
_only_engine,
_fwd_engine,
+   _fwd_engine,
_echo_engine,
 #ifdef RTE_LIBRTE_IEEE1588
_fwd_engine,
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 2b281cc..0af9557 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -239,6 +239,7 @@ extern struct fwd_engine icmp_echo_engine;
 #ifdef RTE_LIBRTE_IEEE1588
 extern struct fwd_engine ieee1588_fwd_engine;
 #endif
+extern struct fwd_engine txprep_fwd_engine;

 extern struct fwd_engine * fwd_engines[]; /**< NULL terminated array. */

diff --git a/app/test-pmd/txprep.c b/app/test-pmd/txprep.c
new file mode 100644
index 000..688927e
--- /dev/null
+++ b/app/test-pmd/txprep.c
@@ -0,0 +1,412 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "testpmd.h"
+
+/* We cannot use rte_cpu_to_be_16() on a constant in a switch/case */
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+#define _htons(x) ((uint16_t)x) & 0x00ffU) << 8) | (((x) & 0xff00U) >> 8)))
+#else
+#define _htons(x) (x)
+#endif
+
+/*
+ * Helper function.
+ * Performs actual copying.
+ * Returns number of segments in the destination mbuf on success,
+ * or negative error code on failure.
+ */
+static int
+mbuf_copy_split(const struct rte_mbuf *ms, struct rte_mbuf *md[],
+   uint16_t seglen[],

[dpdk-dev] [PATCH 5/6] ixgbe: add Tx preparation

2016-08-26 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/ixgbe/ixgbe_ethdev.c |3 ++
 drivers/net/ixgbe/ixgbe_ethdev.h |8 +++-
 drivers/net/ixgbe/ixgbe_rxtx.c   |   83 +-
 drivers/net/ixgbe/ixgbe_rxtx.h   |2 +
 4 files changed, 94 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index fb618ef..1509979 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -515,6 +515,8 @@ static const struct rte_eth_desc_lim tx_desc_lim = {
.nb_max = IXGBE_MAX_RING_DESC,
.nb_min = IXGBE_MIN_RING_DESC,
.nb_align = IXGBE_TXD_ALIGN,
+   .nb_seg_max = IXGBE_TX_MAX_SEG,
+   .nb_mtu_seg_max = IXGBE_TX_MAX_SEG,
 };

 static const struct eth_dev_ops ixgbe_eth_dev_ops = {
@@ -1101,6 +1103,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
eth_dev->dev_ops = _eth_dev_ops;
eth_dev->rx_pkt_burst = _recv_pkts;
eth_dev->tx_pkt_burst = _xmit_pkts;
+   eth_dev->tx_pkt_prep = _prep_pkts;

/*
 * For secondary processes, we don't initialise any further as primary
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index 4ff6338..09d96de 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -396,6 +396,12 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
+uint16_t ixgbe_prep_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
  struct rte_eth_rss_conf *rss_conf);

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 8a306b0..87defa0 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   Copyright 2014 6WIND S.A.
  *   All rights reserved.
  *
@@ -71,6 +71,7 @@
 #include 
 #include 
 #include 
+#include 

 #include "ixgbe_logs.h"
 #include "base/ixgbe_api.h"
@@ -906,6 +907,84 @@ end_of_tx:

 /*
  *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+   struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   /**
+* Check if packet meets requirements for number of segments
+*
+* NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and 
non-TSO
+*/
+
+   if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if ((m->ol_flags & PKT_TX_OFFLOAD_MASK) !=
+   (m->ol_flags & IXGBE_TX_OFFLOAD_MASK)) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+
+   return i;
+}
+
+/* ixgbe simple path as well as vector TX doesn't support tx offloads */
+uint16_t
+ixgbe_prep_pkts_simple(void *tx_queue __rte_unused, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i;
+   struct rte_mbuf *m;
+   uint64_t ol_flags;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /* simple tx path doesn't support multi-segments */
+   if (m->nb_segs != 1) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   /* For simple path (simple and vector) no tx offloads a

[dpdk-dev] [PATCH 4/6] i40e: add Tx preparation

2016-08-26 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/i40e/i40e_ethdev.c |3 ++
 drivers/net/i40e/i40e_rxtx.c   |   98 +++-
 drivers/net/i40e/i40e_rxtx.h   |   10 
 3 files changed, 110 insertions(+), 1 deletion(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index d0aeb70..52a2abb 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -948,6 +948,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = i40e_recv_pkts;
dev->tx_pkt_burst = i40e_xmit_pkts;
+   dev->tx_pkt_prep = i40e_prep_pkts;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
@@ -2614,6 +2615,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
.nb_max = I40E_MAX_RING_DESC,
.nb_min = I40E_MIN_RING_DESC,
.nb_align = I40E_ALIGN_RING_DESC,
+   .nb_seg_max = I40E_TX_MAX_SEG,
+   .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG,
};

if (pf->flags & I40E_FLAG_VMDQ) {
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 554d167..1cd34f7 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 

 #include "i40e_logs.h"
 #include "base/i40e_prototype.h"
@@ -79,6 +81,14 @@
PKT_TX_TCP_SEG | \
PKT_TX_OUTER_IP_CKSUM)

+#define I40E_TX_OFFLOAD_MASK (  \
+   PKT_TX_IP_CKSUM |   \
+   PKT_TX_L4_MASK |\
+   PKT_TX_OUTER_IP_CKSUM | \
+   PKT_TX_TCP_SEG |\
+   PKT_TX_QINQ_PKT |   \
+   PKT_TX_VLAN_PKT)
+
 static uint16_t i40e_xmit_pkts_simple(void *tx_queue,
  struct rte_mbuf **tx_pkts,
  uint16_t nb_pkts);
@@ -1930,6 +1940,90 @@ i40e_xmit_pkts_simple(void *tx_queue,
return nb_tx;
 }

+/*
+ *
+ *  TX prep functions
+ *
+ **/
+uint16_t
+i40e_prep_pkts(void *tx_queue __rte_unused, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /**
+* m->nb_segs is uint8_t, so m->nb_segs is always less than
+* I40E_TX_MAX_SEG.
+* We check only a condition for m->nb_segs > 
I40E_TX_MAX_MTU_SEG.
+*/
+   if (!(ol_flags & PKT_TX_TCP_SEG)) {
+   if (m->nb_segs > I40E_TX_MAX_MTU_SEG) {
+   rte_errno = -1;
+   return i;
+   }
+   } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) ||
+   (m->tso_segsz > I40E_MAX_TSO_MSS)) {
+   /* MSS outside the range (256B - 9674B) are considered 
malicious */
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+   if ((ol_flags & PKT_TX_OFFLOAD_MASK) !=
+   (ol_flags & I40E_TX_OFFLOAD_MASK)) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+   ret = rte_validate_tx_offload(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+#endif
+   ret = rte_phdr_cksum_fix(m);
+   if (ret != 0) {
+   rte_errno = ret;
+   return i;
+   }
+   }
+   return i;
+}
+
+/* i40e simple path doesn't support tx offloads */
+uint16_t
+i40e_prep_pkts_simple(void *tx_queue __rte_unused, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i;
+   uint64_t ol_flags;
+   struct rte_mbuf *m;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+   ol_flags = m->ol_flags;
+
+   /* For simple path (simple and vector) no tx offloads are 
supported */
+   if (m->nb_segs &

[dpdk-dev] [PATCH 3/6] fm10k: add Tx preparation

2016-08-26 Thread Tomasz Kulasek

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/fm10k/fm10k.h|9 
 drivers/net/fm10k/fm10k_ethdev.c |5 +++
 drivers/net/fm10k/fm10k_rxtx.c   |   87 +-
 3 files changed, 100 insertions(+), 1 deletion(-)

diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h
index 05aa1a2..83d2bfb 100644
--- a/drivers/net/fm10k/fm10k.h
+++ b/drivers/net/fm10k/fm10k.h
@@ -69,6 +69,9 @@
 #define FM10K_MAX_RX_DESC  (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc))
 #define FM10K_MAX_TX_DESC  (FM10K_MAX_TX_RING_SZ / sizeof(struct 
fm10k_tx_desc))

+#define FM10K_TX_MAX_SEG UINT8_MAX
+#define FM10K_TX_MAX_MTU_SEG UINT8_MAX
+
 /*
  * byte aligment for HW RX data buffer
  * Datasheet requires RX buffer addresses shall either be 512-byte aligned or
@@ -356,6 +359,12 @@ fm10k_dev_rx_descriptor_done(void *rx_queue, uint16_t 
offset);
 uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
+uint16_t fm10k_prep_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
+
 int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq);
 int fm10k_rx_vec_condition_check(struct rte_eth_dev *);
 void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq);
diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index 01f4a72..c9f450e 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -1441,6 +1441,8 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev,
.nb_max = FM10K_MAX_TX_DESC,
.nb_min = FM10K_MIN_TX_DESC,
.nb_align = FM10K_MULT_TX_DESC,
+   .nb_seg_max = FM10K_TX_MAX_SEG,
+   .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG,
};

dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G |
@@ -2749,8 +2751,10 @@ fm10k_set_tx_function(struct rte_eth_dev *dev)
fm10k_txq_vec_setup(txq);
}
dev->tx_pkt_burst = fm10k_xmit_pkts_vec;
+   dev->tx_pkt_prep = fm10k_prep_pkts_simple;
} else {
dev->tx_pkt_burst = fm10k_xmit_pkts;
+   dev->tx_pkt_prep = fm10k_prep_pkts;
PMD_INIT_LOG(DEBUG, "Use regular Tx func");
}
 }
@@ -2829,6 +2833,7 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
dev->dev_ops = _eth_dev_ops;
dev->rx_pkt_burst = _recv_pkts;
dev->tx_pkt_burst = _xmit_pkts;
+   dev->tx_pkt_prep = _prep_pkts;

/* only initialize in the primary process */
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c
index 5b2d04b..72b1931 100644
--- a/drivers/net/fm10k/fm10k_rxtx.c
+++ b/drivers/net/fm10k/fm10k_rxtx.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -35,6 +35,7 @@

 #include 
 #include 
+#include 
 #include "fm10k.h"
 #include "base/fm10k_type.h"

@@ -65,6 +66,12 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd)
 }
 #endif

+#define FM10K_TX_OFFLOAD_MASK (  \
+   PKT_TX_VLAN_PKT |\
+   PKT_TX_IP_CKSUM |\
+   PKT_TX_L4_MASK | \
+   PKT_TX_TCP_SEG)
+
 /* @note: When this function is changed, make corresponding change to
  * fm10k_dev_supported_ptypes_get()
  */
@@ -583,3 +590,81 @@ fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,

return count;
 }
+
+uint16_t
+fm10k_prep_pkts(void *tx_queue __rte_unused, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts)
+{
+   int i, ret;
+   struct rte_mbuf *m;
+   struct fm10k_tx_queue *q = tx_queue;
+
+   for (i = 0; i < nb_pkts; i++) {
+   m = tx_pkts[i];
+
+   if (m->ol_flags & PKT_TX_TCP_SEG) {
+   uint8_t hdrlen = m->outer_l2_len + m->outer_l3_len + 
m->l2_len +
+   m->l3_len + m->l4_len;
+
+   if (q->hw_ring[q->next_free].flags & 
FM10K_TXD_FLAG_FTAG)
+   hdrlen += sizeof(struct fm10k_ftag);
+
+   if ((hdrlen < FM10K_TSO_MIN_HEADERLEN) ||
+   (hdrlen > FM10K_TSO_MAX_HEADERLEN) ||
+   (m->tso_segsz < FM10K_TSO_MINMSS)) {
+   rte_errno = -EINVAL;
+   return i;
+   }
+   }
+
+   if ((m->ol_flags & PKT_TX_OFFLOAD_MASK) !=
+

[dpdk-dev] [PATCH 1/6] ethdev: add Tx preparation

2016-08-26 Thread Tomasz Kulasek

Added API for `rte_eth_tx_prep`

uint16_t rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts)

Added fields to the `struct rte_eth_desc_lim`:

uint16_t nb_seg_max;
/**< Max number of segments per whole packet. */

uint16_t nb_mtu_seg_max;
/**< Max number of segments per one MTU */

Created `rte_pkt.h` header with common used functions:

int rte_validate_tx_offload(struct rte_mbuf *m)
to validate general requirements for tx offload in packet such a
flag completness. In current implementation this function is called
optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled.

int rte_phdr_cksum_fix(struct rte_mbuf *m)
to fix pseudo header checksum for TSO and non-TSO tcp/udp packets
before hardware tx checksum offload.
 - for non-TSO tcp/udp packets full pseudo-header checksum is
   counted and set.
 - for TSO the IP payload length is not included.

Signed-off-by: Tomasz Kulasek 
---
 lib/librte_ether/rte_ethdev.h |   74 +++
 lib/librte_mbuf/rte_mbuf.h|8 +++
 lib/librte_net/Makefile   |2 +-
 lib/librte_net/rte_pkt.h  |  132 +
 4 files changed, 215 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_net/rte_pkt.h

diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index b0fe033..02569ca 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -182,6 +182,7 @@ extern "C" {
 #include 
 #include 
 #include 
+#include 
 #include "rte_ether.h"
 #include "rte_eth_ctrl.h"
 #include "rte_dev_info.h"
@@ -696,6 +697,8 @@ struct rte_eth_desc_lim {
uint16_t nb_max;   /**< Max allowed number of descriptors. */
uint16_t nb_min;   /**< Min allowed number of descriptors. */
uint16_t nb_align; /**< Number of descriptors should be aligned to. */
+   uint16_t nb_seg_max; /**< Max number of segments per whole packet. 
*/
+   uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */
 };

 /**
@@ -1181,6 +1184,12 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq,
   uint16_t nb_pkts);
 /**< @internal Send output packets on a transmit queue of an Ethernet device. 
*/

+typedef uint16_t (*eth_tx_prep_t)(void *txq,
+  struct rte_mbuf **tx_pkts,
+  uint16_t nb_pkts);
+/**< @internal Prepare output packets on a transmit queue of an Ethernet
+   device. */
+
 typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev,
   struct rte_eth_fc_conf *fc_conf);
 /**< @internal Get current flow control parameter on an Ethernet device */
@@ -1626,6 +1635,7 @@ enum rte_eth_dev_type {
 struct rte_eth_dev {
eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
+   eth_tx_prep_t tx_pkt_prep; /**< Pointer to PMD transmit prepare 
function. */
struct rte_eth_dev_data *data;  /**< Pointer to device data */
const struct eth_driver *driver;/**< Driver for this device */
const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
@@ -2833,6 +2843,70 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, 
nb_pkts);
 }

+/**
+ * Process a burst of output packets on a transmit queue of an Ethernet device.
+ *
+ * The rte_eth_tx_prep() function is invoked to prepare output packets to be
+ * transmitted on the output queue *queue_id* of the Ethernet device designated
+ * by its *port_id*.
+ * The *nb_pkts* parameter is the number of packets to be prepared which are
+ * supplied in the *tx_pkts* array of *rte_mbuf* structures, each of them
+ * allocated from a pool created with rte_pktmbuf_pool_create().
+ * For each packet to send, the rte_eth_tx_prep() function performs
+ * the following operations:
+ *
+ * - Check if packet meets devices requirements for tx offloads.
+ *
+ * - Check limitations about number of segments.
+ *
+ * - Check additional requirements when debug is enabled.
+ *
+ * - Update and/or reset required checksums when tx offload is set for packet.
+ *
+ * The rte_eth_tx_prep() function returns the number of packets ready to be
+ * sent. A return value equal to *nb_pkts* means that all packets are valid and
+ * ready to be sent.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the transmit queue through which output packets must be
+ *   sent.
+ *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param tx_pkts
+ *   The address of an array of *nb_p

[dpdk-dev] [PATCH 0/6] add Tx preparation

2016-08-26 Thread Tomasz Kulasek

As discussed in that thread:

http://dpdk.org/ml/archives/dev/2015-September/023603.html

Different NIC models depending on HW offload requested might impose
different requirements on packets to be TX-ed in terms of:

 - Max number of fragments per packet allowed
 - Max number of fragments per TSO segments
 - The way pseudo-header checksum should be pre-calculated
 - L3/L4 header fields filling
 - etc.


MOTIVATION:
---

1) Some work cannot (and didn't should) be done in rte_eth_tx_burst.
   However, this work is sometimes required, and now, it's an
   application issue.

2) Different hardware may have different requirements for TX offloads,
   other subset can be supported and so on.

3) Some parameters (e.g. number of segments in ixgbe driver) may hung
   device. These parameters may be vary for different devices.

   For example i40e HW allows 8 fragments per packet, but that is after
   TSO segmentation. While ixgbe has a 38-fragment pre-TSO limit.

4) Fields in packet may require different initialization (like e.g. will
   require pseudo-header checksum precalculation, sometimes in a
   different way depending on packet type, and so on). Now application
   needs to care about it.

5) Using additional API (rte_eth_tx_prep) before rte_eth_tx_burst let to
   prepare packet burst in acceptable form for specific device.

6) Some additional checks may be done in debug mode keeping tx_burst
   implementation clean.


PROPOSAL:
-

To help user to deal with all these varieties we propose to:

1) Introduce rte_eth_tx_prep() function to do necessary preparations of
   packet burst to be safely transmitted on device for desired HW
   offloads (set/reset checksum field according to the hardware
   requirements) and check HW constraints (number of segments per
   packet, etc).

   While the limitations and requirements may differ for devices, it
   requires to extend rte_eth_dev structure with new function pointer
   "tx_pkt_prep" which can be implemented in the driver to prepare and
   verify packets, in devices specific way, before burst, what should to
   prevent application to send malformed packets.

2) Also new fields will be introduced in rte_eth_desc_lim: 
   nb_seg_max and nb_mtu_seg_max, providing an information about max
   segments in TSO and non-TSO packets acceptable by device.

   This information is useful for application to not create/limit
   malicious packet.


APPLICATION (CASE OF USE):
--

1) Application should to initialize burst of packets to send, set
   required tx offload flags and required fields, like l2_len, l3_len,
   l4_len, and tso_segsz

2) Application passes burst to the rte_eth_tx_prep to check conditions
   required to send packets through the NIC.

3) The result of rte_eth_tx_prep can be used to send valid packets
   and/or restore invalid if function fails.

e.g.

for (i = 0; i < nb_pkts; i++) {

/* initialize or process packet */

bufs[i]->tso_segsz = 800;
bufs[i]->ol_flags = PKT_TX_TCP_SEG | PKT_TX_IPV4
| PKT_TX_IP_CKSUM;
bufs[i]->l2_len = sizeof(struct ether_hdr);
bufs[i]->l3_len = sizeof(struct ipv4_hdr);
bufs[i]->l4_len = sizeof(struct tcp_hdr);
}

/* Prepare burst of TX packets */
nb_prep = rte_eth_tx_prep(port, 0, bufs, nb_pkts);

if (nb_prep < nb_pkts) {
printf("tx_prep failed\n");

/* nb_prep indicates here first invalid packet. rte_eth_tx_prep
 * can be used on remaining packets to find another ones.
 */

}

/* Send burst of TX packets */
nb_tx = rte_eth_tx_burst(port, 0, bufs, nb_prep);

    /* Free any unsent packets. */


Tomasz Kulasek (6):
  ethdev: add Tx preparation
  e1000: add Tx preparation
  fm10k: add Tx preparation
  i40e: add Tx preparation
  ixgbe: add Tx preparation
  testpmd: add txprep engine

 app/test-pmd/Makefile|3 +-
 app/test-pmd/testpmd.c   |1 +
 app/test-pmd/testpmd.h   |1 +
 app/test-pmd/txprep.c|  412 ++
 drivers/net/e1000/e1000_ethdev.h |   11 +
 drivers/net/e1000/em_ethdev.c|5 +-
 drivers/net/e1000/em_rxtx.c  |   46 -
 drivers/net/e1000/igb_ethdev.c   |4 +
 drivers/net/e1000/igb_rxtx.c |   50 -
 drivers/net/fm10k/fm10k.h|9 +
 drivers/net/fm10k/fm10k_ethdev.c |5 +
 drivers/net/fm10k/fm10k_rxtx.c   |   87 +++-
 drivers/net/i40e/i40e_ethdev.c   |3 +
 drivers/net/i40e/i40e_rxtx.c |   98 -
 drivers/net/i40e/i40e_rxtx.h |   10 +
 drivers/net/ixgbe/ixgbe_ethdev.c |3 +
 drivers/net/ixgbe/ixgbe_ethdev.h |8 +-
 drivers/net/ixgbe/ixgbe_rxtx.c   |   83 +++-
 drivers/net/ixgbe/ixgbe_rxtx.h   |2 +
 lib/librte_ether/rte_ethdev.h|   74 +++

[dpdk-dev] [PATCH 0/6] add Tx preparation

2016-08-26 Thread Tomasz Kulasek

As discussed in that thread:

http://dpdk.org/ml/archives/dev/2015-September/023603.html

Different NIC models depending on HW offload requested might impose
different requirements on packets to be TX-ed in terms of:

 - Max number of fragments per packet allowed
 - Max number of fragments per TSO segments
 - The way pseudo-header checksum should be pre-calculated
 - L3/L4 header fields filling
 - etc.


MOTIVATION:
---

1) Some work cannot (and didn't should) be done in rte_eth_tx_burst.
   However, this work is sometimes required, and now, it's an
   application issue.

2) Different hardware may have different requirements for TX offloads,
   other subset can be supported and so on.

3) Some parameters (e.g. number of segments in ixgbe driver) may hung
   device. These parameters may be vary for different devices.

   For example i40e HW allows 8 fragments per packet, but that is after
   TSO segmentation. While ixgbe has a 38-fragment pre-TSO limit.

4) Fields in packet may require different initialization (like e.g. will
   require pseudo-header checksum precalculation, sometimes in a
   different way depending on packet type, and so on). Now application
   needs to care about it.

5) Using additional API (rte_eth_tx_prep) before rte_eth_tx_burst let to
   prepare packet burst in acceptable form for specific device.

6) Some additional checks may be done in debug mode keeping tx_burst
   implementation clean.


PROPOSAL:
-

To help user to deal with all these varieties we propose to:

1) Introduce rte_eth_tx_prep() function to do necessary preparations of
   packet burst to be safely transmitted on device for desired HW
   offloads (set/reset checksum field according to the hardware
   requirements) and check HW constraints (number of segments per
   packet, etc).

   While the limitations and requirements may differ for devices, it
   requires to extend rte_eth_dev structure with new function pointer
   "tx_pkt_prep" which can be implemented in the driver to prepare and
   verify packets, in devices specific way, before burst, what should to
   prevent application to send malformed packets.

2) Also new fields will be introduced in rte_eth_desc_lim: 
   nb_seg_max and nb_mtu_seg_max, providing an information about max
   segments in TSO and non-TSO packets acceptable by device.

   This information is useful for application to not create/limit
   malicious packet.


APPLICATION (CASE OF USE):
--

1) Application should to initialize burst of packets to send, set
   required tx offload flags and required fields, like l2_len, l3_len,
   l4_len, and tso_segsz

2) Application passes burst to the rte_eth_tx_prep to check conditions
   required to send packets through the NIC.

3) The result of rte_eth_tx_prep can be used to send valid packets
   and/or restore invalid if function fails.

e.g.

for (i = 0; i < nb_pkts; i++) {

/* initialize or process packet */

bufs[i]->tso_segsz = 800;
bufs[i]->ol_flags = PKT_TX_TCP_SEG | PKT_TX_IPV4
| PKT_TX_IP_CKSUM;
bufs[i]->l2_len = sizeof(struct ether_hdr);
bufs[i]->l3_len = sizeof(struct ipv4_hdr);
bufs[i]->l4_len = sizeof(struct tcp_hdr);
}

/* Prepare burst of TX packets */
nb_prep = rte_eth_tx_prep(port, 0, bufs, nb_pkts);

if (nb_prep < nb_pkts) {
printf("tx_prep failed\n");

/* nb_prep indicates here first invalid packet. rte_eth_tx_prep
 * can be used on remaining packets to find another ones.
 */

}

/* Send burst of TX packets */
nb_tx = rte_eth_tx_burst(port, 0, bufs, nb_prep);

    /* Free any unsent packets. */


Tomasz Kulasek (6):
  ethdev: add Tx preparation
  e1000: add Tx preparation
  fm10k: add Tx preparation
  i40e: add Tx preparation
  ixgbe: add Tx preparation
  testpmd: add txprep engine

 app/test-pmd/Makefile|3 +-
 app/test-pmd/testpmd.c   |1 +
 app/test-pmd/testpmd.h   |1 +
 app/test-pmd/txprep.c|  412 ++
 drivers/net/e1000/e1000_ethdev.h |   11 +
 drivers/net/e1000/em_ethdev.c|5 +-
 drivers/net/e1000/em_rxtx.c  |   46 -
 drivers/net/e1000/igb_ethdev.c   |4 +
 drivers/net/e1000/igb_rxtx.c |   50 -
 drivers/net/fm10k/fm10k.h|9 +
 drivers/net/fm10k/fm10k_ethdev.c |5 +
 drivers/net/fm10k/fm10k_rxtx.c   |   87 +++-
 drivers/net/i40e/i40e_ethdev.c   |3 +
 drivers/net/i40e/i40e_rxtx.c |   98 -
 drivers/net/i40e/i40e_rxtx.h |   10 +
 drivers/net/ixgbe/ixgbe_ethdev.c |3 +
 drivers/net/ixgbe/ixgbe_ethdev.h |8 +-
 drivers/net/ixgbe/ixgbe_rxtx.c   |   83 +++-
 drivers/net/ixgbe/ixgbe_rxtx.h   |2 +
 lib/librte_ether/rte_ethdev.h|   74 +++

[dpdk-dev] [PATCH v2] doc: announce ABI change for rte_eth_dev structure

2016-07-21 Thread Tomasz Kulasek

This is an ABI deprecation notice for DPDK 16.11 in librte_ether about
changes in rte_eth_dev and rte_eth_desc_lim structures.

As discussed in that thread:

http://dpdk.org/ml/archives/dev/2015-September/023603.html

Different NIC models depending on HW offload requested might impose
different requirements on packets to be TX-ed in terms of:

 - Max number of fragments per packet allowed
 - Max number of fragments per TSO segments
 - The way pseudo-header checksum should be pre-calculated
 - L3/L4 header fields filling
 - etc.


MOTIVATION:
---

1) Some work cannot (and didn't should) be done in rte_eth_tx_burst.
   However, this work is sometimes required, and now, it's an
   application issue.

2) Different hardware may have different requirements for TX offloads,
   other subset can be supported and so on.

3) Some parameters (eg. number of segments in ixgbe driver) may hung
   device. These parameters may be vary for different devices.

   For example i40e HW allows 8 fragments per packet, but that is after
   TSO segmentation. While ixgbe has a 38-fragment pre-TSO limit.

4) Fields in packet may require different initialization (like eg. will
   require pseudo-header checksum precalculation, sometimes in a
   different way depending on packet type, and so on). Now application
   needs to care about it.

5) Using additional API (rte_eth_tx_prep) before rte_eth_tx_burst let to
   prepare packet burst in acceptable form for specific device.

6) Some additional checks may be done in debug mode keeping tx_burst
   implementation clean.


PROPOSAL:
-

To help user to deal with all these varieties we propose to:

1. Introduce rte_eth_tx_prep() function to do necessary preparations of
   packet burst to be safely transmitted on device for desired HW
   offloads (set/reset checksum field according to the hardware
   requirements) and check HW constraints (number of segments per
   packet, etc).

   While the limitations and requirements may differ for devices, it
   requires to extend rte_eth_dev structure with new function pointer
   "tx_pkt_prep" which can be implemented in the driver to prepare and
   verify packets, in devices specific way, before burst, what should to
   prevent application to send malformed packets.

2. Also new fields will be introduced in rte_eth_desc_lim: 
   nb_seg_max and nb_mtu_seg_max, providing an information about max
   segments in TSO and non-TSO packets acceptable by device.

   This information is useful for application to not create/limit
   malicious packet.


APPLICATION (CASE OF USE):
--

1) Application should to initialize burst of packets to send, set
   required tx offload flags and required fields, like l2_len, l3_len,
   l4_len, and tso_segsz

2) Application passes burst to the rte_eth_tx_prep to check conditions
   required to send packets through the NIC.

3) The result of rte_eth_tx_prep can be used to send valid packets
   and/or restore invalid if function fails.

eg.

for (i = 0; i < nb_pkts; i++) {

/* initialize or process packet */

bufs[i]->tso_segsz = 800;
bufs[i]->ol_flags = PKT_TX_TCP_SEG | PKT_TX_IPV4
| PKT_TX_IP_CKSUM;
bufs[i]->l2_len = sizeof(struct ether_hdr);
bufs[i]->l3_len = sizeof(struct ipv4_hdr);
bufs[i]->l4_len = sizeof(struct tcp_hdr);
}

/* Prepare burst of TX packets */
nb_prep = rte_eth_tx_prep(port, 0, bufs, nb_pkts);

if (nb_prep < nb_pkts) {
printf("tx_prep failed\n");

/* drop or restore invalid packets */

}

/* Send burst of TX packets */
nb_tx = rte_eth_tx_burst(port, 0, bufs, nb_prep);

/* Free any unsent packets. */



Signed-off-by: Tomasz Kulasek 
---
 doc/guides/rel_notes/deprecation.rst |7 +++
 1 file changed, 7 insertions(+)

diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index f502f86..485aacb 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -41,3 +41,10 @@ Deprecation Notices
 * The mempool functions for single/multi producer/consumer are deprecated and
   will be removed in 16.11.
   It is replaced by rte_mempool_generic_get/put functions.
+
+* In 16.11 ABI changes are plained: the ``rte_eth_dev`` structure will be
+  extended with new function pointer ``tx_pkt_prep`` allowing verification
+  and processing of packet burst to meet HW specific requirements before
+  transmit. Also new fields will be added to the ``rte_eth_desc_lim`` 
structure:
+  ``nb_seg_max`` and ``nb_mtu_seg_max`` provideing information about number of
+  segments limit to be transmitted by device for TSO/non-TSO packets.
-- 
1.7.9.5

[dpdk-dev] [PATCH] doc: announce ABI change for rte_eth_dev structure

2016-07-20 Thread Tomasz Kulasek

This is an ABI deprecation notice for DPDK 16.11 in librte_ether about
changes in rte_eth_dev and rte_eth_desc_lim structures.

In 16.11, we plan to introduce rte_eth_tx_prep() function to do
necessary preparations of packet burst to be safely transmitted on
device for desired HW offloads (set/reset checksum field according to
the hardware requirements) and check HW constraints (number of segments
per packet, etc).

While the limitations and requirements may differ for devices, it
requires to extend rte_eth_dev structure with new function pointer
"tx_pkt_prep" which can be implemented in the driver to prepare and
verify packets, in devices specific way, before burst, what should to
prevent application to send malformed packets.

Also new fields will be introduced in rte_eth_desc_lim: nb_seg_max and
nb_mtu_seg_max, providing an information about max segments in TSO and
non TSO packets acceptable by device.

Signed-off-by: Tomasz Kulasek 
---
 doc/guides/rel_notes/deprecation.rst |7 +++
 1 file changed, 7 insertions(+)

diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index f502f86..485aacb 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -41,3 +41,10 @@ Deprecation Notices
 * The mempool functions for single/multi producer/consumer are deprecated and
   will be removed in 16.11.
   It is replaced by rte_mempool_generic_get/put functions.
+
+* In 16.11 ABI changes are plained: the ``rte_eth_dev`` structure will be
+  extended with new function pointer ``tx_pkt_prep`` allowing verification
+  and processing of packet burst to meet HW specific requirements before
+  transmit. Also new fields will be added to the ``rte_eth_desc_lim`` 
structure:
+  ``nb_seg_max`` and ``nb_mtu_seg_max`` provideing information about number of
+  segments limit to be transmitted by device for TSO/non-TSO packets.
-- 
1.7.9.5

[dpdk-dev] [PATCH] app/test: fix array overflow warning with gcc 4.5

2016-06-09 Thread Tomasz Kulasek

DPDK/app/test/test_cryptodev.c: In function ?create_snow3g_cipher_operation
_oop.clone.15?: DPDK/x86_64-native-linuxapp-gcc/include/rte_memcpy.h:796:14
error: array subscript is above array bounds.

In test_cryptodev.c:
2429rte_memcpy(sym_op->cipher.iv.data, iv, iv_len);

When iv_len is declared as 'unsigned int', rte_memcpy evaluates code for
buffer size bigger than 255, but while 'iv' array is 64 bytes long, it
causes 'above array bounds' warning in gcc 4.5 and breaks compilation.

Using uint8_t as a size of copied block prevents to evaluate in rte_memcpy
code for length bigger than 255, causing the problem.

The root of this issue and solution is the same as for commit 2c007ea10616
("app/test: fix array overflow warning with gcc 4.5")

Fixes: 9727af14b032 ("app/test: add out-of-place symmetric crypto
   operations")

Signed-off-by: Tomasz Kulasek 
---
 app/test/test_cryptodev.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c
index 45e6daa..6621573 100644
--- a/app/test/test_cryptodev.c
+++ b/app/test/test_cryptodev.c
@@ -2392,7 +2392,7 @@ create_snow3g_cipher_operation(const uint8_t *iv, const 
unsigned iv_len,
 }

 static int
-create_snow3g_cipher_operation_oop(const uint8_t *iv, const unsigned iv_len,
+create_snow3g_cipher_operation_oop(const uint8_t *iv, const uint8_t iv_len,
const unsigned cipher_len,
const unsigned cipher_offset)
 {
-- 
1.7.9.5

[dpdk-dev] [PATCH v2] examples/performance-thread: fix size of destination port ids in l3fwd-thread

2016-04-29 Thread Tomasz Kulasek

After extending IPv4 next hop in lpm library, size of dst_port array was
changed from 16 to 32 bits in l3fwd-thread example, without modification
of the rest of path written for 16 bit value.

This patch uses similar approach for fix, like in commit 8353a36a9b4b
("examples/l3fwd: fix size of destination port ids"), restoring 16 bit size
for destination port ids and doing necessary conversion from 32 to 16 bit
after lpm_lookupx4.

Fixes: dc81ebbacaeb ("lpm: extend IPv4 next hop field")

Signed-off-by: Tomasz Kulasek 
---
v2:
 - split into two patches

 examples/performance-thread/l3fwd-thread/main.c |   20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/examples/performance-thread/l3fwd-thread/main.c 
b/examples/performance-thread/l3fwd-thread/main.c
index 4ad16ea..c008d6a 100644
--- a/examples/performance-thread/l3fwd-thread/main.c
+++ b/examples/performance-thread/l3fwd-thread/main.c
@@ -1307,7 +1307,7 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid)
  * to BAD_PORT value.
  */
 static inline __attribute__((always_inline)) void
-rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint32_t *dp, uint32_t ptype)
+rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype)
 {
uint8_t ihl;

@@ -1326,7 +1326,7 @@ rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint32_t *dp, 
uint32_t ptype)
 }

 #else
-#definerfc1812_process(mb, dp) do { } while (0)
+#definerfc1812_process(mb, dp, ptype)  do { } while (0)
 #endif /* DO_RFC_1812_CHECKS */
 #endif /* APP_LOOKUP_LPM && ENABLE_MULTI_BUFFER_OPTIMIZE */

@@ -1364,7 +1364,7 @@ get_dst_port(struct rte_mbuf *pkt, uint32_t dst_ipv4, 
uint8_t portid)
 }

 static inline void
-process_packet(struct rte_mbuf *pkt, uint32_t *dst_port, uint8_t portid)
+process_packet(struct rte_mbuf *pkt, uint16_t *dst_port, uint8_t portid)
 {
struct ether_hdr *eth_hdr;
struct ipv4_hdr *ipv4_hdr;
@@ -1431,9 +1431,9 @@ processx4_step1(struct rte_mbuf *pkt[FWDSTEP],
 static inline void
 processx4_step2(__m128i dip,
uint32_t ipv4_flag,
-   uint32_t portid,
+   uint8_t portid,
struct rte_mbuf *pkt[FWDSTEP],
-   uint32_t dprt[FWDSTEP])
+   uint16_t dprt[FWDSTEP])
 {
rte_xmm_t dst;
const __m128i bswap_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11,
@@ -1445,7 +1445,11 @@ processx4_step2(__m128i dip,
/* if all 4 packets are IPV4. */
if (likely(ipv4_flag)) {
rte_lpm_lookupx4(RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct, 
dip,
-   dprt, portid);
+   dst.u32, portid);
+
+   /* get rid of unused upper 16 bit for each dport. */
+   dst.x = _mm_packs_epi32(dst.x, dst.x);
+   *(uint64_t *)dprt = dst.u64[0];
} else {
dst.x = dip;
dprt[0] = get_dst_port(pkt[0], dst.u32[0], portid);
@@ -1460,7 +1464,7 @@ processx4_step2(__m128i dip,
  * Perform RFC1812 checks and updates for IPV4 packets.
  */
 static inline void
-processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint32_t dst_port[FWDSTEP])
+processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP])
 {
__m128i te[FWDSTEP];
__m128i ve[FWDSTEP];
@@ -1679,7 +1683,7 @@ process_burst(struct rte_mbuf *pkts_burst[MAX_PKT_BURST], 
int nb_rx,
int32_t k;
uint16_t dlp;
uint16_t *lp;
-   uint32_t dst_port[MAX_PKT_BURST];
+   uint16_t dst_port[MAX_PKT_BURST];
__m128i dip[MAX_PKT_BURST / FWDSTEP];
uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
uint16_t pnum[MAX_PKT_BURST + 1];
-- 
1.7.9.5

[dpdk-dev] [PATCH] examples/performance-thread: fix segfault with in gcc 5.x

2016-04-26 Thread Tomasz Kulasek

It seems that with gcc >5.x and -O2/-O3 optimization breaks packet grouping
algorithm in l3fwd-thread application causing segfault.

When last packet pointer "lp" and "pnum->u64" buffer points the same
memory buffer, high optimization can cause unpredictable results. It seems
that assignment of precalculated group sizes may interfere with
initialization of new group size when lp points value inside current group
and didn't should be changed.

With gcc >5.x and optimization we cannot be sure which assignment will be
done first, so the group size can be counted incorrectly causing segfault.

This patch eliminates intersection of assignment of initial group size
(lp[0] = 1) and precalculated group sizes when gptbl[v].idx < 4.

Fixes: d48415e1fee3 ("examples/performance-thread: add l3fwd-thread app")

Signed-off-by: Tomasz Kulasek 
---
 examples/performance-thread/l3fwd-thread/main.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/performance-thread/l3fwd-thread/main.c 
b/examples/performance-thread/l3fwd-thread/main.c
index 15c0a4d..3417fd5 100644
--- a/examples/performance-thread/l3fwd-thread/main.c
+++ b/examples/performance-thread/l3fwd-thread/main.c
@@ -1658,9 +1658,9 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, 
__m128i dp1, __m128i dp2)

/* if dest port value has changed. */
if (v != GRPMSK) {
-   lp = pnum->u16 + gptbl[v].idx;
-   lp[0] = 1;
pnum->u64 = gptbl[v].pnum;
+   pnum->u16[FWDSTEP] = 1;
+   lp = pnum->u16 + gptbl[v].idx;
}

return lp;
-- 
1.7.9.5

[dpdk-dev] [PATCH v2] ixgbe: fix bad shift operation in ixgbe_set_pool_rx/tx

2016-04-22 Thread Tomasz Kulasek

Fix issue reported by Coverity.

Coverity ID 13193: Bad bit shift operation (BAD_SHIFT)
large_shift: In expression 1 << pool, left shifting by more than 31 bits
has undefined behavior. The shift amount, pool, is at least 32.

This patch is a rework of register addr selection logic and mask
computation to made it more readable and avoid bit overflow when 32 bit
value is shifted over its size for pool > 31.

Fixes: fe3a45fd4104 ("ixgbe: add VMDq support")

Signed-off-by: Tomasz Kulasek 
---
v2:
 - joined two patches for same issue in tx/rx
 - added pool parameter checking for invalid value
 - reworked register selection logic and mask shift in more explicit way

 drivers/net/ixgbe/ixgbe_ethdev.c |   28 
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 3f1ebc1..eed2662 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -4399,9 +4399,19 @@ ixgbe_set_pool_rx(struct rte_eth_dev *dev, uint16_t 
pool, uint8_t on)
if (ixgbe_vmdq_mode_check(hw) < 0)
return -ENOTSUP;

-   addr = IXGBE_VFRE(pool >= ETH_64_POOLS/2);
+   if (pool >= ETH_64_POOLS)
+   return -EINVAL;
+
+   /* for pool >= 32, set bit in PFVFRE[1], otherwise PFVFRE[0] */
+   if (pool >= 32) {
+   addr = IXGBE_VFRE(1);
+   val = bit1 << (pool - 32);
+   } else {
+   addr = IXGBE_VFRE(0);
+   val = bit1 << pool;
+   }
+
reg = IXGBE_READ_REG(hw, addr);
-   val = bit1 << pool;

if (on)
reg |= val;
@@ -4426,9 +4436,19 @@ ixgbe_set_pool_tx(struct rte_eth_dev *dev, uint16_t 
pool, uint8_t on)
if (ixgbe_vmdq_mode_check(hw) < 0)
return -ENOTSUP;

-   addr = IXGBE_VFTE(pool >= ETH_64_POOLS/2);
+   if (pool >= ETH_64_POOLS)
+   return -EINVAL;
+
+   /* for pool >= 32, set bit in PFVFTE[1], otherwise PFVFTE[0] */
+   if (pool >= 32) {
+   addr = IXGBE_VFTE(1);
+   val = bit1 << (pool - 32);
+   } else {
+   addr = IXGBE_VFTE(0);
+   val = bit1 << pool;
+   }
+
reg = IXGBE_READ_REG(hw, addr);
-   val = bit1 << pool;

if (on)
reg |= val;
-- 
1.7.9.5

[dpdk-dev] [PATCH] examples/performance-thread: fix size of destination port ids in l3fwd-thread

2016-04-20 Thread Tomasz Kulasek

After extending IPv4 next hop in lpm library, size of dst_port array was
changed from 16 to 32 bits in l3fwd-thread example, without modification
of the rest of path written for 16 bit value.

This patch uses similar approach for fix, like in commit 8353a36a9b4b
("examples/l3fwd: fix size of destination port ids"), restoring 16 bit size
for destination port ids and doing necessary conversion from 32 to 16 bit
after lpm_lookupx4.

Also fixes wrong logic in get_dst_port() on lpm path causing unpredictable
return value when ipv4/ipv6 lookup success, introduced in the same patch.

Fixes: dc81ebbacaeb ("lpm: extend IPv4 next hop field")

Signed-off-by: Tomasz Kulasek 
---
 examples/performance-thread/l3fwd-thread/main.c |   45 ---
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/examples/performance-thread/l3fwd-thread/main.c 
b/examples/performance-thread/l3fwd-thread/main.c
index 15c0a4d..6588c0c 100644
--- a/examples/performance-thread/l3fwd-thread/main.c
+++ b/examples/performance-thread/l3fwd-thread/main.c
@@ -1307,7 +1307,7 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid)
  * to BAD_PORT value.
  */
 static inline __attribute__((always_inline)) void
-rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint32_t *dp, uint32_t ptype)
+rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype)
 {
uint8_t ihl;

@@ -1326,7 +1326,7 @@ rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint32_t *dp, 
uint32_t ptype)
 }

 #else
-#definerfc1812_process(mb, dp) do { } while (0)
+#definerfc1812_process(mb, dp, ptype)  do { } while (0)
 #endif /* DO_RFC_1812_CHECKS */
 #endif /* APP_LOOKUP_LPM && ENABLE_MULTI_BUFFER_OPTIMIZE */

@@ -1343,28 +1343,27 @@ get_dst_port(struct rte_mbuf *pkt, uint32_t dst_ipv4, 
uint8_t portid)
struct ether_hdr *eth_hdr;

if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) {
-   if 
(rte_lpm_lookup(RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct,
-   dst_ipv4, _hop_ipv4) != 0) {
-   next_hop_ipv4 = portid;
-   return next_hop_ipv4;
-   }
+   return (uint16_t) ((rte_lpm_lookup(
+   RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct, 
dst_ipv4,
+   _hop_ipv4) == 0) ? next_hop_ipv4 : portid);
+
} else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) {
+
eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
ipv6_hdr = (struct ipv6_hdr *)(eth_hdr + 1);
-   if 
(rte_lpm6_lookup(RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct,
-   ipv6_hdr->dst_addr, _hop_ipv6) != 0) {
-   next_hop_ipv6 = portid;
-   return next_hop_ipv6;
-   }
-   } else {
-   next_hop_ipv4 = portid;
-   return next_hop_ipv4;
+
+   return (uint16_t) ((rte_lpm6_lookup(
+   RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct,
+   ipv6_hdr->dst_addr, _hop_ipv6) == 0) ? 
next_hop_ipv6 :
+   portid);
+
}

+   return portid;
 }

 static inline void
-process_packet(struct rte_mbuf *pkt, uint32_t *dst_port, uint8_t portid)
+process_packet(struct rte_mbuf *pkt, uint16_t *dst_port, uint8_t portid)
 {
struct ether_hdr *eth_hdr;
struct ipv4_hdr *ipv4_hdr;
@@ -1431,9 +1430,9 @@ processx4_step1(struct rte_mbuf *pkt[FWDSTEP],
 static inline void
 processx4_step2(__m128i dip,
uint32_t ipv4_flag,
-   uint32_t portid,
+   uint8_t portid,
struct rte_mbuf *pkt[FWDSTEP],
-   uint32_t dprt[FWDSTEP])
+   uint16_t dprt[FWDSTEP])
 {
rte_xmm_t dst;
const __m128i bswap_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11,
@@ -1445,7 +1444,11 @@ processx4_step2(__m128i dip,
/* if all 4 packets are IPV4. */
if (likely(ipv4_flag)) {
rte_lpm_lookupx4(RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct, 
dip,
-   dprt, portid);
+   dst.u32, portid);
+
+   /* get rid of unused upper 16 bit for each dport. */
+   dst.x = _mm_packs_epi32(dst.x, dst.x);
+   *(uint64_t *)dprt = dst.u64[0];
} else {
dst.x = dip;
dprt[0] = get_dst_port(pkt[0], dst.u32[0], portid);
@@ -1460,7 +1463,7 @@ processx4_step2(__m128i dip,
  * Perform RFC1812 checks and updates for IPV4 packets.
  */
 static inline void
-processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint32_t dst_port[FWDSTEP])
+processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP])
 {
__m128i te[FWDSTEP];
__m128i ve[FWDSTEP];
@@ -1679,7 +1682,7 @@ process_burst(struct rte_mbuf *p

[dpdk-dev] [PATCH] ixgbe: fix bad shift operation in ixgbe_set_pool_tx

2016-04-15 Thread Tomasz Kulasek

CID 13190 (#1 of 1): Bad bit shift operation (BAD_SHIFT)
large_shift: In expression 1 << pool, left shifting by more than 31 bits
has undefined behavior. The shift amount, pool, is at least 32.

This patch limits mask shift to be in range of 32 bit PFVFTE[1] register,
for pool > 31.

Fixes: fe3a45fd4104 ("ixgbe: add VMDq support")

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/ixgbe/ixgbe_ethdev.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index f676a64..6ae82e7 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -4428,7 +4428,7 @@ ixgbe_set_pool_tx(struct rte_eth_dev *dev, uint16_t pool, 
uint8_t on)

addr = IXGBE_VFTE(pool >= ETH_64_POOLS/2);
reg = IXGBE_READ_REG(hw, addr);
-   val = bit1 << pool;
+   val = bit1 << (pool & 0x01F);

if (on)
reg |= val;
-- 
1.7.9.5

[dpdk-dev] [PATCH] ixgbe: fix bad shift operation in ixgbe_set_pool_rx

2016-04-15 Thread Tomasz Kulasek

CID 13193 (#1 of 1): Bad bit shift operation (BAD_SHIFT)
large_shift: In expression 1 << pool, left shifting by more than 31 bits
has undefined behavior. The shift amount, pool, is at least 32.

This patch limits mask shift to be in range of 32 bit PFVFRE[1] register,
for pool > 31.

Fixes: fe3a45fd4104 ("ixgbe: add VMDq support")

Signed-off-by: Tomasz Kulasek 
---
 drivers/net/ixgbe/ixgbe_ethdev.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 3f1ebc1..f676a64 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -4401,7 +4401,7 @@ ixgbe_set_pool_rx(struct rte_eth_dev *dev, uint16_t pool, 
uint8_t on)

addr = IXGBE_VFRE(pool >= ETH_64_POOLS/2);
reg = IXGBE_READ_REG(hw, addr);
-   val = bit1 << pool;
+   val = bit1 << (pool & 0x01F);

if (on)
reg |= val;
-- 
1.7.9.5

[dpdk-dev] [PATCH] app/testpmd: fix strcat can overrun fixed-size string

2016-04-11 Thread Tomasz Kulasek

CID 13307 (#1 of 1): Copy into fixed size buffer (STRING_OVERFLOW)
fixed_size_dest: You might overrun the 128 byte fixed-size string fwd_modes
by copying fwd_eng->fwd_mode_name without checking the length.

Fixes: 769ce6b17835 ("app/testpmd: list forwarding engines")

Signed-off-by: Tomasz Kulasek 
---
 app/test-pmd/config.c |6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index b1bbec6..fff2d96 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -1673,8 +1673,10 @@ list_pkt_forwarding_modes(void)

if (strlen (fwd_modes) == 0) {
while ((fwd_eng = fwd_engines[i++]) != NULL) {
-   strcat(fwd_modes, fwd_eng->fwd_mode_name);
-   strcat(fwd_modes, separator);
+   strncat(fwd_modes, fwd_eng->fwd_mode_name, 
sizeof(fwd_modes) -
+   strlen(fwd_modes) - 1);
+   strncat(fwd_modes, separator, sizeof(fwd_modes) - 
strlen(fwd_modes)
+   - 1);
}
fwd_modes[strlen(fwd_modes) - strlen(separator)] = '\0';
}
-- 
1.7.9.5

[dpdk-dev] [PATCH] examples: fix not all queues drained in l3fwd-*

2016-04-07 Thread Tomasz Kulasek

In l3fwd-acl and l3fwd-power not all tx ports was included in tx_port_id
array, used to periodically drain only available ports. This caused that
some packets can remain in buffer when application stops to receiving
packets or when size of burst is small.

Fixes: e2366e74e029 ("examples: use buffered Tx")

Signed-off-by: Tomasz Kulasek 
---
 examples/l3fwd-acl/main.c   |6 +-
 examples/l3fwd-power/main.c |6 +-
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/examples/l3fwd-acl/main.c b/examples/l3fwd-acl/main.c
index 3a895b7..55ee337 100644
--- a/examples/l3fwd-acl/main.c
+++ b/examples/l3fwd-acl/main.c
@@ -1893,7 +1893,6 @@ main(int argc, char **argv)
unsigned lcore_id;
uint32_t n_tx_queue, nb_lcores;
uint8_t portid, nb_rx_queue, queue, socketid;
-   uint8_t nb_tx_port;

/* init EAL */
ret = rte_eal_init(argc, argv);
@@ -1926,7 +1925,6 @@ main(int argc, char **argv)
rte_exit(EXIT_FAILURE, "app_acl_init failed\n");

nb_lcores = rte_lcore_count();
-   nb_tx_port = 0;

/* initialize all ports */
for (portid = 0; portid < nb_ports; portid++) {
@@ -2008,12 +2006,10 @@ main(int argc, char **argv)
qconf->tx_queue_id[portid] = queueid;
queueid++;

-   qconf->n_tx_port = nb_tx_port;
qconf->tx_port_id[qconf->n_tx_port] = portid;
+   qconf->n_tx_port++;
}
printf("\n");
-
-   nb_tx_port++;
}

for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c
index e7ebe30..2af7731 100644
--- a/examples/l3fwd-power/main.c
+++ b/examples/l3fwd-power/main.c
@@ -1546,7 +1546,6 @@ main(int argc, char **argv)
uint32_t n_tx_queue, nb_lcores;
uint32_t dev_rxq_num, dev_txq_num;
uint8_t portid, nb_rx_queue, queue, socketid;
-   uint8_t nb_tx_port;

/* catch SIGINT and restore cpufreq governor to ondemand */
signal(SIGINT, signal_exit_now);
@@ -1582,7 +1581,6 @@ main(int argc, char **argv)
rte_exit(EXIT_FAILURE, "check_port_config failed\n");

nb_lcores = rte_lcore_count();
-   nb_tx_port = 0;

/* initialize all ports */
for (portid = 0; portid < nb_ports; portid++) {
@@ -1675,12 +1673,10 @@ main(int argc, char **argv)
qconf->tx_queue_id[portid] = queueid;
queueid++;

-   qconf->n_tx_port = nb_tx_port;
qconf->tx_port_id[qconf->n_tx_port] = portid;
+   qconf->n_tx_port++;
}
printf("\n");
-
-   nb_tx_port++;
}

for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
-- 
1.7.9.5

[dpdk-dev] [PATCH v2] app/test: fix array subscript is above array bounds for gcc 4.5

2016-04-07 Thread Tomasz Kulasek

cc1: warnings being treated as errors
DPDK/app/test/test_cryptodev.c: In function 'test_snow3g_encrypted_authenti
cation.clone.3':
DPDK/x86_64-ivshmem-linuxapp-gcc/include/rte_memcpy.h:796:14: error: array
subscript is above array bounds
compilation terminated due to -Wfatal-errors.


ROOT OF PROBLEM:


In lines like:

rte_memcpy(sym_op->cipher.iv.data, iv, iv_len);

when "iv" is 64 bytes long array, and "iv_len" is "unsigned int",
compiler tries to evaluate also a code for array size larger than 255 bytes
long and reports error "array subscript is above array bounds" in line:

rte_memcpy.h:796

rte_mov128((uint8_t *)dst + 128, (const uint8_t *)src + 128);

caused by evaluation to:

rte_mov128((uint8_t *)sym_op->cipher.iv.data + 128, (const uint8_t *)iv
+ 128);

where "iv" is 64 bytes long buffer and "iv + 128" point out of it, gcc 4.5.


SOLUTION:
-

Using uint8_t as a size of copied block prevents to evaluate in rte_memcpy
code for length bigger than 255, causing the problem.

v2 changes:
 - added fixline

Fixes: 8bdf665fe6c0 ("app/test: add SNOW 3G")

Signed-off-by: Tomasz Kulasek 
---
 app/test/test_cryptodev.c |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c
index 0c26804..8e8da98 100644
--- a/app/test/test_cryptodev.c
+++ b/app/test/test_cryptodev.c
@@ -2362,10 +2362,10 @@ create_snow3g_hash_operation(const uint8_t *auth_tag,
 static int
 create_snow3g_cipher_hash_operation(const uint8_t *auth_tag,
const unsigned auth_tag_len,
-   const uint8_t *aad, const unsigned aad_len,
+   const uint8_t *aad, const uint8_t aad_len,
unsigned data_pad_len,
enum rte_crypto_auth_operation op,
-   const uint8_t *iv, const unsigned iv_len,
+   const uint8_t *iv, const uint8_t iv_len,
const unsigned cipher_len, const unsigned cipher_offset,
const unsigned auth_len, const unsigned auth_offset)
 {
@@ -2460,8 +2460,8 @@ create_snow3g_cipher_hash_operation(const uint8_t 
*auth_tag,

 static int
 create_snow3g_auth_cipher_operation(const unsigned auth_tag_len,
-   const uint8_t *iv, const unsigned iv_len,
-   const uint8_t *aad, const unsigned aad_len,
+   const uint8_t *iv, const uint8_t iv_len,
+   const uint8_t *aad, const uint8_t aad_len,
unsigned data_pad_len,
const unsigned cipher_len, const unsigned cipher_offset,
const unsigned auth_len, const unsigned auth_offset)
-- 
1.7.9.5

[dpdk-dev] [PATCH] app/test: fix array subscript is above array bounds for gcc 4.5

2016-04-07 Thread Tomasz Kulasek

cc1: warnings being treated as errors
DPDK/app/test/test_cryptodev.c: In function 'test_snow3g_encrypted_authenti
cation.clone.3':
DPDK/x86_64-ivshmem-linuxapp-gcc/include/rte_memcpy.h:796:14: error: array
subscript is above array bounds
compilation terminated due to -Wfatal-errors.


ROOT OF PROBLEM:


In lines like:

rte_memcpy(sym_op->cipher.iv.data, iv, iv_len);

when "iv" is 64 bytes long array, and "iv_len" is "unsigned int",
compiler tries to evaluate also a code for array size larger than 255 bytes
long and reports error "array subscript is above array bounds" in line:

rte_memcpy.h:796

rte_mov128((uint8_t *)dst + 128, (const uint8_t *)src + 128);

caused by evaluation to:

rte_mov128((uint8_t *)sym_op->cipher.iv.data + 128, (const uint8_t *)iv
+ 128);

where "iv" is 64 bytes long buffer and "iv + 128" point out of it, gcc 4.5.


SOLUTION:
-

Using uint8_t as a size of copied block prevents to evaluate in rte_memcpy
code for size bigger than 255, causing the problem.


Signed-off-by: Tomasz Kulasek 
---
 app/test/test_cryptodev.c |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c
index 0c26804..8e8da98 100644
--- a/app/test/test_cryptodev.c
+++ b/app/test/test_cryptodev.c
@@ -2362,10 +2362,10 @@ create_snow3g_hash_operation(const uint8_t *auth_tag,
 static int
 create_snow3g_cipher_hash_operation(const uint8_t *auth_tag,
const unsigned auth_tag_len,
-   const uint8_t *aad, const unsigned aad_len,
+   const uint8_t *aad, const uint8_t aad_len,
unsigned data_pad_len,
enum rte_crypto_auth_operation op,
-   const uint8_t *iv, const unsigned iv_len,
+   const uint8_t *iv, const uint8_t iv_len,
const unsigned cipher_len, const unsigned cipher_offset,
const unsigned auth_len, const unsigned auth_offset)
 {
@@ -2460,8 +2460,8 @@ create_snow3g_cipher_hash_operation(const uint8_t 
*auth_tag,

 static int
 create_snow3g_auth_cipher_operation(const unsigned auth_tag_len,
-   const uint8_t *iv, const unsigned iv_len,
-   const uint8_t *aad, const unsigned aad_len,
+   const uint8_t *iv, const uint8_t iv_len,
+   const uint8_t *aad, const uint8_t aad_len,
unsigned data_pad_len,
const unsigned cipher_len, const unsigned cipher_offset,
const unsigned auth_len, const unsigned auth_offset)
-- 
1.7.9.5

[dpdk-dev] [PATCH] examples/l3fwd: fix segfault with gcc 5.x

2016-04-04 Thread Tomasz Kulasek

It seems that with gcc >5.x and -O2/-O3 optimization breaks packet grouping
algorithm.

When last packet pointer "lp" and "pnum->u64" buffer points the same
memory buffer, high optimization can cause unpredictable results. It seems
that assignment of precalculated group sizes may interfere with
initialization of new group size when lp points value inside current group
and didn't should be changed.

With gcc >5.x and optimization we cannot be sure which assignment will be
done first, so the group size can be counted incorrectly.

This patch eliminates intersection of assignment of initial group size
(lp[0] = 1) and precalculated group sizes when gptbl[v].idx < 4.

Fixes: 94c54b4158d5 ("examples/l3fwd: rework exact-match")

Signed-off-by: Tomasz Kulasek 
---
 examples/l3fwd/l3fwd_sse.h |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h
index f9cf50a..1afa1f0 100644
--- a/examples/l3fwd/l3fwd_sse.h
+++ b/examples/l3fwd/l3fwd_sse.h
@@ -283,9 +283,9 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, 
__m128i dp1, __m128i dp2)

/* if dest port value has changed. */
if (v != GRPMSK) {
-   lp = pnum->u16 + gptbl[v].idx;
-   lp[0] = 1;
pnum->u64 = gptbl[v].pnum;
+   pnum->u16[FWDSTEP] = 1;
+   lp = pnum->u16 + gptbl[v].idx;
}

return lp;
-- 
1.7.9.5

[dpdk-dev] [PATCH] examples/l3fwd: fix some packets lost when stops receiving

2016-03-31 Thread Tomasz Kulasek

Not all tx ports was included in tx_port_id array, used to periodically
drain only available ports. This caused that some packets remain in buffer
when application stops to receiving packets.

Fixes: 52c97adc1f0f ("examples/l3fwd: fix exact match performance")

Signed-off-by: Tomasz Kulasek 
---
 examples/l3fwd/main.c |6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c
index e10dab9..92fda3b 100644
--- a/examples/l3fwd/main.c
+++ b/examples/l3fwd/main.c
@@ -836,7 +836,6 @@ main(int argc, char **argv)
unsigned lcore_id;
uint32_t n_tx_queue, nb_lcores;
uint8_t portid, nb_rx_queue, queue, socketid;
-   uint8_t nb_tx_port;

/* init EAL */
ret = rte_eal_init(argc, argv);
@@ -876,7 +875,6 @@ main(int argc, char **argv)
rte_exit(EXIT_FAILURE, "check_port_config failed\n");

nb_lcores = rte_lcore_count();
-   nb_tx_port = 0;

/* Setup function pointers for lookup method. */
setup_l3fwd_lookup_tables();
@@ -954,12 +952,10 @@ main(int argc, char **argv)
qconf->tx_queue_id[portid] = queueid;
queueid++;

-   qconf->n_tx_port = nb_tx_port;
qconf->tx_port_id[qconf->n_tx_port] = portid;
+   qconf->n_tx_port++;
}
printf("\n");
-
-   nb_tx_port++;
}

for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
-- 
1.7.9.5

[dpdk-dev] [PATCH] testpmd: fix enumerated type mixed with another type

2016-03-22 Thread Tomasz Kulasek

This patch fixes error #188: enumerated type mixed with another type,
when uint32_t is casted on enum type in icc.

Fixes: 05f1b9c82ec2 ("app/testpmd: add commands for L2 tunnel config")

Signed-off-by: Tomasz Kulasek 
---
 app/test-pmd/cmdline.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index 9d52b8c..c34d4c1 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -9723,14 +9723,14 @@ cmdline_parse_token_num_t 
cmd_config_l2_tunnel_eth_type_eth_type_val =
(struct cmd_config_l2_tunnel_eth_type_result,
 eth_type_val, UINT16);

-static uint32_t
+static enum rte_eth_tunnel_type
 str2fdir_l2_tunnel_type(char *string)
 {
uint32_t i = 0;

static const struct {
char str[32];
-   uint32_t type;
+   enum rte_eth_tunnel_type type;
} l2_tunnel_type_str[] = {
{"E-tag", RTE_L2_TUNNEL_TYPE_E_TAG},
};
-- 
2.5.5

[dpdk-dev] [PATCH v6] examples/l3fwd: em path performance fix

2016-03-18 Thread Tomasz Kulasek

It seems that for the most use cases, previous hash_multi_lookup provides
better performance, and more, sequential lookup can cause significant
performance drop.

This patch sets previously optional hash_multi_lookup method as default.
It also provides some minor optimizations such as queue drain only on used
tx ports.


This patch should be applied after Maciej Czekaj's patch "l3fwd: Fix
compilation with HASH_MULTI_LOOKUP"


v6 changes:
 - use RTE_MACHINE_CPUFLAG_NEON instead of __ARM_NEON for ARM Neon
   detection

v5 changes:
 - removed debug informations, patch cleanup

v4 changes:
 - rebased to be applicable after patch "l3fwd: Fix compilation with
   HASH_MULTI_LOOKUP" of Maciej Czekaj

v3 changes:
 - "lpm: extend IPv4 next hop field" patch extends dst_port table from
   uint16_t to uint32_t omiting previously disabled l3fwd_em_hlm_sse.h,
   what causes incompatible pointer type error after turning on this header

v2 changes:
 - fixed copy-paste error causing that not all packets are classified right
   in hash_multi_lookup implementation when burst size is not divisible
   by 8

Fixes: 94c54b4158d5 ("examples/l3fwd: rework exact-match")
Fixes: dc81ebbacaeb ("lpm: extend IPv4 next hop field")
Fixes: 64d3955de1de ("examples/l3fwd: fix ARM build")

Reported-by: Qian Xu 
Signed-off-by: Tomasz Kulasek 
---
 examples/l3fwd/l3fwd.h|6 ++
 examples/l3fwd/l3fwd_em.c |8 
 examples/l3fwd/l3fwd_em_hlm_sse.h |   28 ++--
 examples/l3fwd/l3fwd_em_sse.h |9 +
 examples/l3fwd/l3fwd_lpm.c|4 ++--
 examples/l3fwd/main.c |7 +++
 6 files changed, 38 insertions(+), 24 deletions(-)

diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
index 7dcc7e5..726e8cc 100644
--- a/examples/l3fwd/l3fwd.h
+++ b/examples/l3fwd/l3fwd.h
@@ -40,6 +40,10 @@

 #define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1

+#if !defined(NO_HASH_MULTI_LOOKUP) && defined(RTE_MACHINE_CPUFLAG_NEON)
+#define NO_HASH_MULTI_LOOKUP 1
+#endif
+
 #define MAX_PKT_BURST 32
 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */

@@ -86,6 +90,8 @@ struct lcore_rx_queue {
 struct lcore_conf {
uint16_t n_rx_queue;
struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
+   uint16_t n_tx_port;
+   uint16_t tx_port_id[RTE_MAX_ETHPORTS];
uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
void *ipv4_lookup_struct;
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index 0adf8f4..50f09e5 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -250,7 +250,7 @@ em_mask_key(void *key, xmm_t mask)

return _mm_and_si128(data, mask);
 }
-#elif defined(__ARM_NEON)
+#elif defined(RTE_MACHINE_CPUFLAG_NEON)
 static inline xmm_t
 em_mask_key(void *key, xmm_t mask)
 {
@@ -320,7 +320,7 @@ em_get_ipv6_dst_port(void *ipv6_hdr,  uint8_t portid, void 
*lookup_struct)
  * buffer optimization i.e. ENABLE_MULTI_BUFFER_OPTIMIZE=1.
  */
 #if defined(__SSE4_1__)
-#ifndef HASH_MULTI_LOOKUP
+#if defined(NO_HASH_MULTI_LOOKUP)
 #include "l3fwd_em_sse.h"
 #else
 #include "l3fwd_em_hlm_sse.h"
@@ -568,8 +568,8 @@ em_main_loop(__attribute__((unused)) void *dummy)
diff_tsc = cur_tsc - prev_tsc;
if (unlikely(diff_tsc > drain_tsc)) {

-   for (i = 0; i < qconf->n_rx_queue; i++) {
-   portid = qconf->rx_queue_list[i].port_id;
+   for (i = 0; i < qconf->n_tx_port; ++i) {
+   portid = qconf->tx_port_id[i];
if (qconf->tx_mbufs[portid].len == 0)
continue;
send_burst(qconf,
diff --git a/examples/l3fwd/l3fwd_em_hlm_sse.h 
b/examples/l3fwd/l3fwd_em_hlm_sse.h
index 891ae2e..7faf04a 100644
--- a/examples/l3fwd/l3fwd_em_hlm_sse.h
+++ b/examples/l3fwd/l3fwd_em_hlm_sse.h
@@ -34,17 +34,9 @@
 #ifndef __L3FWD_EM_HLM_SSE_H__
 #define __L3FWD_EM_HLM_SSE_H__

-/**
- * @file
- * This is an optional implementation of packet classification in Exact-Match
- * path using rte_hash_lookup_multi method from previous implementation.
- * While sequential classification seems to be faster, it's disabled by default
- * and can be enabled with HASH_LOOKUP_MULTI global define in compilation time.
- */
-
 #include "l3fwd_sse.h"

-static inline void
+static inline __attribute__((always_inline)) void
 em_get_dst_port_ipv4x8(struct lcore_conf *qconf, struct rte_mbuf *m[8],
uint8_t portid, uint32_t dst_port[8])
 {
@@ -168,7 +160,7 @@ get_ipv6_5tuple(struct rte_mbuf *m0, __m128i mask0,
key->xmm[2] = _mm_and_si128(tmpdata2, mask1);
 }

-static inline void
+static inline __attribute__((always_inline)) void
 em_get_dst_por

[dpdk-dev] [PATCH v5] examples/l3fwd: em path performance fix

2016-03-18 Thread Tomasz Kulasek

It seems that for the most use cases, previous hash_multi_lookup provides
better performance, and more, sequential lookup can cause significant
performance drop.

This patch sets previously optional hash_multi_lookup method as default.
It also provides some minor optimizations such as queue drain only on used
tx ports.


This patch should be applied after Maciej Czekaj's patch "l3fwd: Fix
compilation with HASH_MULTI_LOOKUP"


v5 changes:
 - removed debug informations, patch cleanup

v4 changes:
 - rebased to be applicable after patch "l3fwd: Fix compilation with
   HASH_MULTI_LOOKUP" of Maciej Czekaj

v3 changes:
 - "lpm: extend IPv4 next hop field" patch extends dst_port table from
   uint16_t to uint32_t omiting previously disabled l3fwd_em_hlm_sse.h,
   what causes incompatible pointer type error after turning on this header

v2 changes:
 - fixed copy-paste error causing that not all packets are classified right
   in hash_multi_lookup implementation when burst size is not divisible
   by 8

Fixes: 94c54b4158d5 ("examples/l3fwd: rework exact-match")
Fixes: dc81ebbacaeb ("lpm: extend IPv4 next hop field")
Fixes: 64d3955de1de ("examples/l3fwd: fix ARM build")

Reported-by: Qian Xu 
Signed-off-by: Tomasz Kulasek 
---
 examples/l3fwd/l3fwd.h|6 ++
 examples/l3fwd/l3fwd_em.c |6 +++---
 examples/l3fwd/l3fwd_em_hlm_sse.h |   28 ++--
 examples/l3fwd/l3fwd_em_sse.h |9 +
 examples/l3fwd/l3fwd_lpm.c|4 ++--
 examples/l3fwd/main.c |7 +++
 6 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
index 7dcc7e5..1b148e7 100644
--- a/examples/l3fwd/l3fwd.h
+++ b/examples/l3fwd/l3fwd.h
@@ -40,6 +40,10 @@

 #define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1

+#if !defined(NO_HASH_MULTI_LOOKUP) && defined(__ARM_NEON)
+#define NO_HASH_MULTI_LOOKUP 1
+#endif
+
 #define MAX_PKT_BURST 32
 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */

@@ -86,6 +90,8 @@ struct lcore_rx_queue {
 struct lcore_conf {
uint16_t n_rx_queue;
struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
+   uint16_t n_tx_port;
+   uint16_t tx_port_id[RTE_MAX_ETHPORTS];
uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
void *ipv4_lookup_struct;
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index 0adf8f4..5a2e7ff 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -320,7 +320,7 @@ em_get_ipv6_dst_port(void *ipv6_hdr,  uint8_t portid, void 
*lookup_struct)
  * buffer optimization i.e. ENABLE_MULTI_BUFFER_OPTIMIZE=1.
  */
 #if defined(__SSE4_1__)
-#ifndef HASH_MULTI_LOOKUP
+#if defined(NO_HASH_MULTI_LOOKUP)
 #include "l3fwd_em_sse.h"
 #else
 #include "l3fwd_em_hlm_sse.h"
@@ -568,8 +568,8 @@ em_main_loop(__attribute__((unused)) void *dummy)
diff_tsc = cur_tsc - prev_tsc;
if (unlikely(diff_tsc > drain_tsc)) {

-   for (i = 0; i < qconf->n_rx_queue; i++) {
-   portid = qconf->rx_queue_list[i].port_id;
+   for (i = 0; i < qconf->n_tx_port; ++i) {
+   portid = qconf->tx_port_id[i];
if (qconf->tx_mbufs[portid].len == 0)
continue;
send_burst(qconf,
diff --git a/examples/l3fwd/l3fwd_em_hlm_sse.h 
b/examples/l3fwd/l3fwd_em_hlm_sse.h
index 891ae2e..7faf04a 100644
--- a/examples/l3fwd/l3fwd_em_hlm_sse.h
+++ b/examples/l3fwd/l3fwd_em_hlm_sse.h
@@ -34,17 +34,9 @@
 #ifndef __L3FWD_EM_HLM_SSE_H__
 #define __L3FWD_EM_HLM_SSE_H__

-/**
- * @file
- * This is an optional implementation of packet classification in Exact-Match
- * path using rte_hash_lookup_multi method from previous implementation.
- * While sequential classification seems to be faster, it's disabled by default
- * and can be enabled with HASH_LOOKUP_MULTI global define in compilation time.
- */
-
 #include "l3fwd_sse.h"

-static inline void
+static inline __attribute__((always_inline)) void
 em_get_dst_port_ipv4x8(struct lcore_conf *qconf, struct rte_mbuf *m[8],
uint8_t portid, uint32_t dst_port[8])
 {
@@ -168,7 +160,7 @@ get_ipv6_5tuple(struct rte_mbuf *m0, __m128i mask0,
key->xmm[2] = _mm_and_si128(tmpdata2, mask1);
 }

-static inline void
+static inline __attribute__((always_inline)) void
 em_get_dst_port_ipv6x8(struct lcore_conf *qconf, struct rte_mbuf *m[8],
uint8_t portid, uint32_t dst_port[8])
 {
@@ -322,17 +314,17 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf 
**pkts_burst,

} else {
dst_port[j]   = em_get_dst_port(qconf, pkts_burst[j], 
portid);
-

[dpdk-dev] [PATCH v4] examples/l3fwd: em path performance fix

2016-03-18 Thread Tomasz Kulasek

It seems that for the most use cases, previous hash_multi_lookup provides
better performance, and more, sequential lookup can cause significant
performance drop.

This patch sets previously optional hash_multi_lookup method as default.
It also provides some minor optimizations such as queue drain only on used
tx ports.


This patch should be applied after Maciej Czekaj's patch "l3fwd: Fix
compilation with HASH_MULTI_LOOKUP"


v4 changes:
 - rebased to be applicable after patch "l3fwd: Fix compilation with
   HASH_MULTI_LOOKUP" of Maciej Czekaj

v3 changes:
 - "lpm: extend IPv4 next hop field" patch extends dst_port table from
   uint16_t to uint32_t omiting previously disabled l3fwd_em_hlm_sse.h,
   what causes incompatible pointer type error after turning on this header

v2 changes:
 - fixed copy-paste error causing that not all packets are classified right
   in hash_multi_lookup implementation when burst size is not divisible
   by 8

Fixes: 94c54b4158d5 ("examples/l3fwd: rework exact-match")

Reported-by: Qian Xu 
Signed-off-by: Tomasz Kulasek 
---
 examples/l3fwd/l3fwd.h|8 
 examples/l3fwd/l3fwd_em.c |6 +++---
 examples/l3fwd/l3fwd_em_hlm_sse.h |   28 ++--
 examples/l3fwd/l3fwd_em_sse.h |9 +
 examples/l3fwd/l3fwd_lpm.c|4 ++--
 examples/l3fwd/main.c |7 +++
 6 files changed, 39 insertions(+), 23 deletions(-)

diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
index 7dcc7e5..69dcc17 100644
--- a/examples/l3fwd/l3fwd.h
+++ b/examples/l3fwd/l3fwd.h
@@ -40,6 +40,12 @@

 #define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1

+#define __ARM_NEON 1
+
+#if !defined(NO_HASH_MULTI_LOOKUP) && defined(__ARM_NEON)
+#define NO_HASH_MULTI_LOOKUP 1
+#endif
+
 #define MAX_PKT_BURST 32
 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */

@@ -86,6 +92,8 @@ struct lcore_rx_queue {
 struct lcore_conf {
uint16_t n_rx_queue;
struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
+   uint16_t n_tx_port;
+   uint16_t tx_port_id[RTE_MAX_ETHPORTS];
uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
void *ipv4_lookup_struct;
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index 0adf8f4..5a2e7ff 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -320,7 +320,7 @@ em_get_ipv6_dst_port(void *ipv6_hdr,  uint8_t portid, void 
*lookup_struct)
  * buffer optimization i.e. ENABLE_MULTI_BUFFER_OPTIMIZE=1.
  */
 #if defined(__SSE4_1__)
-#ifndef HASH_MULTI_LOOKUP
+#if defined(NO_HASH_MULTI_LOOKUP)
 #include "l3fwd_em_sse.h"
 #else
 #include "l3fwd_em_hlm_sse.h"
@@ -568,8 +568,8 @@ em_main_loop(__attribute__((unused)) void *dummy)
diff_tsc = cur_tsc - prev_tsc;
if (unlikely(diff_tsc > drain_tsc)) {

-   for (i = 0; i < qconf->n_rx_queue; i++) {
-   portid = qconf->rx_queue_list[i].port_id;
+   for (i = 0; i < qconf->n_tx_port; ++i) {
+   portid = qconf->tx_port_id[i];
if (qconf->tx_mbufs[portid].len == 0)
continue;
send_burst(qconf,
diff --git a/examples/l3fwd/l3fwd_em_hlm_sse.h 
b/examples/l3fwd/l3fwd_em_hlm_sse.h
index 891ae2e..7faf04a 100644
--- a/examples/l3fwd/l3fwd_em_hlm_sse.h
+++ b/examples/l3fwd/l3fwd_em_hlm_sse.h
@@ -34,17 +34,9 @@
 #ifndef __L3FWD_EM_HLM_SSE_H__
 #define __L3FWD_EM_HLM_SSE_H__

-/**
- * @file
- * This is an optional implementation of packet classification in Exact-Match
- * path using rte_hash_lookup_multi method from previous implementation.
- * While sequential classification seems to be faster, it's disabled by default
- * and can be enabled with HASH_LOOKUP_MULTI global define in compilation time.
- */
-
 #include "l3fwd_sse.h"

-static inline void
+static inline __attribute__((always_inline)) void
 em_get_dst_port_ipv4x8(struct lcore_conf *qconf, struct rte_mbuf *m[8],
uint8_t portid, uint32_t dst_port[8])
 {
@@ -168,7 +160,7 @@ get_ipv6_5tuple(struct rte_mbuf *m0, __m128i mask0,
key->xmm[2] = _mm_and_si128(tmpdata2, mask1);
 }

-static inline void
+static inline __attribute__((always_inline)) void
 em_get_dst_port_ipv6x8(struct lcore_conf *qconf, struct rte_mbuf *m[8],
uint8_t portid, uint32_t dst_port[8])
 {
@@ -322,17 +314,17 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf 
**pkts_burst,

} else {
dst_port[j]   = em_get_dst_port(qconf, pkts_burst[j], 
portid);
-   dst_port[j+1] = em_get_dst_port(qconf, pkts_burst[j], 
portid);
-   dst_port[j+2] = em_get_dst_port(qconf, pkts_burst[j], 
portid);
-

[dpdk-dev] [PATCH v3] examples/l3fwd: em path performance fix

2016-03-11 Thread Tomasz Kulasek

It seems that for the most use cases, previous hash_multi_lookup provides
better performance, and more, sequential lookup can cause significant
performance drop.

This patch sets previously optional hash_multi_lookup method as default.
It also provides some minor optimizations such as queue drain only on used
tx ports.

v3 changes:
 - "lpm: extend IPv4 next hop field" patch extends dst_port table from
   uint16_t to uint32_t omiting previously disabled l3fwd_em_hlm_sse.h,
   what causes incompatible pointer type error after turning on this header

v2 changes:
 - fixed copy-paste error causing that not all packets are classified right
   in hash_multi_lookup implementation when burst size is not divisible
   by 8

Fixes: 94c54b4158d5 ("examples/l3fwd: rework exact-match")
Fixes: dc81ebbacaeb ("lpm: extend IPv4 next hop field")

Reported-by: Qian Xu 
Signed-off-by: Tomasz Kulasek 
---
 examples/l3fwd/l3fwd.h|2 ++
 examples/l3fwd/l3fwd_em.c |6 +++---
 examples/l3fwd/l3fwd_em_hlm_sse.h |   34 +-
 examples/l3fwd/l3fwd_em_sse.h |9 +
 examples/l3fwd/l3fwd_lpm.c|4 ++--
 examples/l3fwd/main.c |7 +++
 6 files changed, 36 insertions(+), 26 deletions(-)

diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
index da6d369..207a60a 100644
--- a/examples/l3fwd/l3fwd.h
+++ b/examples/l3fwd/l3fwd.h
@@ -84,6 +84,8 @@ struct lcore_rx_queue {
 struct lcore_conf {
uint16_t n_rx_queue;
struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
+   uint16_t n_tx_port;
+   uint16_t tx_port_id[RTE_MAX_ETHPORTS];
uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
void *ipv4_lookup_struct;
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index f6a65d8..c8c781d 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -305,7 +305,7 @@ em_get_ipv6_dst_port(void *ipv6_hdr,  uint8_t portid, void 
*lookup_struct)
  * buffer optimization i.e. ENABLE_MULTI_BUFFER_OPTIMIZE=1.
  */
 #if defined(__SSE4_1__)
-#ifndef HASH_MULTI_LOOKUP
+#ifdef NO_HASH_MULTI_LOOKUP
 #include "l3fwd_em_sse.h"
 #else
 #include "l3fwd_em_hlm_sse.h"
@@ -552,8 +552,8 @@ em_main_loop(__attribute__((unused)) void *dummy)
diff_tsc = cur_tsc - prev_tsc;
if (unlikely(diff_tsc > drain_tsc)) {

-   for (i = 0; i < qconf->n_rx_queue; i++) {
-   portid = qconf->rx_queue_list[i].port_id;
+   for (i = 0; i < qconf->n_tx_port; ++i) {
+   portid = qconf->tx_port_id[i];
if (qconf->tx_mbufs[portid].len == 0)
continue;
send_burst(qconf,
diff --git a/examples/l3fwd/l3fwd_em_hlm_sse.h 
b/examples/l3fwd/l3fwd_em_hlm_sse.h
index d3388da..328dcb8 100644
--- a/examples/l3fwd/l3fwd_em_hlm_sse.h
+++ b/examples/l3fwd/l3fwd_em_hlm_sse.h
@@ -34,19 +34,11 @@
 #ifndef __L3FWD_EM_HLM_SSE_H__
 #define __L3FWD_EM_HLM_SSE_H__

-/**
- * @file
- * This is an optional implementation of packet classification in Exact-Match
- * path using rte_hash_lookup_multi method from previous implementation.
- * While sequential classification seems to be faster, it's disabled by default
- * and can be enabled with HASH_LOOKUP_MULTI global define in compilation time.
- */
-
 #include "l3fwd_sse.h"

-static inline void
+static inline __attribute__((always_inline)) void
 em_get_dst_port_ipv4x8(struct lcore_conf *qconf, struct rte_mbuf *m[8],
-   uint8_t portid, uint16_t dst_port[8])
+   uint8_t portid, uint32_t dst_port[8])
 {
int32_t ret[8];
union ipv4_5tuple_host key[8];
@@ -168,9 +160,9 @@ get_ipv6_5tuple(struct rte_mbuf *m0, __m128i mask0,
key->xmm[2] = _mm_and_si128(tmpdata2, mask1);
 }

-static inline void
+static inline __attribute__((always_inline)) void
 em_get_dst_port_ipv6x8(struct lcore_conf *qconf, struct rte_mbuf *m[8],
-   uint8_t portid, uint16_t dst_port[8])
+   uint8_t portid, uint32_t dst_port[8])
 {
int32_t ret[8];
union ipv6_5tuple_host key[8];
@@ -292,7 +284,7 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf 
**pkts_burst,
uint8_t portid, struct lcore_conf *qconf)
 {
int32_t j;
-   uint16_t dst_port[MAX_PKT_BURST];
+   uint32_t dst_port[MAX_PKT_BURST];

/*
 * Send nb_rx - nb_rx%8 packets
@@ -322,17 +314,17 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf 
**pkts_burst,

} else {
dst_port[j]   = em_get_dst_port(qconf, pkts_burst[j], 
portid);
-   dst_port[j+1] = em_get_dst_port(qconf, pkts_burst[j], 
portid);
-   dst_port[j+2]

[dpdk-dev] [PATCH v4 2/2] examples: rework to use buffered tx

2016-03-10 Thread Tomasz Kulasek

The internal buffering of packets for TX in sample apps is no longer
needed, so this patchset also replaces this code with calls to the new
rte_eth_tx_buffer* APIs in:

* l2fwd-jobstats
* l2fwd-keepalive
* l2fwd
* l3fwd-acl
* l3fwd-power
* link_status_interrupt
* client_server_mp
* l2fwd_fork
* packet_ordering
* qos_meter

v3 changes
 - updated due to the change of callback name

v2 changes
 - rework synced with tx buffer API changes

Signed-off-by: Tomasz Kulasek 
Acked-by: Konstantin Ananyev 
---
 examples/l2fwd-jobstats/main.c |  104 +++--
 examples/l2fwd-keepalive/main.c|  100 ++--
 examples/l2fwd/main.c  |  104 +++--
 examples/l3fwd-acl/main.c  |   92 ++-
 examples/l3fwd-power/main.c|   89 ++
 examples/link_status_interrupt/main.c  |  107 +++--
 .../client_server_mp/mp_client/client.c|  101 +---
 examples/multi_process/l2fwd_fork/main.c   |   97 +++-
 examples/packet_ordering/main.c|  122 ++--
 examples/qos_meter/main.c  |   61 +++---
 10 files changed, 436 insertions(+), 541 deletions(-)

diff --git a/examples/l2fwd-jobstats/main.c b/examples/l2fwd-jobstats/main.c
index 6da60e0..d1e9bf7 100644
--- a/examples/l2fwd-jobstats/main.c
+++ b/examples/l2fwd-jobstats/main.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -41,6 +41,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -97,18 +98,12 @@ static uint32_t l2fwd_dst_ports[RTE_MAX_ETHPORTS];

 static unsigned int l2fwd_rx_queue_per_lcore = 1;

-struct mbuf_table {
-   uint64_t next_flush_time;
-   unsigned len;
-   struct rte_mbuf *mbufs[MAX_PKT_BURST];
-};
-
 #define MAX_RX_QUEUE_PER_LCORE 16
 #define MAX_TX_QUEUE_PER_PORT 16
 struct lcore_queue_conf {
unsigned n_rx_port;
unsigned rx_port_list[MAX_RX_QUEUE_PER_LCORE];
-   struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
+   uint64_t next_flush_time[RTE_MAX_ETHPORTS];

struct rte_timer rx_timers[MAX_RX_QUEUE_PER_LCORE];
struct rte_jobstats port_fwd_jobs[MAX_RX_QUEUE_PER_LCORE];
@@ -123,6 +118,8 @@ struct lcore_queue_conf {
 } __rte_cache_aligned;
 struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];

+struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
+
 static const struct rte_eth_conf port_conf = {
.rxmode = {
.split_hdr_size = 0,
@@ -373,59 +370,14 @@ show_stats_cb(__rte_unused void *param)
rte_eal_alarm_set(timer_period * US_PER_S, show_stats_cb, NULL);
 }

-/* Send the burst of packets on an output interface */
-static void
-l2fwd_send_burst(struct lcore_queue_conf *qconf, uint8_t port)
-{
-   struct mbuf_table *m_table;
-   uint16_t ret;
-   uint16_t queueid = 0;
-   uint16_t n;
-
-   m_table = >tx_mbufs[port];
-   n = m_table->len;
-
-   m_table->next_flush_time = rte_get_timer_cycles() + drain_tsc;
-   m_table->len = 0;
-
-   ret = rte_eth_tx_burst(port, queueid, m_table->mbufs, n);
-
-   port_statistics[port].tx += ret;
-   if (unlikely(ret < n)) {
-   port_statistics[port].dropped += (n - ret);
-   do {
-   rte_pktmbuf_free(m_table->mbufs[ret]);
-   } while (++ret < n);
-   }
-}
-
-/* Enqueue packets for TX and prepare them to be sent */
-static int
-l2fwd_send_packet(struct rte_mbuf *m, uint8_t port)
-{
-   const unsigned lcore_id = rte_lcore_id();
-   struct lcore_queue_conf *qconf = _queue_conf[lcore_id];
-   struct mbuf_table *m_table = >tx_mbufs[port];
-   uint16_t len = qconf->tx_mbufs[port].len;
-
-   m_table->mbufs[len] = m;
-
-   len++;
-   m_table->len = len;
-
-   /* Enough pkts to be sent. */
-   if (unlikely(len == MAX_PKT_BURST))
-   l2fwd_send_burst(qconf, port);
-
-   return 0;
-}
-
 static void
 l2fwd_simple_forward(struct rte_mbuf *m, unsigned portid)
 {
struct ether_hdr *eth;
void *tmp;
+   int sent;
unsigned dst_port;
+   struct rte_eth_dev_tx_buffer *buffer;

dst_port = l2fwd_dst_ports[portid];
eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
@@ -437,7 +389,10 @@ l2fwd_simple_forward(struct rte_mbuf *m, unsigned portid)
/* src addr */
ether_addr_copy(_ports_eth_addr[dst_port], >s_addr);

-   l2fwd_send_packet(m, (uint8_t) dst_port);
+   buffer = tx_buffer[dst_port];
+   sent = rte_eth_tx_buffer(dst_port, 0, buffer, m);
+   if (sent)
+

[dpdk-dev] [PATCH v4 1/2] ethdev: add buffered tx api

2016-03-10 Thread Tomasz Kulasek

Many sample apps include internal buffering for single-packet-at-a-time
operation. Since this is such a common paradigm, this functionality is
better suited to being implemented in the ethdev API.

The new APIs in the ethdev library are:
* rte_eth_tx_buffer_init - initialize buffer
* rte_eth_tx_buffer - buffer up a single packet for future transmission
* rte_eth_tx_buffer_flush - flush any unsent buffered packets
* rte_eth_tx_buffer_set_err_callback - set up a callback to be called in
  case transmitting a buffered burst fails. By default, we just free the
  unsent packets.

As well as these, an additional reference callbacks are provided, which
frees the packets:

* rte_eth_tx_buffer_drop_callback - silently drop packets (default
  behavior)
* rte_eth_tx_buffer_count_callback - drop and update user-provided counter
  to track the number of dropped packets

v4 changes:
 - added comments
 - chaged names of error callback and user data
 - changed order of function names in map file

v3 changes:
 - error counter removed from tx buffer structure, now default behavior is
   silent drop of unsent packets
 - some names was changed in tx buffer structure to be more descriptive
 - two default calbacks are provided: rte_eth_tx_buffer_drop_callback and
   rte_eth_tx_buffer_count_callback

Signed-off-by: Tomasz Kulasek 
Acked-by: Konstantin Ananyev 
---
 lib/librte_ether/rte_ethdev.c  |   46 +++
 lib/librte_ether/rte_ethdev.h  |  206 +++-
 lib/librte_ether/rte_ether_version.map |   10 ++
 3 files changed, 261 insertions(+), 1 deletion(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 5c2b416..98587e1 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -1271,6 +1271,52 @@ rte_eth_tx_queue_setup(uint8_t port_id, uint16_t 
tx_queue_id,
 }

 void
+rte_eth_tx_buffer_drop_callback(struct rte_mbuf **pkts, uint16_t unsent,
+   void *userdata __rte_unused)
+{
+   unsigned i;
+
+   for (i = 0; i < unsent; i++)
+   rte_pktmbuf_free(pkts[i]);
+}
+
+void
+rte_eth_tx_buffer_count_callback(struct rte_mbuf **pkts, uint16_t unsent,
+   void *userdata)
+{
+   uint64_t *count = userdata;
+   unsigned i;
+
+   for (i = 0; i < unsent; i++)
+   rte_pktmbuf_free(pkts[i]);
+
+   *count += unsent;
+}
+
+int
+rte_eth_tx_buffer_set_err_callback(struct rte_eth_dev_tx_buffer *buffer,
+   buffer_tx_error_fn cbfn, void *userdata)
+{
+   buffer->error_callback = cbfn;
+   buffer->error_userdata = userdata;
+   return 0;
+}
+
+int
+rte_eth_tx_buffer_init(struct rte_eth_dev_tx_buffer *buffer, uint16_t size)
+{
+   if (buffer == NULL)
+   return -EINVAL;
+
+   buffer->size = size;
+   if (buffer->error_callback == NULL)
+   rte_eth_tx_buffer_set_err_callback(buffer,
+   rte_eth_tx_buffer_drop_callback, NULL);
+
+   return 0;
+}
+
+void
 rte_eth_promiscuous_enable(uint8_t port_id)
 {
struct rte_eth_dev *dev;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index d53e362..2062d6c 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -2655,6 +2655,210 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, 
nb_pkts);
 }

+typedef void (*buffer_tx_error_fn)(struct rte_mbuf **unsent, uint16_t count,
+   void *userdata);
+
+/**
+ * Structure used to buffer packets for future TX
+ * Used by APIs rte_eth_tx_buffer and rte_eth_tx_buffer_flush
+ */
+struct rte_eth_dev_tx_buffer {
+   buffer_tx_error_fn error_callback;
+   void *error_userdata;
+   uint16_t size;   /**< Size of buffer for buffered tx */
+   uint16_t length; /**< Number of packets in the array */
+   struct rte_mbuf *pkts[];
+   /**< Pending packets to be sent on explicit flush or when full */
+};
+
+/**
+ * Calculate the size of the tx buffer.
+ *
+ * @param sz
+ *   Number of stored packets.
+ */
+#define RTE_ETH_TX_BUFFER_SIZE(sz) \
+   (sizeof(struct rte_eth_dev_tx_buffer) + (sz) * sizeof(struct rte_mbuf 
*))
+
+/**
+ * Initialize default values for buffered transmitting
+ *
+ * @param buffer
+ *   Tx buffer to be initialized.
+ * @param size
+ *   Buffer size
+ * @return
+ *   0 if no error
+ */
+int
+rte_eth_tx_buffer_init(struct rte_eth_dev_tx_buffer *buffer, uint16_t size);
+
+/**
+ * Send any packets queued up for transmission on a port and HW queue
+ *
+ * This causes an explicit flus

[dpdk-dev] [PATCH v4 0/2] add support for buffered tx to ethdev

2016-03-10 Thread Tomasz Kulasek

Many sample apps include internal buffering for single-packet-at-a-time
operation. Since this is such a common paradigm, this functionality is
better suited to being implemented in the ethdev API.

The new APIs in the ethdev library are:
* rte_eth_tx_buffer_init - initialize buffer
* rte_eth_tx_buffer - buffer up a single packet for future transmission
* rte_eth_tx_buffer_flush - flush any unsent buffered packets
* rte_eth_tx_buffer_set_err_callback - set up a callback to be called in
  case transmitting a buffered burst fails. By default, we just free the
  unsent packets.

As well as these, an additional reference callbacks are provided, which
frees the packets:

* rte_eth_tx_buffer_drop_callback - silently drop packets (default
  behavior)
* rte_eth_tx_buffer_count_callback - drop and update user-provided counter
  to track the number of dropped packets

Due to the feedback from mailing list, that buffer management facilities
in the user application are more preferable than API simplicity, we decided
to move internal buffer table, as well as callback functions and user data,
from rte_eth_dev/rte_eth_dev_data to the application space.
It prevents ABI breakage and gives some more flexibility in the buffer's
management such as allocation, dynamical size change, reuse buffers on many
ports or after fail, and so on.


The following steps illustrate how tx buffers can be used in application:

1) Initialization

a) Allocate memory for a buffer

   struct rte_eth_dev_tx_buffer *buffer = rte_zmalloc_socket("tx_buffer",
   RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, socket_id);

   RTE_ETH_TX_BUFFER_SIZE(size) macro computes memory required to store
   "size" packets in buffer.

b) Initialize allocated memory and set up default values. Threshold level
   must be lower than or equal to the MAX_PKT_BURST from 1a)

   rte_eth_tx_buffer_init(buffer, threshold);


c) Set error callback (optional)

   rte_eth_tx_buffer_set_err_callback(buffer, callback_fn, userdata);


2) Store packet "pkt" in buffer and send them all to the queue_id on
   port_id when number of packets reaches threshold level set up in 1b)

   rte_eth_tx_buffer(port_id, queue_id, buffer, pkt);


3) Send all stored packets to the queue_id on port_id

   rte_eth_tx_buffer_flush(port_id, queue_id, buffer);


4) Flush buffer and free memory

   rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
   ...
   rte_free(buffer);

v4 changes:
 - added comments
 - chaged names of error callback and user data
 - changed order of function names in map file

v3 changes:
 - error counter removed from tx buffer structure, now default behavior is
   silent drop of unsent packets
 - some names was changed in tx buffer structure to be more descriptive
 - two default calbacks are provided: rte_eth_tx_buffer_drop_callback and
   rte_eth_tx_buffer_count_callback

v2 changes:
 - reworked to use new buffer model
 - buffer data and callbacks are removed from rte_eth_dev/rte_eth_dev_data,
   so this patch doesn't brake an ABI anymore
 - introduced RTE_ETH_TX_BUFFER macro and rte_eth_tx_buffer_init
 - buffers are not attached to the port-queue
 - buffers can be allocated dynamically during application work
 - size of buffer can be changed without port restart

Tomasz Kulasek (2):
  ethdev: add buffered tx api
  examples: rework to use buffered tx

 examples/l2fwd-jobstats/main.c |  104 --
 examples/l2fwd-keepalive/main.c|  100 --
 examples/l2fwd/main.c  |  104 --
 examples/l3fwd-acl/main.c  |   92 -
 examples/l3fwd-power/main.c|   89 -
 examples/link_status_interrupt/main.c  |  107 --
 .../client_server_mp/mp_client/client.c|  101 ++
 examples/multi_process/l2fwd_fork/main.c   |   97 -
 examples/packet_ordering/main.c|  122 
 examples/qos_meter/main.c  |   61 ++
 lib/librte_ether/rte_ethdev.c  |   46 +
 lib/librte_ether/rte_ethdev.h  |  206 +++-
 lib/librte_ether/rte_ether_version.map |   10 +
 13 files changed, 697 insertions(+), 542 deletions(-)

-- 
1.7.9.5

[dpdk-dev] [PATCH v3 2/2] examples: rework to use buffered tx

2016-03-10 Thread Tomasz Kulasek

The internal buffering of packets for TX in sample apps is no longer
needed, so this patchset also replaces this code with calls to the new
rte_eth_tx_buffer* APIs in:

* l2fwd-jobstats
* l2fwd-keepalive
* l2fwd
* l3fwd-acl
* l3fwd-power
* link_status_interrupt
* client_server_mp
* l2fwd_fork
* packet_ordering
* qos_meter

v3 changes
 - updated due to the change of callback name

v2 changes
 - rework synced with tx buffer API changes

Signed-off-by: Tomasz Kulasek 
---
 examples/l2fwd-jobstats/main.c |  104 +++--
 examples/l2fwd-keepalive/main.c|  100 ++--
 examples/l2fwd/main.c  |  104 +++--
 examples/l3fwd-acl/main.c  |   92 ++-
 examples/l3fwd-power/main.c|   89 ++
 examples/link_status_interrupt/main.c  |  107 +++--
 .../client_server_mp/mp_client/client.c|  101 +---
 examples/multi_process/l2fwd_fork/main.c   |   97 +++-
 examples/packet_ordering/main.c|  122 ++--
 examples/qos_meter/main.c  |   61 +++---
 10 files changed, 436 insertions(+), 541 deletions(-)

diff --git a/examples/l2fwd-jobstats/main.c b/examples/l2fwd-jobstats/main.c
index 6da60e0..d1e9bf7 100644
--- a/examples/l2fwd-jobstats/main.c
+++ b/examples/l2fwd-jobstats/main.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -41,6 +41,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -97,18 +98,12 @@ static uint32_t l2fwd_dst_ports[RTE_MAX_ETHPORTS];

 static unsigned int l2fwd_rx_queue_per_lcore = 1;

-struct mbuf_table {
-   uint64_t next_flush_time;
-   unsigned len;
-   struct rte_mbuf *mbufs[MAX_PKT_BURST];
-};
-
 #define MAX_RX_QUEUE_PER_LCORE 16
 #define MAX_TX_QUEUE_PER_PORT 16
 struct lcore_queue_conf {
unsigned n_rx_port;
unsigned rx_port_list[MAX_RX_QUEUE_PER_LCORE];
-   struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
+   uint64_t next_flush_time[RTE_MAX_ETHPORTS];

struct rte_timer rx_timers[MAX_RX_QUEUE_PER_LCORE];
struct rte_jobstats port_fwd_jobs[MAX_RX_QUEUE_PER_LCORE];
@@ -123,6 +118,8 @@ struct lcore_queue_conf {
 } __rte_cache_aligned;
 struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];

+struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
+
 static const struct rte_eth_conf port_conf = {
.rxmode = {
.split_hdr_size = 0,
@@ -373,59 +370,14 @@ show_stats_cb(__rte_unused void *param)
rte_eal_alarm_set(timer_period * US_PER_S, show_stats_cb, NULL);
 }

-/* Send the burst of packets on an output interface */
-static void
-l2fwd_send_burst(struct lcore_queue_conf *qconf, uint8_t port)
-{
-   struct mbuf_table *m_table;
-   uint16_t ret;
-   uint16_t queueid = 0;
-   uint16_t n;
-
-   m_table = >tx_mbufs[port];
-   n = m_table->len;
-
-   m_table->next_flush_time = rte_get_timer_cycles() + drain_tsc;
-   m_table->len = 0;
-
-   ret = rte_eth_tx_burst(port, queueid, m_table->mbufs, n);
-
-   port_statistics[port].tx += ret;
-   if (unlikely(ret < n)) {
-   port_statistics[port].dropped += (n - ret);
-   do {
-   rte_pktmbuf_free(m_table->mbufs[ret]);
-   } while (++ret < n);
-   }
-}
-
-/* Enqueue packets for TX and prepare them to be sent */
-static int
-l2fwd_send_packet(struct rte_mbuf *m, uint8_t port)
-{
-   const unsigned lcore_id = rte_lcore_id();
-   struct lcore_queue_conf *qconf = _queue_conf[lcore_id];
-   struct mbuf_table *m_table = >tx_mbufs[port];
-   uint16_t len = qconf->tx_mbufs[port].len;
-
-   m_table->mbufs[len] = m;
-
-   len++;
-   m_table->len = len;
-
-   /* Enough pkts to be sent. */
-   if (unlikely(len == MAX_PKT_BURST))
-   l2fwd_send_burst(qconf, port);
-
-   return 0;
-}
-
 static void
 l2fwd_simple_forward(struct rte_mbuf *m, unsigned portid)
 {
struct ether_hdr *eth;
void *tmp;
+   int sent;
unsigned dst_port;
+   struct rte_eth_dev_tx_buffer *buffer;

dst_port = l2fwd_dst_ports[portid];
eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
@@ -437,7 +389,10 @@ l2fwd_simple_forward(struct rte_mbuf *m, unsigned portid)
/* src addr */
ether_addr_copy(_ports_eth_addr[dst_port], >s_addr);

-   l2fwd_send_packet(m, (uint8_t) dst_port);
+   buffer = tx_buffer[dst_port];
+   sent = rte_eth_tx_buffer(dst_port, 0, buffer, m);
+   if (sent)
+   port_statistics

[dpdk-dev] [PATCH v3 1/2] ethdev: add buffered tx api

2016-03-10 Thread Tomasz Kulasek

Many sample apps include internal buffering for single-packet-at-a-time
operation. Since this is such a common paradigm, this functionality is
better suited to being implemented in the ethdev API.

The new APIs in the ethdev library are:
* rte_eth_tx_buffer_init - initialize buffer
* rte_eth_tx_buffer - buffer up a single packet for future transmission
* rte_eth_tx_buffer_flush - flush any unsent buffered packets
* rte_eth_tx_buffer_set_err_callback - set up a callback to be called in
  case transmitting a buffered burst fails. By default, we just free the
  unsent packets.

As well as these, an additional reference callbacks are provided, which
frees the packets:

* rte_eth_tx_buffer_drop_callback - silently drop packets (default
  behavior)
* rte_eth_tx_buffer_count_callback - drop and update user-provided counter
  to track the number of dropped packets

v3 changes:
 - error counter removed from tx buffer structure, now default behavior is
   silent drop of unsent packets
 - some names was changed in tx buffer structure to be more descriptive
 - two default calbacks are provided: rte_eth_tx_buffer_drop_callback and
   rte_eth_tx_buffer_count_callback

Signed-off-by: Tomasz Kulasek 
---
 lib/librte_ether/rte_ethdev.c  |   46 +++
 lib/librte_ether/rte_ethdev.h  |  205 +++-
 lib/librte_ether/rte_ether_version.map |   10 ++
 3 files changed, 260 insertions(+), 1 deletion(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 5c2b416..b682af4 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -1271,6 +1271,52 @@ rte_eth_tx_queue_setup(uint8_t port_id, uint16_t 
tx_queue_id,
 }

 void
+rte_eth_tx_buffer_drop_callback(struct rte_mbuf **pkts, uint16_t unsent,
+   void *userdata __rte_unused)
+{
+   unsigned i;
+
+   for (i = 0; i < unsent; i++)
+   rte_pktmbuf_free(pkts[i]);
+}
+
+void
+rte_eth_tx_buffer_count_callback(struct rte_mbuf **pkts, uint16_t unsent,
+   void *userdata)
+{
+   uint64_t *count = userdata;
+   unsigned i;
+
+   for (i = 0; i < unsent; i++)
+   rte_pktmbuf_free(pkts[i]);
+
+   *count += unsent;
+}
+
+int
+rte_eth_tx_buffer_set_err_callback(struct rte_eth_dev_tx_buffer *buffer,
+   buffer_tx_error_fn cbfn, void *userdata)
+{
+   buffer->callback = cbfn;
+   buffer->userdata = userdata;
+   return 0;
+}
+
+int
+rte_eth_tx_buffer_init(struct rte_eth_dev_tx_buffer *buffer, uint16_t size)
+{
+   if (buffer == NULL)
+   return -EINVAL;
+
+   buffer->size = size;
+   if (buffer->callback == NULL)
+   rte_eth_tx_buffer_set_err_callback(buffer,
+   rte_eth_tx_buffer_drop_callback, NULL);
+
+   return 0;
+}
+
+void
 rte_eth_promiscuous_enable(uint8_t port_id)
 {
struct rte_eth_dev *dev;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index d53e362..c457728 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -2655,6 +2655,209 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, 
nb_pkts);
 }

+typedef void (*buffer_tx_error_fn)(struct rte_mbuf **unsent, uint16_t count,
+   void *userdata);
+
+/**
+ * Structure used to buffer packets for future TX
+ * Used by APIs rte_eth_tx_buffer and rte_eth_tx_buffer_flush
+ */
+struct rte_eth_dev_tx_buffer {
+   buffer_tx_error_fn callback;
+   void *userdata;
+   uint16_t size;   /**< Size of buffer for buffered tx */
+   uint16_t length;
+   struct rte_mbuf *pkts[];
+};
+
+/**
+ * Calculate the size of the tx buffer.
+ *
+ * @param sz
+ *   Number of stored packets.
+ */
+#define RTE_ETH_TX_BUFFER_SIZE(sz) \
+   (sizeof(struct rte_eth_dev_tx_buffer) + (sz) * sizeof(struct rte_mbuf 
*))
+
+/**
+ * Initialize default values for buffered transmitting
+ *
+ * @param buffer
+ *   Tx buffer to be initialized.
+ * @param size
+ *   Buffer size
+ * @return
+ *   0 if no error
+ */
+int
+rte_eth_tx_buffer_init(struct rte_eth_dev_tx_buffer *buffer, uint16_t size);
+
+/**
+ * Send any packets queued up for transmission on a port and HW queue
+ *
+ * This causes an explicit flush of packets previously buffered via the
+ * rte_eth_tx_buffer() function. It returns the number of packets successfully
+ * sent to the NIC, and calls the error callback for any unsent packets. Unless
+ * explicitly set up otherwise, the default callback simply frees the unsent
+ * packets back to the owning mempool.
+ *
+ * @param por

[dpdk-dev] [PATCH v3 0/2] add support for buffered tx to ethdev

2016-03-10 Thread Tomasz Kulasek

Many sample apps include internal buffering for single-packet-at-a-time
operation. Since this is such a common paradigm, this functionality is
better suited to being implemented in the ethdev API.

The new APIs in the ethdev library are:
* rte_eth_tx_buffer_init - initialize buffer
* rte_eth_tx_buffer - buffer up a single packet for future transmission
* rte_eth_tx_buffer_flush - flush any unsent buffered packets
* rte_eth_tx_buffer_set_err_callback - set up a callback to be called in
  case transmitting a buffered burst fails. By default, we just free the
  unsent packets.

As well as these, an additional reference callbacks are provided, which
frees the packets:

* rte_eth_tx_buffer_drop_callback - silently drop packets (default
  behavior)
* rte_eth_tx_buffer_count_callback - drop and update user-provided counter
  to track the number of dropped packets

Due to the feedback from mailing list, that buffer management facilities
in the user application are more preferable than API simplicity, we decided
to move internal buffer table, as well as callback functions and user data,
from rte_eth_dev/rte_eth_dev_data to the application space.
It prevents ABI breakage and gives some more flexibility in the buffer's
management such as allocation, dynamical size change, reuse buffers on many
ports or after fail, and so on.


The following steps illustrate how tx buffers can be used in application:

1) Initialization

a) Allocate memory for a buffer

   struct rte_eth_dev_tx_buffer *buffer = rte_zmalloc_socket("tx_buffer",
   RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, socket_id);

   RTE_ETH_TX_BUFFER_SIZE(size) macro computes memory required to store
   "size" packets in buffer.

b) Initialize allocated memory and set up default values. Threshold level
   must be lower than or equal to the MAX_PKT_BURST from 1a)

   rte_eth_tx_buffer_init(buffer, threshold);


c) Set error callback (optional)

   rte_eth_tx_buffer_set_err_callback(buffer, callback_fn, userdata);


2) Store packet "pkt" in buffer and send them all to the queue_id on
   port_id when number of packets reaches threshold level set up in 1b)

   rte_eth_tx_buffer(port_id, queue_id, buffer, pkt);


3) Send all stored packets to the queue_id on port_id

   rte_eth_tx_buffer_flush(port_id, queue_id, buffer);


4) Flush buffer and free memory

   rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
   ...
   rte_free(buffer);

v3 changes:
 - error counter removed from tx buffer structure, now default behavior is
   silent drop of unsent packets
 - some names was changed in tx buffer structure to be more descriptive
 - two default calbacks are provided: rte_eth_tx_buffer_drop_callback and
   rte_eth_tx_buffer_count_callback

v2 changes:
 - reworked to use new buffer model
 - buffer data and callbacks are removed from rte_eth_dev/rte_eth_dev_data,
   so this patch doesn't brake an ABI anymore
 - introduced RTE_ETH_TX_BUFFER macro and rte_eth_tx_buffer_init
 - buffers are not attached to the port-queue
 - buffers can be allocated dynamically during application work
 - size of buffer can be changed without port restart

Tomasz Kulasek (2):
  ethdev: add buffered tx api
  examples: rework to use buffered tx

 examples/l2fwd-jobstats/main.c |  104 --
 examples/l2fwd-keepalive/main.c|  100 --
 examples/l2fwd/main.c  |  104 --
 examples/l3fwd-acl/main.c  |   92 -
 examples/l3fwd-power/main.c|   89 -
 examples/link_status_interrupt/main.c  |  107 --
 .../client_server_mp/mp_client/client.c|  101 ++
 examples/multi_process/l2fwd_fork/main.c   |   97 -
 examples/packet_ordering/main.c|  122 
 examples/qos_meter/main.c  |   61 ++
 lib/librte_ether/rte_ethdev.c  |   46 +
 lib/librte_ether/rte_ethdev.h  |  205 +++-
 lib/librte_ether/rte_ether_version.map |   10 +
 13 files changed, 696 insertions(+), 542 deletions(-)

-- 
1.7.9.5

[dpdk-dev] [PATCH v2] examples/l3fwd: em path performance fix

2016-03-08 Thread Tomasz Kulasek

It seems that for the most use cases, previous hash_multi_lookup provides
better performance, and more, sequential lookup can cause significant
performance drop.

This patch sets previously optional hash_multi_lookup method as default.
It also provides some minor optimizations such as queue drain only on used
tx ports.

v2 changes:
 - fixed copy-paste error causing that not all packets are classified right
   in hash_multi_lookup implementation when burst size is not divisible
   by 8

Fixes: 94c54b4158d5 ("examples/l3fwd: rework exact-match")

Reported-by: Qian Xu 
Signed-off-by: Tomasz Kulasek 
---
 examples/l3fwd/l3fwd.h|2 ++
 examples/l3fwd/l3fwd_em.c |6 +++---
 examples/l3fwd/l3fwd_em_hlm_sse.h |   28 ++--
 examples/l3fwd/l3fwd_em_sse.h |9 +
 examples/l3fwd/l3fwd_lpm.c|4 ++--
 examples/l3fwd/main.c |7 +++
 6 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
index da6d369..207a60a 100644
--- a/examples/l3fwd/l3fwd.h
+++ b/examples/l3fwd/l3fwd.h
@@ -84,6 +84,8 @@ struct lcore_rx_queue {
 struct lcore_conf {
uint16_t n_rx_queue;
struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
+   uint16_t n_tx_port;
+   uint16_t tx_port_id[RTE_MAX_ETHPORTS];
uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
void *ipv4_lookup_struct;
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index f6a65d8..c8c781d 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -305,7 +305,7 @@ em_get_ipv6_dst_port(void *ipv6_hdr,  uint8_t portid, void 
*lookup_struct)
  * buffer optimization i.e. ENABLE_MULTI_BUFFER_OPTIMIZE=1.
  */
 #if defined(__SSE4_1__)
-#ifndef HASH_MULTI_LOOKUP
+#ifdef NO_HASH_MULTI_LOOKUP
 #include "l3fwd_em_sse.h"
 #else
 #include "l3fwd_em_hlm_sse.h"
@@ -552,8 +552,8 @@ em_main_loop(__attribute__((unused)) void *dummy)
diff_tsc = cur_tsc - prev_tsc;
if (unlikely(diff_tsc > drain_tsc)) {

-   for (i = 0; i < qconf->n_rx_queue; i++) {
-   portid = qconf->rx_queue_list[i].port_id;
+   for (i = 0; i < qconf->n_tx_port; ++i) {
+   portid = qconf->tx_port_id[i];
if (qconf->tx_mbufs[portid].len == 0)
continue;
send_burst(qconf,
diff --git a/examples/l3fwd/l3fwd_em_hlm_sse.h 
b/examples/l3fwd/l3fwd_em_hlm_sse.h
index d3388da..f0cf421 100644
--- a/examples/l3fwd/l3fwd_em_hlm_sse.h
+++ b/examples/l3fwd/l3fwd_em_hlm_sse.h
@@ -34,17 +34,9 @@
 #ifndef __L3FWD_EM_HLM_SSE_H__
 #define __L3FWD_EM_HLM_SSE_H__

-/**
- * @file
- * This is an optional implementation of packet classification in Exact-Match
- * path using rte_hash_lookup_multi method from previous implementation.
- * While sequential classification seems to be faster, it's disabled by default
- * and can be enabled with HASH_LOOKUP_MULTI global define in compilation time.
- */
-
 #include "l3fwd_sse.h"

-static inline void
+static inline __attribute__((always_inline)) void
 em_get_dst_port_ipv4x8(struct lcore_conf *qconf, struct rte_mbuf *m[8],
uint8_t portid, uint16_t dst_port[8])
 {
@@ -168,7 +160,7 @@ get_ipv6_5tuple(struct rte_mbuf *m0, __m128i mask0,
key->xmm[2] = _mm_and_si128(tmpdata2, mask1);
 }

-static inline void
+static inline __attribute__((always_inline)) void
 em_get_dst_port_ipv6x8(struct lcore_conf *qconf, struct rte_mbuf *m[8],
uint8_t portid, uint16_t dst_port[8])
 {
@@ -322,17 +314,17 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf 
**pkts_burst,

} else {
dst_port[j]   = em_get_dst_port(qconf, pkts_burst[j], 
portid);
-   dst_port[j+1] = em_get_dst_port(qconf, pkts_burst[j], 
portid);
-   dst_port[j+2] = em_get_dst_port(qconf, pkts_burst[j], 
portid);
-   dst_port[j+3] = em_get_dst_port(qconf, pkts_burst[j], 
portid);
-   dst_port[j+4] = em_get_dst_port(qconf, pkts_burst[j], 
portid);
-   dst_port[j+5] = em_get_dst_port(qconf, pkts_burst[j], 
portid);
-   dst_port[j+6] = em_get_dst_port(qconf, pkts_burst[j], 
portid);
-   dst_port[j+7] = em_get_dst_port(qconf, pkts_burst[j], 
portid);
+   dst_port[j+1] = em_get_dst_port(qconf, pkts_burst[j+1], 
portid);
+   dst_port[j+2] = em_get_dst_port(qconf, pkts_burst[j+2], 
portid);
+   dst_port[j+3] = em_get_dst_port(qconf, pkts_burst[j+3], 
portid);
+   dst_port[j+4] = em_get_dst_port(qconf, pkts_burst[j+4], 
portid);
+

[dpdk-dev] [PATCH] examples/l3fwd: em path performance fix

2016-03-03 Thread Tomasz Kulasek

It seems that for the most use cases, previous hash_multi_lookup provides
better performance, and more, sequential lookup can cause significant
performance drop.

This patch sets previously optional hash_multi_lookup method as default.
It also provides some minor optimizations such as queue drain only on used
tx ports.

Fixes: 94c54b4158d5 ("examples/l3fwd: rework exact-match")

Reported-by: Qian Xu 
Signed-off-by: Tomasz Kulasek 
---
 examples/l3fwd/l3fwd.h|2 ++
 examples/l3fwd/l3fwd_em.c |6 +++---
 examples/l3fwd/l3fwd_em_hlm_sse.h |   12 ++--
 examples/l3fwd/l3fwd_em_sse.h |9 +
 examples/l3fwd/l3fwd_lpm.c|4 ++--
 examples/l3fwd/main.c |7 +++
 6 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
index da6d369..207a60a 100644
--- a/examples/l3fwd/l3fwd.h
+++ b/examples/l3fwd/l3fwd.h
@@ -84,6 +84,8 @@ struct lcore_rx_queue {
 struct lcore_conf {
uint16_t n_rx_queue;
struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
+   uint16_t n_tx_port;
+   uint16_t tx_port_id[RTE_MAX_ETHPORTS];
uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
void *ipv4_lookup_struct;
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index f6a65d8..c8c781d 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -305,7 +305,7 @@ em_get_ipv6_dst_port(void *ipv6_hdr,  uint8_t portid, void 
*lookup_struct)
  * buffer optimization i.e. ENABLE_MULTI_BUFFER_OPTIMIZE=1.
  */
 #if defined(__SSE4_1__)
-#ifndef HASH_MULTI_LOOKUP
+#ifdef NO_HASH_MULTI_LOOKUP
 #include "l3fwd_em_sse.h"
 #else
 #include "l3fwd_em_hlm_sse.h"
@@ -552,8 +552,8 @@ em_main_loop(__attribute__((unused)) void *dummy)
diff_tsc = cur_tsc - prev_tsc;
if (unlikely(diff_tsc > drain_tsc)) {

-   for (i = 0; i < qconf->n_rx_queue; i++) {
-   portid = qconf->rx_queue_list[i].port_id;
+   for (i = 0; i < qconf->n_tx_port; ++i) {
+   portid = qconf->tx_port_id[i];
if (qconf->tx_mbufs[portid].len == 0)
continue;
send_burst(qconf,
diff --git a/examples/l3fwd/l3fwd_em_hlm_sse.h 
b/examples/l3fwd/l3fwd_em_hlm_sse.h
index d3388da..517815a 100644
--- a/examples/l3fwd/l3fwd_em_hlm_sse.h
+++ b/examples/l3fwd/l3fwd_em_hlm_sse.h
@@ -34,17 +34,9 @@
 #ifndef __L3FWD_EM_HLM_SSE_H__
 #define __L3FWD_EM_HLM_SSE_H__

-/**
- * @file
- * This is an optional implementation of packet classification in Exact-Match
- * path using rte_hash_lookup_multi method from previous implementation.
- * While sequential classification seems to be faster, it's disabled by default
- * and can be enabled with HASH_LOOKUP_MULTI global define in compilation time.
- */
-
 #include "l3fwd_sse.h"

-static inline void
+static inline __attribute__((always_inline)) void
 em_get_dst_port_ipv4x8(struct lcore_conf *qconf, struct rte_mbuf *m[8],
uint8_t portid, uint16_t dst_port[8])
 {
@@ -168,7 +160,7 @@ get_ipv6_5tuple(struct rte_mbuf *m0, __m128i mask0,
key->xmm[2] = _mm_and_si128(tmpdata2, mask1);
 }

-static inline void
+static inline __attribute__((always_inline)) void
 em_get_dst_port_ipv6x8(struct lcore_conf *qconf, struct rte_mbuf *m[8],
uint8_t portid, uint16_t dst_port[8])
 {
diff --git a/examples/l3fwd/l3fwd_em_sse.h b/examples/l3fwd/l3fwd_em_sse.h
index 4c6d14f..7f10af4 100644
--- a/examples/l3fwd/l3fwd_em_sse.h
+++ b/examples/l3fwd/l3fwd_em_sse.h
@@ -34,6 +34,15 @@
 #ifndef __L3FWD_EM_SSE_H__
 #define __L3FWD_EM_SSE_H__

+/**
+ * @file
+ * This is an optional implementation of packet classification in Exact-Match
+ * path using sequential packet classification method.
+ * While hash lookup multi seems to provide better performance, it's disabled
+ * by default and can be enabled with NO_HASH_LOOKUP_MULTI global define in
+ * compilation time.
+ */
+
 #include "l3fwd_sse.h"

 static inline __attribute__((always_inline)) uint16_t
diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
index e0ed3c4..8df762d 100644
--- a/examples/l3fwd/l3fwd_lpm.c
+++ b/examples/l3fwd/l3fwd_lpm.c
@@ -158,8 +158,8 @@ lpm_main_loop(__attribute__((unused)) void *dummy)
diff_tsc = cur_tsc - prev_tsc;
if (unlikely(diff_tsc > drain_tsc)) {

-   for (i = 0; i < qconf->n_rx_queue; i++) {
-   portid = qconf->rx_queue_list[i].port_id;
+   for (i = 0; i < qconf->n_tx_port; ++i) {
+   portid = qconf->tx_port_id[i];
if (qconf->tx_mbufs[portid].l

[dpdk-dev] [PATCH v3] examples/l3fwd: exact-match rework

2016-02-29 Thread Tomasz Kulasek

Current implementation of Exact-Match uses different execution path than
for LPM. Unifying them allows to reuse big part of LPM code and sightly
increase performance of Exact-Match.

Main changes:
-
* Packet classification stage is separated from the rest of path for both
  LPM and EM.
* Packet processing, modifying and transmit part is the same for LPM and EM
  and mostly based on the current LPM implementation.
* Shared code is moved to the common file "l3fwd_sse.h".
* While sequential packet classification in EM path, seems to be faster
  than using multi hash lookup, used before, it is used by default. Old
  implementation is moved to the file l3fwd_em_hlm_sse.h and can be enabled
  with HASH_LOOKUP_MULTI global define in compilation time.

This patch depends of Ravi Kerur's "Modify and modularize l3fwd code" and
should be applied after it.

Changes in v3:
 - fixed error: unused function 'l3fwd_em_simple_forward'. This function is
   used only in l3fwd_em_no_opt_send_packets, and after moving it to new
   header file l3fwd_em.h in Ravi's patch, also should be moved there.

Changes in v2:
 - patch rebase to be applicable on top of "Modify and modularize l3fwd
   code" v3

Signed-off-by: Tomasz Kulasek 
Acked-by: Konstantin Ananyev 
---
 examples/l3fwd/l3fwd.h|8 +
 examples/l3fwd/l3fwd_em.c |   80 +-
 examples/l3fwd/l3fwd_em.h |   68 +
 examples/l3fwd/l3fwd_em_hlm_sse.h |  341 +
 examples/l3fwd/l3fwd_em_sse.h |  447 +++-
 examples/l3fwd/l3fwd_lpm.c|   15 +-
 examples/l3fwd/l3fwd_lpm_sse.h|  507 -
 examples/l3fwd/l3fwd_sse.h|  501 
 8 files changed, 1011 insertions(+), 956 deletions(-)
 create mode 100644 examples/l3fwd/l3fwd_em_hlm_sse.h
 create mode 100644 examples/l3fwd/l3fwd_sse.h

diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
index f450269..da6d369 100644
--- a/examples/l3fwd/l3fwd.h
+++ b/examples/l3fwd/l3fwd.h
@@ -53,6 +53,14 @@
 /* Configure how many packets ahead to prefetch, when reading packets */
 #define PREFETCH_OFFSET  3

+/* Used to mark destination port as 'invalid'. */
+#defineBAD_PORT ((uint16_t)-1)
+
+#define FWDSTEP4
+
+/* replace first 12B of the ethernet header. */
+#defineMASK_ETH 0x3f
+
 /* Hash parameters. */
 #ifdef RTE_ARCH_X86_64
 /* default to 4 million hash entries (approx) */
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index ace06cf..f6a65d8 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -300,81 +300,17 @@ em_get_ipv6_dst_port(void *ipv6_hdr,  uint8_t portid, 
void *lookup_struct)
return (uint8_t)((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]);
 }

-static inline __attribute__((always_inline)) void
-l3fwd_em_simple_forward(struct rte_mbuf *m, uint8_t portid,
-   struct lcore_conf *qconf)
-{
-   struct ether_hdr *eth_hdr;
-   struct ipv4_hdr *ipv4_hdr;
-   uint8_t dst_port;
-
-   eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
-
-   if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
-   /* Handle IPv4 headers.*/
-   ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
-  sizeof(struct ether_hdr));
-
-#ifdef DO_RFC_1812_CHECKS
-   /* Check to make sure the packet is valid (RFC1812) */
-   if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) {
-   rte_pktmbuf_free(m);
-   return;
-   }
-#endif
-dst_port = em_get_ipv4_dst_port(ipv4_hdr, portid,
-   qconf->ipv4_lookup_struct);
-
-   if (dst_port >= RTE_MAX_ETHPORTS ||
-   (enabled_port_mask & 1 << dst_port) == 0)
-   dst_port = portid;
-
-#ifdef DO_RFC_1812_CHECKS
-   /* Update time to live and header checksum */
-   --(ipv4_hdr->time_to_live);
-   ++(ipv4_hdr->hdr_checksum);
-#endif
-   /* dst addr */
-   *(uint64_t *)_hdr->d_addr = dest_eth_addr[dst_port];
-
-   /* src addr */
-   ether_addr_copy(_eth_addr[dst_port], _hdr->s_addr);
-
-   send_single_packet(qconf, m, dst_port);
-   } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
-   /* Handle IPv6 headers.*/
-   struct ipv6_hdr *ipv6_hdr;
-
-   ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *,
-  sizeof(struct ether_hdr));
-
-   dst_port = em_get_ipv6_dst_port(ipv6_hdr, portid,
-   qconf->ipv6_lookup_struct);
-
-   if (dst_port >= RTE_MAX_ETHPORTS ||
-

[dpdk-dev] [PATCH v2 2/2] examples: rework to use buffered tx api

2016-02-24 Thread Tomasz Kulasek

The internal buffering of packets for TX in sample apps is no longer
needed, so this patchset also replaces this code with calls to the new
rte_eth_tx_buffer* APIs in:

* l2fwd-jobstats
* l2fwd-keepalive
* l2fwd
* l3fwd-acl
* l3fwd-power
* link_status_interrupt
* client_server_mp
* l2fwd_fork
* packet_ordering
* qos_meter

v2 changes
 - rework synced with tx buffer API changes

Signed-off-by: Tomasz Kulasek 
---
 examples/l2fwd-jobstats/main.c |  104 +++--
 examples/l2fwd-keepalive/main.c|  100 ++--
 examples/l2fwd/main.c  |  104 +++--
 examples/l3fwd-acl/main.c  |   92 ++-
 examples/l3fwd-power/main.c|   89 ++
 examples/link_status_interrupt/main.c  |  107 +++--
 .../client_server_mp/mp_client/client.c|  101 +---
 examples/multi_process/l2fwd_fork/main.c   |   97 +++-
 examples/packet_ordering/main.c|  122 ++--
 examples/qos_meter/main.c  |   61 +++---
 10 files changed, 436 insertions(+), 541 deletions(-)

diff --git a/examples/l2fwd-jobstats/main.c b/examples/l2fwd-jobstats/main.c
index 7b59f4e..f159168 100644
--- a/examples/l2fwd-jobstats/main.c
+++ b/examples/l2fwd-jobstats/main.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -41,6 +41,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -97,18 +98,12 @@ static uint32_t l2fwd_dst_ports[RTE_MAX_ETHPORTS];

 static unsigned int l2fwd_rx_queue_per_lcore = 1;

-struct mbuf_table {
-   uint64_t next_flush_time;
-   unsigned len;
-   struct rte_mbuf *mbufs[MAX_PKT_BURST];
-};
-
 #define MAX_RX_QUEUE_PER_LCORE 16
 #define MAX_TX_QUEUE_PER_PORT 16
 struct lcore_queue_conf {
unsigned n_rx_port;
unsigned rx_port_list[MAX_RX_QUEUE_PER_LCORE];
-   struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
+   uint64_t next_flush_time[RTE_MAX_ETHPORTS];

struct rte_timer rx_timers[MAX_RX_QUEUE_PER_LCORE];
struct rte_jobstats port_fwd_jobs[MAX_RX_QUEUE_PER_LCORE];
@@ -123,6 +118,8 @@ struct lcore_queue_conf {
 } __rte_cache_aligned;
 struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];

+struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
+
 static const struct rte_eth_conf port_conf = {
.rxmode = {
.split_hdr_size = 0,
@@ -373,59 +370,14 @@ show_stats_cb(__rte_unused void *param)
rte_eal_alarm_set(timer_period * US_PER_S, show_stats_cb, NULL);
 }

-/* Send the burst of packets on an output interface */
-static void
-l2fwd_send_burst(struct lcore_queue_conf *qconf, uint8_t port)
-{
-   struct mbuf_table *m_table;
-   uint16_t ret;
-   uint16_t queueid = 0;
-   uint16_t n;
-
-   m_table = >tx_mbufs[port];
-   n = m_table->len;
-
-   m_table->next_flush_time = rte_get_timer_cycles() + drain_tsc;
-   m_table->len = 0;
-
-   ret = rte_eth_tx_burst(port, queueid, m_table->mbufs, n);
-
-   port_statistics[port].tx += ret;
-   if (unlikely(ret < n)) {
-   port_statistics[port].dropped += (n - ret);
-   do {
-   rte_pktmbuf_free(m_table->mbufs[ret]);
-   } while (++ret < n);
-   }
-}
-
-/* Enqueue packets for TX and prepare them to be sent */
-static int
-l2fwd_send_packet(struct rte_mbuf *m, uint8_t port)
-{
-   const unsigned lcore_id = rte_lcore_id();
-   struct lcore_queue_conf *qconf = _queue_conf[lcore_id];
-   struct mbuf_table *m_table = >tx_mbufs[port];
-   uint16_t len = qconf->tx_mbufs[port].len;
-
-   m_table->mbufs[len] = m;
-
-   len++;
-   m_table->len = len;
-
-   /* Enough pkts to be sent. */
-   if (unlikely(len == MAX_PKT_BURST))
-   l2fwd_send_burst(qconf, port);
-
-   return 0;
-}
-
 static void
 l2fwd_simple_forward(struct rte_mbuf *m, unsigned portid)
 {
struct ether_hdr *eth;
void *tmp;
+   int sent;
unsigned dst_port;
+   struct rte_eth_dev_tx_buffer *buffer;

dst_port = l2fwd_dst_ports[portid];
eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
@@ -437,7 +389,10 @@ l2fwd_simple_forward(struct rte_mbuf *m, unsigned portid)
/* src addr */
ether_addr_copy(_ports_eth_addr[dst_port], >s_addr);

-   l2fwd_send_packet(m, (uint8_t) dst_port);
+   buffer = tx_buffer[dst_port];
+   sent = rte_eth_tx_buffer(dst_port, 0, buffer, m);
+   if (sent)
+   port_statistics[dst_port].tx += sent;
 }

 static void
@@ -511,8 +466,10

[dpdk-dev] [PATCH v2 1/2] ethdev: add buffered tx api

2016-02-24 Thread Tomasz Kulasek

Many sample apps include internal buffering for single-packet-at-a-time
operation. Since this is such a common paradigm, this functionality is
better suited to being implemented in the ethdev API.

The new APIs in the ethdev library are:
* rte_eth_tx_buffer_init - initialize buffer
* rte_eth_tx_buffer - buffer up a single packet for future transmission
* rte_eth_tx_buffer_flush - flush any unsent buffered packets
* rte_eth_tx_buffer_set_err_callback - set up a callback to be called in
  case transmitting a buffered burst fails. By default, we just free the
  unsent packets.

As well as these, an additional reference callback is provided, which
frees the packets (as the default callback does), as well as updating a
user-provided counter, so that the number of dropped packets can be
tracked.

v2 changes:
 - reworked to use new buffer model
 - buffer data and callbacks are removed from rte_eth_dev/rte_eth_dev_data,
   so this patch doesn't brake an ABI anymore
 - introduced RTE_ETH_TX_BUFFER macro and rte_eth_tx_buffer_init
 - buffers are not attached to the port-queue
 - buffers can be allocated dynamically during application work
 - size of buffer can be changed without port restart

Signed-off-by: Tomasz Kulasek 
---
 lib/librte_ether/rte_ethdev.c  |   36 +++
 lib/librte_ether/rte_ethdev.h  |  182 +++-
 lib/librte_ether/rte_ether_version.map |9 ++
 3 files changed, 226 insertions(+), 1 deletion(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 756b234..b8ab747 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -1307,6 +1307,42 @@ rte_eth_tx_queue_setup(uint8_t port_id, uint16_t 
tx_queue_id,
 }

 void
+rte_eth_count_unsent_packet_callback(struct rte_mbuf **pkts, uint16_t unsent,
+   void *userdata)
+{
+   uint64_t *count = userdata;
+   unsigned i;
+
+   for (i = 0; i < unsent; i++)
+   rte_pktmbuf_free(pkts[i]);
+
+   *count += unsent;
+}
+
+int
+rte_eth_tx_buffer_set_err_callback(struct rte_eth_dev_tx_buffer *buffer,
+   buffer_tx_error_fn cbfn, void *userdata)
+{
+   buffer->cbfn = cbfn;
+   buffer->userdata = userdata;
+   return 0;
+}
+
+int
+rte_eth_tx_buffer_init(struct rte_eth_dev_tx_buffer *buffer, uint16_t size)
+{
+   if (buffer == NULL)
+   return -EINVAL;
+
+   buffer->size = size;
+   if (buffer->cbfn == NULL)
+   rte_eth_tx_buffer_set_err_callback(buffer,
+   rte_eth_count_unsent_packet_callback, (void 
*)>errors);
+
+   return 0;
+}
+
+void
 rte_eth_promiscuous_enable(uint8_t port_id)
 {
struct rte_eth_dev *dev;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 16da821..b0d4932 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -2655,6 +2655,186 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, 
nb_pkts);
 }

+typedef void (*buffer_tx_error_fn)(struct rte_mbuf **unsent, uint16_t count,
+   void *userdata);
+
+/**
+ * Structure used to buffer packets for future TX
+ * Used by APIs rte_eth_tx_buffer and rte_eth_tx_buffer_flush
+ */
+struct rte_eth_dev_tx_buffer {
+   unsigned nb_pkts;
+   uint64_t errors;
+   /**< Total number of queue packets to sent that are dropped. */
+   buffer_tx_error_fn cbfn;
+   void *userdata;
+   uint16_t size;   /**< Size of buffer for buffered tx */
+   struct rte_mbuf *pkts[];
+};
+
+/**
+ * Calculate the size of the tx buffer.
+ *
+ * @param sz
+ *   Number of stored packets.
+ */
+#define RTE_ETH_TX_BUFFER_SIZE(sz) \
+   (sizeof(struct rte_eth_dev_tx_buffer) + (sz) * sizeof(struct rte_mbuf 
*))
+
+/**
+ * Initialize default values for buffered transmitting
+ *
+ * @param buffer
+ *   Tx buffer to be initialized.
+ * @param size
+ *   Buffer size
+ * @return
+ *   0 if no error
+ */
+int
+rte_eth_tx_buffer_init(struct rte_eth_dev_tx_buffer *buffer, uint16_t size);
+
+/**
+ * Send any packets queued up for transmission on a port and HW queue
+ *
+ * This causes an explicit flush of packets previously buffered via the
+ * rte_eth_tx_buffer() function. It returns the number of packets successfully
+ * sent to the NIC, and calls the error callback for any unsent packets. Unless
+ * explicitly set up otherwise, the default callback simply frees the unsent
+ * packets back to the owning mempool.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of

[dpdk-dev] [PATCH v2 0/2] add support for buffered tx to ethdev

2016-02-24 Thread Tomasz Kulasek

Many sample apps include internal buffering for single-packet-at-a-time
operation. Since this is such a common paradigm, this functionality is
better suited to being implemented in the ethdev API.

The new APIs in the ethdev library are:
* rte_eth_tx_buffer_init - initialize buffer
* rte_eth_tx_buffer - buffer up a single packet for future transmission
* rte_eth_tx_buffer_flush - flush any unsent buffered packets
* rte_eth_tx_buffer_set_err_callback - set up a callback to be called in
  case transmitting a buffered burst fails. By default, we just free the
  unsent packets.

As well as these, an additional reference callback is provided, which
frees the packets (as the default callback does), as well as updating a
user-provided counter, so that the number of dropped packets can be
tracked.

Due to the feedback from mailing list, that buffer management facilities
in the user application are more preferable than API simplicity, we decided
to move internal buffer table, as well as callback functions and user data,
from rte_eth_dev/rte_eth_dev_data to the application space.
It prevents ABI breakage and gives some more flexibility in the buffer's
management such as allocation, dynamical size change, reuse buffers on many
ports or after fail, and so on.


The following steps illustrate how tx buffers can be used in application:

1) Initialization

a) Allocate memory for a buffer

   struct rte_eth_dev_tx_buffer *buffer = rte_zmalloc_socket("tx_buffer",
   RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, socket_id);

   RTE_ETH_TX_BUFFER_SIZE(size) macro computes memory required to store
   "size" packets in buffer.

b) Initialize allocated memory and set up default values. Threshold level
   must be lower than or equal to the MAX_PKT_BURST from 1a)

   rte_eth_tx_buffer_init(buffer, threshold);


c) Set error callback (optional)

   rte_eth_tx_buffer_set_err_callback(buffer, callback_fn, userdata);


2) Store packet "pkt" in buffer and send them all to the queue_id on
   port_id when number of packets reaches threshold level set up in 1b)

   rte_eth_tx_buffer(port_id, queue_id, buffer, pkt);


3) Send all stored packets to the queue_id on port_id

   rte_eth_tx_buffer_flush(port_id, queue_id, buffer);


4) Flush buffer and free memory

   rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
   ...
   rte_free(buffer);


v2 changes:
 - reworked to use new buffer model
 - buffer data and callbacks are removed from rte_eth_dev/rte_eth_dev_data,
   so this patch doesn't brake an ABI anymore
 - introduced RTE_ETH_TX_BUFFER macro and rte_eth_tx_buffer_init
 - buffers are not attached to the port-queue
 - buffers can be allocated dynamically during application work
 - size of buffer can be changed without port restart


Tomasz Kulasek (2):
  ethdev: add buffered tx api
  examples: rework to use buffered tx

 examples/l2fwd-jobstats/main.c |  104 +--
 examples/l2fwd-keepalive/main.c|  100 ---
 examples/l2fwd/main.c  |  104 +--
 examples/l3fwd-acl/main.c  |   92 --
 examples/l3fwd-power/main.c|   89 --
 examples/link_status_interrupt/main.c  |  107 +---
 .../client_server_mp/mp_client/client.c|  101 ++-
 examples/multi_process/l2fwd_fork/main.c   |   97 +--
 examples/packet_ordering/main.c|  122 +
 examples/qos_meter/main.c  |   61 ++-
 lib/librte_ether/rte_ethdev.c  |   36 
 lib/librte_ether/rte_ethdev.h  |  182 +++-
 lib/librte_ether/rte_ether_version.map |9 +
 13 files changed, 662 insertions(+), 542 deletions(-)

-- 
1.7.9.5

1 2 3 >

1 - 100 of 202 matches

Mail list logo