[dpdk-dev] DPDK VF and I40E

2015-04-20 Thread Shankar Krishnamurthy
DPDK Experts,

When I run I40E host PF driver with DPDK VF Driver in VM, I see an issue where 
packets are received in bunch of 4s. See below

But the same problem does not show up when run with DPDK PF testpmd with DPDK 
VF in VM. 

Since we would like to have I40e regular driver work with DPDK VF, Can one 
please point to code change that was made in DPDK PF (testpmd) to make this 
happen? 

thanks,
shanar.

Inside VM:
./examples/l2fwd/l2fwd/x86_64-native-linuxapp-gcc/app/l2fwd -c 0xb -n 4 -- -p 
0x3
...
Port statistics 
Statistics for port 0 --
Packets sent:4
Packets received:0
Packets dropped: 0
Statistics for port 1 --
Packets sent:0
Packets received:4
Packets dropped: 0
Aggregate statistics ===
Total packets sent:  4
Total packets received:  4
Total packets dropped:   0



[dpdk-dev] [PATCH 0/7] Hyper-V Poll Mode Driver

2015-04-20 Thread Stephen Hemminger
This is revised version of earlier patches to support Hyper-V poll mode
driver. The main changes were to rebase onto DPDK 2.0 with a few other
style cleanups to keep checkpatch at bay.

Stephen Hemminger (7):
  ether: add function to query for link state interrupt
  pmd: change drivers initialization for pci
  hv: add basic vmbus support
  hv: uio driver
  hv: poll mode driver
  hv: enable driver in common config
  hv: add kernel patch

 config/common_linuxapp |9 +
 lib/Makefile   |1 +
 lib/librte_eal/common/Makefile |2 +-
 lib/librte_eal/common/eal_common_options.c |5 +
 lib/librte_eal/common/eal_internal_cfg.h   |1 +
 lib/librte_eal/common/eal_options.h|2 +
 lib/librte_eal/common/eal_private.h|   10 +
 lib/librte_eal/linuxapp/Makefile   |3 +
 lib/librte_eal/linuxapp/eal/Makefile   |3 +
 lib/librte_eal/linuxapp/eal/eal.c  |   11 +
 lib/librte_eal/linuxapp/hv_uio/Makefile|   57 +
 lib/librte_eal/linuxapp/hv_uio/hv_uio.c|  551 +++
 lib/librte_eal/linuxapp/hv_uio/hyperv_net.h|  907 +++
 .../linuxapp/hv_uio/vmbus-get-pages.patch  |   55 +
 lib/librte_ether/rte_ethdev.c  |  142 +-
 lib/librte_ether/rte_ethdev.h  |   27 +-
 lib/librte_pmd_e1000/em_ethdev.c   |2 +-
 lib/librte_pmd_e1000/igb_ethdev.c  |4 +-
 lib/librte_pmd_enic/enic_ethdev.c  |2 +-
 lib/librte_pmd_fm10k/fm10k_ethdev.c|2 +-
 lib/librte_pmd_hyperv/Makefile |   28 +
 lib/librte_pmd_hyperv/hyperv.h |  169 ++
 lib/librte_pmd_hyperv/hyperv_drv.c | 1653 
 lib/librte_pmd_hyperv/hyperv_drv.h |  558 +++
 lib/librte_pmd_hyperv/hyperv_ethdev.c  |  332 
 lib/librte_pmd_hyperv/hyperv_logs.h|   69 +
 lib/librte_pmd_hyperv/hyperv_rxtx.c|  403 +
 lib/librte_pmd_hyperv/hyperv_rxtx.h|   35 +
 lib/librte_pmd_i40e/i40e_ethdev.c  |2 +-
 lib/librte_pmd_i40e/i40e_ethdev_vf.c   |2 +-
 lib/librte_pmd_ixgbe/ixgbe_ethdev.c|4 +-
 lib/librte_pmd_virtio/virtio_ethdev.c  |2 +-
 lib/librte_pmd_vmxnet3/vmxnet3_ethdev.c|2 +-
 mk/rte.app.mk  |4 +
 34 files changed, 5039 insertions(+), 20 deletions(-)
 create mode 100644 lib/librte_eal/linuxapp/hv_uio/Makefile
 create mode 100644 lib/librte_eal/linuxapp/hv_uio/hv_uio.c
 create mode 100644 lib/librte_eal/linuxapp/hv_uio/hyperv_net.h
 create mode 100644 lib/librte_eal/linuxapp/hv_uio/vmbus-get-pages.patch
 create mode 100644 lib/librte_pmd_hyperv/Makefile
 create mode 100644 lib/librte_pmd_hyperv/hyperv.h
 create mode 100644 lib/librte_pmd_hyperv/hyperv_drv.c
 create mode 100644 lib/librte_pmd_hyperv/hyperv_drv.h
 create mode 100644 lib/librte_pmd_hyperv/hyperv_ethdev.c
 create mode 100644 lib/librte_pmd_hyperv/hyperv_logs.h
 create mode 100644 lib/librte_pmd_hyperv/hyperv_rxtx.c
 create mode 100644 lib/librte_pmd_hyperv/hyperv_rxtx.h

-- 
2.1.4



[dpdk-dev] [PATCH 1/7] ether: add function to query for link state interrupt

2015-04-20 Thread Stephen Hemminger
From: Stephen Hemminger 

Allow application to query whether link state will work.
This is also part of abstracting dependency on PCI.

Signed-off-by: Stephen Hemminger 
---
 lib/librte_ether/rte_ethdev.c | 14 ++
 lib/librte_ether/rte_ethdev.h | 12 
 2 files changed, 26 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index e20cca5..9577d17 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -1340,6 +1340,20 @@ rte_eth_dev_start(uint8_t port_id)
return 0;
 }

+int
+rte_eth_has_link_state(uint8_t port_id)
+{
+   struct rte_eth_dev *dev;
+
+   if (port_id >= nb_ports) {
+   PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+   return 0;
+   }
+   dev = _eth_devices[port_id];
+
+   return (dev->pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) != 0;
+}
+
 void
 rte_eth_dev_stop(uint8_t port_id)
 {
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 4648290..991023b 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -2064,6 +2064,18 @@ extern void rte_eth_link_get_nowait(uint8_t port_id,
struct rte_eth_link *link);

 /**
+ * Test whether device supports link state interrupt mode.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - (1) if link state interrupt is supported
+ *   - (0) if link state interrupt is not supported
+ */
+extern int
+rte_eth_has_link_state(uint8_t port_id);
+
+/**
  * Retrieve the general I/O statistics of an Ethernet device.
  *
  * @param port_id
-- 
2.1.4



[dpdk-dev] [PATCH 2/7] pmd: change drivers initialization for pci

2015-04-20 Thread Stephen Hemminger
From: Stephen Hemminger 

The change to generic ether device structure to support multiple
bus types requires a change to all existing PMD but only in the
initialization (and the change is backwards compatiable).

Signed-off-by: Stephen Hemminger 
---
 lib/librte_pmd_e1000/em_ethdev.c| 2 +-
 lib/librte_pmd_e1000/igb_ethdev.c   | 4 ++--
 lib/librte_pmd_enic/enic_ethdev.c   | 2 +-
 lib/librte_pmd_fm10k/fm10k_ethdev.c | 2 +-
 lib/librte_pmd_i40e/i40e_ethdev.c   | 2 +-
 lib/librte_pmd_i40e/i40e_ethdev_vf.c| 2 +-
 lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 4 ++--
 lib/librte_pmd_virtio/virtio_ethdev.c   | 2 +-
 lib/librte_pmd_vmxnet3/vmxnet3_ethdev.c | 2 +-
 9 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/lib/librte_pmd_e1000/em_ethdev.c b/lib/librte_pmd_e1000/em_ethdev.c
index 82e0b7a..e57530e 100644
--- a/lib/librte_pmd_e1000/em_ethdev.c
+++ b/lib/librte_pmd_e1000/em_ethdev.c
@@ -281,7 +281,7 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev)
 }

 static struct eth_driver rte_em_pmd = {
-   {
+   .pci_drv = {
.name = "rte_em_pmd",
.id_table = pci_id_em_map,
.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
diff --git a/lib/librte_pmd_e1000/igb_ethdev.c 
b/lib/librte_pmd_e1000/igb_ethdev.c
index e2b7cf3..67273b0 100644
--- a/lib/librte_pmd_e1000/igb_ethdev.c
+++ b/lib/librte_pmd_e1000/igb_ethdev.c
@@ -680,7 +680,7 @@ eth_igbvf_dev_init(struct rte_eth_dev *eth_dev)
 }

 static struct eth_driver rte_igb_pmd = {
-   {
+   .pci_drv = {
.name = "rte_igb_pmd",
.id_table = pci_id_igb_map,
.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
@@ -693,7 +693,7 @@ static struct eth_driver rte_igb_pmd = {
  * virtual function driver struct
  */
 static struct eth_driver rte_igbvf_pmd = {
-   {
+   .pci_drv = {
.name = "rte_igbvf_pmd",
.id_table = pci_id_igbvf_map,
.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
diff --git a/lib/librte_pmd_enic/enic_ethdev.c 
b/lib/librte_pmd_enic/enic_ethdev.c
index 63a594d..dbef5c6 100644
--- a/lib/librte_pmd_enic/enic_ethdev.c
+++ b/lib/librte_pmd_enic/enic_ethdev.c
@@ -609,7 +609,7 @@ static int eth_enicpmd_dev_init(struct rte_eth_dev *eth_dev)
 }

 static struct eth_driver rte_enic_pmd = {
-   {
+   .pci_drv = {
.name = "rte_enic_pmd",
.id_table = pci_id_enic_map,
.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
diff --git a/lib/librte_pmd_fm10k/fm10k_ethdev.c 
b/lib/librte_pmd_fm10k/fm10k_ethdev.c
index 1a96cf2..ed6aaa6 100644
--- a/lib/librte_pmd_fm10k/fm10k_ethdev.c
+++ b/lib/librte_pmd_fm10k/fm10k_ethdev.c
@@ -1843,7 +1843,7 @@ static struct rte_pci_id pci_id_fm10k_map[] = {
 };

 static struct eth_driver rte_pmd_fm10k = {
-   {
+   .pci_drv = {
.name = "rte_pmd_fm10k",
.id_table = pci_id_fm10k_map,
.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
diff --git a/lib/librte_pmd_i40e/i40e_ethdev.c 
b/lib/librte_pmd_i40e/i40e_ethdev.c
index dc44764..ba13d68 100644
--- a/lib/librte_pmd_i40e/i40e_ethdev.c
+++ b/lib/librte_pmd_i40e/i40e_ethdev.c
@@ -265,7 +265,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops = {
 };

 static struct eth_driver rte_i40e_pmd = {
-   {
+   .pci_drv = {
.name = "rte_i40e_pmd",
.id_table = pci_id_i40e_map,
.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
diff --git a/lib/librte_pmd_i40e/i40e_ethdev_vf.c 
b/lib/librte_pmd_i40e/i40e_ethdev_vf.c
index 4581c5b..0186fbd 100644
--- a/lib/librte_pmd_i40e/i40e_ethdev_vf.c
+++ b/lib/librte_pmd_i40e/i40e_ethdev_vf.c
@@ -1201,7 +1201,7 @@ i40evf_dev_init(struct rte_eth_dev *eth_dev)
  * virtual function driver struct
  */
 static struct eth_driver rte_i40evf_pmd = {
-   {
+   .pci_drv = {
.name = "rte_i40evf_pmd",
.id_table = pci_id_i40evf_map,
.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c 
b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
index 1b3b4b5..757ae96 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
@@ -1087,7 +1087,7 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev)
 }

 static struct eth_driver rte_ixgbe_pmd = {
-   {
+   .pci_drv = {
.name = "rte_ixgbe_pmd",
.id_table = pci_id_ixgbe_map,
.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
@@ -1100,7 +1100,7 @@ static struct eth_driver rte_ixgbe_pmd = {
  * virtual function driver struct
  */
 static struct eth_driver rte_ixgbevf_pmd = {
-   {
+   .pci_drv = {
.name = "rte_ixgbevf_pmd",
.id_table = pci_id_ixgbevf_map,
.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
diff --git a/lib/librte_pmd_virtio/virtio_ethdev.c 

[dpdk-dev] [PATCH 3/7] hv: add basic vmbus support

2015-04-20 Thread Stephen Hemminger
The hyper-v device driver forces the base EAL code to change
to support multiple bus types. This is done changing the pci_device
in ether driver to a generic union.

As much as possible this is done in a backwards source compatiable
way. It will break ABI for device drivers.

Signed-off-by: Stephen Hemminger 
---
 lib/librte_eal/common/Makefile |   2 +-
 lib/librte_eal/common/eal_common_options.c |   5 ++
 lib/librte_eal/common/eal_internal_cfg.h   |   1 +
 lib/librte_eal/common/eal_options.h|   2 +
 lib/librte_eal/common/eal_private.h|  10 +++
 lib/librte_eal/linuxapp/eal/Makefile   |   3 +
 lib/librte_eal/linuxapp/eal/eal.c  |  11 +++
 lib/librte_ether/rte_ethdev.c  | 128 +++--
 lib/librte_ether/rte_ethdev.h  |  15 +++-
 9 files changed, 168 insertions(+), 9 deletions(-)

diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index 3ea3bbf..202485e 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -33,7 +33,7 @@ include $(RTE_SDK)/mk/rte.vars.mk

 INC := rte_branch_prediction.h rte_common.h
 INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
-INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
+INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h rte_vmbus.h
 INC += rte_pci_dev_ids.h rte_per_lcore.h rte_random.h
 INC += rte_rwlock.h rte_tailq.h rte_interrupts.h rte_alarm.h
 INC += rte_string_fns.h rte_version.h
diff --git a/lib/librte_eal/common/eal_common_options.c 
b/lib/librte_eal/common/eal_common_options.c
index 8fcb1ab..76a3394 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -80,6 +80,7 @@ eal_long_options[] = {
{OPT_NO_HPET,   0, NULL, OPT_NO_HPET_NUM  },
{OPT_NO_HUGE,   0, NULL, OPT_NO_HUGE_NUM  },
{OPT_NO_PCI,0, NULL, OPT_NO_PCI_NUM   },
+   {OPT_NO_VMBUS,  0, NULL, OPT_NO_VMBUS_NUM },
{OPT_NO_SHCONF, 0, NULL, OPT_NO_SHCONF_NUM},
{OPT_PCI_BLACKLIST, 1, NULL, OPT_PCI_BLACKLIST_NUM},
{OPT_PCI_WHITELIST, 1, NULL, OPT_PCI_WHITELIST_NUM},
@@ -726,6 +727,10 @@ eal_parse_common_option(int opt, const char *optarg,
conf->no_pci = 1;
break;

+   case OPT_NO_VMBUS_NUM:
+   conf->no_vmbus = 1;
+   break;
+
case OPT_NO_HPET_NUM:
conf->no_hpet = 1;
break;
diff --git a/lib/librte_eal/common/eal_internal_cfg.h 
b/lib/librte_eal/common/eal_internal_cfg.h
index e2ecb0d..0e7de34 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -66,6 +66,7 @@ struct internal_config {
volatile unsigned no_hugetlbfs;   /**< true to disable hugetlbfs */
volatile unsigned xen_dom0_support; /**< support app running on Xen 
Dom0*/
volatile unsigned no_pci; /**< true to disable PCI */
+   volatile unsigned no_vmbus;   /**< true to disable VMBUS */
volatile unsigned no_hpet;/**< true to disable HPET */
volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping

* instead of native TSC */
diff --git a/lib/librte_eal/common/eal_options.h 
b/lib/librte_eal/common/eal_options.h
index f6714d9..54f03dc 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -67,6 +67,8 @@ enum {
OPT_NO_HUGE_NUM,
 #define OPT_NO_PCI"no-pci"
OPT_NO_PCI_NUM,
+#define OPT_NO_VMBUS  "no-vmbus"
+   OPT_NO_VMBUS_NUM,
 #define OPT_NO_SHCONF "no-shconf"
OPT_NO_SHCONF_NUM,
 #define OPT_SOCKET_MEM"socket-mem"
diff --git a/lib/librte_eal/common/eal_private.h 
b/lib/librte_eal/common/eal_private.h
index 4acf5a0..039e9f3 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -180,6 +180,16 @@ int rte_eal_pci_close_one_driver(struct rte_pci_driver *dr,
struct rte_pci_device *dev);

 /**
+ * VMBUS related functions and structures
+ */
+int rte_eal_vmbus_init(void);
+
+struct rte_vmbus_driver;
+struct rte_vmbus_device;
+
+int rte_eal_vmbus_probe_one_driver(struct rte_vmbus_driver *dr,
+   struct rte_vmbus_device *dev);
+/**
  * Init tail queues for non-EAL library structures. This is to allow
  * the rings, mempools, etc. lists to be shared among multiple processes
  *
diff --git a/lib/librte_eal/linuxapp/eal/Makefile 
b/lib/librte_eal/linuxapp/eal/Makefile
index 01f7b70..acd5127 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -74,6 +74,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_alarm.c
 ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y)
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_ivshmem.c
 

[dpdk-dev] [PATCH 4/7] hv: uio driver

2015-04-20 Thread Stephen Hemminger
From: Stephen Hemminger 

Add new UIO driver in kernel to support DPDK Poll Mode Driver.

Signed-off-by: Stas Egorov 
Signed-off-by: Stephen Hemminger 
---
 lib/librte_eal/linuxapp/Makefile|   3 +
 lib/librte_eal/linuxapp/hv_uio/Makefile |  57 ++
 lib/librte_eal/linuxapp/hv_uio/hv_uio.c | 551 +
 lib/librte_eal/linuxapp/hv_uio/hyperv_net.h | 907 
 4 files changed, 1518 insertions(+)
 create mode 100644 lib/librte_eal/linuxapp/hv_uio/Makefile
 create mode 100644 lib/librte_eal/linuxapp/hv_uio/hv_uio.c
 create mode 100644 lib/librte_eal/linuxapp/hv_uio/hyperv_net.h

diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile
index 8fcfdf6..a28d289 100644
--- a/lib/librte_eal/linuxapp/Makefile
+++ b/lib/librte_eal/linuxapp/Makefile
@@ -41,5 +41,8 @@ endif
 ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y)
 DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += xen_dom0
 endif
+ifeq ($(CONFIG_RTE_LIBRTE_HV_PMD),y)
+DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += hv_uio
+endif

 include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_eal/linuxapp/hv_uio/Makefile 
b/lib/librte_eal/linuxapp/hv_uio/Makefile
new file mode 100644
index 000..2ed7771
--- /dev/null
+++ b/lib/librte_eal/linuxapp/hv_uio/Makefile
@@ -0,0 +1,57 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+#   Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of Intel Corporation nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = hv_uio
+MODULE_PATH = drivers/net/hv_uio
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=100
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
+MODULE_CFLAGS += -Winline -Wall -Werror
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+ifeq ($(CONFIG_RTE_LIBRTE_HV_DEBUG),y)
+MODULE_CFLAGS += -DDBG
+endif
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y := hv_uio.c
+
+include $(RTE_SDK)/mk/rte.module.mk
diff --git a/lib/librte_eal/linuxapp/hv_uio/hv_uio.c 
b/lib/librte_eal/linuxapp/hv_uio/hv_uio.c
new file mode 100644
index 000..294b0fd
--- /dev/null
+++ b/lib/librte_eal/linuxapp/hv_uio/hv_uio.c
@@ -0,0 +1,551 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see .
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "hyperv_net.h"
+
+#define HV_DEVICE_ADD   0
+#define HV_DEVICE_REMOVE 1
+#define HV_RING_SIZE512
+
+static uint mtu = ETH_DATA_LEN;
+/*
+ * List of resources to be mapped to uspace
+ * can be extended up to MAX_UIO_MAPS(5) items
+ */
+enum {
+   TXRX_RING_MAP,
+   INT_PAGE_MAP,
+   MON_PAGE_MAP,
+   RECV_BUF_MAP
+};
+
+struct hyperv_private_data {

[dpdk-dev] [PATCH 5/7] hv: poll mode driver

2015-04-20 Thread Stephen Hemminger
From: Stephen Hemminger 

This is new Poll Mode driver for using hyper-v virtual network
interface.

Signed-off-by: Stas Egorov 
Signed-off-by: Stephen Hemminger 
---
 lib/Makefile  |1 +
 lib/librte_pmd_hyperv/Makefile|   28 +
 lib/librte_pmd_hyperv/hyperv.h|  169 
 lib/librte_pmd_hyperv/hyperv_drv.c| 1653 +
 lib/librte_pmd_hyperv/hyperv_drv.h|  558 +++
 lib/librte_pmd_hyperv/hyperv_ethdev.c |  332 +++
 lib/librte_pmd_hyperv/hyperv_logs.h   |   69 ++
 lib/librte_pmd_hyperv/hyperv_rxtx.c   |  403 
 lib/librte_pmd_hyperv/hyperv_rxtx.h   |   35 +
 mk/rte.app.mk |4 +
 10 files changed, 3252 insertions(+)
 create mode 100644 lib/librte_pmd_hyperv/Makefile
 create mode 100644 lib/librte_pmd_hyperv/hyperv.h
 create mode 100644 lib/librte_pmd_hyperv/hyperv_drv.c
 create mode 100644 lib/librte_pmd_hyperv/hyperv_drv.h
 create mode 100644 lib/librte_pmd_hyperv/hyperv_ethdev.c
 create mode 100644 lib/librte_pmd_hyperv/hyperv_logs.h
 create mode 100644 lib/librte_pmd_hyperv/hyperv_rxtx.c
 create mode 100644 lib/librte_pmd_hyperv/hyperv_rxtx.h

diff --git a/lib/Makefile b/lib/Makefile
index d94355d..6c1daf2 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -47,6 +47,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += librte_pmd_i40e
 DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += librte_pmd_fm10k
 DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += librte_pmd_mlx4
 DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += librte_pmd_enic
+DIRS-$(CONFIG_RTE_LIBRTE_HV_PMD) += librte_pmd_hyperv
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += librte_pmd_bond
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += librte_pmd_ring
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += librte_pmd_pcap
diff --git a/lib/librte_pmd_hyperv/Makefile b/lib/librte_pmd_hyperv/Makefile
new file mode 100644
index 000..4ba08c8
--- /dev/null
+++ b/lib/librte_pmd_hyperv/Makefile
@@ -0,0 +1,28 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
+#   All rights reserved.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_hyperv.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_HV_PMD) += hyperv_ethdev.c
+SRCS-$(CONFIG_RTE_LIBRTE_HV_PMD) += hyperv_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_HV_PMD) += hyperv_drv.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_HV_PMD) += lib/librte_eal lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_HV_PMD) += lib/librte_mempool lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_HV_PMD) += lib/librte_malloc
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_pmd_hyperv/hyperv.h b/lib/librte_pmd_hyperv/hyperv.h
new file mode 100644
index 000..5f66d8a
--- /dev/null
+++ b/lib/librte_pmd_hyperv/hyperv.h
@@ -0,0 +1,169 @@
+/*-
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef _HYPERV_H_
+#define _HYPERV_H_
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "hyperv_logs.h"
+
+#define PAGE_SHIFT 12
+#define PAGE_SIZE  (1 << PAGE_SHIFT)
+
+/*
+ * Tunable ethdev params
+ */
+#define HV_MIN_RX_BUF_SIZE 1024
+#define HV_MAX_RX_PKT_LEN  4096
+#define HV_MAX_MAC_ADDRS   1
+#define HV_MAX_RX_QUEUES   1
+#define HV_MAX_TX_QUEUES   1
+#define HV_MAX_PKT_BURST   32
+#define HV_MAX_LINK_REQ10
+
+/*
+ * List of resources mapped from kspace
+ * need to be the same as defined in hv_uio.c
+ */
+enum {
+   TXRX_RING_MAP,
+   INT_PAGE_MAP,
+   MON_PAGE_MAP,
+   RECV_BUF_MAP
+};
+
+/*
+ * Statistics
+ */
+struct hv_stats {
+   uint64_t opkts;
+   uint64_t obytes;
+   uint64_t oerrors;
+
+   uint64_t ipkts;
+   uint64_t ibytes;
+   uint64_t ierrors;
+   uint64_t rx_nombuf;
+};
+
+struct hv_data;
+struct netvsc_packet;
+struct rndis_msg;
+typedef void (*receive_callback_t)(struct hv_data *hv, struct rndis_msg *msg,
+   struct netvsc_packet *pkt);
+
+/*
+ * Main driver structure
+ */
+struct hv_data {
+   int vmbus_device;
+   uint8_t monitor_bit;
+   uint8_t monitor_group;
+   uint8_t kernel_initialized;
+   int uio_fd;
+   /* Flag indicates channel state. If closed, RX/TX shouldn't work 
further */
+   uint8_t closed;
+   /* Flag indicates whether HALT rndis request was received by host */
+   uint8_t hlt_req_sent;
+   /* Flag indicates pending state for HALT request */
+   uint8_t hlt_req_pending;
+   /* Counter for RNDIS requests */
+   uint32_t new_request_id;
+   /* State of RNDIS device */
+   uint8_t rndis_dev_state;
+   /* Number of transmitted packets but not completed yet by Hyper-V */
+   int num_outstanding_sends;
+   /* Max pkt len to fit in rx mbufs */
+   uint32_t max_rx_pkt_len;
+
+   uint8_t jumbo_frame_support;
+
+   struct 

[dpdk-dev] [PATCH 6/7] hv: enable driver in common config

2015-04-20 Thread Stephen Hemminger
From: Stephen Hemminger 

Add hyperv driver config to enable it.

Signed-off-by: Stephen Hemminger 
---
 config/common_linuxapp | 9 +
 1 file changed, 9 insertions(+)

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 0078dc9..58cc352 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -234,6 +234,15 @@ CONFIG_RTE_LIBRTE_VMXNET3_DEBUG_TX_FREE=n
 CONFIG_RTE_LIBRTE_VMXNET3_DEBUG_DRIVER=n

 #
+# Compile burst-mode Hyperv PMD driver
+#
+CONFIG_RTE_LIBRTE_HV_PMD=y
+CONFIG_RTE_LIBRTE_HV_DEBUG=n
+CONFIG_RTE_LIBRTE_HV_DEBUG_INIT=n
+CONFIG_RTE_LIBRTE_HV_DEBUG_RX=n
+CONFIG_RTE_LIBRTE_HV_DEBUG_TX=n
+
+#
 # Compile example software rings based PMD
 #
 CONFIG_RTE_LIBRTE_PMD_RING=y
-- 
2.1.4



[dpdk-dev] [PATCH 7/7] hv: add kernel patch

2015-04-20 Thread Stephen Hemminger
From: Stephen Hemminger 

For users using non latest kernels, put kernel patch in for
them to use.

Signed-off-by: Stephen Hemminger 
---
 .../linuxapp/hv_uio/vmbus-get-pages.patch  | 55 ++
 1 file changed, 55 insertions(+)
 create mode 100644 lib/librte_eal/linuxapp/hv_uio/vmbus-get-pages.patch

diff --git a/lib/librte_eal/linuxapp/hv_uio/vmbus-get-pages.patch 
b/lib/librte_eal/linuxapp/hv_uio/vmbus-get-pages.patch
new file mode 100644
index 000..ae27fbd
--- /dev/null
+++ b/lib/librte_eal/linuxapp/hv_uio/vmbus-get-pages.patch
@@ -0,0 +1,55 @@
+hyper-v: allow access to vmbus from userspace driver
+
+This is patch from  to allow access to hyper-v vmbus from UIO driver.
+
+Signed-off-by: Stas Egorov 
+Signed-off-by: Stephen Hemminger 
+
+---
+v2 - simplify and rename to vmbus_get_monitor_pages
+
+ drivers/hv/connection.c |   20 +---
+ include/linux/hyperv.h  |3 +++
+ 2 files changed, 20 insertions(+), 3 deletions(-)
+
+--- a/drivers/hv/connection.c  2015-02-03 10:58:51.751752450 -0800
 b/drivers/hv/connection.c  2015-02-04 14:59:51.636194383 -0800
+@@ -64,6 +64,15 @@ static __u32 vmbus_get_next_version(__u3
+   }
+ }
+
++void vmbus_get_monitor_pages(unsigned long *int_page,
++   unsigned long monitor_pages[2])
++{
++  *int_page = (unsigned long)vmbus_connection.int_page;
++  monitor_pages[0] = (unsigned long)vmbus_connection.monitor_pages[0];
++  monitor_pages[1] = (unsigned long)vmbus_connection.monitor_pages[1];
++}
++EXPORT_SYMBOL_GPL(vmbus_get_monitor_pages);
++
+ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
+   __u32 version)
+ {
+@@ -347,10 +356,7 @@ static void process_chn_event(u32 relid)
+   else
+   bytes_to_read = 0;
+   } while (read_state && (bytes_to_read != 0));
+-  } else {
+-  pr_err("no channel callback for relid - %u\n", relid);
+   }
+-
+ }
+
+ /*
+--- a/include/linux/hyperv.h   2015-02-03 10:58:51.751752450 -0800
 b/include/linux/hyperv.h   2015-02-04 15:00:26.388355012 -0800
+@@ -868,6 +868,9 @@ extern int vmbus_recvpacket_raw(struct v
+
+ extern void vmbus_ontimer(unsigned long data);
+
++extern void vmbus_get_monitor_pages(unsigned long *int_page,
++  unsigned long monitor_pages[2]);
++
+ /* Base driver object */
+ struct hv_driver {
+   const char *name;
-- 
2.1.4



[dpdk-dev] [PATCH 0/4] rte_ethdev: cleanups

2015-04-20 Thread Thomas Monjalon
2015-04-09 14:29, Stephen Hemminger:
> A bunch of small (almost trivial) patches to fix style and other
> issues in the base Ethernet driver interface code.
> 
> Stephen Hemminger (4):
>   rte_ethdev: remove extra inline
>   rte_ethdev: whitespace cleanup
>   rte_ethdev: make tables const
>   rte_ethdev: remove unnecessary paren on return

Series applied except "whitespace cleanup" which was refused.
Thanks


[dpdk-dev] [RFC PATCH 0/4] pktdev

2015-04-20 Thread Marc Sune


On 17/04/15 21:50, Wiles, Keith wrote:
> Hi Marc and Bruce,

Hi Keith, Bruce,

>
> On 4/17/15, 1:49 PM, "Marc Sune"  wrote:
>
>>
>> On 17/04/15 17:16, Bruce Richardson wrote:
>>> Hi all,
>>>
>>> to continue this discussion a bit more, here is my, slightly different,
>>> slant
>>> on what a pktdev abstraction may look like.
>>>
>>> The primary objective I had in mind when drafting this is to provide the
>>> minimal abstraction that can be *easily* used as a common device
>>> abstraction for
>>> existing (and future) device types to be passed to dataplane code. The
>>> patchset
>>> demonstrates this by defining a minimal interface for pktdev - since I
>>> firmly
>>> believe the interface should be as small as possible - and then showing
>>> how that
>>> common interface can be used to unify rings and ethdevs under a common
>>> API for the
>>> datapath. I believe any attempt to unify things much beyond this to the
>>> control
>>> plane or setup phase is not worth doing - at least not initially - as at
>>> init time the code always needs to be aware of the underlying resource
>>> type in
>>> order to configure it properly for dataplane use.
>>>
>>> The overall objective I look to achieve is illustrated by the final
>>> patch in
>>> the series, which is a sample app where the same code is used for all
>>> cores,
>>> irrespective of the underlying device type.
>>>
>>> To get to that point, patch 1 defines the minimal API - just RX and TX.
>>> The .c
>>> file in the library is empty for simplicity, though I would see some
>>> functionality moving there when/if it makes sense e.g. the callback
>>> support
>>> from ethdev, as is done in Keith's patchset.
>>> Patch 2 then makes very minimal changes to ethdev to allow ethdevs to
>>> be used
>>> as pktdevs, and to make use of the pktdev functions when appropriate
>>> Patch 3 was, for me, the key test for this implementation - how hard
>>> was it to
>>> make an rte_ring usable as a pktdev too. Two single-line functions for
>>> RX/TX
>>> and a separate "converter" function proved to be all that was necessary
>>> here -
>>> and I believe simpler solutions may be possible too, as the extra
>>> structures
>>> allocated on conversion could be merged into the rte_ring structure
>>> itself and
>>> initialized on ring creation if we prefer that option. It is
>>> hoped/presumed that
>>> wrapping other structures, such as KNI, may prove to be just as easily
>>> done.
>>> [Not attempted yet - left as an exercise for the reader :-)].
>>>
>>> Now, in terms of pktdev vs ethdev, there is nothing in this proposal
>>> that
>>> cannot also be done using ethdev AFAIK. However, pktdev as outlined here
>>> should make the process far easier than trying to create a full PMD for
>>> something.
>>> All NIC specific functions, including things like stop/start, are
>>> stripped out,
>>> as they don't make sense for an rte_ring or other software objects.
>>> Also, the other thing this provides is that we can move away from just
>>> using
>>> port ids. Instead in the same way as we now reference
>>> rings/mempools/KNIs etc
>>> via pointer, we can do the same with ethernet ports as pktdevs on the
>>> data path.
>>> There was discussion previously on moving beyond 8-bit port ids. If we
>>> look to
>>> use ethdev as a common abstraction, I feel that change will soon have
>>> to be made
>>> causing a large amount of code churn.
>> Hi Richard,
>>
>> First thank you both for taking the time to look at this. I did not not
>> reply to Keith because you Richard summarized most of my concerns.
>>
>> I had a brief look to this second proposal. It is more aligned to what I
>> had in mind. But still I feel it is slightly too complicated. I don't
>> like much the necessary (in your approach) MACRO-like pkt_dev_data
>> struct. It is also slightly inconvenient that the user has to do:
>>
>> +struct rte_pkt_dev *in = rte_eth_get_dev(0);
>>
>> +struct rte_pkt_dev *out = rte_ring_get_dev(
>> +rte_ring_create(name, 4096, rte_socket_id(), 
>> 0));
>>
>>
>>
>> What about something like (~pseudo-code):
>>
>> rte_pkt_dev_data.h:
>>
>> enum rte_pkt_dev_type{
>>  RTE_PKT_DEV_ETH,
>>  RTE_PKT_DEV_RING,
>>  RTE_PKT_DEV_KNI,
>>  //Keep adding as more PMDs are supported
>> };
>>
>>
>> //This struct may be redundant if there is nothing more
>> struct rte_pkt_dev_data{
>>  enum rte_pkt_dev_type;
>>  //Placeholder, maybe we need more...
>> };
>>
>> //Make RX/TX pktdev APIs more readable, but not really needed
>> typedef void pkt_dev_t;
>>
>> (In all PMDs and e.g. KNI and RINGs):
>>
>>   struct rte_eth_dev {
>>  struct rte_pkt_dev_data pkt_dev;//
>> <++
>>  eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
>>  eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function.
>> */
>>  struct rte_eth_dev_data *data;  /**< Pointer to device 

[dpdk-dev] [PATCH RFC 00/10] Add a VXLAN sample

2015-04-20 Thread Liu, Jijiang

Any comments on this RFC patch set ?

> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Jijiang Liu
> Sent: Thursday, April 16, 2015 11:56 AM
> To: dev at dpdk.org; Gilmore, Walter E; Long, Thomas
> Subject: [dpdk-dev] [PATCH RFC 00/10] Add a VXLAN sample
> 
> This VXLAN example simulates a VXLAN Tunnel endpoint(VTEP) termination in
> DPDK, which is used to demonstrate the offload and filtering capabilities of 
> i40
> NIC for VXLAN packet.
> 
> And this example uses the basic virtio devices management function from vHost
> example, and it uses us-Vhost interface and tunnel filtering mechanism to 
> direct
> the traffic to/from a specific VM.
> 
> In addition, this sample is also designed to show how tunneling protocols can 
> be
> handled. For the vHost interface, we do not need to support zero copy/inter VM
> packet transfer etc. The approach that we take would be of benefit to you in
> that we put a pluggable structure in place so that the application could be 
> easily
> extended to support a new tunneling protocol.
> 
> The software framework is as follows:
> 
>|---|   |---|
>| VM-1(VNI:100) |   |  VM-2(VNI:200)|
>| |--| |--| |   | |--| |--| |
>| |vport0| |vport1| |   | |vport0| |vport1| |
>|-|--|-|--|-|   |-|--|-|--|-|  Guests
>   \   /
>  |-\---/|
>  | us-vHost interface   |
>  |  |-||--| |
>  | decap| | TEP|  | encap   |   DPDK App
>  |  |-||--| |
>  ||||
>  ||||
>   ||
> |-||---|
> |tunnel filter|| IP/L4 Tx csum |
> |IP/L4 csum   || TSO   |
> |packet type  ||   |   NIC
> |CRC strip||   |
> |-||---|
>   ||
>   ||
>   ||
>   /---\
>   VXLAN Tunnel
> 
> The sample will support the followings:
> 1> Tunneling packet recognition.
> 
> 2> The port of UDP tunneling is configurable
> 
> 3> Directing of incoming traffic to the correct queue based on the tunnel 
> filter
> type such as inner MAC address and VNI.
>   The VNI will be assigned from a static internal table based on the us-
> vhost device ID. Each device will receive a unique device ID. The inner MAC 
> will
> be learned from the first packet transmitted from a device.
> 
> 4> Decapsulation of Rx VXLAN traffic. This is a software only
> 4> operation(will use HW header split instead later)
> 
> 5> Encapsulation of Tx VXLAN traffic. This is a software only operation
> 
> 6> Tx outer IP, inner IP and L4 checksum offload
> 
> 7> TSO support for tunneling packet
> 
> Limitations:
> 1. No ARP support
> 2. There are some duplicated source codes because of using the basic virtio
> device management function from vhost sample, but consider that current vhost
> sample is quite complicated and huge enough, and I think we shall have a
> separate sample for tunneling packet processing.
> 3. Currently, only the i40e NIC is supported in the sample, but other types of
> NICs also can be supported later if those NICs are able to support tunneling
> packet filter.
> 
> 
> Jijiang Liu (10):
>   create VXLAN sample framework using virtio device management function
>   add basic VXLAN structures
>   add VXLAN operation APIs
>   support overlay operations
>   Add encapsulation and decapsulation function
>   add udp port configuration
>   add filter type configuration
>   add tx checksum offload configuration
>   add TSO offload configuration
>   add encapsulation and decapsulation flags
> 
>  examples/Makefile  |1 +
>  examples/tep_termination/Makefile  |   58 ++
>  examples/tep_termination/main.c| 1117
> 
>  examples/tep_termination/main.h|  113 
>  examples/tep_termination/vxlan.c   |  242 +++
>  examples/tep_termination/vxlan.h   |   81 +++
>  examples/tep_termination/vxlan_setup.c |  453 +
>  examples/tep_termination/vxlan_setup.h |   76 +++
>  8 files changed, 2141 insertions(+), 0 deletions(-)  create mode 100644
> examples/tep_termination/Makefile  create mode 100644
> examples/tep_termination/main.c  create mode 100644
> examples/tep_termination/main.h  create mode 100644
> examples/tep_termination/vxlan.c  create mode 100644
> examples/tep_termination/vxlan.h  create mode 100644
> examples/tep_termination/vxlan_setup.c
>  create mode 100644 examples/tep_termination/vxlan_setup.h
> 
> --
> 1.7.7.6



[dpdk-dev] [PATCH 00/18] i40e base driver update

2015-04-20 Thread Helin Zhang
To support firmware version 'FVL3E', i40e base driver should
be updated. It mainly includes the base driver update which
contains additional enhancements, fixes, changes for future
use, and so on, together with neccessary modifications in
i40e Poll Mode Driver. The details are listed as follows.

Helin Zhang (18):
  i40e: copyright update
  i40e: disable setting phy configuration
  i40e: adjustment of register definitions and relevant
  i40e: support of CEE DCBX on recent firmware versions
  i40e: rework of 'i40e_hmc_get_object_va'
  i40e: support of Fortpark device IDs and mac types
  i40e: rename 'err' to 'perrno'
  i40e: support NVM read on Fortpark, with minor enhancements
  i40e: adminq enhancements
  i40e: support of firmware build number
  i40e: support of building both PF and VF driver together
  i40e: enhancements of AQ commands and common interfaces
  i40e: replacement of 'i40e_debug_read_register()'
  i40e: add new interfaces of AQ commands and relevant
  i40e: support of Fortpark FPGA
  i40e: add more virtual channel operations
  i40e: support of structure and command length check
  i40e: Minor enhancements in i40e_type.h

 lib/librte_pmd_i40e/Makefile   |5 +-
 lib/librte_pmd_i40e/i40e/i40e_adminq.c |   59 +-
 lib/librte_pmd_i40e/i40e/i40e_adminq.h |   16 +-
 lib/librte_pmd_i40e/i40e/i40e_adminq_cmd.h |  233 +++-
 lib/librte_pmd_i40e/i40e/i40e_alloc.h  |2 +-
 lib/librte_pmd_i40e/i40e/i40e_common.c |  839 ++--
 lib/librte_pmd_i40e/i40e/i40e_dcb.c|  263 +++-
 lib/librte_pmd_i40e/i40e/i40e_dcb.h|   22 +-
 lib/librte_pmd_i40e/i40e/i40e_diag.c   |2 +-
 lib/librte_pmd_i40e/i40e/i40e_diag.h   |2 +-
 lib/librte_pmd_i40e/i40e/i40e_hmc.c|2 +-
 lib/librte_pmd_i40e/i40e/i40e_hmc.h|2 +-
 lib/librte_pmd_i40e/i40e/i40e_lan_hmc.c|   34 +-
 lib/librte_pmd_i40e/i40e/i40e_lan_hmc.h|7 +-
 lib/librte_pmd_i40e/i40e/i40e_nvm.c|  559 ++--
 lib/librte_pmd_i40e/i40e/i40e_osdep.h  |   64 +-
 lib/librte_pmd_i40e/i40e/i40e_prototype.h  |   49 +-
 lib/librte_pmd_i40e/i40e/i40e_register.h   | 1983 +++-
 lib/librte_pmd_i40e/i40e/i40e_status.h |2 +-
 lib/librte_pmd_i40e/i40e/i40e_type.h   |  123 +-
 lib/librte_pmd_i40e/i40e/i40e_virtchnl.h   |   63 +-
 lib/librte_pmd_i40e/i40e_ethdev.c  |   31 +-
 lib/librte_pmd_i40e/i40e_ethdev.h  |2 +-
 lib/librte_pmd_i40e/i40e_ethdev_vf.c   |2 +-
 lib/librte_pmd_i40e/i40e_fdir.c|2 +-
 lib/librte_pmd_i40e/i40e_logs.h|2 +-
 lib/librte_pmd_i40e/i40e_pf.c  |2 +-
 lib/librte_pmd_i40e/i40e_pf.h  |2 +-
 lib/librte_pmd_i40e/i40e_rxtx.c|2 +-
 lib/librte_pmd_i40e/i40e_rxtx.h|2 +-
 30 files changed, 3934 insertions(+), 444 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH 01/18] i40e: copyright update

2015-04-20 Thread Helin Zhang
Copyright is updated.

Signed-off-by: Helin Zhang 
---
 lib/librte_pmd_i40e/Makefile   | 2 +-
 lib/librte_pmd_i40e/i40e/i40e_adminq.c | 2 +-
 lib/librte_pmd_i40e/i40e/i40e_adminq.h | 2 +-
 lib/librte_pmd_i40e/i40e/i40e_adminq_cmd.h | 2 +-
 lib/librte_pmd_i40e/i40e/i40e_alloc.h  | 2 +-
 lib/librte_pmd_i40e/i40e/i40e_common.c | 2 +-
 lib/librte_pmd_i40e/i40e/i40e_dcb.c| 2 +-
 lib/librte_pmd_i40e/i40e/i40e_dcb.h| 2 +-
 lib/librte_pmd_i40e/i40e/i40e_diag.c   | 2 +-
 lib/librte_pmd_i40e/i40e/i40e_diag.h   | 2 +-
 lib/librte_pmd_i40e/i40e/i40e_hmc.c| 2 +-
 lib/librte_pmd_i40e/i40e/i40e_hmc.h| 2 +-
 lib/librte_pmd_i40e/i40e/i40e_lan_hmc.c| 2 +-
 lib/librte_pmd_i40e/i40e/i40e_lan_hmc.h| 2 +-
 lib/librte_pmd_i40e/i40e/i40e_nvm.c| 2 +-
 lib/librte_pmd_i40e/i40e/i40e_osdep.h  | 2 +-
 lib/librte_pmd_i40e/i40e/i40e_prototype.h  | 2 +-
 lib/librte_pmd_i40e/i40e/i40e_register.h   | 2 +-
 lib/librte_pmd_i40e/i40e/i40e_status.h | 2 +-
 lib/librte_pmd_i40e/i40e/i40e_type.h   | 2 +-
 lib/librte_pmd_i40e/i40e/i40e_virtchnl.h   | 2 +-
 lib/librte_pmd_i40e/i40e_ethdev.c  | 2 +-
 lib/librte_pmd_i40e/i40e_ethdev.h  | 2 +-
 lib/librte_pmd_i40e/i40e_ethdev_vf.c   | 2 +-
 lib/librte_pmd_i40e/i40e_fdir.c| 2 +-
 lib/librte_pmd_i40e/i40e_logs.h| 2 +-
 lib/librte_pmd_i40e/i40e_pf.c  | 2 +-
 lib/librte_pmd_i40e/i40e_pf.h  | 2 +-
 lib/librte_pmd_i40e/i40e_rxtx.c| 2 +-
 lib/librte_pmd_i40e/i40e_rxtx.h| 2 +-
 30 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/lib/librte_pmd_i40e/Makefile b/lib/librte_pmd_i40e/Makefile
index 64bab16..86be3f7 100644
--- a/lib/librte_pmd_i40e/Makefile
+++ b/lib/librte_pmd_i40e/Makefile
@@ -1,6 +1,6 @@
 #   BSD LICENSE
 #
-#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
 #   All rights reserved.
 #
 #   Redistribution and use in source and binary forms, with or without
diff --git a/lib/librte_pmd_i40e/i40e/i40e_adminq.c 
b/lib/librte_pmd_i40e/i40e/i40e_adminq.c
index e098ed6..e8e762f 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_adminq.c
+++ b/lib/librte_pmd_i40e/i40e/i40e_adminq.c
@@ -1,6 +1,6 @@
 
/***

-Copyright (c) 2013 - 2014, Intel Corporation
+Copyright (c) 2013 - 2015, Intel Corporation
 All rights reserved.

 Redistribution and use in source and binary forms, with or without
diff --git a/lib/librte_pmd_i40e/i40e/i40e_adminq.h 
b/lib/librte_pmd_i40e/i40e/i40e_adminq.h
index ea611bd..a8c6afe 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_adminq.h
+++ b/lib/librte_pmd_i40e/i40e/i40e_adminq.h
@@ -1,6 +1,6 @@
 
/***

-Copyright (c) 2013 - 2014, Intel Corporation
+Copyright (c) 2013 - 2015, Intel Corporation
 All rights reserved.

 Redistribution and use in source and binary forms, with or without
diff --git a/lib/librte_pmd_i40e/i40e/i40e_adminq_cmd.h 
b/lib/librte_pmd_i40e/i40e/i40e_adminq_cmd.h
index 5ea9b7d..0fe9d1c 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_adminq_cmd.h
+++ b/lib/librte_pmd_i40e/i40e/i40e_adminq_cmd.h
@@ -1,6 +1,6 @@
 
/***

-Copyright (c) 2013 - 2014, Intel Corporation
+Copyright (c) 2013 - 2015, Intel Corporation
 All rights reserved.

 Redistribution and use in source and binary forms, with or without
diff --git a/lib/librte_pmd_i40e/i40e/i40e_alloc.h 
b/lib/librte_pmd_i40e/i40e/i40e_alloc.h
index 6e81cd5..38c2f65 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_alloc.h
+++ b/lib/librte_pmd_i40e/i40e/i40e_alloc.h
@@ -1,6 +1,6 @@
 
/***

-Copyright (c) 2013 - 2014, Intel Corporation
+Copyright (c) 2013 - 2015, Intel Corporation
 All rights reserved.

 Redistribution and use in source and binary forms, with or without
diff --git a/lib/librte_pmd_i40e/i40e/i40e_common.c 
b/lib/librte_pmd_i40e/i40e/i40e_common.c
index ffaa777..23f14c1 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_common.c
+++ b/lib/librte_pmd_i40e/i40e/i40e_common.c
@@ -1,6 +1,6 @@
 
/***

-Copyright (c) 2013 - 2014, Intel Corporation
+Copyright (c) 2013 - 2015, Intel Corporation
 All rights reserved.

 Redistribution and use in source and binary forms, with or without
diff --git a/lib/librte_pmd_i40e/i40e/i40e_dcb.c 
b/lib/librte_pmd_i40e/i40e/i40e_dcb.c
index d067028..b04ed56 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_dcb.c
+++ b/lib/librte_pmd_i40e/i40e/i40e_dcb.c
@@ -1,6 +1,6 @@
 
/***

-Copyright (c) 2013 - 2014, Intel Corporation
+Copyright (c) 2013 - 2015, Intel Corporation
 All rights reserved.


[dpdk-dev] [PATCH 02/18] i40e: disable setting phy configuration

2015-04-20 Thread Helin Zhang
There was a known linke issue on 40G ports on NVM version (FVL3E),
when setting phy configuration. As a workaround, setting phy
configuration should be disabled. The impact is that the link cannot
be forcedly configured, which doesn't affect any feature functions.
The workaround can be removed when a formal fix is ready later.

Signed-off-by: Helin Zhang 
---
 lib/librte_pmd_i40e/i40e_ethdev.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/lib/librte_pmd_i40e/i40e_ethdev.c 
b/lib/librte_pmd_i40e/i40e_ethdev.c
index df8686f..9fc2ee7 100644
--- a/lib/librte_pmd_i40e/i40e_ethdev.c
+++ b/lib/librte_pmd_i40e/i40e_ethdev.c
@@ -791,6 +791,10 @@ i40e_phy_conf_link(struct i40e_hw *hw, uint8_t abilities, 
uint8_t force_speed)
I40E_LINK_SPEED_100MB;
int ret = -ENOTSUP;

+   /* Skip it on 40G interfaces, as a workaround for the link issue */
+   if (i40e_is_40G_device(hw->device_id))
+   return I40E_SUCCESS;
+
status = i40e_aq_get_phy_capabilities(hw, false, false, _ab,
  NULL);
if (status)
-- 
1.8.1.4



[dpdk-dev] [PATCH 05/18] i40e: rework of 'i40e_hmc_get_object_va'

2015-04-20 Thread Helin Zhang
To simplify the usage of invoking 'i40e_hmc_get_object_va', input
parameters of it were reworked. In addition, minor code style fixes
were added as well.

Signed-off-by: Helin Zhang 
---
 lib/librte_pmd_i40e/i40e/i40e_lan_hmc.c | 32 +---
 lib/librte_pmd_i40e/i40e/i40e_lan_hmc.h |  5 +
 2 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/lib/librte_pmd_i40e/i40e/i40e_lan_hmc.c 
b/lib/librte_pmd_i40e/i40e/i40e_lan_hmc.c
index 7e65efb..fab56aa 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_lan_hmc.c
+++ b/lib/librte_pmd_i40e/i40e/i40e_lan_hmc.c
@@ -863,7 +863,7 @@ static void i40e_write_dword(u8 *hmc_bits,
if (ce_info->width < 32)
mask = ((u32)1 << ce_info->width) - 1;
else
-   mask = 0x;
+   mask = ~(u32)0;

/* don't swizzle the bits until after the mask because the mask bits
 * will be in a different bit position on big endian machines
@@ -915,7 +915,7 @@ static void i40e_write_qword(u8 *hmc_bits,
if (ce_info->width < 64)
mask = ((u64)1 << ce_info->width) - 1;
else
-   mask = 0xUL;
+   mask = ~(u64)0;

/* don't swizzle the bits until after the mask because the mask bits
 * will be in a different bit position on big endian machines
@@ -1045,7 +1045,7 @@ static void i40e_read_dword(u8 *hmc_bits,
if (ce_info->width < 32)
mask = ((u32)1 << ce_info->width) - 1;
else
-   mask = 0x;
+   mask = ~(u32)0;

/* shift to correct alignment */
mask <<= shift_width;
@@ -1098,7 +1098,7 @@ static void i40e_read_qword(u8 *hmc_bits,
if (ce_info->width < 64)
mask = ((u64)1 << ce_info->width) - 1;
else
-   mask = 0xUL;
+   mask = ~(u64)0;

/* shift to correct alignment */
mask <<= shift_width;
@@ -1217,7 +1217,7 @@ static enum i40e_status_code i40e_set_hmc_context(u8 
*context_bytes,

 /**
  * i40e_hmc_get_object_va - retrieves an object's virtual address
- * @hmc_info: pointer to i40e_hmc_info struct
+ * @hw: pointer to the hw structure
  * @object_base: pointer to u64 to get the va
  * @rsrc_type: the hmc resource type
  * @obj_idx: hmc object index
@@ -1225,13 +1225,13 @@ static enum i40e_status_code i40e_set_hmc_context(u8 
*context_bytes,
  * This function retrieves the object's virtual address from the object
  * base pointer.  This function is used for LAN Queue contexts.
  **/
-STATIC
-enum i40e_status_code i40e_hmc_get_object_va(struct i40e_hmc_info *hmc_info,
+enum i40e_status_code i40e_hmc_get_object_va(struct i40e_hw *hw,
u8 **object_base,
enum i40e_hmc_lan_rsrc_type rsrc_type,
u32 obj_idx)
 {
u32 obj_offset_in_sd, obj_offset_in_pd;
+   struct i40e_hmc_info *hmc_info = >hmc;
struct i40e_hmc_sd_entry *sd_entry;
struct i40e_hmc_pd_entry *pd_entry;
u32 pd_idx, pd_lmt, rel_pd_idx;
@@ -1303,8 +1303,7 @@ enum i40e_status_code 
i40e_get_lan_tx_queue_context(struct i40e_hw *hw,
enum i40e_status_code err;
u8 *context_bytes;

-   err = i40e_hmc_get_object_va(>hmc, _bytes,
-I40E_HMC_LAN_TX, queue);
+   err = i40e_hmc_get_object_va(hw, _bytes, I40E_HMC_LAN_TX, 
queue);
if (err < 0)
return err;

@@ -1323,8 +1322,7 @@ enum i40e_status_code 
i40e_clear_lan_tx_queue_context(struct i40e_hw *hw,
enum i40e_status_code err;
u8 *context_bytes;

-   err = i40e_hmc_get_object_va(>hmc, _bytes,
-I40E_HMC_LAN_TX, queue);
+   err = i40e_hmc_get_object_va(hw, _bytes, I40E_HMC_LAN_TX, 
queue);
if (err < 0)
return err;

@@ -1344,8 +1342,7 @@ enum i40e_status_code 
i40e_set_lan_tx_queue_context(struct i40e_hw *hw,
enum i40e_status_code err;
u8 *context_bytes;

-   err = i40e_hmc_get_object_va(>hmc, _bytes,
-I40E_HMC_LAN_TX, queue);
+   err = i40e_hmc_get_object_va(hw, _bytes, I40E_HMC_LAN_TX, 
queue);
if (err < 0)
return err;

@@ -1366,8 +1363,7 @@ enum i40e_status_code 
i40e_get_lan_rx_queue_context(struct i40e_hw *hw,
enum i40e_status_code err;
u8 *context_bytes;

-   err = i40e_hmc_get_object_va(>hmc, _bytes,
-I40E_HMC_LAN_RX, queue);
+   err = i40e_hmc_get_object_va(hw, _bytes, I40E_HMC_LAN_RX, 
queue);
if (err < 0)
return err;

@@ -1386,8 +1382,7 @@ enum i40e_status_code 
i40e_clear_lan_rx_queue_context(struct i40e_hw *hw,
enum i40e_status_code err;
u8 *context_bytes;

-   err = i40e_hmc_get_object_va(>hmc, _bytes,
-

[dpdk-dev] [PATCH 07/18] i40e: rename 'err' to 'perrno'

2015-04-20 Thread Helin Zhang
To be consistent with original base driver, the variable name of
'err' should be renamed to 'perrno'.

Signed-off-by: Helin Zhang 
---
 lib/librte_pmd_i40e/i40e/i40e_nvm.c | 118 ++--
 1 file changed, 59 insertions(+), 59 deletions(-)

diff --git a/lib/librte_pmd_i40e/i40e/i40e_nvm.c 
b/lib/librte_pmd_i40e/i40e/i40e_nvm.c
index 73b8997..2b70508 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_nvm.c
+++ b/lib/librte_pmd_i40e/i40e/i40e_nvm.c
@@ -481,25 +481,25 @@ i40e_validate_nvm_checksum_exit:

 STATIC enum i40e_status_code i40e_nvmupd_state_init(struct i40e_hw *hw,
struct i40e_nvm_access *cmd,
-   u8 *bytes, int *err);
+   u8 *bytes, int *perrno);
 STATIC enum i40e_status_code i40e_nvmupd_state_reading(struct i40e_hw *hw,
struct i40e_nvm_access *cmd,
-   u8 *bytes, int *err);
+   u8 *bytes, int *perrno);
 STATIC enum i40e_status_code i40e_nvmupd_state_writing(struct i40e_hw *hw,
struct i40e_nvm_access *cmd,
-   u8 *bytes, int *err);
+   u8 *bytes, int *perrno);
 STATIC enum i40e_nvmupd_cmd i40e_nvmupd_validate_command(struct i40e_hw *hw,
struct i40e_nvm_access *cmd,
-   int *err);
+   int *perrno);
 STATIC enum i40e_status_code i40e_nvmupd_nvm_erase(struct i40e_hw *hw,
   struct i40e_nvm_access *cmd,
-  int *err);
+  int *perrno);
 STATIC enum i40e_status_code i40e_nvmupd_nvm_write(struct i40e_hw *hw,
   struct i40e_nvm_access *cmd,
-  u8 *bytes, int *err);
+  u8 *bytes, int *perrno);
 STATIC enum i40e_status_code i40e_nvmupd_nvm_read(struct i40e_hw *hw,
  struct i40e_nvm_access *cmd,
- u8 *bytes, int *err);
+ u8 *bytes, int *perrno);
 STATIC inline u8 i40e_nvmupd_get_module(u32 val)
 {
return (u8)(val & I40E_NVM_MOD_PNT_MASK);
@@ -514,38 +514,38 @@ STATIC inline u8 i40e_nvmupd_get_transaction(u32 val)
  * @hw: pointer to hardware structure
  * @cmd: pointer to nvm update command
  * @bytes: pointer to the data buffer
- * @err: pointer to return error code
+ * @perrno: pointer to return error code
  *
  * Dispatches command depending on what update state is current
  **/
 enum i40e_status_code i40e_nvmupd_command(struct i40e_hw *hw,
  struct i40e_nvm_access *cmd,
- u8 *bytes, int *err)
+ u8 *bytes, int *perrno)
 {
enum i40e_status_code status;

DEBUGFUNC("i40e_nvmupd_command");

/* assume success */
-   *err = 0;
+   *perrno = 0;

switch (hw->nvmupd_state) {
case I40E_NVMUPD_STATE_INIT:
-   status = i40e_nvmupd_state_init(hw, cmd, bytes, err);
+   status = i40e_nvmupd_state_init(hw, cmd, bytes, perrno);
break;

case I40E_NVMUPD_STATE_READING:
-   status = i40e_nvmupd_state_reading(hw, cmd, bytes, err);
+   status = i40e_nvmupd_state_reading(hw, cmd, bytes, perrno);
break;

case I40E_NVMUPD_STATE_WRITING:
-   status = i40e_nvmupd_state_writing(hw, cmd, bytes, err);
+   status = i40e_nvmupd_state_writing(hw, cmd, bytes, perrno);
break;

default:
/* invalid state, should never happen */
status = I40E_NOT_SUPPORTED;
-   *err = -ESRCH;
+   *perrno = -ESRCH;
break;
}
return status;
@@ -556,29 +556,29 @@ enum i40e_status_code i40e_nvmupd_command(struct i40e_hw 
*hw,
  * @hw: pointer to hardware structure
  * @cmd: pointer to nvm update command buffer
  * @bytes: pointer to the data buffer
- * @err: pointer to return error code
+ * @perrno: pointer to return error code
  *
  * Process legitimate commands of the Init state and conditionally set next
  * state. Reject all other commands.
  **/
 STATIC enum i40e_status_code i40e_nvmupd_state_init(struct i40e_hw *hw,
struct i40e_nvm_access *cmd,
- 

[dpdk-dev] [PATCH 08/18] i40e: support NVM read on Fortpark, with minor enhancements

2015-04-20 Thread Helin Zhang
NVM read on Fortpark should be done via AQ command, while others
via SRCTL register. So it should check the MAC type first, and then
use different functions to do NVM read. Other minor changes were
also added for the enhancements of NVM write, checksum calculation.

Signed-off-by: Helin Zhang 
---
 lib/librte_pmd_i40e/Makefile   |   1 +
 lib/librte_pmd_i40e/i40e/i40e_adminq.h |   9 +-
 lib/librte_pmd_i40e/i40e/i40e_nvm.c| 461 +++--
 3 files changed, 387 insertions(+), 84 deletions(-)

diff --git a/lib/librte_pmd_i40e/Makefile b/lib/librte_pmd_i40e/Makefile
index 86be3f7..a921bf1 100644
--- a/lib/librte_pmd_i40e/Makefile
+++ b/lib/librte_pmd_i40e/Makefile
@@ -68,6 +68,7 @@ CFLAGS_BASE_DRIVER += -Wno-missing-field-initializers
 CFLAGS_BASE_DRIVER += -Wno-pointer-to-int-cast
 CFLAGS_BASE_DRIVER += -Wno-format-nonliteral
 CFLAGS_BASE_DRIVER += -Wno-format-security
+CFLAGS_BASE_DRIVER += -Wno-unused-variable

 ifeq ($(shell test $(GCC_VERSION) -ge 44 && echo 1), 1)
 CFLAGS_BASE_DRIVER += -Wno-unused-but-set-variable
diff --git a/lib/librte_pmd_i40e/i40e/i40e_adminq.h 
b/lib/librte_pmd_i40e/i40e/i40e_adminq.h
index a8c6afe..d8c1fb6 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_adminq.h
+++ b/lib/librte_pmd_i40e/i40e/i40e_adminq.h
@@ -35,6 +35,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #define _I40E_ADMINQ_H_

 #include "i40e_osdep.h"
+#include "i40e_status.h"
 #include "i40e_adminq_cmd.h"

 #define I40E_ADMINQ_DESC(R, i)   \
@@ -116,7 +117,7 @@ struct i40e_adminq_info {
  * i40e_aq_rc_to_posix - convert errors to user-land codes
  * aq_rc: AdminQ error code to convert
  **/
-STATIC inline int i40e_aq_rc_to_posix(u16 aq_rc)
+STATIC inline int i40e_aq_rc_to_posix(int aq_ret, u16 aq_rc)
 {
int aq_to_posix[] = {
0,   /* I40E_AQ_RC_OK */
@@ -144,6 +145,12 @@ STATIC inline int i40e_aq_rc_to_posix(u16 aq_rc)
-EFBIG,  /* I40E_AQ_RC_EFBIG */
};

+   /* aq_rc is invalid if AQ timed out */
+   if (aq_ret == I40E_ERR_ADMIN_QUEUE_TIMEOUT)
+   return -EAGAIN;
+
+   if (aq_rc >= (sizeof(aq_to_posix) / sizeof((aq_to_posix)[0])))
+   return -ERANGE;
return aq_to_posix[aq_rc];
 }

diff --git a/lib/librte_pmd_i40e/i40e/i40e_nvm.c 
b/lib/librte_pmd_i40e/i40e/i40e_nvm.c
index 2b70508..2f0ed7c 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_nvm.c
+++ b/lib/librte_pmd_i40e/i40e/i40e_nvm.c
@@ -33,6 +33,18 @@ POSSIBILITY OF SUCH DAMAGE.

 #include "i40e_prototype.h"

+enum i40e_status_code i40e_read_nvm_word_srctl(struct i40e_hw *hw, u16 offset,
+  u16 *data);
+enum i40e_status_code i40e_read_nvm_word_aq(struct i40e_hw *hw, u16 offset,
+   u16 *data);
+enum i40e_status_code i40e_read_nvm_buffer_srctl(struct i40e_hw *hw, u16 
offset,
+u16 *words, u16 *data);
+enum i40e_status_code i40e_read_nvm_buffer_aq(struct i40e_hw *hw, u16 offset,
+ u16 *words, u16 *data);
+enum i40e_status_code i40e_read_nvm_aq(struct i40e_hw *hw, u8 module_pointer,
+  u32 offset, u16 words, void *data,
+  bool last_command);
+
 /**
  * i40e_init_nvm_ops - Initialize NVM function pointers
  * @hw: pointer to the HW structure
@@ -70,7 +82,7 @@ enum i40e_status_code i40e_init_nvm(struct i40e_hw *hw)
} else { /* Blank programming mode */
nvm->blank_nvm_mode = true;
ret_code = I40E_ERR_NVM_BLANK_MODE;
-   DEBUGOUT("NVM init error: unsupported blank mode.\n");
+   i40e_debug(hw, I40E_DEBUG_NVM, "NVM init error: unsupported 
blank mode.\n");
}

return ret_code;
@@ -89,7 +101,7 @@ enum i40e_status_code i40e_acquire_nvm(struct i40e_hw *hw,
 {
enum i40e_status_code ret_code = I40E_SUCCESS;
u64 gtime, timeout;
-   u64 time = 0;
+   u64 time_left = 0;

DEBUGFUNC("i40e_acquire_nvm");

@@ -97,40 +109,39 @@ enum i40e_status_code i40e_acquire_nvm(struct i40e_hw *hw,
goto i40e_i40e_acquire_nvm_exit;

ret_code = i40e_aq_request_resource(hw, I40E_NVM_RESOURCE_ID, access,
-   0, , NULL);
+   0, _left, NULL);
/* Reading the Global Device Timer */
gtime = rd32(hw, I40E_GLVFGEN_TIMER);

/* Store the timeout */
-   hw->nvm.hw_semaphore_timeout = I40E_MS_TO_GTIME(time) + gtime;
+   hw->nvm.hw_semaphore_timeout = I40E_MS_TO_GTIME(time_left) + gtime;

-   if (ret_code != I40E_SUCCESS) {
-   /* Set the polling timeout */
-   if (time > I40E_MAX_NVM_TIMEOUT)
-   timeout = I40E_MS_TO_GTIME(I40E_MAX_NVM_TIMEOUT)
- + gtime;
-   else
-   timeout = 

[dpdk-dev] [PATCH 09/18] i40e: adminq enhancements

2015-04-20 Thread Helin Zhang
To be more extendable, 'i40e_is_vf()' was introduced to check if
the MAC type is VF. Also other minor enhancements were added for
'i40e_init_adminq()'.

Signed-off-by: Helin Zhang 
---
 lib/librte_pmd_i40e/i40e/i40e_adminq.c | 31 ++-
 lib/librte_pmd_i40e/i40e/i40e_type.h   |  7 ++-
 2 files changed, 16 insertions(+), 22 deletions(-)

diff --git a/lib/librte_pmd_i40e/i40e/i40e_adminq.c 
b/lib/librte_pmd_i40e/i40e/i40e_adminq.c
index e8e762f..42df290 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_adminq.c
+++ b/lib/librte_pmd_i40e/i40e/i40e_adminq.c
@@ -58,7 +58,7 @@ STATIC INLINE bool i40e_is_nvm_update_op(struct i40e_aq_desc 
*desc)
 STATIC void i40e_adminq_init_regs(struct i40e_hw *hw)
 {
/* set head and tail registers in our local struct */
-   if (hw->mac.type == I40E_MAC_VF) {
+   if (i40e_is_vf(hw)) {
hw->aq.asq.tail = I40E_VF_ATQT1;
hw->aq.asq.head = I40E_VF_ATQH1;
hw->aq.asq.len  = I40E_VF_ATQLEN1;
@@ -563,7 +563,6 @@ enum i40e_status_code i40e_init_adminq(struct i40e_hw *hw)
u16 eetrack_lo, eetrack_hi;
int retry = 0;
 #endif
-
/* verify input for valid configuration */
if ((hw->aq.num_arq_entries == 0) ||
(hw->aq.num_asq_entries == 0) ||
@@ -594,6 +593,11 @@ enum i40e_status_code i40e_init_adminq(struct i40e_hw *hw)
goto init_adminq_free_asq;

 #ifndef VF_DRIVER
+#ifdef INTEGRATED_VF
+   /* VF has no need of firmware */
+   if (i40e_is_vf(hw))
+   goto init_adminq_exit;
+#endif
/* There are some cases where the firmware may not be quite ready
 * for AdminQ operations, so we retry the AdminQ setup a few times
 * if we see timeouts in this first AQ call.
@@ -615,7 +619,8 @@ enum i40e_status_code i40e_init_adminq(struct i40e_hw *hw)
goto init_adminq_free_arq;

/* get the NVM version info */
-   i40e_read_nvm_word(hw, I40E_SR_NVM_IMAGE_VERSION, >nvm.version);
+   i40e_read_nvm_word(hw, I40E_SR_NVM_DEV_STARTER_VERSION,
+  >nvm.version);
i40e_read_nvm_word(hw, I40E_SR_NVM_EETRACK_LO, _lo);
i40e_read_nvm_word(hw, I40E_SR_NVM_EETRACK_HI, _hi);
hw->nvm.eetrack = (eetrack_hi << 16) | eetrack_lo;
@@ -627,7 +632,8 @@ enum i40e_status_code i40e_init_adminq(struct i40e_hw *hw)

/* pre-emptive resource lock release */
i40e_aq_release_resource(hw, I40E_NVM_RESOURCE_ID, 0, NULL);
-   hw->aq.nvm_busy = false;
+   hw->aq.nvm_release_on_done = false;
+   hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;

ret_code = i40e_aq_set_hmc_resource_profile(hw,
I40E_HMC_PROFILE_DEFAULT,
@@ -772,14 +778,6 @@ enum i40e_status_code i40e_asq_send_command(struct i40e_hw 
*hw,
goto asq_send_command_exit;
}

-#ifndef VF_DRIVER
-   if (i40e_is_nvm_update_op(desc) && hw->aq.nvm_busy) {
-   i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQTX: NVM busy.\n");
-   status = I40E_ERR_NVM;
-   goto asq_send_command_exit;
-   }
-
-#endif
details = I40E_ADMINQ_DETAILS(hw->aq.asq, hw->aq.asq.next_to_use);
if (cmd_details) {
i40e_memcpy(details,
@@ -931,11 +929,6 @@ enum i40e_status_code i40e_asq_send_command(struct i40e_hw 
*hw,
status = I40E_ERR_ADMIN_QUEUE_TIMEOUT;
}

-#ifndef VF_DRIVER
-   if (!status && i40e_is_nvm_update_op(desc))
-   hw->aq.nvm_busy = true;
-
-#endif /* VF_DRIVER */
 asq_send_command_error:
i40e_release_spinlock(>aq.asq_spinlock);
 asq_send_command_exit:
@@ -989,9 +982,6 @@ enum i40e_status_code i40e_clean_arq_element(struct i40e_hw 
*hw,
ntu = (rd32(hw, hw->aq.arq.head) & I40E_PF_ARQH_ARQH_MASK);
if (ntu == ntc) {
/* nothing to do - shouldn't need to update ring's values */
-   i40e_debug(hw,
-  I40E_DEBUG_AQ_MESSAGE,
-  "AQRX: Queue is empty.\n");
ret_code = I40E_ERR_ADMIN_QUEUE_NO_WORK;
goto clean_arq_element_out;
}
@@ -1055,7 +1045,6 @@ clean_arq_element_out:

 #ifndef VF_DRIVER
if (i40e_is_nvm_update_op(>desc)) {
-   hw->aq.nvm_busy = false;
if (hw->aq.nvm_release_on_done) {
i40e_release_nvm(hw);
hw->aq.nvm_release_on_done = false;
diff --git a/lib/librte_pmd_i40e/i40e/i40e_type.h 
b/lib/librte_pmd_i40e/i40e/i40e_type.h
index 1565095..1eb243c 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_type.h
+++ b/lib/librte_pmd_i40e/i40e/i40e_type.h
@@ -576,6 +576,11 @@ struct i40e_hw {
u32 debug_mask;
 };

+static inline bool i40e_is_vf(struct i40e_hw *hw)
+{
+   return (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_FPK_VF);
+}
+
 struct i40e_driver_version {
u8 major_version;
u8 minor_version;

[dpdk-dev] [PATCH 10/18] i40e: support of firmware build number

2015-04-20 Thread Helin Zhang
Getting and checking firmware build number were added, as firmware
supports that.

Signed-off-by: Helin Zhang 
---
 lib/librte_pmd_i40e/i40e/i40e_adminq.c| 1 +
 lib/librte_pmd_i40e/i40e/i40e_adminq.h| 5 +++--
 lib/librte_pmd_i40e/i40e/i40e_common.c| 4 
 lib/librte_pmd_i40e/i40e/i40e_prototype.h | 1 +
 4 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/lib/librte_pmd_i40e/i40e/i40e_adminq.c 
b/lib/librte_pmd_i40e/i40e/i40e_adminq.c
index 42df290..91b3568 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_adminq.c
+++ b/lib/librte_pmd_i40e/i40e/i40e_adminq.c
@@ -606,6 +606,7 @@ enum i40e_status_code i40e_init_adminq(struct i40e_hw *hw)
ret_code = i40e_aq_get_firmware_version(hw,
>aq.fw_maj_ver,
>aq.fw_min_ver,
+   >aq.fw_build,
>aq.api_maj_ver,
>aq.api_min_ver,
NULL);
diff --git a/lib/librte_pmd_i40e/i40e/i40e_adminq.h 
b/lib/librte_pmd_i40e/i40e/i40e_adminq.h
index d8c1fb6..23a8e8d 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_adminq.h
+++ b/lib/librte_pmd_i40e/i40e/i40e_adminq.h
@@ -100,9 +100,9 @@ struct i40e_adminq_info {
u16 asq_buf_size;   /* send queue buffer size */
u16 fw_maj_ver; /* firmware major version */
u16 fw_min_ver; /* firmware minor version */
+   u32 fw_build;   /* firmware build number */
u16 api_maj_ver;/* api major version */
u16 api_min_ver;/* api minor version */
-   bool nvm_busy;
bool nvm_release_on_done;

struct i40e_spinlock asq_spinlock; /* Send queue spinlock */
@@ -156,7 +156,8 @@ STATIC inline int i40e_aq_rc_to_posix(int aq_ret, u16 aq_rc)

 /* general information */
 #define I40E_AQ_LARGE_BUF  512
-#define I40E_ASQ_CMD_TIMEOUT   100  /* msecs */
+#define I40E_ASQ_CMD_TIMEOUT   250  /* msecs */
+#define I40E_ASQ_CMD_TIMEOUT_FPGA  4  /* msecs */

 void i40e_fill_default_direct_cmd_desc(struct i40e_aq_desc *desc,
   u16 opcode);
diff --git a/lib/librte_pmd_i40e/i40e/i40e_common.c 
b/lib/librte_pmd_i40e/i40e/i40e_common.c
index 2256de9..3dd8f04 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_common.c
+++ b/lib/librte_pmd_i40e/i40e/i40e_common.c
@@ -1926,6 +1926,7 @@ enum i40e_status_code i40e_aq_get_switch_config(struct 
i40e_hw *hw,
  * @hw: pointer to the hw struct
  * @fw_major_version: firmware major version
  * @fw_minor_version: firmware minor version
+ * @fw_build: firmware build number
  * @api_major_version: major queue version
  * @api_minor_version: minor queue version
  * @cmd_details: pointer to command details structure or NULL
@@ -1934,6 +1935,7 @@ enum i40e_status_code i40e_aq_get_switch_config(struct 
i40e_hw *hw,
  **/
 enum i40e_status_code i40e_aq_get_firmware_version(struct i40e_hw *hw,
u16 *fw_major_version, u16 *fw_minor_version,
+   u32 *fw_build,
u16 *api_major_version, u16 *api_minor_version,
struct i40e_asq_cmd_details *cmd_details)
 {
@@ -1951,6 +1953,8 @@ enum i40e_status_code i40e_aq_get_firmware_version(struct 
i40e_hw *hw,
*fw_major_version = LE16_TO_CPU(resp->fw_major);
if (fw_minor_version != NULL)
*fw_minor_version = LE16_TO_CPU(resp->fw_minor);
+   if (fw_build != NULL)
+   *fw_build = LE32_TO_CPU(resp->fw_build);
if (api_major_version != NULL)
*api_major_version = LE16_TO_CPU(resp->api_major);
if (api_minor_version != NULL)
diff --git a/lib/librte_pmd_i40e/i40e/i40e_prototype.h 
b/lib/librte_pmd_i40e/i40e/i40e_prototype.h
index db63f97..79f4e38 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_prototype.h
+++ b/lib/librte_pmd_i40e/i40e/i40e_prototype.h
@@ -86,6 +86,7 @@ void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink);

 enum i40e_status_code i40e_aq_get_firmware_version(struct i40e_hw *hw,
u16 *fw_major_version, u16 *fw_minor_version,
+   u32 *fw_build,
u16 *api_major_version, u16 *api_minor_version,
struct i40e_asq_cmd_details *cmd_details);
 enum i40e_status_code i40e_aq_debug_write_register(struct i40e_hw *hw,
-- 
1.8.1.4



[dpdk-dev] [PATCH 11/18] i40e: support of building both PF and VF driver together

2015-04-20 Thread Helin Zhang
Macros of PF_DRIVER, VF_DRIVER and INTEGRATED_VF were defined to
support building both PF and VF driver together. PF_DRIVER needs
to be defined if a build is for PF only, while VF_DRIVER for VF
only. PF_DRIVER, VF_DRIVER and INTEGRATED_VF are all needed for
building PF and VF driver together.

Signed-off-by: Helin Zhang 
---
 lib/librte_pmd_i40e/Makefile  |  2 +-
 lib/librte_pmd_i40e/i40e/i40e_adminq.c| 14 +++---
 lib/librte_pmd_i40e/i40e/i40e_common.c|  6 --
 lib/librte_pmd_i40e/i40e/i40e_prototype.h |  4 ++--
 4 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/lib/librte_pmd_i40e/Makefile b/lib/librte_pmd_i40e/Makefile
index a921bf1..22f0716 100644
--- a/lib/librte_pmd_i40e/Makefile
+++ b/lib/librte_pmd_i40e/Makefile
@@ -37,7 +37,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_pmd_i40e.a

 CFLAGS += -O3
-CFLAGS += $(WERROR_FLAGS)
+CFLAGS += $(WERROR_FLAGS) -DPF_DRIVER -DVF_DRIVER -DINTEGRATED_VF

 EXPORT_MAP := rte_pmd_i40e_version.map

diff --git a/lib/librte_pmd_i40e/i40e/i40e_adminq.c 
b/lib/librte_pmd_i40e/i40e/i40e_adminq.c
index 91b3568..8f9e870 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_adminq.c
+++ b/lib/librte_pmd_i40e/i40e/i40e_adminq.c
@@ -37,7 +37,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "i40e_adminq.h"
 #include "i40e_prototype.h"

-#ifndef VF_DRIVER
+#ifdef PF_DRIVER
 /**
  * i40e_is_nvm_update_op - return true if this is an NVM update operation
  * @desc: API request descriptor
@@ -48,7 +48,7 @@ STATIC INLINE bool i40e_is_nvm_update_op(struct i40e_aq_desc 
*desc)
desc->opcode == CPU_TO_LE16(i40e_aqc_opc_nvm_update));
 }

-#endif /* VF_DRIVER */
+#endif /* PF_DRIVER */
 /**
  *  i40e_adminq_init_regs - Initialize AdminQ registers
  *  @hw: pointer to the hardware structure
@@ -559,7 +559,7 @@ enum i40e_status_code i40e_shutdown_arq(struct i40e_hw *hw)
 enum i40e_status_code i40e_init_adminq(struct i40e_hw *hw)
 {
enum i40e_status_code ret_code;
-#ifndef VF_DRIVER
+#ifdef PF_DRIVER
u16 eetrack_lo, eetrack_hi;
int retry = 0;
 #endif
@@ -592,7 +592,7 @@ enum i40e_status_code i40e_init_adminq(struct i40e_hw *hw)
if (ret_code != I40E_SUCCESS)
goto init_adminq_free_asq;

-#ifndef VF_DRIVER
+#ifdef PF_DRIVER
 #ifdef INTEGRATED_VF
/* VF has no need of firmware */
if (i40e_is_vf(hw))
@@ -640,13 +640,13 @@ enum i40e_status_code i40e_init_adminq(struct i40e_hw *hw)
I40E_HMC_PROFILE_DEFAULT,
0,
NULL);
+#endif /* PF_DRIVER */
ret_code = I40E_SUCCESS;

-#endif /* VF_DRIVER */
/* success! */
goto init_adminq_exit;

-#ifndef VF_DRIVER
+#ifdef PF_DRIVER
 init_adminq_free_arq:
i40e_shutdown_arq(hw);
 #endif
@@ -1044,7 +1044,7 @@ clean_arq_element_out:
*pending = (ntc > ntu ? hw->aq.arq.count : 0) + (ntu - ntc);
i40e_release_spinlock(>aq.arq_spinlock);

-#ifndef VF_DRIVER
+#ifdef PF_DRIVER
if (i40e_is_nvm_update_op(>desc)) {
if (hw->aq.nvm_release_on_done) {
i40e_release_nvm(hw);
diff --git a/lib/librte_pmd_i40e/i40e/i40e_common.c 
b/lib/librte_pmd_i40e/i40e/i40e_common.c
index 3dd8f04..7a322f1 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_common.c
+++ b/lib/librte_pmd_i40e/i40e/i40e_common.c
@@ -44,7 +44,7 @@ POSSIBILITY OF SUCH DAMAGE.
  * This function sets the mac type of the adapter based on the
  * vendor ID and device ID stored in the hw structure.
  **/
-#ifdef VF_DRIVER
+#if defined(INTEGRATED_VF) || defined(VF_DRIVER)
 enum i40e_status_code i40e_set_mac_type(struct i40e_hw *hw)
 #else
 STATIC enum i40e_status_code i40e_set_mac_type(struct i40e_hw *hw)
@@ -564,7 +564,7 @@ struct i40e_rx_ptype_decoded i40e_ptype_lookup[] = {
I40E_PTT_UNUSED_ENTRY(255)
 };

-#ifndef VF_DRIVER
+#ifdef PF_DRIVER

 /**
  * i40e_init_shared_code - Initialize the shared code
@@ -4771,6 +4771,8 @@ enum i40e_status_code 
i40e_aq_configure_partition_bw(struct i40e_hw *hw,

return status;
 }
+#endif /* PF_DRIVER */
+#ifdef VF_DRIVER

 /**
  * i40e_aq_send_msg_to_pf
diff --git a/lib/librte_pmd_i40e/i40e/i40e_prototype.h 
b/lib/librte_pmd_i40e/i40e/i40e_prototype.h
index 79f4e38..d143183 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_prototype.h
+++ b/lib/librte_pmd_i40e/i40e/i40e_prototype.h
@@ -77,7 +77,7 @@ void i40e_resume_aq(struct i40e_hw *hw);
 bool i40e_check_asq_alive(struct i40e_hw *hw);
 enum i40e_status_code i40e_aq_queue_shutdown(struct i40e_hw *hw, bool 
unloading);

-#ifndef VF_DRIVER
+#ifdef PF_DRIVER

 u32 i40e_led_get(struct i40e_hw *hw);
 void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink);
@@ -401,7 +401,7 @@ enum i40e_status_code i40e_nvmupd_command(struct i40e_hw 
*hw,
  struct i40e_nvm_access *cmd,
  u8 *bytes, int *);
 void 

[dpdk-dev] [PATCH 12/18] i40e: enhancements of AQ commands and common interfaces

2015-04-20 Thread Helin Zhang
'LE16_TO_CPU' was added in 'i40e_debug_aq()' to support big-endian
and little endian correctly. Fix of determing the PF number was
added for 'i40e_init_shared_code()', which can solve the issue of
PF pass-through. Also enhancements were added for 'i40e_led_get()'
and 'i40e_led_set()'. 'i40e_update_link_info()' was removed and
replaced by 'i40e_aq_get_link_info()' directly.
'i40e_aq_debug_read_register()' was added for reading internal
registers for debug purpose.

Signed-off-by: Helin Zhang 
---
 lib/librte_pmd_i40e/i40e/i40e_common.c| 322 +++---
 lib/librte_pmd_i40e/i40e/i40e_prototype.h |  11 +-
 lib/librte_pmd_i40e/i40e/i40e_type.h  |   6 +
 3 files changed, 215 insertions(+), 124 deletions(-)

diff --git a/lib/librte_pmd_i40e/i40e/i40e_common.c 
b/lib/librte_pmd_i40e/i40e/i40e_common.c
index 7a322f1..9f8d720 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_common.c
+++ b/lib/librte_pmd_i40e/i40e/i40e_common.c
@@ -115,47 +115,51 @@ void i40e_debug_aq(struct i40e_hw *hw, enum 
i40e_debug_mask mask, void *desc,
 {
struct i40e_aq_desc *aq_desc = (struct i40e_aq_desc *)desc;
u16 len = LE16_TO_CPU(aq_desc->datalen);
-   u8 *aq_buffer = (u8 *)buffer;
-   u32 data[4];
-   u32 i = 0;
+   u8 *buf = (u8 *)buffer;
+   u16 i = 0;

if ((!(mask & hw->debug_mask)) || (desc == NULL))
return;

i40e_debug(hw, mask,
   "AQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 
0x%04X\n",
-  aq_desc->opcode, aq_desc->flags, aq_desc->datalen,
-  aq_desc->retval);
+  LE16_TO_CPU(aq_desc->opcode),
+  LE16_TO_CPU(aq_desc->flags),
+  LE16_TO_CPU(aq_desc->datalen),
+  LE16_TO_CPU(aq_desc->retval));
i40e_debug(hw, mask, "\tcookie (h,l) 0x%08X 0x%08X\n",
-  aq_desc->cookie_high, aq_desc->cookie_low);
+  LE32_TO_CPU(aq_desc->cookie_high),
+  LE32_TO_CPU(aq_desc->cookie_low));
i40e_debug(hw, mask, "\tparam (0,1)  0x%08X 0x%08X\n",
-  aq_desc->params.internal.param0,
-  aq_desc->params.internal.param1);
+  LE32_TO_CPU(aq_desc->params.internal.param0),
+  LE32_TO_CPU(aq_desc->params.internal.param1));
i40e_debug(hw, mask, "\taddr (h,l)   0x%08X 0x%08X\n",
-  aq_desc->params.external.addr_high,
-  aq_desc->params.external.addr_low);
+  LE32_TO_CPU(aq_desc->params.external.addr_high),
+  LE32_TO_CPU(aq_desc->params.external.addr_low));

if ((buffer != NULL) && (aq_desc->datalen != 0)) {
-   i40e_memset(data, 0, sizeof(data), I40E_NONDMA_MEM);
i40e_debug(hw, mask, "AQ CMD Buffer:\n");
if (buf_len < len)
len = buf_len;
-   for (i = 0; i < len; i++) {
-   data[((i % 16) / 4)] |=
-   ((u32)aq_buffer[i]) << (8 * (i % 4));
-   if ((i % 16) == 15) {
-   i40e_debug(hw, mask,
-  "\t0x%04X  %08X %08X %08X %08X\n",
-  i - 15, data[0], data[1], data[2],
-  data[3]);
-   i40e_memset(data, 0, sizeof(data),
-   I40E_NONDMA_MEM);
-   }
+   /* write the full 16-byte chunks */
+   for (i = 0; i < (len - 16); i += 16)
+   i40e_debug(hw, mask,
+  "\t0x%04X  %02X %02X %02X %02X %02X %02X 
%02X %02X %02X %02X %02X %02X %02X %02X %02X %02X\n",
+  i, buf[i], buf[i+1], buf[i+2], buf[i+3],
+  buf[i+4], buf[i+5], buf[i+6], buf[i+7],
+  buf[i+8], buf[i+9], buf[i+10], buf[i+11],
+  buf[i+12], buf[i+13], buf[i+14], buf[i+15]);
+   /* write whatever's left over without overrunning the buffer */
+   if (i < len) {
+   char d_buf[80];
+   int j = 0;
+
+   memset(d_buf, 0, sizeof(d_buf));
+   j += sprintf(d_buf, "\t0x%04X ", i);
+   while (i < len)
+   j += sprintf(_buf[j], " %02X", buf[i++]);
+   i40e_debug(hw, mask, "%s\n", d_buf);
}
-   if ((i % 16) != 0)
-   i40e_debug(hw, mask, "\t0x%04X  %08X %08X %08X %08X\n",
-  i - (i % 16), data[0], data[1], data[2],
-  data[3]);
}
 }

@@ -564,6 +568,30 @@ struct i40e_rx_ptype_decoded i40e_ptype_lookup[] = {
I40E_PTT_UNUSED_ENTRY(255)
 };

+
+/**

[dpdk-dev] [PATCH 13/18] i40e: replacement of 'i40e_debug_read_register()'

2015-04-20 Thread Helin Zhang
As base driver provides 'i40e_aq_debug_read_register()', the same
functional interface of 'i40e_debug_read_register()' can be replaced.

Signed-off-by: Helin Zhang 
---
 lib/librte_pmd_i40e/i40e_ethdev.c | 22 ++
 1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/lib/librte_pmd_i40e/i40e_ethdev.c 
b/lib/librte_pmd_i40e/i40e_ethdev.c
index 8832609..5bc2504 100644
--- a/lib/librte_pmd_i40e/i40e_ethdev.c
+++ b/lib/librte_pmd_i40e/i40e_ethdev.c
@@ -5623,25 +5623,6 @@ i40e_pctype_to_flowtype(enum i40e_filter_pctype pctype)
return flowtype_table[pctype];
 }

-static int
-i40e_debug_read_register(struct i40e_hw *hw, uint32_t addr, uint64_t *val)
-{
-   struct i40e_aq_desc desc;
-   enum i40e_status_code status;
-
-   i40e_fill_default_direct_cmd_desc(, i40e_aqc_opc_debug_read_reg);
-   desc.params.internal.param1 = rte_cpu_to_le_32(addr);
-   status = i40e_asq_send_command(hw, , NULL, 0, NULL);
-   if (status < 0)
-   return status;
-
-   *val = ((uint64_t)(rte_le_to_cpu_32(desc.params.internal.param2)) <<
-   (CHAR_BIT * sizeof(uint32_t))) +
-   rte_le_to_cpu_32(desc.params.internal.param3);
-
-   return status;
-}
-
 /*
  * On X710, performance number is far from the expectation on recent firmware
  * versions; on XL710, performance number is also far from the expectation on
@@ -5692,7 +5673,8 @@ i40e_configure_registers(struct i40e_hw *hw)
I40E_GL_SWR_PM_UP_THR_EF_VALUE;
}

-   ret = i40e_debug_read_register(hw, reg_table[i].addr, );
+   ret = i40e_aq_debug_read_register(hw, reg_table[i].addr,
+   , NULL);
if (ret < 0) {
PMD_DRV_LOG(ERR, "Failed to read from 0x%"PRIx32,
reg_table[i].addr);
-- 
1.8.1.4



[dpdk-dev] [PATCH 04/18] i40e: support of CEE DCBX on recent firmware versions

2015-04-20 Thread Helin Zhang
Recent firmware versions support CEE DCBX, while old firmware
versions support IEEE DCBX only. So firmware version needs to be
checked during getting DCB configurations. To support CEE DCBX,
AQ commands of 'i40e_aq_get_cee_dcb_config' and
'i40e_aq_start_stop_dcbx' were implemented.

Signed-off-by: Helin Zhang 
---
 lib/librte_pmd_i40e/i40e/i40e_adminq_cmd.h |  75 -
 lib/librte_pmd_i40e/i40e/i40e_common.c |  58 +++
 lib/librte_pmd_i40e/i40e/i40e_dcb.c| 261 -
 lib/librte_pmd_i40e/i40e/i40e_dcb.h|  20 +++
 lib/librte_pmd_i40e/i40e/i40e_prototype.h  |   8 +
 lib/librte_pmd_i40e/i40e/i40e_type.h   |  53 --
 6 files changed, 452 insertions(+), 23 deletions(-)

diff --git a/lib/librte_pmd_i40e/i40e/i40e_adminq_cmd.h 
b/lib/librte_pmd_i40e/i40e/i40e_adminq_cmd.h
index 0fe9d1c..a2c4394 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_adminq_cmd.h
+++ b/lib/librte_pmd_i40e/i40e/i40e_adminq_cmd.h
@@ -262,6 +262,9 @@ enum i40e_admin_queue_opc {
i40e_aqc_opc_lldp_delete_tlv= 0x0A04,
i40e_aqc_opc_lldp_stop  = 0x0A05,
i40e_aqc_opc_lldp_start = 0x0A06,
+   i40e_aqc_opc_get_cee_dcb_cfg= 0x0A07,
+   i40e_aqc_opc_lldp_set_local_mib = 0x0A08,
+   i40e_aqc_opc_lldp_stop_start_spec_agent = 0x0A09,

/* Tunnel commands */
i40e_aqc_opc_add_udp_tunnel = 0x0B00,
@@ -1994,10 +1997,78 @@ struct i40e_aqc_lldp_start {

 I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_start);

-/* Apply MIB changes (0x0A07)
- * uses the generic struc as it contains no data
+/* Get CEE DCBX Oper Config (0x0A07)
+ * uses the generic descriptor struct
+ * returns below as indirect response
  */

+#define I40E_AQC_CEE_APP_FCOE_SHIFT0x0
+#define I40E_AQC_CEE_APP_FCOE_MASK (0x7 << I40E_AQC_CEE_APP_FCOE_SHIFT)
+#define I40E_AQC_CEE_APP_ISCSI_SHIFT   0x3
+#define I40E_AQC_CEE_APP_ISCSI_MASK(0x7 << I40E_AQC_CEE_APP_ISCSI_SHIFT)
+#define I40E_AQC_CEE_APP_FIP_SHIFT 0x8
+#define I40E_AQC_CEE_APP_FIP_MASK  (0x7 << I40E_AQC_CEE_APP_FIP_SHIFT)
+#define I40E_AQC_CEE_PG_STATUS_SHIFT   0x0
+#define I40E_AQC_CEE_PG_STATUS_MASK(0x7 << I40E_AQC_CEE_PG_STATUS_SHIFT)
+#define I40E_AQC_CEE_PFC_STATUS_SHIFT  0x3
+#define I40E_AQC_CEE_PFC_STATUS_MASK   (0x7 << I40E_AQC_CEE_PFC_STATUS_SHIFT)
+#define I40E_AQC_CEE_APP_STATUS_SHIFT  0x8
+#define I40E_AQC_CEE_APP_STATUS_MASK   (0x7 << I40E_AQC_CEE_APP_STATUS_SHIFT)
+struct i40e_aqc_get_cee_dcb_cfg_v1_resp {
+   u8  reserved1;
+   u8  oper_num_tc;
+   u8  oper_prio_tc[4];
+   u8  reserved2;
+   u8  oper_tc_bw[8];
+   u8  oper_pfc_en;
+   u8  reserved3;
+   __le16  oper_app_prio;
+   u8  reserved4;
+   __le16  tlv_status;
+};
+
+I40E_CHECK_STRUCT_LEN(0x18, i40e_aqc_get_cee_dcb_cfg_v1_resp);
+
+struct i40e_aqc_get_cee_dcb_cfg_resp {
+   u8  oper_num_tc;
+   u8  oper_prio_tc[4];
+   u8  oper_tc_bw[8];
+   u8  oper_pfc_en;
+   __le16  oper_app_prio;
+   __le32  tlv_status;
+   u8  reserved[12];
+};
+
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_get_cee_dcb_cfg_resp);
+
+/* Set Local LLDP MIB (indirect 0x0A08)
+ * Used to replace the local MIB of a given LLDP agent. e.g. DCBx
+ */
+struct i40e_aqc_lldp_set_local_mib {
+#define SET_LOCAL_MIB_AC_TYPE_DCBX_SHIFT   0
+#define SET_LOCAL_MIB_AC_TYPE_DCBX_MASK(1 << 
SET_LOCAL_MIB_AC_TYPE_DCBX_SHIFT)
+   u8  type;
+   u8  reserved0;
+   __le16  length;
+   u8  reserved1[4];
+   __le32  address_high;
+   __le32  address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_set_local_mib);
+
+/* Stop/Start LLDP Agent (direct 0x0A09)
+ * Used for stopping/starting specific LLDP agent. e.g. DCBx
+ */
+struct i40e_aqc_lldp_stop_start_specific_agent {
+#define I40E_AQC_START_SPECIFIC_AGENT_SHIFT0
+#define I40E_AQC_START_SPECIFIC_AGENT_MASK (1 << 
I40E_AQC_START_SPECIFIC_AGENT_SHIFT)
+   u8  command;
+   u8  reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_stop_start_specific_agent);
+
 /* Add Udp Tunnel command and completion (direct 0x0B00) */
 struct i40e_aqc_add_udp_tunnel {
__le16  udp_port;
diff --git a/lib/librte_pmd_i40e/i40e/i40e_common.c 
b/lib/librte_pmd_i40e/i40e/i40e_common.c
index 23f14c1..e7b70c9 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_common.c
+++ b/lib/librte_pmd_i40e/i40e/i40e_common.c
@@ -3173,6 +3173,64 @@ enum i40e_status_code i40e_aq_start_lldp(struct i40e_hw 
*hw,
 }

 /**
+ * i40e_aq_get_cee_dcb_config
+ * @hw: pointer to the hw struct
+ * @buff: response buffer that stores CEE operational configuration
+ * @buff_size: size of the buffer passed
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Get CEE DCBX mode operational configuration from firmware
+ **/
+enum i40e_status_code i40e_aq_get_cee_dcb_config(struct i40e_hw *hw,
+   void *buff, u16 buff_size,
+   

[dpdk-dev] [PATCH 06/18] i40e: support of Fortpark device IDs and mac types

2015-04-20 Thread Helin Zhang
Support of Fortpark device IDs and mac types which includes FPGA
were added.

Signed-off-by: Helin Zhang 
---
 lib/librte_pmd_i40e/i40e/i40e_common.c | 17 +
 lib/librte_pmd_i40e/i40e/i40e_type.h   | 16 
 2 files changed, 33 insertions(+)

diff --git a/lib/librte_pmd_i40e/i40e/i40e_common.c 
b/lib/librte_pmd_i40e/i40e/i40e_common.c
index e7b70c9..2256de9 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_common.c
+++ b/lib/librte_pmd_i40e/i40e/i40e_common.c
@@ -36,6 +36,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "i40e_prototype.h"
 #include "i40e_virtchnl.h"

+
 /**
  * i40e_set_mac_type - Sets MAC type
  * @hw: pointer to the HW structure
@@ -55,6 +56,8 @@ STATIC enum i40e_status_code i40e_set_mac_type(struct i40e_hw 
*hw)

if (hw->vendor_id == I40E_INTEL_VENDOR_ID) {
switch (hw->device_id) {
+   case I40E_DEV_ID_BVL_I40Q:
+   case I40E_DEV_ID_FPGA_A:
case I40E_DEV_ID_SFP_XL710:
case I40E_DEV_ID_QEMU:
case I40E_DEV_ID_KX_A:
@@ -66,6 +69,20 @@ STATIC enum i40e_status_code i40e_set_mac_type(struct 
i40e_hw *hw)
case I40E_DEV_ID_10G_BASE_T:
hw->mac.type = I40E_MAC_XL710;
break;
+   case I40E_DEV_ID_FPK_LBG_NS:
+   case I40E_DEV_ID_FPK_LBG_SD:
+   case I40E_DEV_ID_FPK_FPGA:
+   hw->mac.type = I40E_MAC_FPK;
+   break;
+   case I40E_DEV_ID_FPK_VF_LBG_NS:
+   case I40E_DEV_ID_FPK_VF_LBG_SD:
+   case I40E_DEV_ID_FPK_VF_HV_LBG_NS:
+   case I40E_DEV_ID_FPK_VF_HV_LBG_SD:
+   case I40E_DEV_ID_FPK_FPGA_VF:
+   case I40E_DEV_ID_FPK_FPGA_VF_HV:
+   hw->mac.type = I40E_MAC_FPK_VF;
+   break;
+   case I40E_DEV_ID_BVL_I40Q_VF:
case I40E_DEV_ID_VF:
case I40E_DEV_ID_VF_HV:
hw->mac.type = I40E_MAC_VF;
diff --git a/lib/librte_pmd_i40e/i40e/i40e_type.h 
b/lib/librte_pmd_i40e/i40e/i40e_type.h
index 78b6a85..1565095 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_type.h
+++ b/lib/librte_pmd_i40e/i40e/i40e_type.h
@@ -63,6 +63,20 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_DEV_ID_10G_BASE_T 0x1586
 #define I40E_DEV_ID_VF 0x154C
 #define I40E_DEV_ID_VF_HV  0x1571
+#define I40E_DEV_ID_FPK_LBG_NS 0x374C
+#define I40E_DEV_ID_FPK_LBG_SD 0x37CC
+#define I40E_DEV_ID_FPK_VF_LBG_NS  0x374D
+#define I40E_DEV_ID_FPK_VF_LBG_SD  0x37CD
+#define I40E_DEV_ID_FPK_VF_HV_LBG_NS   0x3759
+#define I40E_DEV_ID_FPK_VF_HV_LBG_SD   0x37D9
+#define I40E_DEV_ID_FPK_FPGA   0xF0A2
+#define I40E_DEV_ID_FPK_FPGA_VF0xF0A3
+#define I40E_DEV_ID_FPK_FPGA_VF_HV 0xF0A4
+#define I40E_DEV_ID_BVL_I40Q   0xFAFA
+#define I40E_DEV_ID_BVL_I40Q_VF0xFAFB
+/* the A0 support needs the FPGA defines for now */
+#define I40E_DEV_ID_FPGA_A 0x154B
+#define I40E_DEV_ID_FPGA_A_VF  0x154C

 #define i40e_is_40G_device(d)  ((d) == I40E_DEV_ID_QSFP_A  || \
 (d) == I40E_DEV_ID_QSFP_B  || \
@@ -194,6 +208,8 @@ enum i40e_mac_type {
I40E_MAC_X710,
I40E_MAC_XL710,
I40E_MAC_VF,
+   I40E_MAC_FPK,
+   I40E_MAC_FPK_VF,
I40E_MAC_GENERIC,
 };

-- 
1.8.1.4



[dpdk-dev] [PATCH 03/18] i40e: adjustment of register definitions and relevant

2015-04-20 Thread Helin Zhang
More register definitions and their relevant masks are added
publically. Also few useless macros are deleted.

Signed-off-by: Helin Zhang 
---
 lib/librte_pmd_i40e/i40e/i40e_register.h | 1981 +-
 lib/librte_pmd_i40e/i40e_ethdev.c|3 -
 2 files changed, 1955 insertions(+), 29 deletions(-)

diff --git a/lib/librte_pmd_i40e/i40e/i40e_register.h 
b/lib/librte_pmd_i40e/i40e/i40e_register.h
index 888c3c3..f116b86 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_register.h
+++ b/lib/librte_pmd_i40e/i40e/i40e_register.h
@@ -318,6 +318,10 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_PRTDCB_RUP2TC_UP6TC_MASK  I40E_MASK(0x7, 
I40E_PRTDCB_RUP2TC_UP6TC_SHIFT)
 #define I40E_PRTDCB_RUP2TC_UP7TC_SHIFT 21
 #define I40E_PRTDCB_RUP2TC_UP7TC_MASK  I40E_MASK(0x7, 
I40E_PRTDCB_RUP2TC_UP7TC_SHIFT)
+#define I40E_PRTDCB_RUPTQ(_i)  (0x00122400 + ((_i) * 32)) /* _i=0...7 
*/ /* Reset: CORER */
+#define I40E_PRTDCB_RUPTQ_MAX_INDEX7
+#define I40E_PRTDCB_RUPTQ_RXQNUM_SHIFT 0
+#define I40E_PRTDCB_RUPTQ_RXQNUM_MASK  I40E_MASK(0x3FFF, 
I40E_PRTDCB_RUPTQ_RXQNUM_SHIFT)
 #define I40E_PRTDCB_TC2PFC  0x001C0980 /* Reset: CORER */
 #define I40E_PRTDCB_TC2PFC_TC2PFC_SHIFT 0
 #define I40E_PRTDCB_TC2PFC_TC2PFC_MASK  I40E_MASK(0xFF, 
I40E_PRTDCB_TC2PFC_TC2PFC_SHIFT)
@@ -429,6 +433,8 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_MASK   I40E_MASK(0x1, 
I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_SHIFT)
 #define I40E_GLGEN_GPIO_CTL_PHY_PIN_NAME_SHIFT 20
 #define I40E_GLGEN_GPIO_CTL_PHY_PIN_NAME_MASK  I40E_MASK(0x3F, 
I40E_GLGEN_GPIO_CTL_PHY_PIN_NAME_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PRT_BIT_MAP_SHIFT  26
+#define I40E_GLGEN_GPIO_CTL_PRT_BIT_MAP_MASK   I40E_MASK(0xF, 
I40E_GLGEN_GPIO_CTL_PRT_BIT_MAP_SHIFT)
 #define I40E_GLGEN_GPIO_SET 0x00088184 /* Reset: POR */
 #define I40E_GLGEN_GPIO_SET_GPIO_INDX_SHIFT 0
 #define I40E_GLGEN_GPIO_SET_GPIO_INDX_MASK  I40E_MASK(0x1F, 
I40E_GLGEN_GPIO_SET_GPIO_INDX_SHIFT)
@@ -492,7 +498,9 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_GLGEN_MDIO_CTRL_CONTMDC_SHIFT  17
 #define I40E_GLGEN_MDIO_CTRL_CONTMDC_MASK   I40E_MASK(0x1, 
I40E_GLGEN_MDIO_CTRL_CONTMDC_SHIFT)
 #define I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD1_SHIFT 18
-#define I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD1_MASK  I40E_MASK(0x3FFF, 
I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD1_SHIFT)
+#define I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD1_MASK  I40E_MASK(0x7FF, 
I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD1_SHIFT)
+#define I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD0_SHIFT 29
+#define I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD0_MASK  I40E_MASK(0x7, 
I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD0_SHIFT)
 #define I40E_GLGEN_MDIO_I2C_SEL(_i)(0x000881C0 + ((_i) * 4)) 
/* _i=0...3 */ /* Reset: POR */
 #define I40E_GLGEN_MDIO_I2C_SEL_MAX_INDEX  3
 #define I40E_GLGEN_MDIO_I2C_SEL_MDIO_I2C_SEL_SHIFT 0
@@ -556,9 +564,6 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_GLGEN_RSTCTL_GRSTDEL_MASK  I40E_MASK(0x3F, 
I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT)
 #define I40E_GLGEN_RSTCTL_ECC_RST_ENA_SHIFT 8
 #define I40E_GLGEN_RSTCTL_ECC_RST_ENA_MASK  I40E_MASK(0x1, 
I40E_GLGEN_RSTCTL_ECC_RST_ENA_SHIFT)
-#define I40E_GLGEN_RSTENA_EMP   0x000B818C /* Reset: POR */
-#define I40E_GLGEN_RSTENA_EMP_EMP_RST_ENA_SHIFT 0
-#define I40E_GLGEN_RSTENA_EMP_EMP_RST_ENA_MASK  I40E_MASK(0x1, 
I40E_GLGEN_RSTENA_EMP_EMP_RST_ENA_SHIFT)
 #define I40E_GLGEN_RTRIG  0x000B8190 /* Reset: CORER */
 #define I40E_GLGEN_RTRIG_CORER_SHIFT  0
 #define I40E_GLGEN_RTRIG_CORER_MASK   I40E_MASK(0x1, 
I40E_GLGEN_RTRIG_CORER_SHIFT)
@@ -1074,7 +1079,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_PFINT_RATEN_INTERVAL_MASK   I40E_MASK(0x3F, 
I40E_PFINT_RATEN_INTERVAL_SHIFT)
 #define I40E_PFINT_RATEN_INTRL_ENA_SHIFT 6
 #define I40E_PFINT_RATEN_INTRL_ENA_MASK  I40E_MASK(0x1, 
I40E_PFINT_RATEN_INTRL_ENA_SHIFT)
-#define I40E_PFINT_STAT_CTL0  0x00038400 /* Reset: PFR */
+#define I40E_PFINT_STAT_CTL0  0x00038400 /* Reset: CORER */
 #define I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT 2
 #define I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_MASK  I40E_MASK(0x3, 
I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT)
 #define I40E_QINT_RQCTL(_Q)  (0x0003A000 + ((_Q) * 4)) /* 
_i=0...1535 */ /* Reset: CORER */
@@ -1179,7 +1184,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_VFINT_ITRN_MAX_INDEX  2
 #define I40E_VFINT_ITRN_INTERVAL_SHIFT 0
 #define I40E_VFINT_ITRN_INTERVAL_MASK  I40E_MASK(0xFFF, 
I40E_VFINT_ITRN_INTERVAL_SHIFT)
-#define I40E_VFINT_STAT_CTL0(_VF) (0x0002A000 + ((_VF) * 4)) 
/* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFINT_STAT_CTL0(_VF) (0x0002A000 + ((_VF) * 4)) 
/* _i=0...127 */ /* Reset: CORER */
 #define I40E_VFINT_STAT_CTL0_MAX_INDEX127
 #define I40E_VFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT 2
 #define I40E_VFINT_STAT_CTL0_OTHER_ITR_INDX_MASK  I40E_MASK(0x3, 
I40E_VFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT)
@@ -1811,9 +1816,6 @@ POSSIBILITY OF SUCH DAMAGE.
 #define 

[dpdk-dev] [PATCH 14/18] i40e: add new interfaces of AQ commands and relevant

2015-04-20 Thread Helin Zhang
New interfaces were added for AQ commands. They are as follows.
- i40e_aq_get_rss_lut()
- i40e_aq_set_rss_lut()
- i40e_aq_get_rss_key()
- i40e_aq_set_rss_key()
- i40e_aq_read_nvm_config()
- i40e_aq_write_nvm_config()
- i40e_aq_set_lldp_mib()
- i40e_read_pba_string()

Signed-off-by: Helin Zhang 
---
 lib/librte_pmd_i40e/i40e/i40e_adminq_cmd.h |  57 -
 lib/librte_pmd_i40e/i40e/i40e_common.c | 331 +
 lib/librte_pmd_i40e/i40e/i40e_prototype.h  |  24 +++
 lib/librte_pmd_i40e/i40e/i40e_type.h   |   1 +
 4 files changed, 409 insertions(+), 4 deletions(-)

diff --git a/lib/librte_pmd_i40e/i40e/i40e_adminq_cmd.h 
b/lib/librte_pmd_i40e/i40e/i40e_adminq_cmd.h
index a2c4394..1cb935c 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_adminq_cmd.h
+++ b/lib/librte_pmd_i40e/i40e/i40e_adminq_cmd.h
@@ -269,6 +269,10 @@ enum i40e_admin_queue_opc {
/* Tunnel commands */
i40e_aqc_opc_add_udp_tunnel = 0x0B00,
i40e_aqc_opc_del_udp_tunnel = 0x0B01,
+   i40e_aqc_opc_set_rss_key= 0x0B02,
+   i40e_aqc_opc_set_rss_lut= 0x0B03,
+   i40e_aqc_opc_get_rss_key= 0x0B04,
+   i40e_aqc_opc_get_rss_lut= 0x0B05,
i40e_aqc_opc_tunnel_key_structure   = 0x0B10,

/* Async Events */
@@ -1797,12 +1801,12 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_update);
 /* NVM Config Read (indirect 0x0704) */
 struct i40e_aqc_nvm_config_read {
__le16  cmd_flags;
-#define ANVM_SINGLE_OR_MULTIPLE_FEATURES_MASK  1
-#define ANVM_READ_SINGLE_FEATURE   0
-#define ANVM_READ_MULTIPLE_FEATURES1
+#define I40E_AQ_ANVM_SINGLE_OR_MULTIPLE_FEATURES_MASK  1
+#define I40E_AQ_ANVM_READ_SINGLE_FEATURE   0
+#define I40E_AQ_ANVM_READ_MULTIPLE_FEATURES1
__le16  element_count;
__le16  element_id; /* Feature/field ID */
-   u8  reserved[2];
+   __le16  element_id_msw; /* MSWord of field ID */
__le32  address_high;
__le32  address_low;
 };
@@ -1820,6 +1824,11 @@ struct i40e_aqc_nvm_config_write {

 I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_config_write);

+/* Used for 0x0704 as well as for 0x0705 commands */
+#define I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_SHIFT1
+#define I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_MASK (1 << 
I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_SHIFT)
+#define I40E_AQ_ANVM_FEATURE   0
+#define I40E_AQ_ANVM_IMMEDIATE_FIELD   (1 << 
FEATURE_OR_IMMEDIATE_SHIFT)
 struct i40e_aqc_nvm_config_data_feature {
__le16 feature_id;
__le16 instance_id;
@@ -2113,6 +2122,46 @@ struct i40e_aqc_del_udp_tunnel_completion {

 I40E_CHECK_CMD_LENGTH(i40e_aqc_del_udp_tunnel_completion);

+struct i40e_aqc_get_set_rss_key {
+#define I40E_AQC_SET_RSS_KEY_VSI_VALID (0x1 << 15)
+#define I40E_AQC_SET_RSS_KEY_VSI_SEID_SHIFT0
+#define I40E_AQC_SET_RSS_KEY_VSI_SEID_MASK (0x3FF << \
+   I40E_AQC_SET_RSS_KEY_VSI_SEID_SHIFT)
+   __le16  vsi_seid;
+   u8  reserved[6];
+   __le32  addr_high;
+   __le32  addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_set_rss_key);
+
+struct i40e_aqc_get_set_rss_key_data {
+   u8 standard_rss_key[0x28];
+   u8 extended_hash_key[0xc];
+};
+
+I40E_CHECK_STRUCT_LEN(0x34, i40e_aqc_get_set_rss_key_data);
+
+struct  i40e_aqc_get_set_rss_lut {
+#define I40E_AQC_SET_RSS_LUT_VSI_VALID (0x1 << 15)
+#define I40E_AQC_SET_RSS_LUT_VSI_SEID_SHIFT0
+#define I40E_AQC_SET_RSS_LUT_VSI_SEID_MASK (0x3FF << \
+   I40E_AQC_SET_RSS_LUT_VSI_SEID_SHIFT)
+   __le16  vsi_seid;
+#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT  0
+#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK   (0x1 << \
+   I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT)
+
+#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_VSI0
+#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_PF 1
+   __le16  flags;
+   u8  reserved[4];
+   __le32  addr_high;
+   __le32  addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_set_rss_lut);
+
 /* tunnel key structure 0x0B10 */

 struct i40e_aqc_tunnel_key_structure {
diff --git a/lib/librte_pmd_i40e/i40e/i40e_common.c 
b/lib/librte_pmd_i40e/i40e/i40e_common.c
index 9f8d720..b6ca288 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_common.c
+++ b/lib/librte_pmd_i40e/i40e/i40e_common.c
@@ -203,6 +203,168 @@ enum i40e_status_code i40e_aq_queue_shutdown(struct 
i40e_hw *hw,
return status;
 }

+/**
+ * i40e_aq_get_set_rss_lut
+ * @hw: pointer to the hardware structure
+ * @seid: vsi number
+ * @pf_lut: for PF table set true, for VSI table set false
+ * @lut: pointer to the lut buffer provided by the caller
+ * @lut_size: size of the lut buffer
+ * @set: set true to set the table, false to get the table
+ *
+ * Internal function to get or set RSS look up table
+ **/
+STATIC enum i40e_status_code i40e_aq_get_set_rss_lut(struct i40e_hw *hw,
+ 

[dpdk-dev] [PATCH 15/18] i40e: support of Fortpark FPGA

2015-04-20 Thread Helin Zhang
To support Fortpark FPGA devices, all device IDs and MAC types of
FPGA were added. Also, special configurations and processings for
FPGA were added.

Signed-off-by: Helin Zhang 
---
 lib/librte_pmd_i40e/i40e/i40e_adminq.c|  11 +++
 lib/librte_pmd_i40e/i40e/i40e_common.c| 123 +-
 lib/librte_pmd_i40e/i40e/i40e_osdep.h |  62 +++
 lib/librte_pmd_i40e/i40e/i40e_prototype.h |   1 +
 lib/librte_pmd_i40e/i40e/i40e_type.h  |   1 +
 5 files changed, 197 insertions(+), 1 deletion(-)

diff --git a/lib/librte_pmd_i40e/i40e/i40e_adminq.c 
b/lib/librte_pmd_i40e/i40e/i40e_adminq.c
index 8f9e870..0fd8ca9 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_adminq.c
+++ b/lib/librte_pmd_i40e/i40e/i40e_adminq.c
@@ -582,6 +582,14 @@ enum i40e_status_code i40e_init_adminq(struct i40e_hw *hw)
/* setup ASQ command write back timeout */
hw->aq.asq_cmd_timeout = I40E_ASQ_CMD_TIMEOUT;

+   /* emulator and FPGA needs a longer write back timeout
+* (exclude FVL FPGA/FVL Blank Flash Si DeviceID)
+*/
+   if ((hw->device_id == I40E_DEV_ID_BVL_I40Q) ||
+   (hw->device_id == I40E_DEV_ID_FPK_FPGA) ||
+   (hw->device_id == I40E_DEV_ID_FPK_FPGA_VF))
+   hw->aq.asq_cmd_timeout = I40E_ASQ_CMD_TIMEOUT_FPGA;
+
/* allocate the ASQ */
ret_code = i40e_init_asq(hw);
if (ret_code != I40E_SUCCESS)
@@ -868,6 +876,9 @@ enum i40e_status_code i40e_asq_send_command(struct i40e_hw 
*hw,
i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQTX: desc and buffer:\n");
i40e_debug_aq(hw, I40E_DEBUG_AQ_COMMAND, (void *)desc_on_ring,
  buff, buff_size);
+   /* To increase stability */
+   if (hw->mac.is_fpga)
+   i40e_msec_delay(10);
(hw->aq.asq.next_to_use)++;
if (hw->aq.asq.next_to_use == hw->aq.asq.count)
hw->aq.asq.next_to_use = 0;
diff --git a/lib/librte_pmd_i40e/i40e/i40e_common.c 
b/lib/librte_pmd_i40e/i40e/i40e_common.c
index b6ca288..0ae88b8 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_common.c
+++ b/lib/librte_pmd_i40e/i40e/i40e_common.c
@@ -799,6 +799,15 @@ enum i40e_status_code i40e_init_shared_code(struct i40e_hw 
*hw)
else
hw->pf_id = (u8)(func_rid & 0x7);

+   /* Check if we are working with FPGA */
+   if ((hw->device_id == I40E_DEV_ID_FPGA_A) ||
+   (hw->device_id == I40E_DEV_ID_FPGA_A_VF))
+   hw->mac.is_fpga = true;
+
+   if ((hw->device_id == I40E_DEV_ID_FPK_FPGA) ||
+   (hw->device_id == I40E_DEV_ID_FPK_FPGA_VF))
+   hw->mac.is_fpga = true;
+
status = i40e_init_nvm(hw);
return status;
 }
@@ -875,6 +884,21 @@ enum i40e_status_code i40e_get_mac_addr(struct i40e_hw 
*hw, u8 *mac_addr)

status = i40e_aq_mac_address_read(hw, , , NULL);

+   /* try the obsolete command if we are working with older FPGA or BVL */
+   if ((status != I40E_SUCCESS) &&
+   (hw->aq.asq_last_status == I40E_AQ_RC_ESRCH) &&
+   (hw->mac.is_fpga)) {
+   struct i40e_aq_desc desc;
+   struct i40e_aqc_mng_laa *cmd_resp =
+   (struct i40e_aqc_mng_laa *)
+
+   i40e_fill_default_direct_cmd_desc(, i40e_aqc_opc_mng_laa);
+   status = i40e_asq_send_command(hw, , NULL, 0, NULL);
+
+   memcpy(_addr[0], _resp->sal, 4);
+   memcpy(_addr[4], _resp->sah, 2);
+   }
+
if (flags & I40E_AQC_LAN_ADDR_VALID)
memcpy(mac_addr, _lan_mac, sizeof(addrs.pf_lan_mac));

@@ -1062,6 +1086,21 @@ enum i40e_status_code i40e_pf_reset(struct i40e_hw *hw)
u32 reg = 0;
u32 grst_del;

+   if (hw->mac.is_fpga) {
+   /* turn on Address Translation and protection */
+   reg = rd32(hw, I40E_GLGEN_CSR_DEBUG_C);
+   reg |= I40E_GLGEN_CSR_DEBUG_C_CSR_ADDR_PROT_MASK;
+   wr32(hw, I40E_GLGEN_CSR_DEBUG_C, reg);
+
+   /* NVM work around - force 0x8 into the GLGEN_RSTCTL reset
+* delay
+*/
+   grst_del = rd32(hw, I40E_GLGEN_RSTCTL);
+   grst_del &= ~I40E_GLGEN_RSTCTL_GRSTDEL_MASK;
+   grst_del |= (0x8 << I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT);
+   wr32(hw, I40E_GLGEN_RSTCTL, grst_del);
+   }
+
/* Poll for Global Reset steady state in case of recent GRST.
 * The grst delay value is in 100ms units, and we'll wait a
 * couple counts longer to be sure we don't just miss the end.
@@ -1110,6 +1149,8 @@ enum i40e_status_code i40e_pf_reset(struct i40e_hw *hw)
reg = rd32(hw, I40E_PFGEN_CTRL);
if (!(reg & I40E_PFGEN_CTRL_PFSWR_MASK))
break;
+   if (hw->mac.is_fpga)
+   i40e_msec_delay(500);
i40e_msec_delay(1);
}
if (reg & 

[dpdk-dev] [PATCH 16/18] i40e: add more virtual channel operations

2015-04-20 Thread Helin Zhang
Added three virtual channel operations, and assigned values to
each of them. To be consistent with base driver releases, virtual
channel operation of 'I40E_VIRTCHNL_OP_GET_ADDNL_SOL_CONFIG' and
its relevant structure were added.

Signed-off-by: Helin Zhang 
---
 lib/librte_pmd_i40e/i40e/i40e_virtchnl.h | 61 +---
 1 file changed, 40 insertions(+), 21 deletions(-)

diff --git a/lib/librte_pmd_i40e/i40e/i40e_virtchnl.h 
b/lib/librte_pmd_i40e/i40e/i40e_virtchnl.h
index 58b4862..0fc4dd6 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_virtchnl.h
+++ b/lib/librte_pmd_i40e/i40e/i40e_virtchnl.h
@@ -66,30 +66,32 @@ POSSIBILITY OF SUCH DAMAGE.
  * of the virtchnl_msg structure.
  */
 enum i40e_virtchnl_ops {
-/* VF sends req. to pf for the following
- * ops.
+/* The PF sends status change events to VFs using
+ * the I40E_VIRTCHNL_OP_EVENT opcode.
+ * VFs send requests to the PF using the other ops.
  */
I40E_VIRTCHNL_OP_UNKNOWN = 0,
I40E_VIRTCHNL_OP_VERSION = 1, /* must ALWAYS be 1 */
-   I40E_VIRTCHNL_OP_RESET_VF,
-   I40E_VIRTCHNL_OP_GET_VF_RESOURCES,
-   I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE,
-   I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE,
-   I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES,
-   I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP,
-   I40E_VIRTCHNL_OP_ENABLE_QUEUES,
-   I40E_VIRTCHNL_OP_DISABLE_QUEUES,
-   I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS,
-   I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS,
-   I40E_VIRTCHNL_OP_ADD_VLAN,
-   I40E_VIRTCHNL_OP_DEL_VLAN,
-   I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
-   I40E_VIRTCHNL_OP_GET_STATS,
-   I40E_VIRTCHNL_OP_FCOE,
-/* PF sends status change events to vfs using
- * the following op.
- */
-   I40E_VIRTCHNL_OP_EVENT,
+   I40E_VIRTCHNL_OP_RESET_VF = 2,
+   I40E_VIRTCHNL_OP_GET_VF_RESOURCES = 3,
+   I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE = 4,
+   I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE = 5,
+   I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES = 6,
+   I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP = 7,
+   I40E_VIRTCHNL_OP_ENABLE_QUEUES = 8,
+   I40E_VIRTCHNL_OP_DISABLE_QUEUES = 9,
+   I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS = 10,
+   I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS = 11,
+   I40E_VIRTCHNL_OP_ADD_VLAN = 12,
+   I40E_VIRTCHNL_OP_DEL_VLAN = 13,
+   I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE = 14,
+   I40E_VIRTCHNL_OP_GET_STATS = 15,
+   I40E_VIRTCHNL_OP_FCOE = 16,
+   I40E_VIRTCHNL_OP_EVENT = 17,
+   I40E_VIRTCHNL_OP_CONFIG_RSS = 18,
+#ifdef I40E_SOL_VF_SUPPORT
+   I40E_VIRTCHNL_OP_GET_ADDNL_SOL_CONFIG = 19,
+#endif
 };

 /* Virtual channel message descriptor. This overlays the admin queue
@@ -282,6 +284,23 @@ struct i40e_virtchnl_ether_addr_list {
struct i40e_virtchnl_ether_addr list[1];
 };

+#ifdef I40E_SOL_VF_SUPPORT
+/* I40E_VIRTCHNL_OP_GET_ADDNL_SOL_CONFIG
+ * VF sends this message to get the default MTU and list of additional ethernet
+ * addresses it is allowed to use.
+ * PF responds with an indirect message containing
+ * i40e_virtchnl_addnl_solaris_config with zero or more
+ * i40e_virtchnl_ether_addr structures.
+ *
+ * It is expected that this operation will only ever be needed for Solaris VFs
+ * running under a Solaris PF.
+ */
+struct i40e_virtchnl_addnl_solaris_config {
+   u16 default_mtu;
+   struct i40e_virtchnl_ether_addr_list al;
+};
+
+#endif
 /* I40E_VIRTCHNL_OP_ADD_VLAN
  * VF sends this message to add one or more VLAN tag filters for receives.
  * PF adds the filters and returns status.
-- 
1.8.1.4



[dpdk-dev] [PATCH 17/18] i40e: support of structure and command length check

2015-04-20 Thread Helin Zhang
'I40E_CHECK_STRUCT_LEN' and 'I40E_CHECK_CMD_LENGTH' can be used to
check the length validity of structures and commands at compile
time. Those checks were added in i40e_adminq_cmd.h which can report
issues as early as possible.

Signed-off-by: Helin Zhang 
---
 lib/librte_pmd_i40e/i40e/i40e_adminq_cmd.h | 101 +
 1 file changed, 89 insertions(+), 12 deletions(-)

diff --git a/lib/librte_pmd_i40e/i40e/i40e_adminq_cmd.h 
b/lib/librte_pmd_i40e/i40e/i40e_adminq_cmd.h
index 1cb935c..32f534b 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_adminq_cmd.h
+++ b/lib/librte_pmd_i40e/i40e/i40e_adminq_cmd.h
@@ -41,7 +41,7 @@ POSSIBILITY OF SUCH DAMAGE.
  */

 #define I40E_FW_API_VERSION_MAJOR  0x0001
-#define I40E_FW_API_VERSION_MINOR  0x0002
+#define I40E_FW_API_VERSION_MINOR  0x0003

 struct i40e_aq_desc {
__le16 flags;
@@ -281,6 +281,8 @@ enum i40e_admin_queue_opc {
/* OEM commands */
i40e_aqc_opc_oem_parameter_change   = 0xFE00,
i40e_aqc_opc_oem_device_status_change   = 0xFE01,
+   i40e_aqc_opc_oem_ocsd_initialize= 0xFE02,
+   i40e_aqc_opc_oem_ocbb_initialize= 0xFE03,

/* debug commands */
i40e_aqc_opc_debug_get_deviceid = 0xFF00,
@@ -289,7 +291,6 @@ enum i40e_admin_queue_opc {
i40e_aqc_opc_debug_write_reg= 0xFF04,
i40e_aqc_opc_debug_modify_reg   = 0xFF07,
i40e_aqc_opc_debug_dump_internals   = 0xFF08,
-   i40e_aqc_opc_debug_modify_internals = 0xFF09,
 };

 /* command structures and indirect data structures */
@@ -423,6 +424,7 @@ struct i40e_aqc_list_capabilities_element_resp {
 #define I40E_AQ_CAP_ID_VSI 0x0017
 #define I40E_AQ_CAP_ID_DCB 0x0018
 #define I40E_AQ_CAP_ID_FCOE0x0021
+#define I40E_AQ_CAP_ID_ISCSI   0x0022
 #define I40E_AQ_CAP_ID_RSS 0x0040
 #define I40E_AQ_CAP_ID_RXQ 0x0041
 #define I40E_AQ_CAP_ID_TXQ 0x0042
@@ -467,8 +469,11 @@ struct i40e_aqc_arp_proxy_data {
__le32  pfpm_proxyfc;
__le32  ip_addr;
u8  mac_addr[6];
+   u8  reserved[2];
 };

+I40E_CHECK_STRUCT_LEN(0x14, i40e_aqc_arp_proxy_data);
+
 /* Set NS Proxy Table Entry Command (indirect 0x0105) */
 struct i40e_aqc_ns_proxy_data {
__le16  table_idx_mac_addr_0;
@@ -494,6 +499,8 @@ struct i40e_aqc_ns_proxy_data {
u8  ipv6_addr_1[16];
 };

+I40E_CHECK_STRUCT_LEN(0x3c, i40e_aqc_ns_proxy_data);
+
 /* Manage LAA Command (0x0106) - obsolete */
 struct i40e_aqc_mng_laa {
__le16  command_flags;
@@ -504,6 +511,8 @@ struct i40e_aqc_mng_laa {
u8  reserved2[6];
 };

+I40E_CHECK_CMD_LENGTH(i40e_aqc_mng_laa);
+
 /* Manage MAC Address Read Command (indirect 0x0107) */
 struct i40e_aqc_mac_address_read {
__le16  command_flags;
@@ -575,6 +584,8 @@ struct i40e_aqc_get_switch_config_header_resp {
u8  reserved[12];
 };

+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_switch_config_header_resp);
+
 struct i40e_aqc_switch_config_element_resp {
u8  element_type;
 #define I40E_AQ_SW_ELEM_TYPE_MAC   1
@@ -600,6 +611,8 @@ struct i40e_aqc_switch_config_element_resp {
__le16  element_info;
 };

+I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_config_element_resp);
+
 /* Get Switch Configuration (indirect 0x0200)
  *an array of elements are returned in the response buffer
  *the first in the array is the header, remainder are elements
@@ -609,6 +622,8 @@ struct i40e_aqc_get_switch_config_resp {
struct i40e_aqc_switch_config_element_resp  element[1];
 };

+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_get_switch_config_resp);
+
 /* Add Statistics (direct 0x0201)
  * Remove Statistics (direct 0x0202)
  */
@@ -674,6 +689,8 @@ struct i40e_aqc_switch_resource_alloc_element_resp {
u8  reserved2[6];
 };

+I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_resource_alloc_element_resp);
+
 /* Add VSI (indirect 0x0210)
  *this indirect command uses struct i40e_aqc_vsi_properties_data
  *as the indirect buffer (128 bytes)
@@ -820,8 +837,12 @@ struct i40e_aqc_vsi_properties_data {
 I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT)
/* queueing option section */
u8  queueing_opt_flags;
+#define I40E_AQ_VSI_QUE_OPT_MULTICAST_UDP_ENA  0x04
+#define I40E_AQ_VSI_QUE_OPT_UNICAST_UDP_ENA0x08
 #define I40E_AQ_VSI_QUE_OPT_TCP_ENA0x10
 #define I40E_AQ_VSI_QUE_OPT_FCOE_ENA   0x20
+#define I40E_AQ_VSI_QUE_OPT_RSS_LUT_PF 0x00
+#define I40E_AQ_VSI_QUE_OPT_RSS_LUT_VSI0x40
u8  queueing_opt_reserved[3];
/* scheduler section */
u8  up_enable_bits;
@@ -1105,6 +1126,8 @@ struct i40e_aqc_remove_tag {
u8  reserved[12];
 };

+I40E_CHECK_CMD_LENGTH(i40e_aqc_remove_tag);
+
 /* Add multicast E-Tag (direct 0x0257)
  * del multicast E-Tag (direct 0x0258) only uses pv_seid and etag fields
  * and no external data
@@ -1220,7 +1243,7 @@ struct 

[dpdk-dev] [PATCH 18/18] i40e: Minor enhancements in i40e_type.h

2015-04-20 Thread Helin Zhang
To be consistent with base driver release, minor enhancements were
added in i40e_type.h, as follows.
- 'an_enabled' was removed from 'struct i40e_link_status', as it
  was useless.
- 'requested_speeds' was added to 'struct i40e_link_status' for
  future use.
- 'hw_semaphore_wait' was removed from 'struct i40e_nvm_info', as
  it was useless.
- 'enum i40e_rx_desc_status_bits' was reworked for future use.
- 'struct i40e_veb_tc_stats' was newly defined for future use.
- More macros were added for future use.

Signed-off-by: Helin Zhang 
---
 lib/librte_pmd_i40e/i40e/i40e_type.h | 37 +---
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/lib/librte_pmd_i40e/i40e/i40e_type.h 
b/lib/librte_pmd_i40e/i40e/i40e_type.h
index 634ade2..d3c6d0e 100644
--- a/lib/librte_pmd_i40e/i40e/i40e_type.h
+++ b/lib/librte_pmd_i40e/i40e/i40e_type.h
@@ -266,12 +266,12 @@ struct i40e_link_status {
u8 an_info;
u8 ext_info;
u8 loopback;
-   bool an_enabled;
/* is Link Status Event notification to SW enabled */
bool lse_enable;
u16 max_frame_size;
bool crc_enable;
u8 pacing;
+   u8 requested_speeds;
 };

 struct i40e_phy_info {
@@ -357,8 +357,7 @@ enum i40e_aq_resource_access_type {
 };

 struct i40e_nvm_info {
-   u64 hw_semaphore_timeout; /* 2usec global time (GTIME resolution) */
-   u64 hw_semaphore_wait;/* - || - */
+   u64 hw_semaphore_timeout; /* usec global time (GTIME resolution) */
u32 timeout;  /* [ms] */
u16 sr_size;  /* Shadow RAM size in words */
bool blank_nvm_mode;  /* is NVM empty (no FW present)*/
@@ -528,7 +527,7 @@ struct i40e_hw {
u8 *hw_addr;
void *back;

-   /* function pointer structs */
+   /* subsystem structs */
struct i40e_phy_info phy;
struct i40e_mac_info mac;
struct i40e_bus_info bus;
@@ -689,14 +688,15 @@ enum i40e_rx_desc_status_bits {
I40E_RX_DESC_STATUS_CRCP_SHIFT  = 4,
I40E_RX_DESC_STATUS_TSYNINDX_SHIFT  = 5, /* 2 BITS */
I40E_RX_DESC_STATUS_TSYNVALID_SHIFT = 7,
-   I40E_RX_DESC_STATUS_PIF_SHIFT   = 8,
+   I40E_RX_DESC_STATUS_EXT_UDP_0_SHIFT = 8,
+
I40E_RX_DESC_STATUS_UMBCAST_SHIFT   = 9, /* 2 BITS */
I40E_RX_DESC_STATUS_FLM_SHIFT   = 11,
I40E_RX_DESC_STATUS_FLTSTAT_SHIFT   = 12, /* 2 BITS */
I40E_RX_DESC_STATUS_LPBK_SHIFT  = 14,
I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT = 15,
-   I40E_RX_DESC_STATUS_RESERVED_SHIFT  = 16, /* 2 BITS */
-   I40E_RX_DESC_STATUS_UDP_0_SHIFT = 18,
+   I40E_RX_DESC_STATUS_RESERVED2_SHIFT = 16, /* 2 BITS */
+   I40E_RX_DESC_STATUS_INT_UDP_0_SHIFT = 18,
I40E_RX_DESC_STATUS_LAST /* this entry must be last!!! */
 };

@@ -1077,6 +1077,9 @@ enum i40e_tx_ctx_desc_eipt_offload {
 #define I40E_TXD_CTX_QW0_DECTTL_MASK   (0xFULL << \
 I40E_TXD_CTX_QW0_DECTTL_SHIFT)

+#define I40E_TXD_CTX_QW0_L4T_CS_SHIFT  23
+#define I40E_TXD_CTX_QW0_L4T_CS_MASK   (0x1ULL << \
+I40E_TXD_CTX_QW0_L4T_CS_SHIFT)
 struct i40e_nop_desc {
__le64 rsvd;
__le64 dtype_cmd;
@@ -1178,6 +1181,10 @@ enum i40e_filter_program_desc_pcmd {
 #define I40E_TXD_FLTR_QW1_FD_STATUS_MASK (0x3ULL << \
  I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT)

+#define I40E_TXD_FLTR_QW1_ATR_SHIFT(0xEULL + \
+I40E_TXD_FLTR_QW1_CMD_SHIFT)
+#define I40E_TXD_FLTR_QW1_ATR_MASK (0x1ULL << I40E_TXD_FLTR_QW1_ATR_SHIFT)
+
 #define I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT 20
 #define I40E_TXD_FLTR_QW1_CNTINDEX_MASK(0x1FFUL << \
 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT)
@@ -1230,6 +1237,14 @@ struct i40e_eth_stats {
u64 tx_errors;  /* tepc */
 };

+/* Statistics collected per VEB per TC */
+struct i40e_veb_tc_stats {
+   u64 tc_rx_packets[I40E_MAX_TRAFFIC_CLASS];
+   u64 tc_rx_bytes[I40E_MAX_TRAFFIC_CLASS];
+   u64 tc_tx_packets[I40E_MAX_TRAFFIC_CLASS];
+   u64 tc_tx_bytes[I40E_MAX_TRAFFIC_CLASS];
+};
+
 /* Statistics collected by the MAC */
 struct i40e_hw_port_stats {
/* eth stats collected by the port */
@@ -1304,11 +1319,16 @@ struct i40e_hw_port_stats {
 #define I40E_SR_NVM_WAKE_ON_LAN0x19
 #define I40E_SR_ALTERNATE_SAN_MAC_ADDRESS_PTR  0x27
 #define I40E_SR_PERMANENT_SAN_MAC_ADDRESS_PTR  0x28
+#define I40E_SR_NVM_MAP_VERSION0x29
+#define I40E_SR_NVM_IMAGE_VERSION  0x2A
+#define I40E_SR_NVM_STRUCTURE_VERSION  0x2B
 #define I40E_SR_NVM_EETRACK_LO 0x2D
 #define I40E_SR_NVM_EETRACK_HI 0x2E
 #define I40E_SR_VPD_PTR0x2F
 #define I40E_SR_PXE_SETUP_PTR  0x30
 

[dpdk-dev] [RFC PATCH 0/6] DPDK support to bifurcated driver

2015-04-20 Thread Shelton Chia
Hi,
I can receive packets when I mmaped all pci memory not only rx and tx
desc.

2015-04-09 11:43 GMT+08:00 ??? :

> Hi Cunming,
>  I applyed bifurc dirver patches and tested it follow your example.
> But I can't received packets with testpmd and l2fwd.
> Kernel stack can receive packets from 10.0.0.2 before "ethtool -N
> XGE4.1 flow-type ip4 src-ip 10.0.0.2 action 12". After "thtool -N XGE4.1
> flow-type ip4 src-ip 10.0.0.2 action 12", kernel stack can't receive
> packets from 10.0.0.2, but testpmd and l2fwd cannot receive any packets
> too.
>queue 0-11 used by kernel and queue 12 used by bifurc dirver.
>How can I make it work?
>
> 2014-11-25 22:11 GMT+08:00 Cunming Liang :
>
>>
>> This is a RFC patch set to support "bifurcated driver" in DPDK.
>>
>>
>> What is "bifurcated driver"?
>> ===
>>
>> The "bifurcated driver" stands for the kernel NIC driver that supports:
>>
>> 1. on-demand rx/tx queue pairs split-off and assignment to user space
>>
>> 2. direct NIC resource(e.g. rx/tx queue registers) access from user space
>>
>> 3. distributing packets to kernel or user space rx queues by
>>NIC's flow director according to the filter rules
>>
>> Here's the kernel patch set to support.
>> http://comments.gmane.org/gmane.linux.network/333615
>>
>>
>> Usage scenario
>> =
>>
>> It's well accepted by industry to use DPDK to process fast path packets in
>> user space in a high performance fashion, meanwhile processing slow path
>> control packets in kernel space is still needed as those packets usually
>> rely on in_kernel TCP/IP stacks and/or socket programming interface.
>>
>> KNI(Kernel NIC Interface) mechanism in DPDK is designed to meet this
>> requirement, with below limitation:
>>
>>   1) Software classifies packets and distributes them to kernel via DPDK
>>  software rings, at the cost of significant CPU cycles and memory
>> bandwidth.
>>
>>   2) Memory copy packets between kernel' socket buffer and mbuf brings
>>  significant negative performance impact to KNI performance.
>>
>> The bifurcated driver provides a alternative approach that not only
>> offloads
>> flow classification and distribution to NIC but also support packets
>> zero_copy.
>>
>> User can use standard ethtool to add filter rules to the NIC in order to
>> distribute specific flows to the queues only accessed by kernel driver and
>> stack, and add other rules to distribute packets to the queues assigned to
>> user-space.
>>
>> For those rx/tx queue pairs that directly accessed from user space,
>> DPDK takes over the packets rx/tx as well as corresponding DMA operation
>> for high performance packet I/O.
>>
>>
>> What's the impact and change to DPDK
>> ==
>>
>> DPDK usually binds PCIe NIC devices by leveraging kernel' user space
>> driver
>> mechanism UIO or VFIO to map entire NIC' PCIe I/O space of NIC to user
>> space.
>> The bifurcated driver PMD talks to a NIC interface using raw socket APIs
>> and
>> only mmap() limited I/O space (e.g. certain 4K pages) for accessing
>> involved
>> rx/tx queue pairs. So the impact and changes mainly comes with below:
>>
>> - netdev
>> DPDK needs to create a af_packet socket and bind it to a bifurcated
>> netdev.
>> The socket fd will be used to request 'queue pairs info',
>> 'split/return queue pairs' and etc. The PCIe device ID, netdev MAC
>> address,
>> numa info are also from the netdev response.
>>
>> - PCIe device scan and driver probe
>> netdev provides the PCIe device ID information. Refer to the device
>> ID,
>> the correct driver should be used. And for such netdev device, the
>> creation
>> of PCIe device is no longer from scan but the on-demand assignment.
>>
>> - PCIe BAR mapping
>> "bifurcated driver" maps several pages for the queue pairs.
>> Others BAR register space maps to a fake page. The BAR mapping go
>> through
>> mmap on sockfd. Which is a little different from what UIO/VFIO does.
>>
>> - PMD
>> The PMD will no longer really initialize and configure NIC.
>> Instead, it only takes care the queue pair setup, rx_burst and
>> tx_burst.
>>
>> The patch uses eal '--vdev' parameter to assign netdev iface name and
>> number of
>> queue pairs. Here's a example about how to configure the bifurcated
>> driver and
>> run DPDK testpmd with bifurcated PMD.
>>
>>   1. Set promisc mode
>>   > ifconfig eth0 promisc
>>
>>   2. Turn on fdir
>>   > ethtool -K eth0 ntuple on
>>
>>   3. Setup a flow director rule to distribute packets with source ip
>>  0.0.0.0 to rxq No.0
>>   > ethtool -N eth0  flow-type udp4 src-ip 0.0.0.0 action 0
>>
>>   4. Run testpmd on netdev 'eth0' with 1 queue pair.
>>   > ./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3 -n 4 \
>>   >  --vdev=rte_bifurc,iface=eth0,qpairs=1 -- \
>>   >  -i --rxfreet=32 --txfreet=32 --txrst=32
>>   Note:
>> iface and qpairs arguments above specify the netdev interface name and
>> 

[dpdk-dev] DCA

2015-04-20 Thread Vlad Zolotarov
Hi,
I would like to ask if there is any reason why DPDK doesn't have support 
for DCA feature?

thanks,
vlad


[dpdk-dev] [RFC PATCH 3/4] add support for a ring to be a pktdev

2015-04-20 Thread Ananyev, Konstantin
Hi Bruce,

> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Bruce Richardson
> Sent: Friday, April 17, 2015 4:17 PM
> To: dev at dpdk.org; Wiles, Keith
> Subject: [dpdk-dev] [RFC PATCH 3/4] add support for a ring to be a pktdev
> 
> Add a new public API function, and two internal wrapper functions so we
> can use ring as a pktdev.
> ---
>  lib/librte_ring/rte_ring.c | 41 +
>  lib/librte_ring/rte_ring.h |  3 +++
>  2 files changed, 44 insertions(+)
> 
> diff --git a/lib/librte_ring/Makefile b/lib/librte_ring/Makefile
> index 84ad3d3..bc5dd09 100644
> --- a/lib/librte_ring/Makefile
> +++ b/lib/librte_ring/Makefile
> @@ -47,6 +47,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_RING) := rte_ring.c
>  SYMLINK-$(CONFIG_RTE_LIBRTE_RING)-include := rte_ring.h
> 
>  # this lib needs eal and rte_malloc
> -DEPDIRS-$(CONFIG_RTE_LIBRTE_RING) += lib/librte_eal lib/librte_malloc
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_RING) += lib/librte_eal lib/librte_malloc 
> lib/librte_pktdev
> 
>  include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_ring/rte_ring.c b/lib/librte_ring/rte_ring.c
> index c9e59d4..424da20 100644
> --- a/lib/librte_ring/rte_ring.c
> +++ b/lib/librte_ring/rte_ring.c
> @@ -86,6 +86,7 @@
>  #include 
>  #include 
>  #include 
> +#include 

I don't think it is a good idea to make rte_ring dependent on rte_pktdev (or 
rte_ethdev).  
If we'd like to have pkt (or eth) device based on rte_ring, why not to create 
librte_pmd_ring
and put all that stuff there?  
Konstantin

> 
>  #include "rte_ring.h"
> 
> @@ -208,6 +208,47 @@ rte_ring_create(const char *name, unsigned count, int 
> socket_id,
>   return r;
>  }
> 
> +static uint16_t
> +dev_rx(void *r, struct rte_mbuf **pkts, uint16_t max_pkts)
> +{
> + return rte_ring_dequeue_burst(r, (void *)pkts, max_pkts);
> +}
> +
> +static uint16_t
> +dev_tx(void *r, struct rte_mbuf **pkts, uint16_t max_pkts)
> +{
> + return rte_ring_enqueue_burst(r, (void *)pkts, max_pkts);
> +}
> +
> +#define rte_ring_dev_data rte_pkt_dev_data
> +
> +struct rte_pkt_dev *
> +rte_ring_get_dev(struct rte_ring *r)
> +{
> + struct ring_as_pktdev {
> + RTE_PKT_DEV_HDR(rte_ring_dev);
> + struct rte_ring_dev_data dev_data;
> + void *ring;
> + } *p;
> + if (r == NULL ||
> + (p = rte_zmalloc(NULL, sizeof(*p), 0)) == NULL)
> + return NULL;
> +
> + p->ring = r;
> + p->rx_pkt_burst = dev_rx;
> + p->tx_pkt_burst = dev_tx;
> + p->data = >dev_data;
> +
> + snprintf(p->dev_data.name, sizeof(p->dev_data.name), "%s", r->name);
> + p->dev_data.nb_rx_queues = 1;
> + p->dev_data.nb_tx_queues = 1;
> + p->dev_data.rx_queues = >ring;
> + p->dev_data.tx_queues = >ring;
> +
> + return (void *)p;
> +}
> +
> +
>  /*
>   * change the high water mark. If *count* is 0, water marking is
>   * disabled
> diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h
> index af6..c2f28be 100644
> --- a/lib/librte_ring/rte_ring.h
> +++ b/lib/librte_ring/rte_ring.h
> @@ -301,6 +302,10 @@ int rte_ring_init(struct rte_ring *r, const char *name, 
> unsigned count,
>  struct rte_ring *rte_ring_create(const char *name, unsigned count,
>int socket_id, unsigned flags);
> 
> +struct rte_pkt_dev;
> +
> +struct rte_pkt_dev *rte_ring_get_dev(struct rte_ring *r);
> +
>  /**
>   * Change the high water mark.
>   *
> --
> 2.1.0



[dpdk-dev] [RFC PATCH 0/4] pktdev

2015-04-20 Thread Bruce Richardson
On Mon, Apr 20, 2015 at 08:51:26AM +0200, Marc Sune wrote:
> 
> 
> On 17/04/15 21:50, Wiles, Keith wrote:
> >Hi Marc and Bruce,
> 
> Hi Keith, Bruce,
> 
> >
> >On 4/17/15, 1:49 PM, "Marc Sune"  wrote:
> What I was proposing is to try to add the minimum common shared state in
> order to properly demultiplex the RX/TX call and have a common set of
> abstract calls (the pkt_dev type). In a way, I was proposing to deliberately
> not have a shared struct rte_dev_data because I think the internals of the
> "pkt_dev" can be very different across devices (e.g. queues in kni vs eth
> port vs. crypto?). I treat the pkt_dev as a "black box" that conforms to
> TX/RX API, leaving the developer of that device to define its internal
> structures as it better suites the needs. I only use each of the specific
> device type TX/RX APIs (external to us, pkt_dev library) in rte_pkt_dev.h.
> This also simplifies the refactor required to eventually integrate the
> rte_pkt_dev library and builds it "on top" of the existing APIs.
> 
> The other important difference with both, Bruce and your approach, and mine
> is the use of function pointers for RX/TX. I don't use them, which makes the
> entire abstracted TX/RX (including the final RX/TX routines itself)
> functions be "inlinable".
> 
> Btw, I forgot to add something basic in the previous pseudo-code. The
> different types have to be conditionally compiled according to compiled-in
> DPDK libs:
> 
> rte_pkt_dev.h:
> 
> #include 
> 
> //Eth devices
> #ifdef RTE_LIBRTE_ETHER
> #include 
> #endif
> 
> //KNI
> #ifdef RTE_LIBRTE_KNI
> #include 
> #endif
> 
> //...
> //Include PMD (and non-PMD) TX/RX headers...
> 
>static inline uint16_t
>rte_pkt_tx_burst(pkt_dev_t* dev, uint16_t queue_id,
>  struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
>{
> switch (((struct rte_pkt_dev_data*)dev)->type){
> #ifdef RTE_LIBRTE_ETHER
> case  RTE_PKT_DEV_ETH:
> struct rte_eth_dev* eth_dev = (struct rte_eth_dev*)pkt_dev;
> rte_pkt_tx_burst(eth_dev, queue_id, tx_pkts, nb_pkts);
> break;
> #endif
> 
> #ifdef RTE_LIBRTE_KNI
> case RTE_PKT_DEV_KNI:
> //...
> break;
> #endif
> 
> default:
> //Corrupted type or unsupported (without compiled
> support)
> //Ignore or fail(fatal error)?
> break;
> }
>}
> 
>//...

Yes, this is an interesting approach, and with the inlining could indeed be
less overhead for the ring and kni compared to my suggestion due to the 
inlining.
There might be a slight overhead for the RX/TX ethdev functions though - 1/2
cycles due to the extra (hopefully predictable) branch in the RX/TX call, since
we always need the indirect function call for the PMDs.

I also like the use of pointers rather than port ids.

Let me think on this a bit more.

/Bruce


[dpdk-dev] DCA

2015-04-20 Thread Bruce Richardson
On Mon, Apr 20, 2015 at 01:07:59PM +0300, Vlad Zolotarov wrote:
> Hi,
> I would like to ask if there is any reason why DPDK doesn't have support for
> DCA feature?
> 
> thanks,
> vlad

With modern platforms with DDIO the data written by the NIC automatically goes
into the cache of the CPU without us needing to use DCA.

/Bruce


[dpdk-dev] [RFC PATCH 1/4] Add example pktdev implementation

2015-04-20 Thread Ananyev, Konstantin


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Bruce Richardson
> Sent: Friday, April 17, 2015 4:17 PM
> To: dev at dpdk.org; Wiles, Keith
> Subject: [dpdk-dev] [RFC PATCH 1/4] Add example pktdev implementation
> 
> This commit demonstrates what a minimal API for all packet handling
> types would look like. It simply provides the necessary parts for
> receiving and transmiting packets, and is based off the ethdev
> implementation.
> ---
>  config/common_bsdapp   |   5 ++
>  config/common_linuxapp |   5 ++
>  lib/Makefile   |   1 +
>  lib/librte_pktdev/Makefile |  56 
>  lib/librte_pktdev/rte_pktdev.c |  35 ++
>  lib/librte_pktdev/rte_pktdev.h | 144 
> +
>  6 files changed, 246 insertions(+)
>  create mode 100644 lib/librte_pktdev/Makefile
>  create mode 100644 lib/librte_pktdev/rte_pktdev.c
>  create mode 100644 lib/librte_pktdev/rte_pktdev.h
> 
> diff --git a/config/common_bsdapp b/config/common_bsdapp
> index 8ff4dc2..d2b932c 100644
> --- a/config/common_bsdapp
> +++ b/config/common_bsdapp
> @@ -132,6 +132,11 @@ CONFIG_RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT=y
>  CONFIG_RTE_LIBRTE_KVARGS=y
> 
>  #
> +# Compile generic packet handling device library
> +#
> +CONFIG_RTE_LIBRTE_PKTDEV=y
> +
> +#
>  # Compile generic ethernet library
>  #
>  CONFIG_RTE_LIBRTE_ETHER=y
> diff --git a/config/common_linuxapp b/config/common_linuxapp
> index 09a58ac..5bda416 100644
> --- a/config/common_linuxapp
> +++ b/config/common_linuxapp
> @@ -129,6 +129,11 @@ CONFIG_RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT=y
>  CONFIG_RTE_LIBRTE_KVARGS=y
> 
>  #
> +# Compile generic packet handling device library
> +#
> +CONFIG_RTE_LIBRTE_PKTDEV=y
> +
> +#
>  # Compile generic ethernet library
>  #
>  CONFIG_RTE_LIBRTE_ETHER=y
> diff --git a/lib/Makefile b/lib/Makefile
> index d94355d..4db5ee0 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -32,6 +32,7 @@
>  include $(RTE_SDK)/mk/rte.vars.mk
> 
>  DIRS-y += librte_compat
> +DIRS-$(CONFIG_RTE_LIBRTE_PKTDEV) += librte_pktdev
>  DIRS-$(CONFIG_RTE_LIBRTE_EAL) += librte_eal
>  DIRS-$(CONFIG_RTE_LIBRTE_MALLOC) += librte_malloc
>  DIRS-$(CONFIG_RTE_LIBRTE_RING) += librte_ring
> diff --git a/lib/librte_pktdev/Makefile b/lib/librte_pktdev/Makefile
> new file mode 100644
> index 000..2d3b3a1
> --- /dev/null
> +++ b/lib/librte_pktdev/Makefile
> @@ -0,0 +1,56 @@
> +#   BSD LICENSE
> +#
> +#   Copyright(c) 2015 Intel Corporation. All rights reserved.
> +#   All rights reserved.
> +#
> +#   Redistribution and use in source and binary forms, with or without
> +#   modification, are permitted provided that the following conditions
> +#   are met:
> +#
> +# * Redistributions of source code must retain the above copyright
> +#   notice, this list of conditions and the following disclaimer.
> +# * Redistributions in binary form must reproduce the above copyright
> +#   notice, this list of conditions and the following disclaimer in
> +#   the documentation and/or other materials provided with the
> +#   distribution.
> +# * Neither the name of Intel Corporation nor the names of its
> +#   contributors may be used to endorse or promote products derived
> +#   from this software without specific prior written permission.
> +#
> +#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> +#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> +#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> +#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> +#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> +#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> +#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> +#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> +#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> +#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> +#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +
> +include $(RTE_SDK)/mk/rte.vars.mk
> +
> +#
> +# library name
> +#
> +LIB = libpktdev.a
> +
> +CFLAGS += -O3
> +CFLAGS += $(WERROR_FLAGS)
> +
> +EXPORT_MAP := rte_pktdev_version.map
> +
> +LIBABIVER := 1
> +
> +SRCS-y += rte_pktdev.c
> +
> +#
> +# Export include files
> +#
> +SYMLINK-y-include += rte_pktdev.h
> +
> +# this lib depends upon no others:
> +DEPDIRS-y +=
> +
> +include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_pktdev/rte_pktdev.c b/lib/librte_pktdev/rte_pktdev.c
> new file mode 100644
> index 000..4c32d86
> --- /dev/null
> +++ b/lib/librte_pktdev/rte_pktdev.c
> @@ -0,0 +1,36 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2015 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary 

[dpdk-dev] [PATCH v3 1/2] mk: fix build with gcc 4.4 and clang

2015-04-20 Thread Thomas Monjalon
> > With GCC 4.4.7 from CentOS 6.5, the following errors arise:
> >
> > lib/librte_pmd_ixgbe/ixgbe_rxtx.c: In function 'ixgbe_dev_rx_queue_setup':
> > lib/librte_pmd_ixgbe/ixgbe_rxtx.c:2509: error: missing initializer
> > lib/librte_pmd_ixgbe/ixgbe_rxtx.c:2509: error: (near initialization for 
> > 'dev_info.driver_name')
> >
> > lib/librte_pmd_ixgbe/ixgbe_rxtx.c: In function 'ixgbe_set_rsc':
> > lib/librte_pmd_ixgbe/ixgbe_rxtx.c:4072: error: missing initializer
> > lib/librte_pmd_ixgbe/ixgbe_rxtx.c:4072: error: (near initialization for 
> > 'dev_info.driver_name')
> >
> > lib/librte_pmd_ixgbe/ixgbe_rxtx.c: In function 
> > 'ixgbe_recv_pkts_lro_single_alloc':
> > lib/librte_pmd_ixgbe/ixgbe_rxtx.c:1479: error: 'next_rsc_entry' may be used 
> > uninitialized in this function
> > lib/librte_pmd_ixgbe/ixgbe_rxtx.c:1480: error: 'next_rxe' may be used 
> > uninitialized in this function
> >
> > The "missing initializer" warning is a GCC bug which seems fixed in 4.7.
> > The same warning is thrown by clang.
> > The "may be used uninitialized" warning is another GCC bug which seems 
> > fixed in 4.7.
> >
> > Fixes: 8eecb3295aed ("ixgbe: add LRO support")
> >
> > Signed-off-by: Thomas Monjalon 
> 
> Acked-by: Vlad Zolotarov 

Applied, thanks


[dpdk-dev] [PATCH v3 2/2] use simple zero initializers

2015-04-20 Thread Thomas Monjalon
> > To initialize a structure with zeros, one field was explicitly set
> > to avoid "missing initializer" bug with old GCC (e.g. 4.4).
> > This warning is now disabled (commit ) for old versions of GCC,
> > so the workarounds may be removed.
> >
> > These initializers should not be needed for static variables but they
> > are still used to workaround an ICC bug (see commit b2595c4aa92d).
> >
> > There is one remaining exception where {0} initializer doesn't work cleanly,
> > even with recent GCC:
> > lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c:735:9:
> > error: missing braces around initializer [-Werror=missing-braces]
> >struct rte_mbuf mb_def = {0}; /* zeroed mbuf */
> >
> > Tested with gcc-4.4.7 (CentOS), gcc-4.7.2 (Debian), gcc-4.9.2 (Arch),
> > clang-3.6.0 and icc-13.1.1.
> >
> > Signed-off-by: Thomas Monjalon 
> > Tested-by: Thomas Monjalon 
> > Tested-by: John McNamara 
> 
> Acked-by: Vlad Zolotarov 

Applied, thanks


[dpdk-dev] [PATCH 2/2] eal: pci probe and adjust_config should be void

2015-04-20 Thread Thomas Monjalon
2015-04-17 08:35, Stephen Hemminger:
> This functions always returned 0 and therefore should be void.
> 
> Signed-off-by: Stephen Hemminger 
> ---
>  lib/librte_eal/common/eal_common_options.c | 3 +--
>  lib/librte_eal/common/eal_common_pci.c | 7 ++-
>  lib/librte_eal/common/eal_options.h| 2 +-
>  lib/librte_eal/common/include/rte_pci.h| 6 +-
>  lib/librte_eal/linuxapp/eal/eal.c  | 6 ++
>  5 files changed, 7 insertions(+), 17 deletions(-)

Why bsdapp is not changed?
You posted a similar patch few days before which was modifying bsdapp:
http://dpdk.org/dev/patchwork/patch/4306/



[dpdk-dev] [PATCH] pci: make rte_pci_probe void

2015-04-20 Thread Thomas Monjalon
2015-04-14 10:55, Stephen Hemminger:
> Since rte_pci_probe always returns 0 or exits via rte_exit()
> there is no point in having it return a value.
> 
> Just make it void
> 
> Signed-off-by: Stephen Hemminger 

Seems partially superseded by this patch:
http://dpdk.org/dev/patchwork/patch/4347/



[dpdk-dev] [PATCH] enic: set correct port ID in received mbufs

2015-04-20 Thread Thomas Monjalon
> >This field is not supposed to contain the RX queue index. Applications can
> >rely on it to determine the port a given mbuf comes from.
> >
> >Signed-off-by: Adrien Mazarguil 
> 
> Acked-by: Sujith Sankar 

Applied, thanks


[dpdk-dev] [RFC PATCH 0/4] pktdev

2015-04-20 Thread Wiles, Keith


On 4/20/15, 8:19 AM, "Wiles, Keith"  wrote:

>
>
>From: Marc Sune mailto:marc.sune at bisdn.de>>
>Date: Monday, April 20, 2015 at 1:51 AM
>To: Keith Wiles mailto:keith.wiles at intel.com>>,
>"dev at dpdk.org" mailto:dev at 
>dpdk.org>>
>Subject: Re: [dpdk-dev] [RFC PATCH 0/4] pktdev
>
>
>
>On 17/04/15 21:50, Wiles, Keith wrote:
>
>Hi Marc and Bruce,
>
>Hi Keith, Bruce,
>
>
>On 4/17/15, 1:49 PM, "Marc Sune"
> wrote:
>
>
>
>On 17/04/15 17:16, Bruce Richardson wrote:
>
>
>Hi all,
>
>to continue this discussion a bit more, here is my, slightly different,
>slant
>on what a pktdev abstraction may look like.
>
>The primary objective I had in mind when drafting this is to provide the
>minimal abstraction that can be *easily* used as a common device
>abstraction for
>existing (and future) device types to be passed to dataplane code. The
>patchset
>demonstrates this by defining a minimal interface for pktdev - since I
>firmly
>believe the interface should be as small as possible - and then showing
>how that
>common interface can be used to unify rings and ethdevs under a common
>API for the
>datapath. I believe any attempt to unify things much beyond this to the
>control
>plane or setup phase is not worth doing - at least not initially - as at
>init time the code always needs to be aware of the underlying resource
>type in
>order to configure it properly for dataplane use.
>
>The overall objective I look to achieve is illustrated by the final
>patch in
>the series, which is a sample app where the same code is used for all
>cores,
>irrespective of the underlying device type.
>
>To get to that point, patch 1 defines the minimal API - just RX and TX.
>The .c
>file in the library is empty for simplicity, though I would see some
>functionality moving there when/if it makes sense e.g. the callback
>support
>from ethdev, as is done in Keith's patchset.
>Patch 2 then makes very minimal changes to ethdev to allow ethdevs to
>be used
>as pktdevs, and to make use of the pktdev functions when appropriate
>Patch 3 was, for me, the key test for this implementation - how hard
>was it to
>make an rte_ring usable as a pktdev too. Two single-line functions for
>RX/TX
>and a separate "converter" function proved to be all that was necessary
>here -
>and I believe simpler solutions may be possible too, as the extra
>structures
>allocated on conversion could be merged into the rte_ring structure
>itself and
>initialized on ring creation if we prefer that option. It is
>hoped/presumed that
>wrapping other structures, such as KNI, may prove to be just as easily
>done.
>[Not attempted yet - left as an exercise for the reader :-)].
>
>Now, in terms of pktdev vs ethdev, there is nothing in this proposal
>that
>cannot also be done using ethdev AFAIK. However, pktdev as outlined here
>should make the process far easier than trying to create a full PMD for
>something.
>All NIC specific functions, including things like stop/start, are
>stripped out,
>as they don't make sense for an rte_ring or other software objects.
>Also, the other thing this provides is that we can move away from just
>using
>port ids. Instead in the same way as we now reference
>rings/mempools/KNIs etc
>via pointer, we can do the same with ethernet ports as pktdevs on the
>data path.
>There was discussion previously on moving beyond 8-bit port ids. If we
>look to
>use ethdev as a common abstraction, I feel that change will soon have
>to be made
>causing a large amount of code churn.
>
>
>Hi Richard,
>
>First thank you both for taking the time to look at this. I did not not
>reply to Keith because you Richard summarized most of my concerns.
>
>I had a brief look to this second proposal. It is more aligned to what I
>had in mind. But still I feel it is slightly too complicated. I don't
>like much the necessary (in your approach) MACRO-like pkt_dev_data
>struct. It is also slightly inconvenient that the user has to do:
>
>+   struct rte_pkt_dev *in = rte_eth_get_dev(0);
>
>+   struct rte_pkt_dev *out = rte_ring_get_dev(
>+   rte_ring_create(name, 4096,
>rte_socket_id(), 0));
>
>
>
>What about something like (~pseudo-code):
>
>rte_pkt_dev_data.h:
>
>   enum rte_pkt_dev_type{
>RTE_PKT_DEV_ETH,
>RTE_PKT_DEV_RING,
>RTE_PKT_DEV_KNI,
>//Keep adding as more PMDs are supported
>   };
>
>
>   //This struct may be redundant if there is nothing more
>   struct rte_pkt_dev_data{
>enum rte_pkt_dev_type;
>//Placeholder, maybe we need more...
>   };
>
>   //Make RX/TX pktdev APIs more readable, but not really needed
>   typedef void pkt_dev_t;
>
>(In all PMDs and e.g. KNI and RINGs):
>
> struct rte_eth_dev {
>struct rte_pkt_dev_data pkt_dev;//
><++
>eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
>eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function.
>*/
>struct 

[dpdk-dev] [PATCH] enic: migrating to new fdir api

2015-04-20 Thread Thomas Monjalon
2015-04-09 14:59, Sujith Sankar:
> This patch helps enic migrate to the new flow-director API.
> 
> It takes care of the following.
> 1.  The change in fdir_filter structure and stats structure
> 2.  DPDK interface functions in enic_ethdev.c
> 3.  ENIC driver functions that deal with the VIC adapter
> 
> 
> Signed-off-by: Sujith Sankar 

Applied, thanks

Now the old flow director API may be totally removed.



[dpdk-dev] [RFC PATCH] ethdev: remove old flow director API

2015-04-20 Thread Thomas Monjalon
It's time to remove this old API.
It seems some work is still needed to rely only on eth_ctrl API.
At least ixgbe, i40e and testpmd must be fixed.
Jingjing, do you think it's possible to remove all these structures
from rte_ethdev.h?

Thanks

---
 lib/librte_ether/rte_ethdev.c | 260 -
 lib/librte_ether/rte_ethdev.h | 399 --
 lib/librte_pmd_enic/enic_ethdev.c |   1 -
 lib/librte_pmd_mlx4/mlx4.c|   7 -
 4 files changed, 667 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index e20cca5..65173e7 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -2098,266 +2098,6 @@ rte_eth_dev_set_vlan_pvid(uint8_t port_id, uint16_t 
pvid, int on)
 }

 int
-rte_eth_dev_fdir_add_signature_filter(uint8_t port_id,
- struct rte_fdir_filter *fdir_filter,
- uint8_t queue)
-{
-   struct rte_eth_dev *dev;
-
-   if (!rte_eth_dev_is_valid_port(port_id)) {
-   PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
-   return (-ENODEV);
-   }
-
-   dev = _eth_devices[port_id];
-
-   if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_SIGNATURE) {
-   PMD_DEBUG_TRACE("port %d: invalid FDIR mode=%u\n",
-   port_id, dev->data->dev_conf.fdir_conf.mode);
-   return (-ENOSYS);
-   }
-
-   if ((fdir_filter->l4type == RTE_FDIR_L4TYPE_SCTP
-|| fdir_filter->l4type == RTE_FDIR_L4TYPE_NONE)
-   && (fdir_filter->port_src || fdir_filter->port_dst)) {
-   PMD_DEBUG_TRACE(" Port are meaningless for SCTP and " \
-   "None l4type, source & destinations ports " \
-   "should be null!\n");
-   return (-EINVAL);
-   }
-
-   FUNC_PTR_OR_ERR_RET(*dev->dev_ops->fdir_add_signature_filter, -ENOTSUP);
-   return (*dev->dev_ops->fdir_add_signature_filter)(dev, fdir_filter,
-   queue);
-}
-
-int
-rte_eth_dev_fdir_update_signature_filter(uint8_t port_id,
-struct rte_fdir_filter *fdir_filter,
-uint8_t queue)
-{
-   struct rte_eth_dev *dev;
-
-   if (!rte_eth_dev_is_valid_port(port_id)) {
-   PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
-   return (-ENODEV);
-   }
-
-   dev = _eth_devices[port_id];
-
-   if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_SIGNATURE) {
-   PMD_DEBUG_TRACE("port %d: invalid FDIR mode=%u\n",
-   port_id, dev->data->dev_conf.fdir_conf.mode);
-   return (-ENOSYS);
-   }
-
-   if ((fdir_filter->l4type == RTE_FDIR_L4TYPE_SCTP
-|| fdir_filter->l4type == RTE_FDIR_L4TYPE_NONE)
-   && (fdir_filter->port_src || fdir_filter->port_dst)) {
-   PMD_DEBUG_TRACE(" Port are meaningless for SCTP and " \
-   "None l4type, source & destinations ports " \
-   "should be null!\n");
-   return (-EINVAL);
-   }
-
-   FUNC_PTR_OR_ERR_RET(*dev->dev_ops->fdir_update_signature_filter, 
-ENOTSUP);
-   return (*dev->dev_ops->fdir_update_signature_filter)(dev, fdir_filter,
-   queue);
-
-}
-
-int
-rte_eth_dev_fdir_remove_signature_filter(uint8_t port_id,
-struct rte_fdir_filter *fdir_filter)
-{
-   struct rte_eth_dev *dev;
-
-   if (!rte_eth_dev_is_valid_port(port_id)) {
-   PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
-   return (-ENODEV);
-   }
-
-   dev = _eth_devices[port_id];
-
-   if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_SIGNATURE) {
-   PMD_DEBUG_TRACE("port %d: invalid FDIR mode=%u\n",
-   port_id, dev->data->dev_conf.fdir_conf.mode);
-   return (-ENOSYS);
-   }
-
-   if ((fdir_filter->l4type == RTE_FDIR_L4TYPE_SCTP
-|| fdir_filter->l4type == RTE_FDIR_L4TYPE_NONE)
-   && (fdir_filter->port_src || fdir_filter->port_dst)) {
-   PMD_DEBUG_TRACE(" Port are meaningless for SCTP and " \
-   "None l4type source & destinations ports " \
-   "should be null!\n");
-   return (-EINVAL);
-   }
-
-   FUNC_PTR_OR_ERR_RET(*dev->dev_ops->fdir_remove_signature_filter, 
-ENOTSUP);
-   return (*dev->dev_ops->fdir_remove_signature_filter)(dev, fdir_filter);
-}
-
-int
-rte_eth_dev_fdir_get_infos(uint8_t port_id, struct rte_eth_fdir *fdir)
-{
-   struct rte_eth_dev *dev;
-
-   if (!rte_eth_dev_is_valid_port(port_id)) {
-   PMD_DEBUG_TRACE("Invalid 

[dpdk-dev] [RFC PATCH] ethdev: remove old flow director API

2015-04-20 Thread Wu, Jingjing

> -Original Message-
> From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> Sent: Monday, April 20, 2015 10:12 PM
> To: Wu, Jingjing
> Cc: dev at dpdk.org
> Subject: [RFC PATCH] ethdev: remove old flow director API
> 
> It's time to remove this old API.
> It seems some work is still needed to rely only on eth_ctrl API.
> At least ixgbe, i40e and testpmd must be fixed.
> Jingjing, do you think it's possible to remove all these structures
> from rte_ethdev.h?
> 
[Wu, Jingjing] Yes, I agree.
But few comments list below.
Beside the following change, some commands also need to be removed in testpmd. 
For the ixgbe, code to the old APIs are already fixed.

> Thanks
> 
> ---
>  lib/librte_ether/rte_ethdev.c | 260 -
>  lib/librte_ether/rte_ethdev.h | 399 
> --
>  lib/librte_pmd_enic/enic_ethdev.c |   1 -
>  lib/librte_pmd_mlx4/mlx4.c|   7 -
>  4 files changed, 667 deletions(-)
> 
> diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
> index e20cca5..65173e7 100644
> --- a/lib/librte_ether/rte_ethdev.c
> +++ b/lib/librte_ether/rte_ethdev.c
> @@ -2098,266 +2098,6 @@ rte_eth_dev_set_vlan_pvid(uint8_t port_id, uint16_t 
> pvid, int on)
>  }
> 
>  int
> -rte_eth_dev_fdir_add_signature_filter(uint8_t port_id,
> -   struct rte_fdir_filter *fdir_filter,
> -   uint8_t queue)
> -{
> - struct rte_eth_dev *dev;
> -
> - if (!rte_eth_dev_is_valid_port(port_id)) {
> - PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
> - return (-ENODEV);
> - }
> -
> - dev = _eth_devices[port_id];
> -
> - if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_SIGNATURE) {
> - PMD_DEBUG_TRACE("port %d: invalid FDIR mode=%u\n",
> - port_id, dev->data->dev_conf.fdir_conf.mode);
> - return (-ENOSYS);
> - }
> -
> - if ((fdir_filter->l4type == RTE_FDIR_L4TYPE_SCTP
> -  || fdir_filter->l4type == RTE_FDIR_L4TYPE_NONE)
> - && (fdir_filter->port_src || fdir_filter->port_dst)) {
> - PMD_DEBUG_TRACE(" Port are meaningless for SCTP and " \
> - "None l4type, source & destinations ports " \
> - "should be null!\n");
> - return (-EINVAL);
> - }
> -
> - FUNC_PTR_OR_ERR_RET(*dev->dev_ops->fdir_add_signature_filter, -ENOTSUP);
> - return (*dev->dev_ops->fdir_add_signature_filter)(dev, fdir_filter,
> - queue);
> -}
> -
> -int
> -rte_eth_dev_fdir_update_signature_filter(uint8_t port_id,
> -  struct rte_fdir_filter *fdir_filter,
> -  uint8_t queue)
> -{
> - struct rte_eth_dev *dev;
> -
> - if (!rte_eth_dev_is_valid_port(port_id)) {
> - PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
> - return (-ENODEV);
> - }
> -
> - dev = _eth_devices[port_id];
> -
> - if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_SIGNATURE) {
> - PMD_DEBUG_TRACE("port %d: invalid FDIR mode=%u\n",
> - port_id, dev->data->dev_conf.fdir_conf.mode);
> - return (-ENOSYS);
> - }
> -
> - if ((fdir_filter->l4type == RTE_FDIR_L4TYPE_SCTP
> -  || fdir_filter->l4type == RTE_FDIR_L4TYPE_NONE)
> - && (fdir_filter->port_src || fdir_filter->port_dst)) {
> - PMD_DEBUG_TRACE(" Port are meaningless for SCTP and " \
> - "None l4type, source & destinations ports " \
> - "should be null!\n");
> - return (-EINVAL);
> - }
> -
> - FUNC_PTR_OR_ERR_RET(*dev->dev_ops->fdir_update_signature_filter, 
> -ENOTSUP);
> - return (*dev->dev_ops->fdir_update_signature_filter)(dev, fdir_filter,
> - queue);
> -
> -}
> -
> -int
> -rte_eth_dev_fdir_remove_signature_filter(uint8_t port_id,
> -  struct rte_fdir_filter *fdir_filter)
> -{
> - struct rte_eth_dev *dev;
> -
> - if (!rte_eth_dev_is_valid_port(port_id)) {
> - PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
> - return (-ENODEV);
> - }
> -
> - dev = _eth_devices[port_id];
> -
> - if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_SIGNATURE) {
> - PMD_DEBUG_TRACE("port %d: invalid FDIR mode=%u\n",
> - port_id, dev->data->dev_conf.fdir_conf.mode);
> - return (-ENOSYS);
> - }
> -
> - if ((fdir_filter->l4type == RTE_FDIR_L4TYPE_SCTP
> -  || fdir_filter->l4type == RTE_FDIR_L4TYPE_NONE)
> - && (fdir_filter->port_src || fdir_filter->port_dst)) {
> - PMD_DEBUG_TRACE(" Port are meaningless for SCTP and " \
> -  

[dpdk-dev] cost of reading tsc register

2015-04-20 Thread Ravi Kumar Iyer
Hi,
We were doing some code optimizations , running DPDK based applications, and 
chanced upon the rte_rdtsc function [ to read tsc timestamp register value ] 
consuming cpu cycles of the order of 100clock cycles with a delta of upto 
40cycles at times [ 60-140 cycles]

We are actually building up a cpu intensive application which is also very 
clock cycle sensitive and this is impacting our implementation.

To validate the same using a small/vanilla application we wrote a small code 
and tested on a single core.
Has anyone else faced a similar issue or are we doing something really 
atrocious here.

Below is the pseudo snip of the same:



uint64_t g_tsc_cost[8] __rte_cache_aligned;

void test_tsc_cost()
{
uint8_t i = 0;
for (i = 0; i < 8 ; i++)
{
g_tsc_cost[i] = rte_rdtsc();
  }
}
int
main(int argc, char **argv)
{

int ret;
unsigned lcore_id;

ret = rte_eal_init(argc, argv);
if (ret < 0)
rte_panic("Cannot init EAL\n");

memset(g_tsc_cost,0,64); /* warm the cache */

uint64_t sc = rte_rdtsc(); /* start count */
test_tsc_cost();
uint64_t ec = rte_rdtsc(); /* end count */

printf("\n Total cost = %lu\n",(ec-sc));

uint8_t i = 0;

for (i = 0; i < 8 ; i++)
{
printf("\n g_tsc_cost[%d]=%lu",i,g_tsc_cost[i]);
   /* here the values printed are 60-140 units apart */

}
return 0;
}


Just to compare, On few bare metal implementations of non-intel processors, we 
are seeing the similar code print values with a delta of 3-4 cycles and thus 
its becoming a bit difficult to digest as well.  Grateful for any help/guidance 
here.

Thanks
ravi




"DISCLAIMER: This message is proprietary to Aricent and is intended solely for 
the use of the individual to whom it is addressed. It may contain privileged or 
confidential information and should not be circulated or used for any purpose 
other than for what it is intended. If you have received this message in error, 
please notify the originator immediately. If you are not the intended 
recipient, you are notified that you are strictly prohibited from using, 
copying, altering, or disclosing the contents of this message. Aricent accepts 
no responsibility for loss or damage arising from the use of the information 
transmitted by this email including damage from virus."


[dpdk-dev] [RFC PATCH 1/4] Add example pktdev implementation

2015-04-20 Thread Bruce Richardson
On Mon, Apr 20, 2015 at 12:26:43PM +0100, Ananyev, Konstantin wrote:
> 
> 
> > -Original Message-
> > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Bruce Richardson
> > Sent: Friday, April 17, 2015 4:17 PM
> > To: dev at dpdk.org; Wiles, Keith
> > Subject: [dpdk-dev] [RFC PATCH 1/4] Add example pktdev implementation
> > 
> > This commit demonstrates what a minimal API for all packet handling
> > types would look like. It simply provides the necessary parts for
> > receiving and transmiting packets, and is based off the ethdev
> > implementation.
> > ---
> >  config/common_bsdapp   |   5 ++
> >  config/common_linuxapp |   5 ++
> >  lib/Makefile   |   1 +
> >  lib/librte_pktdev/Makefile |  56 
> >  lib/librte_pktdev/rte_pktdev.c |  35 ++
> >  lib/librte_pktdev/rte_pktdev.h | 144 
> > +
> >  6 files changed, 246 insertions(+)
> >  create mode 100644 lib/librte_pktdev/Makefile
> >  create mode 100644 lib/librte_pktdev/rte_pktdev.c
> >  create mode 100644 lib/librte_pktdev/rte_pktdev.h
> > 
> > diff --git a/config/common_bsdapp b/config/common_bsdapp
> > index 8ff4dc2..d2b932c 100644
> > --- a/config/common_bsdapp
> > +++ b/config/common_bsdapp
> > @@ -132,6 +132,11 @@ CONFIG_RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT=y
> >  CONFIG_RTE_LIBRTE_KVARGS=y
> > 
> >  #
> > +# Compile generic packet handling device library
> > +#
> > +CONFIG_RTE_LIBRTE_PKTDEV=y
> > +
> > +#
> >  # Compile generic ethernet library
> >  #
> >  CONFIG_RTE_LIBRTE_ETHER=y
> > diff --git a/config/common_linuxapp b/config/common_linuxapp
> > index 09a58ac..5bda416 100644
> > --- a/config/common_linuxapp
> > +++ b/config/common_linuxapp
> > @@ -129,6 +129,11 @@ CONFIG_RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT=y
> >  CONFIG_RTE_LIBRTE_KVARGS=y
> > 
> >  #
> > +# Compile generic packet handling device library
> > +#
> > +CONFIG_RTE_LIBRTE_PKTDEV=y
> > +
> > +#
> >  # Compile generic ethernet library
> >  #
> >  CONFIG_RTE_LIBRTE_ETHER=y
> > diff --git a/lib/Makefile b/lib/Makefile
> > index d94355d..4db5ee0 100644
> > --- a/lib/Makefile
> > +++ b/lib/Makefile
> > @@ -32,6 +32,7 @@
> >  include $(RTE_SDK)/mk/rte.vars.mk
> > 
> >  DIRS-y += librte_compat
> > +DIRS-$(CONFIG_RTE_LIBRTE_PKTDEV) += librte_pktdev
> >  DIRS-$(CONFIG_RTE_LIBRTE_EAL) += librte_eal
> >  DIRS-$(CONFIG_RTE_LIBRTE_MALLOC) += librte_malloc
> >  DIRS-$(CONFIG_RTE_LIBRTE_RING) += librte_ring
> > diff --git a/lib/librte_pktdev/Makefile b/lib/librte_pktdev/Makefile
> > new file mode 100644
> > index 000..2d3b3a1
> > --- /dev/null
> > +++ b/lib/librte_pktdev/Makefile
> > @@ -0,0 +1,56 @@
> > +#   BSD LICENSE
> > +#
> > +#   Copyright(c) 2015 Intel Corporation. All rights reserved.
> > +#   All rights reserved.
> > +#
> > +#   Redistribution and use in source and binary forms, with or without
> > +#   modification, are permitted provided that the following conditions
> > +#   are met:
> > +#
> > +# * Redistributions of source code must retain the above copyright
> > +#   notice, this list of conditions and the following disclaimer.
> > +# * Redistributions in binary form must reproduce the above copyright
> > +#   notice, this list of conditions and the following disclaimer in
> > +#   the documentation and/or other materials provided with the
> > +#   distribution.
> > +# * Neither the name of Intel Corporation nor the names of its
> > +#   contributors may be used to endorse or promote products derived
> > +#   from this software without specific prior written permission.
> > +#
> > +#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > +#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > +#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > +#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > +#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > +#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > +#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > +#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > +#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > +#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > +#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > +
> > +include $(RTE_SDK)/mk/rte.vars.mk
> > +
> > +#
> > +# library name
> > +#
> > +LIB = libpktdev.a
> > +
> > +CFLAGS += -O3
> > +CFLAGS += $(WERROR_FLAGS)
> > +
> > +EXPORT_MAP := rte_pktdev_version.map
> > +
> > +LIBABIVER := 1
> > +
> > +SRCS-y += rte_pktdev.c
> > +
> > +#
> > +# Export include files
> > +#
> > +SYMLINK-y-include += rte_pktdev.h
> > +
> > +# this lib depends upon no others:
> > +DEPDIRS-y +=
> > +
> > +include $(RTE_SDK)/mk/rte.lib.mk
> > diff --git a/lib/librte_pktdev/rte_pktdev.c 

[dpdk-dev] cost of reading tsc register

2015-04-20 Thread Stephen Hemminger
On Mon, 20 Apr 2015 14:37:53 +
Ravi Kumar Iyer  wrote:

> Hi,
> We were doing some code optimizations , running DPDK based applications, and 
> chanced upon the rte_rdtsc function [ to read tsc timestamp register value ] 
> consuming cpu cycles of the order of 100clock cycles with a delta of upto 
> 40cycles at times [ 60-140 cycles]
> 
> We are actually building up a cpu intensive application which is also very 
> clock cycle sensitive and this is impacting our implementation.
> 
> To validate the same using a small/vanilla application we wrote a small code 
> and tested on a single core.
> Has anyone else faced a similar issue or are we doing something really 
> atrocious here.
> 
> Below is the pseudo snip of the same:
> 
> 
> 
> uint64_t g_tsc_cost[8] __rte_cache_aligned;
> 
> void test_tsc_cost()
> {
> uint8_t i = 0;
> for (i = 0; i < 8 ; i++)
> {
> g_tsc_cost[i] = rte_rdtsc();
>   }
> }
> int
> main(int argc, char **argv)
> {
> 
> int ret;
> unsigned lcore_id;
> 
> ret = rte_eal_init(argc, argv);
> if (ret < 0)
> rte_panic("Cannot init EAL\n");
> 
> memset(g_tsc_cost,0,64); /* warm the cache */
> 
> uint64_t sc = rte_rdtsc(); /* start count */
> test_tsc_cost();
> uint64_t ec = rte_rdtsc(); /* end count */
> 
> printf("\n Total cost = %lu\n",(ec-sc));
> 
> uint8_t i = 0;
> 
> for (i = 0; i < 8 ; i++)
> {
> printf("\n g_tsc_cost[%d]=%lu",i,g_tsc_cost[i]);
>/* here the values printed are 60-140 units apart */
> 
> }
> return 0;
> }
> 
> 
> Just to compare, On few bare metal implementations of non-intel processors, 
> we are seeing the similar code print values with a delta of 3-4 cycles and 
> thus its becoming a bit difficult to digest as well.  Grateful for any 
> help/guidance here.

TSC instruction has it's quirks. As far as I can tel.
 1. It kills instruction pipelining
 2. It is as expensive as a cache miss
 3. counter values are not stable on some CPU's

In general, it is best to avoid getting dependent on it in real code.
Intel seems to only test on current generation Intel CPU's in their
lab and on bare metal. Don't read too much into the demo applications.

To get reasonable performance, I gave up on TSC and used approximate
loop cycles for tuning.


[dpdk-dev] [PATCH v4 00/12] mbuf: enhancements of mbuf clones

2015-04-20 Thread Olivier Matz
The first objective of this series is to fix the support of indirect
mbufs when the application reserves a private area in mbufs. It also
removes the limitation that rte_pktmbuf_clone() is only allowed on
direct (non-cloned) mbufs. The series also contains some enhancements
and fixes in the mbuf area that makes the implementation of the
last patches easier.

Changes in v4:
- do not add a priv_size in mbuf structure, having a proper accessor
  to read it from the pool private area is clearer
- prepend some reworks in the mbuf area to simplify the implementation
  (fix mbuf initialization by not using a hardcoded mbuf size, add
  accessors for mbuf pool private area, add a helper to create a
  mbuf pool)

Changes in v3:
- a mbuf can now attach to another one that have a different private
  size. In this case, the m->priv_size corresponds to the size of the
  private area of the direct mbuf.
- add comments to reflect these changes
- minor style modifications

Changes in v2:
- do not change the use of MBUF_EXT_MEM() in vhost
- change rte_mbuf_from_baddr() to rte_mbuf_from_indirect(), removing
  one parameter
- fix and rework rte_pktmbuf_detach()
- move m->priv_size in second mbuf cache line
- fix mbuf free in test error case


Olivier Matz (12):
  mbuf: fix mbuf data room size calculation rte_pktmbuf_pool_init
  examples: always initialize mbuf_pool private area
  mbuf: add accessors to get data room size and private size
  mbuf: fix rte_pktmbuf_init when mbuf private size is not zero
  testpmd: use standard functions to initialize mbufs and mbuf pool
  mbuf: introduce a new helper to create a mbuf pool
  apps: use rte_pktmbuf_pool_create to create mbuf pools
  mbuf: fix clone support when application uses private mbuf data
  mbuf: allow to clone an indirect mbuf
  test/mbuf: rename mc variable in m
  test/mbuf: enhance mbuf refcnt test
  test/mbuf: verify that cloning a clone works properly

 app/test-pipeline/init.c   |  15 +-
 app/test-pmd/testpmd.c |  78 +
 app/test/test_distributor.c|  10 +-
 app/test/test_distributor_perf.c   |  10 +-
 app/test/test_kni.c|  16 +-
 app/test/test_link_bonding.c   |  10 +-
 app/test/test_link_bonding_mode4.c |  12 +-
 app/test/test_mbuf.c   | 110 +---
 app/test/test_pmd_perf.c   |  11 +-
 app/test/test_pmd_ring.c   |  10 +-
 app/test/test_reorder.c|  10 +-
 app/test/test_sched.c  |  16 +-
 app/test/test_table.c  |   9 +-
 app/test/test_table.h  |   3 +-
 doc/guides/rel_notes/updating_apps.rst |  16 ++
 examples/bond/main.c   |  10 +-
 examples/distributor/main.c|  11 +-
 examples/dpdk_qat/main.c   |  10 +-
 examples/exception_path/main.c |  14 +-
 examples/ip_fragmentation/main.c   |  18 +-
 examples/ip_pipeline/init.c|  28 +--
 examples/ipv4_multicast/main.c |  21 +--
 examples/kni/main.c|  12 +-
 examples/l2fwd-ivshmem/host/host.c |  10 +-
 examples/l2fwd-jobstats/main.c |  10 +-
 examples/l2fwd/main.c  |  11 +-
 examples/l3fwd-acl/main.c  |  11 +-
 examples/l3fwd-power/main.c|  11 +-
 examples/l3fwd-vf/main.c   |  12 +-
 examples/l3fwd/main.c  |  10 +-
 examples/link_status_interrupt/main.c  |  10 +-
 examples/load_balancer/init.c  |  12 +-
 examples/load_balancer/main.h  |   4 +-
 .../client_server_mp/mp_server/init.c  |  10 +-
 examples/multi_process/symmetric_mp/main.c |  10 +-
 examples/netmap_compat/bridge/bridge.c |  12 +-
 examples/packet_ordering/main.c|  11 +-
 examples/qos_meter/main.c  |   7 +-
 examples/qos_sched/init.c  |  10 +-
 examples/qos_sched/main.h  |   2 +-
 examples/quota_watermark/include/conf.h|   2 +-
 examples/quota_watermark/qw/main.c |   7 +-
 examples/rxtx_callbacks/main.c |  11 +-
 examples/skeleton/basicfwd.c   |  13 +-
 examples/vhost/main.c  |  31 ++--
 examples/vhost_xen/main.c  |  11 +-
 examples/vmdq/main.c   |  11 +-
 examples/vmdq_dcb/main.c   |  10 +-
 lib/librte_ether/rte_ethdev.c  |   4 +-
 lib/librte_mbuf/rte_mbuf.c |  63 +--
 

[dpdk-dev] [PATCH v4 01/12] mbuf: fix mbuf data room size calculation rte_pktmbuf_pool_init

2015-04-20 Thread Olivier Matz
Deduct the mbuf data room size from mempool->elt_size and priv_size,
instead of using an hardcoded value that is not related to the real
buffer size.

To use rte_pktmbuf_pool_init(), the user can either:
- give a NULL parameter to rte_pktmbuf_pool_init(): in this case, the
  private size is assumed to be 0, and the room size is
  mp->elt_size - sizeof(struct rte_mbuf).
- give the rte_pktmbuf_pool_private filled with appropriate
  data_room_size and priv_size values.

Signed-off-by: Olivier Matz 
---
 app/test-pmd/testpmd.c |  1 +
 doc/guides/rel_notes/updating_apps.rst | 12 
 examples/vhost/main.c  |  5 ++---
 lib/librte_mbuf/rte_mbuf.c | 27 ---
 lib/librte_mbuf/rte_mbuf.h |  3 ++-
 5 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 3057791..10e4347 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -443,6 +443,7 @@ testpmd_mbuf_pool_ctor(struct rte_mempool *mp,
mbp_ctor_arg = (struct mbuf_pool_ctor_arg *) opaque_arg;
mbp_priv = rte_mempool_get_priv(mp);
mbp_priv->mbuf_data_room_size = mbp_ctor_arg->seg_buf_size;
+   mbp_priv->mbuf_priv_size = 0;
 }

 static void
diff --git a/doc/guides/rel_notes/updating_apps.rst 
b/doc/guides/rel_notes/updating_apps.rst
index 4dbf268..f513615 100644
--- a/doc/guides/rel_notes/updating_apps.rst
+++ b/doc/guides/rel_notes/updating_apps.rst
@@ -4,6 +4,18 @@ Updating Applications from Previous Versions
 Although backward compatibility is being maintained across DPDK releases, code 
written for previous versions of the DPDK
 may require some code updates to benefit from performance and user experience 
enhancements provided in later DPDK releases.

+DPDK 2.0 to DPDK 2.1
+
+
+*   The second argument of rte_pktmbuf_pool_init(mempool, opaque) is now a
+pointer to a struct rte_pktmbuf_pool_private instead of a uint16_t
+casted into a pointer. Backward compatibility is preserved when the
+argument was NULL which is the majority of use cases, but not if the
+opaque pointer was not NULL, as it is not technically feasible. In
+this case, the application has to be modified to properly fill a
+rte_pktmbuf_pool_private structure and pass it to
+rte_pktmbuf_pool_init().
+
 DPDK 1.7 to DPDK 1.8
 

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index ad10f82..fc73d1e 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -2844,11 +2844,10 @@ static void
 setup_mempool_tbl(int socket, uint32_t index, char *pool_name,
char *ring_name, uint32_t nb_mbuf)
 {
-   uint16_t roomsize = VIRTIO_DESCRIPTOR_LEN_ZCP + RTE_PKTMBUF_HEADROOM;
vpool_array[index].pool
= rte_mempool_create(pool_name, nb_mbuf, MBUF_SIZE_ZCP,
MBUF_CACHE_SIZE_ZCP, sizeof(struct rte_pktmbuf_pool_private),
-   rte_pktmbuf_pool_init, (void *)(uintptr_t)roomsize,
+   rte_pktmbuf_pool_init, NULL,
rte_pktmbuf_init, NULL, socket, 0);
if (vpool_array[index].pool != NULL) {
vpool_array[index].ring
@@ -2870,7 +2869,7 @@ setup_mempool_tbl(int socket, uint32_t index, char 
*pool_name,
}

/* Need consider head room. */
-   vpool_array[index].buf_size = roomsize - RTE_PKTMBUF_HEADROOM;
+   vpool_array[index].buf_size = VIRTIO_DESCRIPTOR_LEN_ZCP;
} else {
rte_exit(EXIT_FAILURE, "mempool_create(%s) failed", pool_name);
}
diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index 526b18d..231cfb8 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -81,17 +81,30 @@ rte_ctrlmbuf_init(struct rte_mempool *mp,
 void
 rte_pktmbuf_pool_init(struct rte_mempool *mp, void *opaque_arg)
 {
-   struct rte_pktmbuf_pool_private *mbp_priv;
+   struct rte_pktmbuf_pool_private *user_mbp_priv, *mbp_priv;
+   struct rte_pktmbuf_pool_private default_mbp_priv;
uint16_t roomsz;

-   mbp_priv = rte_mempool_get_priv(mp);
-   roomsz = (uint16_t)(uintptr_t)opaque_arg;
+   RTE_MBUF_ASSERT(mp->elt_size >= sizeof(struct rte_mbuf));

-   /* Use default data room size. */
-   if (0 == roomsz)
-   roomsz = 2048 + RTE_PKTMBUF_HEADROOM;
+   /* if no structure is provided, assume no mbuf private area */
+   user_mbp_priv = opaque_arg;
+   if (user_mbp_priv == NULL) {
+   default_mbp_priv.mbuf_priv_size = 0;
+   if (mp->elt_size > sizeof(struct rte_mbuf))
+   roomsz = mp->elt_size - sizeof(struct rte_mbuf);
+   else
+   roomsz = 0;
+   default_mbp_priv.mbuf_data_room_size = roomsz;
+   user_mbp_priv = _mbp_priv;
+   }

-   mbp_priv->mbuf_data_room_size = roomsz;
+   

[dpdk-dev] [PATCH v4 02/12] examples: always initialize mbuf_pool private area

2015-04-20 Thread Olivier Matz
The mbuf pool private area must always be populated in a mbuf pool.
The applications or drivers may expect that for a mbuf pool, the mbuf
pool private area (mbuf_data_room_size and mbuf_priv_size) are
properly filled.

Signed-off-by: Olivier Matz 
---
 examples/ip_fragmentation/main.c | 4 ++--
 examples/ip_pipeline/init.c  | 8 ++--
 examples/ipv4_multicast/main.c   | 6 --
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/examples/ip_fragmentation/main.c b/examples/ip_fragmentation/main.c
index 93ea2a1..cf63718 100644
--- a/examples/ip_fragmentation/main.c
+++ b/examples/ip_fragmentation/main.c
@@ -764,8 +764,8 @@ init_mem(void)

mp = rte_mempool_create(buf, NB_MBUF,
   sizeof(struct 
rte_mbuf), 32,
-  0,
-  NULL, NULL,
+  sizeof(struct 
rte_pktmbuf_pool_private),
+  
rte_pktmbuf_pool_init, NULL,
   rte_pktmbuf_init, 
NULL,
   socket, 0);
if (mp == NULL) {
diff --git a/examples/ip_pipeline/init.c b/examples/ip_pipeline/init.c
index 96aee2b..61d71c3 100644
--- a/examples/ip_pipeline/init.c
+++ b/examples/ip_pipeline/init.c
@@ -363,6 +363,8 @@ app_get_ring_resp(uint32_t core_id)
 static void
 app_init_mbuf_pools(void)
 {
+   struct rte_pktmbuf_pool_private indirect_mbp_priv;
+
/* Init the buffer pool */
RTE_LOG(INFO, USER1, "Creating the mbuf pool ...\n");
app.pool = rte_mempool_create(
@@ -380,13 +382,15 @@ app_init_mbuf_pools(void)

/* Init the indirect buffer pool */
RTE_LOG(INFO, USER1, "Creating the indirect mbuf pool ...\n");
+   indirect_mbp_priv.mbuf_data_room_size = 0;
+   indirect_mbp_priv.mbuf_priv_size = sizeof(struct app_pkt_metadata);
app.indirect_pool = rte_mempool_create(
"indirect mempool",
app.pool_size,
sizeof(struct rte_mbuf) + sizeof(struct app_pkt_metadata),
app.pool_cache_size,
-   0,
-   NULL, NULL,
+   sizeof(struct rte_pktmbuf_pool_private),
+   rte_pktmbuf_pool_init, _mbp_priv,
rte_pktmbuf_init, NULL,
rte_socket_id(),
0);
diff --git a/examples/ipv4_multicast/main.c b/examples/ipv4_multicast/main.c
index eed5611..19832d8 100644
--- a/examples/ipv4_multicast/main.c
+++ b/examples/ipv4_multicast/main.c
@@ -699,14 +699,16 @@ main(int argc, char **argv)
rte_exit(EXIT_FAILURE, "Cannot init packet mbuf pool\n");

header_pool = rte_mempool_create("header_pool", NB_HDR_MBUF,
-   HDR_MBUF_SIZE, 32, 0, NULL, NULL, rte_pktmbuf_init, NULL,
+   HDR_MBUF_SIZE, 32, sizeof(struct rte_pktmbuf_pool_private),
+   rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, NULL,
rte_socket_id(), 0);

if (header_pool == NULL)
rte_exit(EXIT_FAILURE, "Cannot init header mbuf pool\n");

clone_pool = rte_mempool_create("clone_pool", NB_CLONE_MBUF,
-   CLONE_MBUF_SIZE, 32, 0, NULL, NULL, rte_pktmbuf_init, NULL,
+   CLONE_MBUF_SIZE, 32, sizeof(struct rte_pktmbuf_pool_private),
+   rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, NULL,
rte_socket_id(), 0);

if (clone_pool == NULL)
-- 
2.1.4



[dpdk-dev] [PATCH v4 03/12] mbuf: add accessors to get data room size and private size

2015-04-20 Thread Olivier Matz
This code retrieving the pool private area is duplicated in many
places, we can use of function for it.

Signed-off-by: Olivier Matz 
---
 lib/librte_ether/rte_ethdev.c|  4 +--
 lib/librte_mbuf/rte_mbuf.h   | 41 
 lib/librte_pmd_af_packet/rte_eth_af_packet.c |  6 ++--
 lib/librte_pmd_e1000/em_rxtx.c   |  5 ++--
 lib/librte_pmd_e1000/igb_rxtx.c  | 12 +++-
 lib/librte_pmd_fm10k/fm10k_ethdev.c  |  6 ++--
 lib/librte_pmd_i40e/i40e_ethdev_vf.c |  6 ++--
 lib/librte_pmd_i40e/i40e_rxtx.c  | 15 --
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c| 12 +++-
 lib/librte_pmd_pcap/rte_eth_pcap.c   |  5 +---
 lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c|  7 ++---
 11 files changed, 67 insertions(+), 52 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index e20cca5..ff06256 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -1439,7 +1439,6 @@ rte_eth_rx_queue_setup(uint8_t port_id, uint16_t 
rx_queue_id,
int ret;
uint32_t mbp_buf_size;
struct rte_eth_dev *dev;
-   struct rte_pktmbuf_pool_private *mbp_priv;
struct rte_eth_dev_info dev_info;

/* This function is only safe when called from the primary process
@@ -1478,8 +1477,7 @@ rte_eth_rx_queue_setup(uint8_t port_id, uint16_t 
rx_queue_id,
(int) sizeof(struct rte_pktmbuf_pool_private));
return (-ENOSPC);
}
-   mbp_priv = rte_mempool_get_priv(mp);
-   mbp_buf_size = mbp_priv->mbuf_data_room_size;
+   mbp_buf_size = rte_pktmbuf_data_room_size(mp);

if ((mbp_buf_size - RTE_PKTMBUF_HEADROOM) < dev_info.min_rx_bufsize) {
PMD_DEBUG_TRACE("%s mbuf_data_room_size %d < %d "
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 13fd626..a4146fa 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -642,6 +642,47 @@ void rte_pktmbuf_init(struct rte_mempool *mp, void 
*opaque_arg,
 void rte_pktmbuf_pool_init(struct rte_mempool *mp, void *opaque_arg);

 /**
+ * Get the data room size of mbufs stored in a pktmbuf_pool
+ *
+ * The data room size is the amount of data that can be stored in a
+ * mbuf including the headroom (RTE_PKTMBUF_HEADROOM).
+ *
+ * @param mp
+ *   The packet mbuf pool.
+ * @return
+ *   The data room size of mbufs stored in this mempool.
+ */
+static inline uint16_t
+rte_pktmbuf_data_room_size(struct rte_mempool *mp)
+{
+   struct rte_pktmbuf_pool_private *mbp_priv;
+
+   mbp_priv = (struct rte_pktmbuf_pool_private *)rte_mempool_get_priv(mp);
+   return mbp_priv->mbuf_data_room_size;
+}
+
+/**
+ * Get the application private size of mbufs stored in a pktmbuf_pool
+ *
+ * The private size of mbuf is a zone located between the rte_mbuf
+ * structure and the data buffer where an application can store data
+ * associated to a packet.
+ *
+ * @param mp
+ *   The packet mbuf pool.
+ * @return
+ *   The private size of mbufs stored in this mempool.
+ */
+static inline uint16_t
+rte_pktmbuf_priv_size(struct rte_mempool *mp)
+{
+   struct rte_pktmbuf_pool_private *mbp_priv;
+
+   mbp_priv = (struct rte_pktmbuf_pool_private *)rte_mempool_get_priv(mp);
+   return mbp_priv->mbuf_priv_size;
+}
+
+/**
  * Reset the fields of a packet mbuf to their default values.
  *
  * The given mbuf must have only one segment.
diff --git a/lib/librte_pmd_af_packet/rte_eth_af_packet.c 
b/lib/librte_pmd_af_packet/rte_eth_af_packet.c
index f7e9ec9..bdd9628 100644
--- a/lib/librte_pmd_af_packet/rte_eth_af_packet.c
+++ b/lib/librte_pmd_af_packet/rte_eth_af_packet.c
@@ -348,15 +348,13 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 {
struct pmd_internals *internals = dev->data->dev_private;
struct pkt_rx_queue *pkt_q = >rx_queue[rx_queue_id];
-   struct rte_pktmbuf_pool_private *mbp_priv;
uint16_t buf_size;

pkt_q->mb_pool = mb_pool;

/* Now get the space available for data in the mbuf */
-   mbp_priv = rte_mempool_get_priv(pkt_q->mb_pool);
-   buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
-  RTE_PKTMBUF_HEADROOM);
+   buf_size = (uint16_t)(rte_pktmbuf_data_room_size(pkt_q->mb_pool) -
+   RTE_PKTMBUF_HEADROOM);

if (ETH_FRAME_LEN > buf_size) {
RTE_LOG(ERR, PMD,
diff --git a/lib/librte_pmd_e1000/em_rxtx.c b/lib/librte_pmd_e1000/em_rxtx.c
index 8e20920..64d067c 100644
--- a/lib/librte_pmd_e1000/em_rxtx.c
+++ b/lib/librte_pmd_e1000/em_rxtx.c
@@ -1668,12 +1668,11 @@ eth_em_rx_init(struct rte_eth_dev *dev)
/* Determine RX bufsize. */
rctl_bsize = EM_MAX_BUF_SIZE;
for (i = 0; i < dev->data->nb_rx_queues; i++) {
-   struct rte_pktmbuf_pool_private *mbp_priv;
uint32_t buf_size;

rxq = dev->data->rx_queues[i];
- 

[dpdk-dev] [PATCH v4 04/12] mbuf: fix rte_pktmbuf_init when mbuf private size is not zero

2015-04-20 Thread Olivier Matz
Allow the user to use the default rte_pktmbuf_init() function even
if the mbuf private size is not 0.

Signed-off-by: Olivier Matz 
---
 lib/librte_mbuf/rte_mbuf.c | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index 231cfb8..d7f0380 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -119,16 +119,19 @@ rte_pktmbuf_init(struct rte_mempool *mp,
 __attribute__((unused)) unsigned i)
 {
struct rte_mbuf *m = _m;
-   uint32_t buf_len = mp->elt_size - sizeof(struct rte_mbuf);
+   uint32_t mbuf_size, buf_len;

-   RTE_MBUF_ASSERT(mp->elt_size >= sizeof(struct rte_mbuf));
+   mbuf_size = sizeof(struct rte_mbuf) + rte_pktmbuf_priv_size(mp);
+   buf_len = rte_pktmbuf_data_room_size(mp);
+
+   RTE_MBUF_ASSERT(mp->elt_size >= mbuf_size);
+   RTE_MBUF_ASSERT(buf_len <= 0x);

memset(m, 0, mp->elt_size);

/* start of buffer is just after mbuf structure */
-   m->buf_addr = (char *)m + sizeof(struct rte_mbuf);
-   m->buf_physaddr = rte_mempool_virt2phy(mp, m) +
-   sizeof(struct rte_mbuf);
+   m->buf_addr = (char *)m + mbuf_size;
+   m->buf_physaddr = rte_mempool_virt2phy(mp, m) + mbuf_size;
m->buf_len = (uint16_t)buf_len;

/* keep some headroom between start of buffer and data */
-- 
2.1.4



[dpdk-dev] [PATCH v4 05/12] testpmd: use standard functions to initialize mbufs and mbuf pool

2015-04-20 Thread Olivier Matz
The rte_pktmbuf_pool_init() and rte_pktmbuf_init() functions now
support to have a non-hardcoded buffer length. We can remove the
specific functions used in testpmd and replace them by the standard
ones.

Signed-off-by: Olivier Matz 
---
 app/test-pmd/testpmd.c | 74 +-
 1 file changed, 7 insertions(+), 67 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 10e4347..1f2445e 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -393,83 +393,23 @@ set_def_fwd_config(void)
 /*
  * Configuration initialisation done once at init time.
  */
-struct mbuf_ctor_arg {
-   uint16_t seg_buf_offset; /**< offset of data in data segment of mbuf. */
-   uint16_t seg_buf_size;   /**< size of data segment in mbuf. */
-};
-
-struct mbuf_pool_ctor_arg {
-   uint16_t seg_buf_size; /**< size of data segment in mbuf. */
-};
-
-static void
-testpmd_mbuf_ctor(struct rte_mempool *mp,
- void *opaque_arg,
- void *raw_mbuf,
- __attribute__((unused)) unsigned i)
-{
-   struct mbuf_ctor_arg *mb_ctor_arg;
-   struct rte_mbuf*mb;
-
-   mb_ctor_arg = (struct mbuf_ctor_arg *) opaque_arg;
-   mb = (struct rte_mbuf *) raw_mbuf;
-
-   mb->pool = mp;
-   mb->buf_addr = (void *) ((char *)mb + mb_ctor_arg->seg_buf_offset);
-   mb->buf_physaddr = (uint64_t) (rte_mempool_virt2phy(mp, mb) +
-   mb_ctor_arg->seg_buf_offset);
-   mb->buf_len  = mb_ctor_arg->seg_buf_size;
-   mb->ol_flags = 0;
-   mb->data_off = RTE_PKTMBUF_HEADROOM;
-   mb->nb_segs  = 1;
-   mb->tx_offload   = 0;
-   mb->vlan_tci = 0;
-   mb->hash.rss = 0;
-}
-
-static void
-testpmd_mbuf_pool_ctor(struct rte_mempool *mp,
-  void *opaque_arg)
-{
-   struct mbuf_pool_ctor_arg  *mbp_ctor_arg;
-   struct rte_pktmbuf_pool_private *mbp_priv;
-
-   if (mp->private_data_size < sizeof(struct rte_pktmbuf_pool_private)) {
-   printf("%s(%s) private_data_size %d < %d\n",
-  __func__, mp->name, (int) mp->private_data_size,
-  (int) sizeof(struct rte_pktmbuf_pool_private));
-   return;
-   }
-   mbp_ctor_arg = (struct mbuf_pool_ctor_arg *) opaque_arg;
-   mbp_priv = rte_mempool_get_priv(mp);
-   mbp_priv->mbuf_data_room_size = mbp_ctor_arg->seg_buf_size;
-   mbp_priv->mbuf_priv_size = 0;
-}
-
 static void
 mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
 unsigned int socket_id)
 {
char pool_name[RTE_MEMPOOL_NAMESIZE];
struct rte_mempool *rte_mp;
-   struct mbuf_pool_ctor_arg mbp_ctor_arg;
-   struct mbuf_ctor_arg mb_ctor_arg;
uint32_t mb_size;

-   mbp_ctor_arg.seg_buf_size = (uint16_t) (RTE_PKTMBUF_HEADROOM +
-   mbuf_seg_size);
-   mb_ctor_arg.seg_buf_offset =
-   (uint16_t) RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_mbuf));
-   mb_ctor_arg.seg_buf_size = mbp_ctor_arg.seg_buf_size;
-   mb_size = mb_ctor_arg.seg_buf_offset + mb_ctor_arg.seg_buf_size;
+   mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name));

 #ifdef RTE_LIBRTE_PMD_XENVIRT
rte_mp = rte_mempool_gntalloc_create(pool_name, nb_mbuf, mb_size,
(unsigned) mb_mempool_cache,
sizeof(struct rte_pktmbuf_pool_private),
-   testpmd_mbuf_pool_ctor, _ctor_arg,
-   testpmd_mbuf_ctor, _ctor_arg,
+   rte_pktmbuf_pool_init, NULL,
+   rte_pktmbuf_init, NULL,
socket_id, 0);


@@ -479,15 +419,15 @@ mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
rte_mp = mempool_anon_create(pool_name, nb_mbuf, mb_size,
(unsigned) mb_mempool_cache,
sizeof(struct rte_pktmbuf_pool_private),
-   testpmd_mbuf_pool_ctor, _ctor_arg,
-   testpmd_mbuf_ctor, _ctor_arg,
+   rte_pktmbuf_pool_init, NULL,
+   rte_pktmbuf_init, NULL,
socket_id, 0);
else
rte_mp = rte_mempool_create(pool_name, nb_mbuf, mb_size,
(unsigned) mb_mempool_cache,
sizeof(struct rte_pktmbuf_pool_private),
-   testpmd_mbuf_pool_ctor, _ctor_arg,
-   testpmd_mbuf_ctor, _ctor_arg,
+   rte_pktmbuf_pool_init, NULL,
+   

[dpdk-dev] [PATCH v4 06/12] mbuf: introduce a new helper to create a mbuf pool

2015-04-20 Thread Olivier Matz
Add a new wrapper to rte_mempool_create() to simplify the creation
of a packet mbuf pool.

This wrapper can be used if there is no specific mempool flags, and
no specific mbuf or pool constructor function, which is most of the
use cases.

Signed-off-by: Olivier Matz 
---
 doc/guides/rel_notes/updating_apps.rst |  4 
 lib/librte_mbuf/rte_mbuf.c | 21 ++
 lib/librte_mbuf/rte_mbuf.h | 40 ++
 3 files changed, 65 insertions(+)

diff --git a/doc/guides/rel_notes/updating_apps.rst 
b/doc/guides/rel_notes/updating_apps.rst
index f513615..f4dd196 100644
--- a/doc/guides/rel_notes/updating_apps.rst
+++ b/doc/guides/rel_notes/updating_apps.rst
@@ -16,6 +16,10 @@ DPDK 2.0 to DPDK 2.1
 rte_pktmbuf_pool_private structure and pass it to
 rte_pktmbuf_pool_init().

+*   A simpler helper rte_pktmbuf_pool_create() can be used to create a
+packet mbuf pool. The old way using rte_mempool_create() is still
+supported though and is still used for more specific cases.
+
 DPDK 1.7 to DPDK 1.8
 

diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index d7f0380..b013607 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -143,6 +143,27 @@ rte_pktmbuf_init(struct rte_mempool *mp,
m->port = 0xff;
 }

+/* helper to create a mbuf pool */
+struct rte_mempool *
+rte_pktmbuf_pool_create(const char *name, unsigned n,
+   unsigned cache_size, uint16_t priv_size, uint16_t data_room_size,
+   int socket_id)
+{
+   struct rte_pktmbuf_pool_private mbp_priv;
+   unsigned elt_size;
+
+
+   elt_size = sizeof(struct rte_mbuf) + (unsigned)priv_size +
+   (unsigned)data_room_size;
+   mbp_priv.mbuf_data_room_size = data_room_size;
+   mbp_priv.mbuf_priv_size = priv_size;
+
+   return rte_mempool_create(name, n, elt_size,
+   cache_size, sizeof(struct rte_pktmbuf_pool_private),
+   rte_pktmbuf_pool_init, _priv, rte_pktmbuf_init, NULL,
+   socket_id, 0);
+}
+
 /* do some sanity checks on a mbuf: panic if it fails */
 void
 rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header)
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index a4146fa..42db8e3 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -642,6 +642,46 @@ void rte_pktmbuf_init(struct rte_mempool *mp, void 
*opaque_arg,
 void rte_pktmbuf_pool_init(struct rte_mempool *mp, void *opaque_arg);

 /**
+ * Create a mbuf pool.
+ *
+ * This function creates and initializes a packet mbuf pool. It is
+ * a wrapper to rte_mempool_create() with the proper packet constructor
+ * and mempool constructor.
+ *
+ * @param name
+ *   The name of the mbuf pool.
+ * @param n
+ *   The number of elements in the mbuf pool. The optimum size (in terms
+ *   of memory usage) for a mempool is when n is a power of two minus one:
+ *   n = (2^q - 1).
+ * @param cache_size
+ *   Size of the per-core object cache. See rte_mempool_create() for
+ *   details.
+ * @param priv_size
+ *   Size of application private are between the rte_mbuf structure
+ *   and the data buffer.
+ * @param data_room_size
+ *   Size of data buffer in each mbuf, including RTE_PKTMBUF_HEADROOM.
+ * @param socket_id
+ *   The socket identifier where the memory should be allocated. The
+ *   value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the
+ *   reserved zone.
+ * @return
+ *   The pointer to the new allocated mempool, on success. NULL on error
+ *   with rte_errno set appropriately. Possible rte_errno values include:
+ *- E_RTE_NO_CONFIG - function could not get pointer to rte_config 
structure
+ *- E_RTE_SECONDARY - function was called from a secondary process instance
+ *- EINVAL - cache size provided is too large
+ *- ENOSPC - the maximum number of memzones has already been allocated
+ *- EEXIST - a memzone with the same name already exists
+ *- ENOMEM - no appropriate memory area found in which to create memzone
+ */
+struct rte_mempool *
+rte_pktmbuf_pool_create(const char *name, unsigned n,
+   unsigned cache_size, uint16_t priv_size, uint16_t data_room_size,
+   int socket_id);
+
+/**
  * Get the data room size of mbufs stored in a pktmbuf_pool
  *
  * The data room size is the amount of data that can be stored in a
-- 
2.1.4



[dpdk-dev] [PATCH v4 07/12] apps: use rte_pktmbuf_pool_create to create mbuf pools

2015-04-20 Thread Olivier Matz
When it's possible, use the new helper to create the mbuf pools.
Most of the patch is trivial, except for the following files that
have some specifics (indirect mbufs):
- ip_fragmentation
- ip_pipeline
- ipv4_multicast
- vhost

Signed-off-by: Olivier Matz 
---
 app/test-pipeline/init.c   | 15 ++
 app/test-pmd/testpmd.c |  9 ++
 app/test/test_distributor.c| 10 ++-
 app/test/test_distributor_perf.c   | 10 ++-
 app/test/test_kni.c| 16 +++
 app/test/test_link_bonding.c   | 10 +++
 app/test/test_link_bonding_mode4.c | 12 +++-
 app/test/test_mbuf.c   | 22 +--
 app/test/test_pmd_perf.c   | 11 +++-
 app/test/test_pmd_ring.c   | 10 ++-
 app/test/test_reorder.c| 10 ++-
 app/test/test_sched.c  | 16 ++-
 app/test/test_table.c  |  9 ++
 app/test/test_table.h  |  3 +-
 examples/bond/main.c   | 10 ++-
 examples/distributor/main.c| 11 +++-
 examples/dpdk_qat/main.c   | 10 ++-
 examples/exception_path/main.c | 14 --
 examples/ip_fragmentation/main.c   | 18 
 examples/ip_pipeline/init.c| 32 --
 examples/ipv4_multicast/main.c | 23 ++--
 examples/kni/main.c| 12 +++-
 examples/l2fwd-ivshmem/host/host.c | 10 ++-
 examples/l2fwd-jobstats/main.c | 10 ++-
 examples/l2fwd/main.c  | 11 ++--
 examples/l3fwd-acl/main.c  | 11 +++-
 examples/l3fwd-power/main.c| 11 +++-
 examples/l3fwd-vf/main.c   | 12 +++-
 examples/l3fwd/main.c  | 10 +++
 examples/link_status_interrupt/main.c  | 10 ++-
 examples/load_balancer/init.c  | 12 ++--
 examples/load_balancer/main.h  |  4 +--
 .../client_server_mp/mp_server/init.c  | 10 ++-
 examples/multi_process/symmetric_mp/main.c | 10 +++
 examples/netmap_compat/bridge/bridge.c | 12 +++-
 examples/packet_ordering/main.c| 11 +++-
 examples/qos_meter/main.c  |  7 ++---
 examples/qos_sched/init.c  | 10 ++-
 examples/qos_sched/main.h  |  2 +-
 examples/quota_watermark/include/conf.h|  2 +-
 examples/quota_watermark/qw/main.c |  7 ++---
 examples/rxtx_callbacks/main.c | 11 +++-
 examples/skeleton/basicfwd.c   | 13 ++---
 examples/vhost/main.c  | 24 +---
 examples/vhost_xen/main.c  | 11 +++-
 examples/vmdq/main.c   | 11 +++-
 examples/vmdq_dcb/main.c   | 10 ++-
 lib/librte_pmd_bond/rte_eth_bond_alb.c | 16 +--
 48 files changed, 175 insertions(+), 386 deletions(-)

diff --git a/app/test-pipeline/init.c b/app/test-pipeline/init.c
index 05f4503..db2196b 100644
--- a/app/test-pipeline/init.c
+++ b/app/test-pipeline/init.c
@@ -85,8 +85,7 @@ struct app_params app = {
.ring_tx_size = 128,

/* Buffer pool */
-   .pool_buffer_size = 2048 + sizeof(struct rte_mbuf) +
-   RTE_PKTMBUF_HEADROOM,
+   .pool_buffer_size = 2048 + RTE_PKTMBUF_HEADROOM,
.pool_size = 32 * 1024,
.pool_cache_size = 256,

@@ -144,16 +143,8 @@ app_init_mbuf_pools(void)
 {
/* Init the buffer pool */
RTE_LOG(INFO, USER1, "Creating the mbuf pool ...\n");
-   app.pool = rte_mempool_create(
-   "mempool",
-   app.pool_size,
-   app.pool_buffer_size,
-   app.pool_cache_size,
-   sizeof(struct rte_pktmbuf_pool_private),
-   rte_pktmbuf_pool_init, NULL,
-   rte_pktmbuf_init, NULL,
-   rte_socket_id(),
-   0);
+   app.pool = rte_pktmbuf_pool_create("mempool", app.pool_size,
+   app.pool_cache_size, 0, app.pool_buffer_size, rte_socket_id());
if (app.pool == NULL)
rte_panic("Cannot create mbuf pool\n");
 }
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 1f2445e..8418db3 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -423,12 +423,9 @@ mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,

[dpdk-dev] [PATCH v4 08/12] mbuf: fix clone support when application uses private mbuf data

2015-04-20 Thread Olivier Matz
Add a new private_size field in mbuf structure that should
be initialized at mbuf pool creation. This field contains the
size of the application private data in mbufs.

Introduce new static inline functions rte_mbuf_from_indirect()
and rte_mbuf_to_baddr() to replace the existing macros, which
take the private size in account when attaching and detaching
mbufs.

Signed-off-by: Olivier Matz 
Reviewed-by: Zoltan Kiss 
---
 examples/vhost/main.c  |  4 ++--
 lib/librte_mbuf/rte_mbuf.c |  2 +-
 lib/librte_mbuf/rte_mbuf.h | 59 +++---
 3 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 22d6a4b..195d82f 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -138,7 +138,7 @@
 /* Number of descriptors per cacheline. */
 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))

-#define MBUF_EXT_MEM(mb)   (RTE_MBUF_FROM_BADDR((mb)->buf_addr) != (mb))
+#define MBUF_EXT_MEM(mb)   (rte_mbuf_from_indirect(mb) != (mb))

 /* mask of enabled ports */
 static uint32_t enabled_port_mask = 0;
@@ -1549,7 +1549,7 @@ attach_rxmbuf_zcp(struct virtio_net *dev)
 static inline void pktmbuf_detach_zcp(struct rte_mbuf *m)
 {
const struct rte_mempool *mp = m->pool;
-   void *buf = RTE_MBUF_TO_BADDR(m);
+   void *buf = rte_mbuf_to_baddr(m);
uint32_t buf_ofs;
uint32_t buf_len = mp->elt_size - sizeof(*m);
m->buf_physaddr = rte_mempool_virt2phy(mp, m) + sizeof(*m);
diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index b013607..784ae8b 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -129,7 +129,7 @@ rte_pktmbuf_init(struct rte_mempool *mp,

memset(m, 0, mp->elt_size);

-   /* start of buffer is just after mbuf structure */
+   /* start of buffer is after mbuf structure and priv data */
m->buf_addr = (char *)m + mbuf_size;
m->buf_physaddr = rte_mempool_virt2phy(mp, m) + mbuf_size;
m->buf_len = (uint16_t)buf_len;
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 42db8e3..5c01c5b 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -320,16 +320,40 @@ struct rte_mbuf {
};
 } __rte_cache_aligned;

+static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp);
+
 /**
- * Given the buf_addr returns the pointer to corresponding mbuf.
+ * Return the mbuf owning the data buffer address of an indirect mbuf.
+ *
+ * @param mi
+ *   The pointer to the indirect mbuf.
+ * @return
+ *   The address of the direct mbuf corresponding to buffer_addr.
  */
-#define RTE_MBUF_FROM_BADDR(ba) (((struct rte_mbuf *)(ba)) - 1)
+static inline struct rte_mbuf *
+rte_mbuf_from_indirect(struct rte_mbuf *mi)
+{
+   struct rte_mbuf *md;
+   md = (struct rte_mbuf *)((char *)mi->buf_addr -
+  sizeof(*mi) - rte_pktmbuf_priv_size(mi->pool));
+   return md;
+}

 /**
- * Given the pointer to mbuf returns an address where it's  buf_addr
- * should point to.
+ * Return the buffer address embedded in the given mbuf.
+ *
+ * @param md
+ *   The pointer to the mbuf.
+ * @return
+ *   The address of the data buffer owned by the mbuf.
  */
-#define RTE_MBUF_TO_BADDR(mb)   (((struct rte_mbuf *)(mb)) + 1)
+static inline char *
+rte_mbuf_to_baddr(struct rte_mbuf *md)
+{
+   char *buffer_addr;
+   buffer_addr = (char *)md + sizeof(*md) + 
rte_pktmbuf_priv_size(md->pool);
+   return buffer_addr;
+}

 /**
  * Returns TRUE if given mbuf is indirect, or FALSE otherwise.
@@ -771,6 +795,7 @@ static inline struct rte_mbuf *rte_pktmbuf_alloc(struct 
rte_mempool *mp)

 /**
  * Attach packet mbuf to another packet mbuf.
+ *
  * After attachment we refer the mbuf we attached as 'indirect',
  * while mbuf we attached to as 'direct'.
  * Right now, not supported:
@@ -784,7 +809,6 @@ static inline struct rte_mbuf *rte_pktmbuf_alloc(struct 
rte_mempool *mp)
  * @param md
  *   The direct packet mbuf.
  */
-
 static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *md)
 {
RTE_MBUF_ASSERT(RTE_MBUF_DIRECT(md) &&
@@ -815,7 +839,8 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, 
struct rte_mbuf *md)
 }

 /**
- * Detach an indirect packet mbuf -
+ * Detach an indirect packet mbuf.
+ *
  *  - restore original mbuf address and length values.
  *  - reset pktmbuf data and data_len to their default values.
  *  All other fields of the given packet mbuf will be left intact.
@@ -823,22 +848,18 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf 
*mi, struct rte_mbuf *md)
  * @param m
  *   The indirect attached packet mbuf.
  */
-
 static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
 {
-   const struct rte_mempool *mp = m->pool;
-   void *buf = RTE_MBUF_TO_BADDR(m);
-   uint32_t buf_len = mp->elt_size - sizeof(*m);
-   m->buf_physaddr = rte_mempool_virt2phy(mp, m) + sizeof (*m);
+ 

[dpdk-dev] [PATCH v4 09/12] mbuf: allow to clone an indirect mbuf

2015-04-20 Thread Olivier Matz
Remove one limitation of rte_pktmbuf_attach(): "mbuf we're attaching to
must be direct".

Now, when we attach to an indirect mbuf:
- copy the all relevant fields (addr, len, offload, ...) as before
- get the pointer to the mbuf that embeds the data buffer (direct mbuf),
  and increase the reference counter of this one.

When detaching the mbuf, we can retrieve this direct mbuf as the pointer
is determined from the buffer address.

Signed-off-by: Olivier Matz 
---
 lib/librte_mbuf/rte_mbuf.h | 46 ++
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 5c01c5b..df54a46 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -799,43 +799,49 @@ static inline struct rte_mbuf *rte_pktmbuf_alloc(struct 
rte_mempool *mp)
  * After attachment we refer the mbuf we attached as 'indirect',
  * while mbuf we attached to as 'direct'.
  * Right now, not supported:
- *  - attachment to indirect mbuf (e.g. - md  has to be direct).
  *  - attachment for already indirect mbuf (e.g. - mi has to be direct).
  *  - mbuf we trying to attach (mi) is used by someone else
  *e.g. it's reference counter is greater then 1.
  *
  * @param mi
  *   The indirect packet mbuf.
- * @param md
- *   The direct packet mbuf.
+ * @param m
+ *   The packet mbuf we're attaching to.
  */
-static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *md)
+static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
 {
-   RTE_MBUF_ASSERT(RTE_MBUF_DIRECT(md) &&
-   RTE_MBUF_DIRECT(mi) &&
+   struct rte_mbuf *md;
+
+   RTE_MBUF_ASSERT(RTE_MBUF_DIRECT(mi) &&
rte_mbuf_refcnt_read(mi) == 1);

+   /* if m is not direct, get the mbuf that embeds the data */
+   if (RTE_MBUF_DIRECT(m))
+   md = m;
+   else
+   md = rte_mbuf_from_indirect(m);
+
rte_mbuf_refcnt_update(md, 1);
-   mi->buf_physaddr = md->buf_physaddr;
-   mi->buf_addr = md->buf_addr;
-   mi->buf_len = md->buf_len;
-
-   mi->next = md->next;
-   mi->data_off = md->data_off;
-   mi->data_len = md->data_len;
-   mi->port = md->port;
-   mi->vlan_tci = md->vlan_tci;
-   mi->tx_offload = md->tx_offload;
-   mi->hash = md->hash;
+   mi->buf_physaddr = m->buf_physaddr;
+   mi->buf_addr = m->buf_addr;
+   mi->buf_len = m->buf_len;
+
+   mi->next = m->next;
+   mi->data_off = m->data_off;
+   mi->data_len = m->data_len;
+   mi->port = m->port;
+   mi->vlan_tci = m->vlan_tci;
+   mi->tx_offload = m->tx_offload;
+   mi->hash = m->hash;

mi->next = NULL;
mi->pkt_len = mi->data_len;
mi->nb_segs = 1;
-   mi->ol_flags = md->ol_flags | IND_ATTACHED_MBUF;
-   mi->packet_type = md->packet_type;
+   mi->ol_flags = m->ol_flags | IND_ATTACHED_MBUF;
+   mi->packet_type = m->packet_type;

__rte_mbuf_sanity_check(mi, 1);
-   __rte_mbuf_sanity_check(md, 0);
+   __rte_mbuf_sanity_check(m, 0);
 }

 /**
-- 
2.1.4



[dpdk-dev] [PATCH v4 10/12] test/mbuf: rename mc variable in m

2015-04-20 Thread Olivier Matz
It's better to name the mbuf 'm' instead of 'mc' as it's not a clone.

Signed-off-by: Olivier Matz 
---
 app/test/test_mbuf.c | 25 -
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
index 4774263..2614598 100644
--- a/app/test/test_mbuf.c
+++ b/app/test/test_mbuf.c
@@ -320,43 +320,42 @@ fail:
 static int
 testclone_testupdate_testdetach(void)
 {
-   struct rte_mbuf *mc = NULL;
+   struct rte_mbuf *m = NULL;
struct rte_mbuf *clone = NULL;

/* alloc a mbuf */
-
-   mc = rte_pktmbuf_alloc(pktmbuf_pool);
-   if (mc == NULL)
+   m = rte_pktmbuf_alloc(pktmbuf_pool);
+   if (m == NULL)
GOTO_FAIL("ooops not allocating mbuf");

-   if (rte_pktmbuf_pkt_len(mc) != 0)
+   if (rte_pktmbuf_pkt_len(m) != 0)
GOTO_FAIL("Bad length");


/* clone the allocated mbuf */
-   clone = rte_pktmbuf_clone(mc, pktmbuf_pool);
+   clone = rte_pktmbuf_clone(m, pktmbuf_pool);
if (clone == NULL)
GOTO_FAIL("cannot clone data\n");
rte_pktmbuf_free(clone);

-   mc->next = rte_pktmbuf_alloc(pktmbuf_pool);
-   if(mc->next == NULL)
+   m->next = rte_pktmbuf_alloc(pktmbuf_pool);
+   if (m->next == NULL)
GOTO_FAIL("Next Pkt Null\n");

-   clone = rte_pktmbuf_clone(mc, pktmbuf_pool);
+   clone = rte_pktmbuf_clone(m, pktmbuf_pool);
if (clone == NULL)
GOTO_FAIL("cannot clone data\n");

/* free mbuf */
-   rte_pktmbuf_free(mc);
+   rte_pktmbuf_free(m);
rte_pktmbuf_free(clone);
-   mc = NULL;
+   m = NULL;
clone = NULL;
return 0;

 fail:
-   if (mc)
-   rte_pktmbuf_free(mc);
+   if (m)
+   rte_pktmbuf_free(m);
return -1;
 }
 #undef GOTO_FAIL
-- 
2.1.4



[dpdk-dev] [PATCH v4 11/12] test/mbuf: enhance mbuf refcnt test

2015-04-20 Thread Olivier Matz
Check that the data in the cloned mbuf is the same than in the
reference mbuf.
Check that the reference counter is incremented for each segment.

Signed-off-by: Olivier Matz 
---
 app/test/test_mbuf.c | 37 +
 1 file changed, 37 insertions(+)

diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
index 2614598..01838c6 100644
--- a/app/test/test_mbuf.c
+++ b/app/test/test_mbuf.c
@@ -75,6 +75,8 @@
 #define REFCNT_MBUF_NUM 64
 #define REFCNT_RING_SIZE(REFCNT_MBUF_NUM * REFCNT_MAX_REF)

+#define MAGIC_DATA  0x42424242
+
 #define MAKE_STRING(x)  # x

 static struct rte_mempool *pktmbuf_pool = NULL;
@@ -322,6 +324,7 @@ testclone_testupdate_testdetach(void)
 {
struct rte_mbuf *m = NULL;
struct rte_mbuf *clone = NULL;
+   uint32_t *data;

/* alloc a mbuf */
m = rte_pktmbuf_alloc(pktmbuf_pool);
@@ -331,21 +334,53 @@ testclone_testupdate_testdetach(void)
if (rte_pktmbuf_pkt_len(m) != 0)
GOTO_FAIL("Bad length");

+   rte_pktmbuf_append(m, sizeof(uint32_t));
+   data = rte_pktmbuf_mtod(m, uint32_t *);
+   *data = MAGIC_DATA;

/* clone the allocated mbuf */
clone = rte_pktmbuf_clone(m, pktmbuf_pool);
if (clone == NULL)
GOTO_FAIL("cannot clone data\n");
+
+   data = rte_pktmbuf_mtod(clone, uint32_t *);
+   if (*data != MAGIC_DATA)
+   GOTO_FAIL("invalid data in clone\n");
+
+   if (rte_mbuf_refcnt_read(m) != 2)
+   GOTO_FAIL("invalid refcnt in m\n");
+
+   /* free the clone */
rte_pktmbuf_free(clone);
+   clone = NULL;

+   /* same test with a chained mbuf */
m->next = rte_pktmbuf_alloc(pktmbuf_pool);
if (m->next == NULL)
GOTO_FAIL("Next Pkt Null\n");

+   rte_pktmbuf_append(m->next, sizeof(uint32_t));
+   data = rte_pktmbuf_mtod(m->next, uint32_t *);
+   *data = MAGIC_DATA;
+
clone = rte_pktmbuf_clone(m, pktmbuf_pool);
if (clone == NULL)
GOTO_FAIL("cannot clone data\n");

+   data = rte_pktmbuf_mtod(clone, uint32_t *);
+   if (*data != MAGIC_DATA)
+   GOTO_FAIL("invalid data in clone\n");
+
+   data = rte_pktmbuf_mtod(clone->next, uint32_t *);
+   if (*data != MAGIC_DATA)
+   GOTO_FAIL("invalid data in clone->next\n");
+
+   if (rte_mbuf_refcnt_read(m) != 2)
+   GOTO_FAIL("invalid refcnt in m\n");
+
+   if (rte_mbuf_refcnt_read(m->next) != 2)
+   GOTO_FAIL("invalid refcnt in m->next\n");
+
/* free mbuf */
rte_pktmbuf_free(m);
rte_pktmbuf_free(clone);
@@ -356,6 +391,8 @@ testclone_testupdate_testdetach(void)
 fail:
if (m)
rte_pktmbuf_free(m);
+   if (clone)
+   rte_pktmbuf_free(clone);
return -1;
 }
 #undef GOTO_FAIL
-- 
2.1.4



[dpdk-dev] [PATCH v4 12/12] test/mbuf: verify that cloning a clone works properly

2015-04-20 Thread Olivier Matz
Signed-off-by: Olivier Matz 
---
 app/test/test_mbuf.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c
index 01838c6..b5ae5b7 100644
--- a/app/test/test_mbuf.c
+++ b/app/test/test_mbuf.c
@@ -324,6 +324,7 @@ testclone_testupdate_testdetach(void)
 {
struct rte_mbuf *m = NULL;
struct rte_mbuf *clone = NULL;
+   struct rte_mbuf *clone2 = NULL;
uint32_t *data;

/* alloc a mbuf */
@@ -381,11 +382,34 @@ testclone_testupdate_testdetach(void)
if (rte_mbuf_refcnt_read(m->next) != 2)
GOTO_FAIL("invalid refcnt in m->next\n");

+   /* try to clone the clone */
+
+   clone2 = rte_pktmbuf_clone(clone, pktmbuf_pool);
+   if (clone2 == NULL)
+   GOTO_FAIL("cannot clone the clone\n");
+
+   data = rte_pktmbuf_mtod(clone2, uint32_t *);
+   if (*data != MAGIC_DATA)
+   GOTO_FAIL("invalid data in clone2\n");
+
+   data = rte_pktmbuf_mtod(clone2->next, uint32_t *);
+   if (*data != MAGIC_DATA)
+   GOTO_FAIL("invalid data in clone2->next\n");
+
+   if (rte_mbuf_refcnt_read(m) != 3)
+   GOTO_FAIL("invalid refcnt in m\n");
+
+   if (rte_mbuf_refcnt_read(m->next) != 3)
+   GOTO_FAIL("invalid refcnt in m->next\n");
+
/* free mbuf */
rte_pktmbuf_free(m);
rte_pktmbuf_free(clone);
+   rte_pktmbuf_free(clone2);
+
m = NULL;
clone = NULL;
+   clone2 = NULL;
return 0;

 fail:
@@ -393,6 +417,8 @@ fail:
rte_pktmbuf_free(m);
if (clone)
rte_pktmbuf_free(clone);
+   if (clone2)
+   rte_pktmbuf_free(clone2);
return -1;
 }
 #undef GOTO_FAIL
-- 
2.1.4



[dpdk-dev] Saving packet timestamp when reading from pcap

2015-04-20 Thread Dor Green
To test my program and for some other uses I sometimes use vdev
(libpcap pmd) to read data from a pcap file.

Those tests would be a lot easier if the packet timestamp (which is in
the cap) was supplied by DPDK, but alas it is not.

So I could access it, I placed it in mbuf's userdata for the timebeing.
In rte_eth_pcap.c I added this after line 171:
 mbuf->userdata = (void*) header.ts.tv_sec;
Obviously this isn't the prettiest and it lacks the microseconds the
struct supplies.

Does anyone have any better ideas on how to do it, or is this a
feature planned for any future version (seems easy enough to add some
more fields to the mbuf struct to accommodate this, and maybe in the
future hardware timestamps, etc.?)

Thanks, Dor.


[dpdk-dev] cost of reading tsc register

2015-04-20 Thread Matthew Hall
On Mon, Apr 20, 2015 at 02:37:53PM +, Ravi Kumar Iyer wrote:
> We were doing some code optimizations , running DPDK based applications, and 
> chanced upon the rte_rdtsc function [ to read tsc timestamp register value ] 
> consuming cpu cycles of the order of 100clock cycles with a delta of upto 
> 40cycles at times [ 60-140 cycles]
> 
> We are actually building up a cpu intensive application which is also very 
> clock cycle sensitive and this is impacting our implementation.
> 
> To validate the same using a small/vanilla application we wrote a small code 
> and tested on a single core.
> Has anyone else faced a similar issue or are we doing something really 
> atrocious here.

What happened when you tried rte_rdtsc_precise ?

Matthew.


[dpdk-dev] [RFC PATCH] ethdev: remove old flow director API

2015-04-20 Thread Neil Horman
On Mon, Apr 20, 2015 at 04:11:43PM +0200, Thomas Monjalon wrote:
> It's time to remove this old API.
> It seems some work is still needed to rely only on eth_ctrl API.
> At least ixgbe, i40e and testpmd must be fixed.
> Jingjing, do you think it's possible to remove all these structures
> from rte_ethdev.h?
> 
> Thanks
> 
NAK.

I'm certainly not opposed to removing the API's if they are truly no longer
needed.  But they have been codified as part of the ABI, so the deprecation
schedule needs to be followed.  Given what you've said above, it seems like that
might be worthwhile anyway, as it will provide the needed runway to allow users
to convert to the new API.

Neil



[dpdk-dev] QoS Question

2015-04-20 Thread Greg Smith
Hi DPDK team,

The docs on QoS (http://dpdk.org/doc/guides/prog_guide/qos_framework.html# ) 
describe the traffic class (TC) as follows:
1 - The  TCs of the same pipe handled in strict priority order.
2 - Upper limit enforced per TC at the pipe level.
3 - Lower priority TCs able to reuse pipe bandwidth currently unused by higher 
priority TCs.
4 - When subport TC is oversubscribed (configuration time event), pipe TC upper 
limit is capped to a dynamically adjusted value that is shared by all the 
subport pipes.

Can someone describe how and when the TC upper limit is "dynamically" changed?

For example, assume there's a 1Gb/s port and a single 1Gb/s subport and 2000 
pipes each of 1Mb/s (total pipes = 2Gb/s which is > the 1Gb/s subport which I 
think means "oversubscribed" as used in the doc). Each Pipe has a single TC.
In that case, would each pipe be shaped to an upper limit of 0.5 Mb/s?
What if there was no traffic on 1999 pipes, would the single active pipe still 
be limited to 0.5 Mb/s?
What if the number of pipes changes without restarting the OS, how does that 
change the behavior?

BTW, great docs overall, thanks for writing those up.

Thanks,

Greg Smith





[dpdk-dev] [PATCH v2 0/4] bonding corrections and additions

2015-04-20 Thread Eric Kinzie
This patchset makes a couple of small corrections to the bonding driver
and introduces the ability to use an external state machine for mode
4 operation.

Changes in v2:
  . eliminate external_sm field in 802.3ad configuration 
(rte_eth_bond_8023ad_conf).
  . stop bonding device before changing the periodic callback function.
start again if needed.
  . remove unnecessary calls to valid_bonded_port_id().
  . do not check for NULL tx_ring.
  . return error in rte_eth_bond_8023ad_ext_slowtx() if packet is not LACP.
  . remove check for external sm configuration in periodic callback
  . check for valid LACPDU in test application's rx callback
  . add "Fixes:" tags

Eric Kinzie (4):
  bond mode 4: copy entire config structure
  bond mode 4: do not ignore multicast
  bond mode 4: allow external state machine
  bond mode 4: tests for external state machine

 app/test/test_link_bonding_mode4.c|  217 +++--
 lib/librte_pmd_bond/rte_eth_bond_8023ad.c |  174 +
 lib/librte_pmd_bond/rte_eth_bond_8023ad.h |   44 +
 lib/librte_pmd_bond/rte_eth_bond_8023ad_private.h |2 +
 lib/librte_pmd_bond/rte_eth_bond_pmd.c|1 +
 5 files changed, 427 insertions(+), 11 deletions(-)

-- 
1.7.10.4



[dpdk-dev] [PATCH v2 1/4] bond mode 4: copy entire config structure

2015-04-20 Thread Eric Kinzie
From: Eric Kinzie 

  Copy all needed fields from the mode8023ad_private structure in
  bond_mode_8023ad_conf_get().  This help ensure that a subsequent call
  to rte_eth_bond_8023ad_setup() is not passed uninitialized data that
  would result in either incorrect behavior or a failed sanity check.

Fixes: 46fb43683679 ("bond: add mode 4")

Signed-off-by: Eric Kinzie 
---
 lib/librte_pmd_bond/rte_eth_bond_8023ad.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.c 
b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c
index 97a828e..1009d5b 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_8023ad.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c
@@ -1013,6 +1013,7 @@ bond_mode_8023ad_conf_get(struct rte_eth_dev *dev,
conf->aggregate_wait_timeout_ms = mode4->aggregate_wait_timeout / 
ms_ticks;
conf->tx_period_ms = mode4->tx_period_timeout / ms_ticks;
conf->update_timeout_ms = mode4->update_timeout_us / 1000;
+   conf->rx_marker_period_ms = mode4->rx_marker_timeout / ms_ticks;
 }

 void
-- 
1.7.10.4



[dpdk-dev] [PATCH v2 2/4] bond mode 4: do not ignore multicast

2015-04-20 Thread Eric Kinzie
From: Eric Kinzie 

The bonding PMD in mode 4 puts all enslaved interfaces into promiscuous
mode in order to receive LACPDUs and must filter unwanted packets
after the traffic has been "collected".  Allow broadcast and multicast
through so that ARP and IPv6 neighbor discovery continue to work.

Fixes: 46fb43683679 ("bond: add mode 4")

Signed-off-by: Eric Kinzie 
---
 app/test/test_link_bonding_mode4.c |7 +--
 lib/librte_pmd_bond/rte_eth_bond_pmd.c |1 +
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/app/test/test_link_bonding_mode4.c 
b/app/test/test_link_bonding_mode4.c
index 02380f9..5a726af 100644
--- a/app/test/test_link_bonding_mode4.c
+++ b/app/test/test_link_bonding_mode4.c
@@ -755,8 +755,11 @@ test_mode4_rx(void)
rte_eth_macaddr_get(test_params.bonded_port_id, _mac);
ether_addr_copy(_mac, _mac);

-   /* Assert that dst address is not bonding address */
-   dst_mac.addr_bytes[0]++;
+   /* Assert that dst address is not bonding address.  Do not set the
+* least significant bit of the zero byte as this would create a
+* multicast address.
+*/
+   dst_mac.addr_bytes[0] += 2;

/* First try with promiscuous mode enabled.
 * Add 2 packets to each slave. First with bonding MAC address, second 
with
diff --git a/lib/librte_pmd_bond/rte_eth_bond_pmd.c 
b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
index c937e6b..1691300 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_pmd.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
@@ -170,6 +170,7 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf 
**bufs,
 * mode and packet address does not match. */
if (unlikely(hdr->ether_type == ether_type_slow_be ||
!collecting || (!promisc &&
+   !is_multicast_ether_addr(>d_addr) 
&&
!is_same_ether_addr(_mac, 
>d_addr {

if (hdr->ether_type == ether_type_slow_be) {
-- 
1.7.10.4



[dpdk-dev] [PATCH v2 3/4] bond mode 4: allow external state machine

2015-04-20 Thread Eric Kinzie
From: Eric Kinzie 

  Provide functions to allow an external 802.3ad state machine to transmit
  and recieve LACPDUs and to set the collection/distribution flags on
  slave interfaces.

Signed-off-by: Eric Kinzie 
---
 lib/librte_pmd_bond/rte_eth_bond_8023ad.c |  173 +
 lib/librte_pmd_bond/rte_eth_bond_8023ad.h |   44 ++
 lib/librte_pmd_bond/rte_eth_bond_8023ad_private.h |2 +
 3 files changed, 219 insertions(+)

diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.c 
b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c
index 1009d5b..326e899 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_8023ad.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c
@@ -42,6 +42,8 @@

 #include "rte_eth_bond_private.h"

+static void bond_mode_8023ad_ext_periodic_cb(void *arg);
+
 #ifdef RTE_LIBRTE_BOND_DEBUG_8023AD
 #define MODE4_DEBUG(fmt, ...) RTE_LOG(DEBUG, PMD, "%6u [Port %u: %s] " fmt, \
bond_dbg_get_time_diff_ms(), slave_id, \
@@ -1014,6 +1016,7 @@ bond_mode_8023ad_conf_get(struct rte_eth_dev *dev,
conf->tx_period_ms = mode4->tx_period_timeout / ms_ticks;
conf->update_timeout_ms = mode4->update_timeout_us / 1000;
conf->rx_marker_period_ms = mode4->rx_marker_timeout / ms_ticks;
+   conf->slowrx_cb = mode4->slowrx_cb;
 }

 void
@@ -1035,8 +1038,11 @@ bond_mode_8023ad_setup(struct rte_eth_dev *dev,
conf->tx_period_ms = BOND_8023AD_TX_MACHINE_PERIOD_MS;
conf->rx_marker_period_ms = BOND_8023AD_RX_MARKER_PERIOD_MS;
conf->update_timeout_ms = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS;
+   conf->slowrx_cb = NULL;
}

+   bond_mode_8023ad_stop(dev);
+
mode4->fast_periodic_timeout = conf->fast_periodic_ms * ms_ticks;
mode4->slow_periodic_timeout = conf->slow_periodic_ms * ms_ticks;
mode4->short_timeout = conf->short_timeout_ms * ms_ticks;
@@ -1045,6 +1051,10 @@ bond_mode_8023ad_setup(struct rte_eth_dev *dev,
mode4->tx_period_timeout = conf->tx_period_ms * ms_ticks;
mode4->rx_marker_timeout = conf->rx_marker_period_ms * ms_ticks;
mode4->update_timeout_us = conf->update_timeout_ms * 1000;
+   mode4->slowrx_cb = conf->slowrx_cb;
+
+   if (dev->data->dev_started)
+   bond_mode_8023ad_start(dev);
 }

 int
@@ -1062,6 +1072,13 @@ bond_mode_8023ad_enable(struct rte_eth_dev *bond_dev)
 int
 bond_mode_8023ad_start(struct rte_eth_dev *bond_dev)
 {
+   struct bond_dev_private *internals = bond_dev->data->dev_private;
+   struct mode8023ad_private *mode4 = >mode4;
+
+   if (mode4->slowrx_cb)
+   return rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 
1000,
+   _mode_8023ad_ext_periodic_cb, bond_dev);
+
return rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000,
_mode_8023ad_periodic_cb, bond_dev);
 }
@@ -1069,6 +1086,13 @@ bond_mode_8023ad_start(struct rte_eth_dev *bond_dev)
 void
 bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev)
 {
+   struct bond_dev_private *internals = bond_dev->data->dev_private;
+   struct mode8023ad_private *mode4 = >mode4;
+
+   if (mode4->slowrx_cb) {
+   rte_eal_alarm_cancel(_mode_8023ad_ext_periodic_cb, 
bond_dev);
+   return;
+   }
rte_eal_alarm_cancel(_mode_8023ad_periodic_cb, bond_dev);
 }

@@ -1215,3 +1239,152 @@ rte_eth_bond_8023ad_slave_info(uint8_t port_id, uint8_t 
slave_id,
info->agg_port_id = port->aggregator_port_id;
return 0;
 }
+
+int
+rte_eth_bond_8023ad_ext_collect(uint8_t port_id, uint8_t slave_id, int enabled)
+{
+   struct rte_eth_dev *bond_dev;
+   struct bond_dev_private *internals;
+   struct mode8023ad_private *mode4;
+   struct port *port;
+
+   if (rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD)
+   return -EINVAL;
+
+   bond_dev = _eth_devices[port_id];
+
+   if (!bond_dev->data->dev_started)
+   return -EINVAL;
+
+   internals = bond_dev->data->dev_private;
+   if (find_slave_by_id(internals->active_slaves,
+   internals->active_slave_count, slave_id) ==
+   internals->active_slave_count)
+   return -EINVAL;
+
+   mode4 = >mode4;
+   if (mode4->slowrx_cb == NULL)
+   return -EINVAL;
+
+   port = _8023ad_ports[slave_id];
+
+   if (enabled)
+   ACTOR_STATE_SET(port, COLLECTING);
+   else
+   ACTOR_STATE_CLR(port, COLLECTING);
+
+   return 0;
+}
+
+int
+rte_eth_bond_8023ad_ext_distrib(uint8_t port_id, uint8_t slave_id, int enabled)
+{
+   struct rte_eth_dev *bond_dev;
+   struct bond_dev_private *internals;
+   struct mode8023ad_private *mode4;
+   struct port *port;
+
+   if (rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD)
+   return -EINVAL;
+
+   bond_dev = 

[dpdk-dev] [PATCH v2 4/4] bond mode 4: tests for external state machine

2015-04-20 Thread Eric Kinzie
From: Eric Kinzie 

  This adds test cases for exercising the external state machine API to
  the mode 4 autotest.

Signed-off-by: Eric Kinzie 
---
 app/test/test_link_bonding_mode4.c |  210 ++--
 1 file changed, 201 insertions(+), 9 deletions(-)

diff --git a/app/test/test_link_bonding_mode4.c 
b/app/test/test_link_bonding_mode4.c
index 5a726af..c191ac5 100644
--- a/app/test/test_link_bonding_mode4.c
+++ b/app/test/test_link_bonding_mode4.c
@@ -155,6 +155,8 @@ static struct rte_eth_conf default_pmd_conf = {
.lpbk_mode = 0,
 };

+static uint8_t lacpdu_rx_count[RTE_MAX_ETHPORTS] = {0, };
+
 #define FOR_EACH(_i, _item, _array, _size) \
for (_i = 0, _item = &_array[0]; _i < _size && (_item = &_array[_i]); 
_i++)

@@ -324,8 +326,26 @@ remove_slave(struct slave_conf *slave)
return 0;
 }

+static void
+lacp_recv_cb(uint8_t slave_id, struct rte_mbuf *lacp_pkt)
+{
+   struct ether_hdr *hdr;
+   struct slow_protocol_frame *slow_hdr;
+
+   RTE_VERIFY(lacp_pkt != NULL);
+
+   hdr = rte_pktmbuf_mtod(lacp_pkt, struct ether_hdr *);
+   RTE_VERIFY(hdr->ether_type == rte_cpu_to_be_16(ETHER_TYPE_SLOW));
+
+   slow_hdr = rte_pktmbuf_mtod(lacp_pkt, struct slow_protocol_frame *);
+   RTE_VERIFY(slow_hdr->slow_protocol.subtype == SLOW_SUBTYPE_LACP);
+
+   lacpdu_rx_count[slave_id]++;
+   rte_pktmbuf_free(lacp_pkt);
+}
+
 static int
-initialize_bonded_device_with_slaves(uint8_t slave_count, uint8_t start)
+initialize_bonded_device_with_slaves(uint8_t slave_count, uint8_t external_sm)
 {
uint8_t i;

@@ -341,9 +361,17 @@ initialize_bonded_device_with_slaves(uint8_t slave_count, 
uint8_t start)
rte_eth_bond_8023ad_setup(test_params.bonded_port_id, NULL);
rte_eth_promiscuous_disable(test_params.bonded_port_id);

-   if (start)
-   
TEST_ASSERT_SUCCESS(rte_eth_dev_start(test_params.bonded_port_id),
-   "Failed to start bonded device");
+   if (external_sm) {
+   struct rte_eth_bond_8023ad_conf conf;
+
+   rte_eth_bond_8023ad_conf_get(test_params.bonded_port_id, );
+   conf.slowrx_cb = lacp_recv_cb;
+   rte_eth_bond_8023ad_setup(test_params.bonded_port_id, );
+
+   }
+
+   TEST_ASSERT_SUCCESS(rte_eth_dev_start(test_params.bonded_port_id),
+   "Failed to start bonded device");

return TEST_SUCCESS;
 }
@@ -648,7 +676,7 @@ test_mode4_lacp(void)
 {
int retval;

-   retval = initialize_bonded_device_with_slaves(TEST_LACP_SLAVE_COUT, 1);
+   retval = initialize_bonded_device_with_slaves(TEST_LACP_SLAVE_COUT, 0);
TEST_ASSERT_SUCCESS(retval, "Failed to initialize bonded device");

/* Test LACP handshake function */
@@ -746,7 +774,7 @@ test_mode4_rx(void)
struct ether_addr dst_mac;
struct ether_addr bonded_mac;

-   retval = initialize_bonded_device_with_slaves(TEST_PROMISC_SLAVE_COUNT, 
1);
+   retval = initialize_bonded_device_with_slaves(TEST_PROMISC_SLAVE_COUNT, 
0);
TEST_ASSERT_SUCCESS(retval, "Failed to initialize bonded device");

retval = bond_handshake();
@@ -923,7 +951,7 @@ test_mode4_tx_burst(void)
struct ether_addr dst_mac = { { 0x00, 0xFF, 0x00, 0xFF, 0x00, 0x00 } };
struct ether_addr bonded_mac;

-   retval = initialize_bonded_device_with_slaves(TEST_TX_SLAVE_COUNT, 1);
+   retval = initialize_bonded_device_with_slaves(TEST_TX_SLAVE_COUNT, 0);
TEST_ASSERT_SUCCESS(retval, "Failed to initialize bonded device");

retval = bond_handshake();
@@ -1107,7 +1135,7 @@ test_mode4_marker(void)
uint8_t i, j;
const uint16_t ethtype_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);

-   retval = initialize_bonded_device_with_slaves(TEST_MARKER_SLAVE_COUT, 
1);
+   retval = initialize_bonded_device_with_slaves(TEST_MARKER_SLAVE_COUT, 
0);
TEST_ASSERT_SUCCESS(retval, "Failed to initialize bonded device");

/* Test LACP handshake function */
@@ -1192,7 +1220,7 @@ test_mode4_expired(void)

struct rte_eth_bond_8023ad_conf conf;

-   retval = initialize_bonded_device_with_slaves(TEST_EXPIRED_SLAVE_COUNT, 
1);
+   retval = initialize_bonded_device_with_slaves(TEST_EXPIRED_SLAVE_COUNT, 
0);
/* Set custom timeouts to make test last shorter. */
rte_eth_bond_8023ad_conf_get(test_params.bonded_port_id, );
conf.fast_periodic_ms = 100;
@@ -1274,6 +1302,156 @@ test_mode4_expired(void)
 }

 static int
+test_mode4_ext_ctrl(void)
+{
+   /*
+* configure bonded interface without the external sm enabled
+*   . try to transmit lacpdu (should fail)
+*   . try to set collecting and distributing flags (should fail)
+* reconfigure w/external sm
+*   . transmit one lacpdu on each slave using new api
+*   . make sure each slave receives one lacpdu using the callback api
+*   . 

[dpdk-dev] [PATCH v2] Clean up rte_memcpy.h

2015-04-20 Thread Ravi Kerur
This version contains changes for removing unnecessary typecasting only.
Backing out remaining changes i.e. loop-unrolling. Though loop-unrolling
makes sense from more space/less time perspective, code generated by 
GCC 4.8.2 with "gcc -O3 -mavx -s" and "gcc -O3 -m64 -s" for loop of 2,
4 and 8 iterations are same and "memcpy perf" from "make test" reveals
similar results for "with and without loop". Will investigate this later.

Ravi Kerur (1):
  Clean up rte_memcpy.h file

 .../common/include/arch/x86/rte_memcpy.h   | 340 +++--
 1 file changed, 175 insertions(+), 165 deletions(-)

-- 
1.9.1



[dpdk-dev] [PATCH v2] Clean up rte_memcpy.h file

2015-04-20 Thread Ravi Kerur
Remove unnecessary type casting in functions.

Tested on Ubuntu (14.04 x86_64) with "make test".
"make test" results match the results with baseline.
"Memcpy perf" results match the results with baseline.

Signed-off-by: Ravi Kerur 
---
 .../common/include/arch/x86/rte_memcpy.h   | 340 +++--
 1 file changed, 175 insertions(+), 165 deletions(-)

diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h 
b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
index 6a57426..839d4ec 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
@@ -106,8 +106,8 @@ rte_mov32(uint8_t *dst, const uint8_t *src)
 static inline void
 rte_mov64(uint8_t *dst, const uint8_t *src)
 {
-   rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32);
-   rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32);
+   rte_mov32(dst + 0 * 32, src + 0 * 32);
+   rte_mov32(dst + 1 * 32, src + 1 * 32);
 }

 /**
@@ -117,10 +117,10 @@ rte_mov64(uint8_t *dst, const uint8_t *src)
 static inline void
 rte_mov128(uint8_t *dst, const uint8_t *src)
 {
-   rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32);
-   rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32);
-   rte_mov32((uint8_t *)dst + 2 * 32, (const uint8_t *)src + 2 * 32);
-   rte_mov32((uint8_t *)dst + 3 * 32, (const uint8_t *)src + 3 * 32);
+   rte_mov32(dst + 0 * 32, src + 0 * 32);
+   rte_mov32(dst + 1 * 32, src + 1 * 32);
+   rte_mov32(dst + 2 * 32, src + 2 * 32);
+   rte_mov32(dst + 3 * 32, src + 3 * 32);
 }

 /**
@@ -130,14 +130,14 @@ rte_mov128(uint8_t *dst, const uint8_t *src)
 static inline void
 rte_mov256(uint8_t *dst, const uint8_t *src)
 {
-   rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32);
-   rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32);
-   rte_mov32((uint8_t *)dst + 2 * 32, (const uint8_t *)src + 2 * 32);
-   rte_mov32((uint8_t *)dst + 3 * 32, (const uint8_t *)src + 3 * 32);
-   rte_mov32((uint8_t *)dst + 4 * 32, (const uint8_t *)src + 4 * 32);
-   rte_mov32((uint8_t *)dst + 5 * 32, (const uint8_t *)src + 5 * 32);
-   rte_mov32((uint8_t *)dst + 6 * 32, (const uint8_t *)src + 6 * 32);
-   rte_mov32((uint8_t *)dst + 7 * 32, (const uint8_t *)src + 7 * 32);
+   rte_mov32(dst + 0 * 32, src + 0 * 32);
+   rte_mov32(dst + 1 * 32, src + 1 * 32);
+   rte_mov32(dst + 2 * 32, src + 2 * 32);
+   rte_mov32(dst + 3 * 32, src + 3 * 32);
+   rte_mov32(dst + 4 * 32, src + 4 * 32);
+   rte_mov32(dst + 5 * 32, src + 5 * 32);
+   rte_mov32(dst + 6 * 32, src + 6 * 32);
+   rte_mov32(dst + 7 * 32, src + 7 * 32);
 }

 /**
@@ -150,13 +150,16 @@ rte_mov64blocks(uint8_t *dst, const uint8_t *src, size_t 
n)
__m256i ymm0, ymm1;

while (n >= 64) {
-   ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t 
*)src + 0 * 32));
+
+   ymm0 = _mm256_loadu_si256((const __m256i *)(src + 0 * 32));
+   ymm1 = _mm256_loadu_si256((const __m256i *)(src + 1 * 32));
+
+   _mm256_storeu_si256((__m256i *)(dst + 0 * 32), ymm0);
+   _mm256_storeu_si256((__m256i *)(dst + 1 * 32), ymm1);
+
n -= 64;
-   ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t 
*)src + 1 * 32));
-   src = (const uint8_t *)src + 64;
-   _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0);
-   _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1);
-   dst = (uint8_t *)dst + 64;
+   src = src + 64;
+   dst = dst + 64;
}
 }

@@ -170,34 +173,39 @@ rte_mov256blocks(uint8_t *dst, const uint8_t *src, size_t 
n)
__m256i ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7;

while (n >= 256) {
-   ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t 
*)src + 0 * 32));
+
+   ymm0 = _mm256_loadu_si256((const __m256i *)(src + 0 * 32));
+   ymm1 = _mm256_loadu_si256((const __m256i *)(src + 1 * 32));
+   ymm2 = _mm256_loadu_si256((const __m256i *)(src + 2 * 32));
+   ymm3 = _mm256_loadu_si256((const __m256i *)(src + 3 * 32));
+   ymm4 = _mm256_loadu_si256((const __m256i *)(src + 4 * 32));
+   ymm5 = _mm256_loadu_si256((const __m256i *)(src + 5 * 32));
+   ymm6 = _mm256_loadu_si256((const __m256i *)(src + 6 * 32));
+   ymm7 = _mm256_loadu_si256((const __m256i *)(src + 7 * 32));
+
+   _mm256_storeu_si256((__m256i *)(dst + 0 * 32), ymm0);
+   _mm256_storeu_si256((__m256i *)(dst + 1 * 32), ymm1);
+   _mm256_storeu_si256((__m256i *)(dst + 2 * 32), ymm2);
+   _mm256_storeu_si256((__m256i *)(dst + 3 * 32), ymm3);
+   _mm256_storeu_si256((__m256i *)(dst + 4 * 32), ymm4);
+