Re: [dpdk-dev] [PATCH v6] net/virtio-user: add support for server mode

2018-04-06 Thread Yang, Zhiyong
Hi Jianfeng,

> -Original Message-
> From: Tan, Jianfeng
> Sent: Friday, April 6, 2018 2:13 AM
> To: Yang, Zhiyong ; dev@dpdk.org
> Cc: maxime.coque...@redhat.com; Bie, Tiwei ; Wang,
> Dong1 ; Wang, Zhihong 
> Subject: Re: [PATCH v6] net/virtio-user: add support for server mode
> 
> 
> 
> On 4/6/2018 8:18 AM, zhiyong.y...@intel.com wrote:
> > In a container environment if the vhost-user backend restarts, there's
> > no way for it to reconnect to virtio-user. To address this, support
> > for server mode is added. In this mode the socket file is created by
> > virtio- user, which the backend then connects to. This means that if
> > the backend restarts, it can reconnect to virtio-user and continue
> communications.
> >
> > With current implementation, LSC is enabled at virtio-user side to
> > support to accept the coming connection.
> >
> > Release note is updated in this patch.
> >
> > Signed-off-by: Zhiyong Yang 
> > ---
> >
> > Changes in V6:
> > 1. fix report wrong link stauts in server mode.
> > 2. fix some code style issues.
> >
> > Changes in V5:
> > 1. Support server mode virtio-user startup in non-blocking mode.
> > 2. rebase on top of dpdk-next-virtio.
> >
> > Changes in V4:
> > 1. Don't create new pthread any more and use librte_eal interrupt thread.
> > 2. virtio-user doesn't work in blocking mode any more for the first
> connection.
> > Client mode vhost-user startups firstly, then server mode virtio-user
> > creates socket file and startups. Keep consistency with usage of
> > client mode virtio-user.
> >
> > Changes in V3:
> > 1. use EAL epoll mechanism instead of vhost events. Cancel to export
> > vhost event APIs.
> > 2. rebase the code on top of dpdk-next-virtio
> >
> > Changes in V2:
> > 1. split two patches 1/5 and 2/5 from v1 patchset to fix some existing
> > issues which is not strongly related to support for server mode 2.
> > move fdset related functions to librte_eal from librte_vhost exposed
> > as new APIs.
> > 3. release note is added in the patch 5/5.
> > 4. squash data structure change patch into 4/5 according to Maxime's
> suggestion.
> >
> >   doc/guides/rel_notes/release_18_05.rst   |   6 ++
> >   drivers/net/virtio/virtio_user/vhost_user.c  |  45 --
> >   drivers/net/virtio/virtio_user/virtio_user_dev.c |  40 +++--
> >   drivers/net/virtio/virtio_user/virtio_user_dev.h |   3 +
> >   drivers/net/virtio/virtio_user_ethdev.c  | 101
> ---
> >   5 files changed, 171 insertions(+), 24 deletions(-)
> >
> > diff --git a/doc/guides/rel_notes/release_18_05.rst
> > b/doc/guides/rel_notes/release_18_05.rst
> > index 9cc77f893..f8897b2e9 100644
> > --- a/doc/guides/rel_notes/release_18_05.rst
> > +++ b/doc/guides/rel_notes/release_18_05.rst
> > @@ -58,6 +58,12 @@ New Features
> > * Added support for NVGRE, VXLAN and GENEVE filters in flow API.
> > * Added support for DROP action in flow API.
> >
> > +* **Added support for virtio-user server mode.**
> > +  In a container environment if the vhost-user backend restarts,
> > +there's no way
> > +  for it to reconnect to virtio-user. To address this, support for
> > +server mode
> > +  is added. In this mode the socket file is created by virtio-user,
> > +which the
> > +  backend connects to. This means that if the backend restarts, it
> > +can reconnect
> > +  to virtio-user and continue communications.
> >
> >   API Changes
> >   ---
> > diff --git a/drivers/net/virtio/virtio_user/vhost_user.c
> > b/drivers/net/virtio/virtio_user/vhost_user.c
> > index 91c6449bb..a6df97a00 100644
> > --- a/drivers/net/virtio/virtio_user/vhost_user.c
> > +++ b/drivers/net/virtio/virtio_user/vhost_user.c
> > @@ -378,6 +378,30 @@ vhost_user_sock(struct virtio_user_dev *dev,
> > return 0;
> >   }
> >
> > +#define MAX_VIRTIO_USER_BACKLOG 1
> > +static int
> > +virtio_user_start_server(struct virtio_user_dev *dev, struct
> > +sockaddr_un *un) {
> > +   int ret;
> > +   int flag;
> > +   int fd = dev->listenfd;
> > +
> > +   ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
> > +   if (ret < 0) {
> > +   PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and
> try again\n",
> > +   dev->path, strerror(errno));
> > +   return -1;
> > +   }
> > +   ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
> > +   if (ret < 0)
> > +   return -1;
> > +
> > +   flag = fcntl(fd, F_GETFL);
> > +   fcntl(fd, F_SETFL, flag | O_NONBLOCK);
> > +
> > +   return 0;
> > +}
> > +
> >   /**
> >* Set up environment to talk with a vhost user backend.
> >*
> > @@ -405,13 +429,24 @@ vhost_user_setup(struct virtio_user_dev *dev)
> > memset(&un, 0, sizeof(un));
> > un.sun_family = AF_UNIX;
> > snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
> > -   if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> > -   PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
> > -   close(fd);
> > -   return -1;
> > +
> > + 

Re: [dpdk-dev] [PATCH v5] net/virtio-user: add support for server mode

2018-04-06 Thread Yang, Zhiyong


> -Original Message-
> From: Bie, Tiwei
> Sent: Thursday, April 5, 2018 4:29 PM
> To: Yang, Zhiyong 
> Cc: dev@dpdk.org; maxime.coque...@redhat.com; tho...@monjalon.net;
> Tan, Jianfeng ; Wang, Zhihong
> ; Wang, Dong1 
> Subject: Re: [PATCH v5] net/virtio-user: add support for server mode
> 
> On Thu, Apr 05, 2018 at 01:17:53AM +0800, zhiyong.y...@intel.com wrote:



> > @@ -337,16 +343,21 @@ virtio_user_dev_init(struct virtio_user_dev *dev,
> char *path, int queues,
> > return -1;
> > }
> >
> > -   if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL)
> < 0) {
> > -   PMD_INIT_LOG(ERR, "set_owner fails: %s", strerror(errno));
> > -   return -1;
> > -   }
> > +   if (dev->vhostfd >= 0) {
> > +   if (dev->ops->send_request(dev,
> VHOST_USER_SET_OWNER, NULL) < 0) {
> > +   PMD_INIT_LOG(ERR, "set_owner fails: %s",
> strerror(errno));
> > +   return -1;
> > +   }
> >
> > -   if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES,
> > -   &dev->device_features) < 0) {
> > -   PMD_INIT_LOG(ERR, "get_features failed: %s",
> strerror(errno));
> > -   return -1;
> > +   if (dev->ops->send_request(dev,
> VHOST_USER_GET_FEATURES,
> > +   &dev->device_features) < 0) {
> > +   PMD_INIT_LOG(ERR, "get_features failed: %s",
> strerror(errno));
> > +   return -1;
> > +   }
> > +   } else {
> > +   dev->device_features =
> VIRTIO_USER_SUPPORTED_FEATURES;
> 
> If the backend doesn't support e.g. VIRTIO_RING_F_INDIRECT_DESC.
> Will it cause any problem?
> 
vhost-user will compare virtio-user and vhost-user features, as you said that
if VIRTIO_RING_F_INDIRECT_DESC was not supported ,
vhost-user come across failure. vhost-user closes the connecting socket and 
later virtio-user will detect the
broken connection by LSC. 

thanks
zhiyong


Re: [dpdk-dev] [PATCH v3 08/21] net/virtio: implement receive path for packed queues

2018-04-06 Thread Maxime Coquelin

Hi Jens,

On 04/05/2018 12:10 PM, Jens Freimann wrote:

From: Yuanhan Liu 

Implement the receive part here. No support for mergeable buffers yet.

Signed-off-by: Jens Freimann 
Signed-off-by: Yuanhan Liu 
---
  drivers/net/virtio/virtio_ethdev.c |  10 ++-
  drivers/net/virtio/virtio_ethdev.h |   2 +
  drivers/net/virtio/virtio_rxtx.c   | 137 -
  3 files changed, 146 insertions(+), 3 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 089a161ac..dc220c743 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1315,10 +1315,15 @@ set_rxtx_funcs(struct rte_eth_dev *eth_dev)
  {
struct virtio_hw *hw = eth_dev->data->dev_private;
  
-	if (hw->use_simple_rx) {

+   /* workarount for packed vqs which don't support mrg_rxbuf at this 
point */


I don't think you need such workarounds, just advertise the packed ring
layout feature once full support is introduced.

Also I'm not cleat what the workaround is needed here, as you set to
virtio_recv_pkts_packed whatever mrg is on or not.




Re: [dpdk-dev] [PATCH v3 07/21] net/virtio: implement transmit path for packed queues

2018-04-06 Thread Maxime Coquelin



On 04/05/2018 12:10 PM, Jens Freimann wrote:

This implements the transmit path for devices with
support for Virtio 1.1.

Add the feature bit for Virtio 1.1 and enable code to
add buffers to vring and mark descriptors as available.

This is based on a patch by Yuanhan Liu.

Signed-off-by: Jens Freiman 
---
  drivers/net/virtio/virtio_ethdev.c |   8 ++-
  drivers/net/virtio/virtio_ethdev.h |   3 ++
  drivers/net/virtio/virtio_rxtx.c   | 102 -
  3 files changed, 111 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index cccefafe9..089a161ac 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -383,6 +383,8 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t 
vtpci_queue_idx)
vq->hw = hw;
vq->vq_queue_index = vtpci_queue_idx;
vq->vq_nentries = vq_size;
+   if (vtpci_packed_queue(hw))
+   vq->vq_ring.avail_wrap_counter = 1;
  
  	/*

 * Reserve a memzone for vring elements
@@ -1328,7 +1330,11 @@ set_rxtx_funcs(struct rte_eth_dev *eth_dev)
eth_dev->rx_pkt_burst = &virtio_recv_pkts;
}
  
-	if (hw->use_simple_tx) {

+   if (vtpci_packed_queue(hw)) {
+   PMD_INIT_LOG(INFO, "virtio: using virtio 1.1 Tx path on port 
%u",
+   eth_dev->data->port_id);
+   eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
+   } else if (hw->use_simple_tx) {
PMD_INIT_LOG(INFO, "virtio: using simple Tx path on port %u",
eth_dev->data->port_id);
eth_dev->tx_pkt_burst = virtio_xmit_pkts_simple;
diff --git a/drivers/net/virtio/virtio_ethdev.h 
b/drivers/net/virtio/virtio_ethdev.h
index bb40064ea..d457013cb 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -36,6 +36,7 @@
 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE |\
 1u << VIRTIO_RING_F_INDIRECT_DESC |\
 1ULL << VIRTIO_F_VERSION_1   |   \
+1ULL << VIRTIO_F_RING_PACKED |   \


Should it really advertise VIRTIO_F_RING_PACKED unconditionally, as it
is not yet fully supported? (non-pow2, indirect descs, etc...)


 1ULL << VIRTIO_F_IOMMU_PLATFORM)
  
  #define VIRTIO_PMD_SUPPORTED_GUEST_FEATURES	\


Re: [dpdk-dev] [PATCH v3 07/21] net/virtio: implement transmit path for packed queues

2018-04-06 Thread Jens Freimann

On Fri, Apr 06, 2018 at 09:56:06AM +0200, Maxime Coquelin wrote:



On 04/05/2018 12:10 PM, Jens Freimann wrote:

This implements the transmit path for devices with
support for Virtio 1.1.

Add the feature bit for Virtio 1.1 and enable code to
add buffers to vring and mark descriptors as available.

This is based on a patch by Yuanhan Liu.

Signed-off-by: Jens Freiman 
---
 drivers/net/virtio/virtio_ethdev.c |   8 ++-
 drivers/net/virtio/virtio_ethdev.h |   3 ++
 drivers/net/virtio/virtio_rxtx.c   | 102 -
 3 files changed, 111 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index cccefafe9..089a161ac 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -383,6 +383,8 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t 
vtpci_queue_idx)
vq->hw = hw;
vq->vq_queue_index = vtpci_queue_idx;
vq->vq_nentries = vq_size;
+   if (vtpci_packed_queue(hw))
+   vq->vq_ring.avail_wrap_counter = 1;
/*
 * Reserve a memzone for vring elements
@@ -1328,7 +1330,11 @@ set_rxtx_funcs(struct rte_eth_dev *eth_dev)
eth_dev->rx_pkt_burst = &virtio_recv_pkts;
}
-   if (hw->use_simple_tx) {
+   if (vtpci_packed_queue(hw)) {
+   PMD_INIT_LOG(INFO, "virtio: using virtio 1.1 Tx path on port 
%u",
+   eth_dev->data->port_id);
+   eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
+   } else if (hw->use_simple_tx) {
PMD_INIT_LOG(INFO, "virtio: using simple Tx path on port %u",
eth_dev->data->port_id);
eth_dev->tx_pkt_burst = virtio_xmit_pkts_simple;
diff --git a/drivers/net/virtio/virtio_ethdev.h 
b/drivers/net/virtio/virtio_ethdev.h
index bb40064ea..d457013cb 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -36,6 +36,7 @@
 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE |\
 1u << VIRTIO_RING_F_INDIRECT_DESC |\
 1ULL << VIRTIO_F_VERSION_1   |   \
+1ULL << VIRTIO_F_RING_PACKED |   \


Should it really advertise VIRTIO_F_RING_PACKED unconditionally, as it
is not yet fully supported? (non-pow2, indirect descs, etc...)


We can advertise packed ring but have VIRTIO_F_INDIRECT_DESC disabled.
non-pow2 needs to be integrated thoug and will be in v4.

regards,
Jens 



 1ULL << VIRTIO_F_IOMMU_PLATFORM)
 #define VIRTIO_PMD_SUPPORTED_GUEST_FEATURES\


Re: [dpdk-dev] [PATCH v3 08/21] net/virtio: implement receive path for packed queues

2018-04-06 Thread Jens Freimann

On Fri, Apr 06, 2018 at 09:51:32AM +0200, Maxime Coquelin wrote:

Hi Jens,

On 04/05/2018 12:10 PM, Jens Freimann wrote:

From: Yuanhan Liu 

Implement the receive part here. No support for mergeable buffers yet.

Signed-off-by: Jens Freimann 
Signed-off-by: Yuanhan Liu 
---
 drivers/net/virtio/virtio_ethdev.c |  10 ++-
 drivers/net/virtio/virtio_ethdev.h |   2 +
 drivers/net/virtio/virtio_rxtx.c   | 137 -
 3 files changed, 146 insertions(+), 3 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 089a161ac..dc220c743 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1315,10 +1315,15 @@ set_rxtx_funcs(struct rte_eth_dev *eth_dev)
 {
struct virtio_hw *hw = eth_dev->data->dev_private;
-   if (hw->use_simple_rx) {
+   /* workarount for packed vqs which don't support mrg_rxbuf at this 
point */


I don't think you need such workarounds, just advertise the packed ring
layout feature once full support is introduced.

Also I'm not cleat what the workaround is needed here, as you set to
virtio_recv_pkts_packed whatever mrg is on or not.


yes, I'll change it in v4 to advertise packed virtqueues as the last
patch.

Thanks!

regards,
Jens 





Re: [dpdk-dev] [PATCH v3 10/21] vhost: turn of indirect descriptors for packed virtqueues

2018-04-06 Thread Maxime Coquelin



On 04/05/2018 12:10 PM, Jens Freimann wrote:

Signed-off-by: Jens Freimann 
---
  lib/librte_vhost/socket.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index 72d769e6a..05193e368 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -852,6 +852,7 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
  
  #ifndef RTE_LIBRTE_VHOST_PQ

vsocket->features &= ~(1ULL << VIRTIO_F_RING_PACKED);
+   vsocket->features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC);

hmm.. Shouldn't be the opposite?

  #endif
  
  	if ((flags & RTE_VHOST_USER_CLIENT) != 0) {




Re: [dpdk-dev] [PATCH v3 11/21] vhost: add virtio 1.1 defines

2018-04-06 Thread Maxime Coquelin



On 04/05/2018 12:10 PM, Jens Freimann wrote:

This should actually be in the kernel header file, but it isn't
yet. For now let's use our own headers.


I think it is not just temporary, as we will always want to be able to
build with older kernels.


Signed-off-by: Jens Freimann 
---
  lib/librte_vhost/vhost.h  |  4 
  lib/librte_vhost/virtio-1.1.h | 18 ++
  2 files changed, 22 insertions(+)
  create mode 100644 lib/librte_vhost/virtio-1.1.h

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index c14a90529..3004c26c1 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -177,6 +177,10 @@ struct vhost_msg {
  #ifndef VIRTIO_F_VERSION_1
   #define VIRTIO_F_VERSION_1 32
  #endif
+#ifndef VIRTIO_F_RING_PACKED
+ #define VIRTIO_F_RING_PACKED 34
+#endif
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
  
  /* Features supported by this builtin vhost-user net driver. */

  #define VIRTIO_NET_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
diff --git a/lib/librte_vhost/virtio-1.1.h b/lib/librte_vhost/virtio-1.1.h
new file mode 100644
index 0..7b48caed7
--- /dev/null
+++ b/lib/librte_vhost/virtio-1.1.h
@@ -0,0 +1,18 @@


You need to add a license for the file.


+#ifndef __VIRTIO_PACKED_H
+#define __VIRTIO_PACKED_H
+
+#define VRING_DESC_F_NEXT   1
+#define VRING_DESC_F_WRITE  2
+#define VRING_DESC_F_INDIRECT   4
+
+#define VRING_DESC_F_AVAIL  (1ULL << 7)
+#define VRING_DESC_F_USED  (1ULL << 15)
+
+struct vring_desc_packed {
+   uint64_t addr;
+   uint32_t len;
+   uint16_t index;
+   uint16_t flags;
+};
+
+#endif /* __VIRTIO_PACKED_H */



Re: [dpdk-dev] [PATCH v3 12/21] vhost: vring address setup for packed queues

2018-04-06 Thread Maxime Coquelin



On 04/05/2018 12:10 PM, Jens Freimann wrote:

From: Yuanhan Liu 

Add code to set up packed queues when enabled.

Signed-off-by: Yuanhan Liu 
Signed-off-by: Jens Freimann 
---
  lib/librte_vhost/vhost.h  |  1 +
  lib/librte_vhost/vhost_user.c | 21 -
  2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 3004c26c1..20d78f883 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -74,6 +74,7 @@ struct batch_copy_elem {
   */
  struct vhost_virtqueue {
struct vring_desc   *desc;
+   struct vring_desc_packed   *desc_packed;
struct vring_avail  *avail;
struct vring_used   *used;
uint32_tsize;
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 157cf2f60..183893e46 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -469,6 +469,23 @@ translate_ring_addresses(struct virtio_net *dev, int 
vq_index)
struct vhost_virtqueue *vq = dev->virtqueue[vq_index];
struct vhost_vring_addr *addr = &vq->ring_addrs;
  
+	if (dev->features & (1ULL << VIRTIO_F_RING_PACKED)) {

+   vq->desc_packed = (struct vring_desc_packed *) ring_addr_to_vva
+   (dev, vq, addr->desc_user_addr, 
sizeof(vq->desc_packed));

sizeof(*vq->desc_packed)
or
sizeof(struct vring_desc_packed) for consitency.


+   vq->desc = NULL;
+   vq->avail = NULL;
+   vq->used = NULL;
+   vq->log_guest_addr = 0;
+
+   if (vq->last_used_idx != 0) {
+   RTE_LOG(WARNING, VHOST_CONFIG,
+   "last_used_idx (%u) not 0\n",
+   vq->last_used_idx);
+   vq->last_used_idx = 0;
+   }
+   return dev;
+   }
+
/* The addresses are converted from QEMU virtual to Vhost virtual. */
if (vq->desc && vq->avail && vq->used)
return dev;
@@ -481,6 +498,7 @@ translate_ring_addresses(struct virtio_net *dev, int 
vq_index)
dev->vid);
return dev;
}
+   vq->desc_packed = NULL;
  
  	dev = numa_realloc(dev, vq_index);

vq = dev->virtqueue[vq_index];
@@ -853,7 +871,8 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct 
VhostUserMsg *pmsg)
  static int
  vq_is_ready(struct vhost_virtqueue *vq)
  {
-   return vq && vq->desc && vq->avail && vq->used &&
+   return vq &&
+  (vq->desc_packed || (vq->desc && vq->avail && vq->used)) &&
   vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
   vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
  }



Re: [dpdk-dev] [PATCH v3 12/21] vhost: vring address setup for packed queues

2018-04-06 Thread Jens Freimann

On Fri, Apr 06, 2018 at 10:19:28AM +0200, Maxime Coquelin wrote:



On 04/05/2018 12:10 PM, Jens Freimann wrote:

From: Yuanhan Liu 

Add code to set up packed queues when enabled.

Signed-off-by: Yuanhan Liu 
Signed-off-by: Jens Freimann 
---
 lib/librte_vhost/vhost.h  |  1 +
 lib/librte_vhost/vhost_user.c | 21 -
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 3004c26c1..20d78f883 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -74,6 +74,7 @@ struct batch_copy_elem {
  */
 struct vhost_virtqueue {
struct vring_desc   *desc;
+   struct vring_desc_packed   *desc_packed;
struct vring_avail  *avail;
struct vring_used   *used;
uint32_tsize;
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 157cf2f60..183893e46 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -469,6 +469,23 @@ translate_ring_addresses(struct virtio_net *dev, int 
vq_index)
struct vhost_virtqueue *vq = dev->virtqueue[vq_index];
struct vhost_vring_addr *addr = &vq->ring_addrs;
+   if (dev->features & (1ULL << VIRTIO_F_RING_PACKED)) {
+   vq->desc_packed = (struct vring_desc_packed *) ring_addr_to_vva
+   (dev, vq, addr->desc_user_addr, 
sizeof(vq->desc_packed));

sizeof(*vq->desc_packed)
or
sizeof(struct vring_desc_packed) for consitency.


yes, will fix. thanks!

regards,
Jens 



+   vq->desc = NULL;
+   vq->avail = NULL;
+   vq->used = NULL;
+   vq->log_guest_addr = 0;
+
+   if (vq->last_used_idx != 0) {
+   RTE_LOG(WARNING, VHOST_CONFIG,
+   "last_used_idx (%u) not 0\n",
+   vq->last_used_idx);
+   vq->last_used_idx = 0;
+   }
+   return dev;
+   }
+
/* The addresses are converted from QEMU virtual to Vhost virtual. */
if (vq->desc && vq->avail && vq->used)
return dev;
@@ -481,6 +498,7 @@ translate_ring_addresses(struct virtio_net *dev, int 
vq_index)
dev->vid);
return dev;
}
+   vq->desc_packed = NULL;
dev = numa_realloc(dev, vq_index);
vq = dev->virtqueue[vq_index];
@@ -853,7 +871,8 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct 
VhostUserMsg *pmsg)
 static int
 vq_is_ready(struct vhost_virtqueue *vq)
 {
-   return vq && vq->desc && vq->avail && vq->used &&
+   return vq &&
+  (vq->desc_packed || (vq->desc && vq->avail && vq->used)) &&
   vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
   vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
 }



Re: [dpdk-dev] [PATCH 2/3] usertools: add hv_uio_setup script

2018-04-06 Thread Bruce Richardson
On Thu, Apr 05, 2018 at 05:22:42PM -0700, Stephen Hemminger wrote:
> On Thu, 5 Apr 2018 23:57:47 +
> "Ananyev, Konstantin"  wrote:
> 
> > > -Original Message-
> > > From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Thomas Monjalon
> > > Sent: Thursday, April 5, 2018 10:11 PM
> > > To: Richardson, Bruce 
> > > Cc: Stephen Hemminger ; dev@dpdk.org; Stephen 
> > > Hemminger 
> > > Subject: Re: [dpdk-dev] [PATCH 2/3] usertools: add hv_uio_setup script
> > > 
> > > 05/04/2018 23:07, Bruce Richardson:  
> > > > On Thu, Apr 05, 2018 at 10:43:39PM +0200, Thomas Monjalon wrote:  
> > > > > 05/04/2018 21:13, Stephen Hemminger:  
> > > > > > Small script to rebind netvsc kernel device to Hyper-V
> > > > > > networking PMD. It could be integrated in dpdk-bind, but dpdk-bind
> > > > > > is focused on PCI, and that would get messy.
> > > > > >
> > > > > > Eventually, this functionality will be built into netvsc driver
> > > > > > (see vdev_netvsc as an example).  
> > > > >
> > > > > I believe we should avoid creating such script.
> > > > > The direction to go, for hotplug, is to remove dpdk-devbind.py,
> > > > > and implement kernel binding in PMDs (with EAL helpers).
> > > > >  
> > > > I'm not convinced at all that that is the direction to go. I instead 
> > > > would
> > > > prefer to see all binding happen outside DPDK. I believe having udev or
> > > > similar manage bindings, set up via e.g driverctl[1], is a far better 
> > > > path.  
> > > 
> > > This is a system admin tool, and only for Linux.
> > > Having the binding logic inside DPDK, allows the application to control
> > > how hotplug behave.  
> > 
> > I also don't think that DPDK application should control hotplug behavior 
> > logic.
> > It is clearly up to the system admin to make such decisions. 
> > Konstantin
> 
> My preference would be to get driverctl working as a standard tool.
> But it requires kernel changes to work with vmbus.
> 
+1

I don't think that binding should be done by DPDK for a couple of reasons:
1. There are already daemons and kernel supports out there, such as udev,
   for managing devices on a system level. I'd rather not see DPDK duplicate
   functionality, when we can re-use what is there. Also there exists the
   possibility of conflict, e.g. what if udev has a rule for a device, and
   DPDK also tries to manage it at the same time.

2. I believe that the app is the wrong place to manage the binding of
   devices, since it's up to the system administrator not the app to determine
   the exact setup for the platform. If apps are to manage binding, then each
   app will have to expose to the user/sysadmin cmdline options to specify
   what devices should be hotplugged into the app or not, and what drivers
   they should be bound too. Not all NICs hotplugged to a platform are for
   DPDK use, and they won't all want to use the igb_uio or the vfio_pci
   drivers. Better that that is configured for each platform on the platform
   itself.

I really feel that the driverctl approach is the best one - yes it's linux
only for now, but architecturally I think it's the proper solution.

/Bruce


Re: [dpdk-dev] [PATCH 00/14] net/qede/base: update PMD version to 2.8.0.1

2018-04-06 Thread Ferruh Yigit
On 4/1/2018 6:46 AM, Rasesh Mody wrote:
> Hi,
> 
> This patch set updates QEDE base driver to use FW version 8.33.12.0.
> It contains some base driver enhancements and fixes. The PMD version
> is updated to 2.8.0.1.
> 
> Please apply.
> 
> Thanks!
> -Rasesh
> 
> Rasesh Mody (14):
>   net/qede/base: use path ID for HW init
>   net/qede/base: protect DMAE transactions
>   net/qede/base: add DMAE sanity check
>   net/qede/base: upgrade FW to 8.33.12.0
>   net/qede/base: symantic changes
>   net/qede/base: add new chain API
>   net/qede/base: allow changing VF MAC address
>   net/qede/base: add MFW support for driver load timeout
>   net/qede/base: refine error handling
>   net/qede/base: add stats counter for link state
>   net/qede/base: add APIs for xcvr
>   net/qede/base: fix to support OVLAN mode
>   net/qede/base: add packet pacing support
>   net/qede: update PMD version to 2.8.0.1

Hi Rasesh,

Getting build errors for 32bit [1], I didn't dig which commit cause them.


[1]
In file included from .../dpdk/drivers/net/qede/base/ecore.h:50:0,
 from .../dpdk/drivers/net/qede/base/ecore_hw.c:12:
.../dpdk/drivers/net/qede/base/ecore_hw.c: In function 
‘ecore_dmae_execute_command’:
.../dpdk/drivers/net/qede/base/../qede_logs.h:48:5: error: format ‘%lx’ expects
argument of type ‘long unsigned int’, but argument 7 has type ‘u64 {aka long
long unsigned int}’ [-Werror=format=]
 "[%s:%d(%s)]" fmt,   \
 ^
.../dpdk/drivers/net/qede/base/ecore_hw.c:789:3: note: in expansion of macro
‘DP_VERBOSE’
   DP_VERBOSE(p_hwfn, ECORE_MSG_HW,
   ^~
.../dpdk/drivers/net/qede/base/ecore_hw.c:790:69: note: format string is defined
here
   "No buffers allocated. Avoid DMAE transaction [{src: addr 0x%lx, type
%d}, {dst: addr 0x%lx, type %d}, size %d].\n",
   ~~^
   %llx
In file included from .../dpdk/drivers/net/qede/base/ecore.h:50:0,
 from .../dpdk/drivers/net/qede/base/ecore_hw.c:12:
.../dpdk/drivers/net/qede/base/../qede_logs.h:48:5: error: format ‘%lx’ expects
argument of type ‘long unsigned int’, but argument 9 has type ‘u64 {aka long
long unsigned int}’ [-Werror=format=]
 "[%s:%d(%s)]" fmt,   \
 ^
.../dpdk/drivers/net/qede/base/ecore_hw.c:789:3: note: in expansion of macro
‘DP_VERBOSE’
   DP_VERBOSE(p_hwfn, ECORE_MSG_HW,
   ^~
.../dpdk/drivers/net/qede/base/ecore_hw.c:790:97: note: format string is defined
here
   "No buffers allocated. Avoid DMAE transaction [{src: addr 0x%lx, type
%d}, {dst: addr 0x%lx, type %d}, size %d].\n",

  ~~^

  %llx
In file included from .../dpdk/drivers/net/qede/base/ecore.h:50:0,
 from .../dpdk/drivers/net/qede/base/ecore_hw.c:12:
.../dpdk/drivers/net/qede/base/ecore_hw.c: In function ‘ecore_dmae_sanity’:
.../dpdk/drivers/net/qede/base/../qede_logs.h:48:5: error: format ‘%lx’ expects
argument of type ‘long unsigned int’, but argument 8 has type ‘long long
unsigned int’ [-Werror=format=]
 "[%s:%d(%s)]" fmt,   \
 ^
.../dpdk/drivers/net/qede/base/ecore_hw.c:995:2: note: in expansion of macro
‘DP_VERBOSE’
  DP_VERBOSE(p_hwfn, ECORE_MSG_SP,
  ^~
.../dpdk/drivers/net/qede/base/ecore_hw.c:996:44: note: format string is defined
here
  "DMAE sanity [%s]: src_addr={phys 0x%lx, virt %p}, dst_addr={phys 0x%lx,
virt %p}, size 0x%x\n",
  ~~^
  %llx
In file included from .../dpdk/drivers/net/qede/base/ecore.h:50:0,
 from .../dpdk/drivers/net/qede/base/ecore_hw.c:12:
.../dpdk/drivers/net/qede/base/../qede_logs.h:48:5: error: format ‘%lx’ expects
argument of type ‘long unsigned int’, but argument 10 has type ‘long long
unsigned int’ [-Werror=format=]
 "[%s:%d(%s)]" fmt,   \
 ^
.../dpdk/drivers/net/qede/base/ecore_hw.c:995:2: note: in expansion of macro
‘DP_VERBOSE’
  DP_VERBOSE(p_hwfn, ECORE_MSG_SP,
  ^~
.../dpdk/drivers/net/qede/base/ecore_hw.c:996:76: note: format string is defined
here
  "DMAE sanity [%s]: src_addr={phys 0x%lx, virt %p}, dst_addr={phys 0x%lx,
virt %p}, size 0x%x\n",
  ~~^
  %llx
In file included from .../dpdk/drivers/net/qede/base/ecore.h:50:0,
 from .../dpdk/drivers/net/qede/base/ecore_hw.c:12:
.../dpdk/drivers/net/qede/base/../qede_logs.h:25:4: error: format ‘%lx’ expects
argument of type ‘long unsigned int’, but argument 7 has type ‘long long
unsigned int’ [-Werror=format=]
"[QEDE PMD: (%s)]%s:" fmt, \
^
.../dpdk/drivers/net/qede/base/ecore_hw.c:1018:4: note: in expansion of macro
‘DP_NOTICE’
DP_NOTICE(p_hwfn, false,
^
.../dpdk/drivers/net/qede/base/ecore_hw.c:1019:41: note: format string is
defined here
   "DMAE sanity [%s]: addr={phys 0x%

Re: [dpdk-dev] [PATCH] net/i40e/vf: reset scatter_rx flag when configuration complies

2018-04-06 Thread Zhang, Helin


> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Zhang, Qi Z
> Sent: Monday, April 2, 2018 8:14 PM
> To: eduse...@gmail.com
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] net/i40e/vf: reset scatter_rx flag when
> configuration complies
> 
> Hi Serra:
> 
> > -Original Message-
> > From: eduse...@gmail.com [mailto:eduse...@gmail.com]
> > Sent: Friday, March 30, 2018 4:13 AM
> > To: Zhang, Qi Z 
> > Cc: dev@dpdk.org; Eduard Serra 
> > Subject: [PATCH] net/i40e/vf: reset scatter_rx flag when configuration
> > complies
> >
> > From: Eduard Serra 
> >
> > Scatter RX (scattered_rx) flag is currently not being resetted when
> > new configuration is suplied to reconfigure a port, rendering the rx
> > callback logic to
> 
> s/suplied/supplied
> 
> > always fall through the scatter branch.
> >
> > Signed-off-by: Eduard Serra 
> > ---
> >  drivers/net/i40e/i40e_ethdev_vf.c | 2 ++
> >  drivers/net/i40e/i40e_rxtx.c  | 2 ++
> >  2 files changed, 4 insertions(+)
> >
> > diff --git a/drivers/net/i40e/i40e_ethdev_vf.c
> > b/drivers/net/i40e/i40e_ethdev_vf.c
> > index 750d849..a072154 100644
> > --- a/drivers/net/i40e/i40e_ethdev_vf.c
> > +++ b/drivers/net/i40e/i40e_ethdev_vf.c
> > @@ -1750,6 +1750,8 @@ i40evf_rxq_init(struct rte_eth_dev *dev, struct
> > i40e_rx_queue *rxq)
> > if (dev_data->dev_conf.rxmode.enable_scatter ||
> > (rxq->max_pkt_len + 2 * I40E_VLAN_TAG_SIZE) > buf_size) {
> > dev_data->scattered_rx = 1;
> > +   } else {
> > +   dev_data->scattered_rx = 0;
> 
> Scattered_rx should not be reset if any queue already required You should
> reset it before the queue loop
Does it mean a NACK?

/Helin

> 
> > }
> >
> > return 0;
> > diff --git a/drivers/net/i40e/i40e_rxtx.c
> > b/drivers/net/i40e/i40e_rxtx.c index
> > 1217e5a..8698747 100644
> > --- a/drivers/net/i40e/i40e_rxtx.c
> > +++ b/drivers/net/i40e/i40e_rxtx.c
> > @@ -2561,6 +2561,8 @@ i40e_rx_queue_init(struct i40e_rx_queue *rxq)
> > /* Check if scattered RX needs to be used. */
> > if ((rxq->max_pkt_len + 2 * I40E_VLAN_TAG_SIZE) > buf_size) {
> > dev_data->scattered_rx = 1;
> > +   } else {
> > +   dev_data->scattered_rx = 0;
> 
> Same as above.
> 
> Regards
> Qi
> 
> > }
> >
> > /* Init the RX tail regieter. */
> > --
> > 2.7.4



Re: [dpdk-dev] [PATCH] net/e1000: add mac_addr_set set to em

2018-04-06 Thread Zhang, Helin


> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Zhao1, Wei
> Sent: Monday, April 2, 2018 2:17 PM
> To: Chas Williams; dev@dpdk.org
> Cc: Lu, Wenzhuo; Chas Williams
> Subject: Re: [dpdk-dev] [PATCH] net/e1000: add mac_addr_set set to em
> 
> 
> 
> > -Original Message-
> > From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Chas Williams
> > Sent: Monday, February 19, 2018 8:40 AM
> > To: dev@dpdk.org
> > Cc: Lu, Wenzhuo ; Chas Williams 
> > Subject: [dpdk-dev] [PATCH] net/e1000: add mac_addr_set set to em
> >
> > From: Chas Williams 
> >
> > Based on the equivalent code in the igb driver.
> >
> > Signed-off-by: Chas Williams 
> > ---
> >  drivers/net/e1000/em_ethdev.c | 12 
> >  1 file changed, 12 insertions(+)
> >
> > diff --git a/drivers/net/e1000/em_ethdev.c
> > b/drivers/net/e1000/em_ethdev.c index 242375f..5bb9cc9 100644
> > --- a/drivers/net/e1000/em_ethdev.c
> > +++ b/drivers/net/e1000/em_ethdev.c
> > @@ -94,6 +94,8 @@ static int em_get_rx_buffer_size(struct e1000_hw
> > *hw); static int eth_em_rar_set(struct rte_eth_dev *dev, struct
> > ether_addr *mac_addr,
> >   uint32_t index, uint32_t pool);
> >  static void eth_em_rar_clear(struct rte_eth_dev *dev, uint32_t
> > index);
> > +static void eth_em_default_mac_addr_set(struct rte_eth_dev *dev,
> > +struct ether_addr *addr);
> >
> >  static int eth_em_set_mc_addr_list(struct rte_eth_dev *dev,
> >struct ether_addr *mc_addr_set, @@ -190,6
> +192,7 @@ static
> > const struct eth_dev_ops eth_em_ops = {
> > .dev_led_off  = eth_em_led_off,
> > .flow_ctrl_get= eth_em_flow_ctrl_get,
> > .flow_ctrl_set= eth_em_flow_ctrl_set,
> > +   .mac_addr_set = eth_em_default_mac_addr_set,
> > .mac_addr_add = eth_em_rar_set,
> > .mac_addr_remove  = eth_em_rar_clear,
> > .set_mc_addr_list = eth_em_set_mc_addr_list,
> > @@ -1809,6 +1812,15 @@ eth_em_rar_clear(struct rte_eth_dev *dev,
> > uint32_t index)
> > e1000_rar_set(hw, addr, index);
> >  }
> >
> > +static void
> > +eth_em_default_mac_addr_set(struct rte_eth_dev *dev,
> > +   struct ether_addr *addr)
> > +{
> > +   eth_em_rar_clear(dev, 0);
> > +
> > +   eth_em_rar_set(dev, (void *)addr, 0, 0); }
> > +
> >  static int
> >  eth_em_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)  {
> > --
> > 2.9.5
> 
> 
>  I think this patch is need by em to enable the feature of configuration for
> default Receive Address Register, if DPDK user use rte function of
> rte_eth_dev_default_mac_addr_set(),  it can do this config work like other 
> NIC.
> 
> Acked-by: Wei Zhao 
Applied to dpdk-next-net-intel, thanks!

/Helin


Re: [dpdk-dev] [PATCH v4] net/i40e: add flow RSS queue index check

2018-04-06 Thread Zhang, Helin


> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Zhang, Qi Z
> Sent: Wednesday, April 4, 2018 9:26 PM
> To: Zhao1, Wei; dev@dpdk.org; sta...@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4] net/i40e: add flow RSS queue index check
> 
> 
> 
> > -Original Message-
> > From: Zhao1, Wei
> > Sent: Wednesday, April 4, 2018 4:06 PM
> > To: dev@dpdk.org; sta...@dpdk.org
> > Cc: Zhang, Qi Z ; Zhao1, Wei
> > 
> > Subject: [PATCH v4] net/i40e: add flow RSS queue index check
> 
> tile start with fix.
> 
> >
> > There need a queue index check for RSS queue region in order to aviod
> > error from configuration.
> >
> > Fixes: ecad87d22383 ("net/i40e: move RSS to flow API")
> > Signed-off-by: Wei Zhao 
> > Tested-by: Peng Yuan 
> 
> Should be Tested-by: Yuan Peng 
> 
> Acked-by: Qi Zhang 
Applied to dpdk-next-net-intel, thanks!

/Helin


Re: [dpdk-dev] [PATCH] net/i40e: fix flow RSS queue region error

2018-04-06 Thread Zhang, Helin


> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Zhang, Qi Z
> Sent: Wednesday, April 4, 2018 12:06 PM
> To: Zhao1, Wei; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] net/i40e: fix flow RSS queue region error
> 
> 
> 
> > -Original Message-
> > From: Zhao1, Wei
> > Sent: Tuesday, April 3, 2018 2:09 PM
> > To: dev@dpdk.org
> > Cc: Zhang, Qi Z ; Zhao1, Wei
> > 
> > Subject: [PATCH] net/i40e: fix flow RSS queue region error
> >
> > Queue region comparison error in configuration parameters.
> >
> > Fixes: ecad87d22383 ("net/i40e: move RSS to flow API")
> > Signed-off-by: Wei Zhao 
> > Tested-by: Peng Yuan 
> 
> Acked-by: Qi Zhang 
Applied to dpdk-next-net-intel, thanks!

/Helin


Re: [dpdk-dev] [PATCH v3 13/21] vhost: add helpers for packed virtqueues

2018-04-06 Thread Maxime Coquelin



On 04/05/2018 12:10 PM, Jens Freimann wrote:

Add some helper functions to set/check descriptor flags
and toggle the used wrap counter.

Signed-off-by: Jens Freimann 
---
  lib/librte_vhost/virtio-1.1.h | 44 +++
  1 file changed, 44 insertions(+)

diff --git a/lib/librte_vhost/virtio-1.1.h b/lib/librte_vhost/virtio-1.1.h
index 7b48caed7..e77d7aa6c 100644
--- a/lib/librte_vhost/virtio-1.1.h
+++ b/lib/librte_vhost/virtio-1.1.h

Shouldn't the file be named virtio-packed.h?


@@ -15,4 +15,48 @@ struct vring_desc_packed {
uint16_t flags;
  };
  
+static inline void

+toggle_wrap_counter(struct vhost_virtqueue *vq)
+{
+   vq->used_wrap_counter ^= 1;
+}
+
+static inline int
+desc_is_avail(struct vhost_virtqueue *vq, struct vring_desc_packed *desc)
+{
+   if (vq->used_wrap_counter == 1) {
+   if ((desc->flags & VRING_DESC_F_AVAIL) &&
+   !(desc->flags & VRING_DESC_F_USED))
+   return 1;
+   }
+   if (vq->used_wrap_counter == 0) {
+   if (!(desc->flags & VRING_DESC_F_AVAIL) &&
+   (desc->flags & VRING_DESC_F_USED))
+   return 1;
+   }
+   return 0;
+}
+
+static inline void
+_set_desc_used(struct vring_desc_packed *desc, int wrap_counter)
+{
+   uint16_t flags = desc->flags;
+
+   if (wrap_counter == 1) {
+   flags |= VRING_DESC_F_USED;
+   flags |= VRING_DESC_F_AVAIL;
+   } else {
+   flags &= ~VRING_DESC_F_USED;
+   flags &= ~VRING_DESC_F_AVAIL;
+   }
+
+   desc->flags = flags;
+}
+
+static inline void
+set_desc_used(struct vhost_virtqueue *vq, struct vring_desc_packed *desc)
+{
+   _set_desc_used(desc, vq->used_wrap_counter);
+}
+


Maybe prefix all with vring_

  #endif /* __VIRTIO_PACKED_H */



Re: [dpdk-dev] [PATCH v3 13/21] vhost: add helpers for packed virtqueues

2018-04-06 Thread Jens Freimann

On Fri, Apr 06, 2018 at 11:20:10AM +0200, Maxime Coquelin wrote:



On 04/05/2018 12:10 PM, Jens Freimann wrote:

Add some helper functions to set/check descriptor flags
and toggle the used wrap counter.

Signed-off-by: Jens Freimann 
---
 lib/librte_vhost/virtio-1.1.h | 44 +++
 1 file changed, 44 insertions(+)

diff --git a/lib/librte_vhost/virtio-1.1.h b/lib/librte_vhost/virtio-1.1.h
index 7b48caed7..e77d7aa6c 100644
--- a/lib/librte_vhost/virtio-1.1.h
+++ b/lib/librte_vhost/virtio-1.1.h

Shouldn't the file be named virtio-packed.h?


yes, will rename it. 


@@ -15,4 +15,48 @@ struct vring_desc_packed {
uint16_t flags;
 };
+static inline void
+toggle_wrap_counter(struct vhost_virtqueue *vq)
+{
+   vq->used_wrap_counter ^= 1;
+}
+
+static inline int
+desc_is_avail(struct vhost_virtqueue *vq, struct vring_desc_packed *desc)
+{
+   if (vq->used_wrap_counter == 1) {
+   if ((desc->flags & VRING_DESC_F_AVAIL) &&
+   !(desc->flags & VRING_DESC_F_USED))
+   return 1;
+   }
+   if (vq->used_wrap_counter == 0) {
+   if (!(desc->flags & VRING_DESC_F_AVAIL) &&
+   (desc->flags & VRING_DESC_F_USED))
+   return 1;
+   }
+   return 0;
+}
+
+static inline void
+_set_desc_used(struct vring_desc_packed *desc, int wrap_counter)
+{
+   uint16_t flags = desc->flags;
+
+   if (wrap_counter == 1) {
+   flags |= VRING_DESC_F_USED;
+   flags |= VRING_DESC_F_AVAIL;
+   } else {
+   flags &= ~VRING_DESC_F_USED;
+   flags &= ~VRING_DESC_F_AVAIL;
+   }
+
+   desc->flags = flags;
+}
+
+static inline void
+set_desc_used(struct vhost_virtqueue *vq, struct vring_desc_packed *desc)
+{
+   _set_desc_used(desc, vq->used_wrap_counter);
+}
+


Maybe prefix all with vring_

 #endif /* __VIRTIO_PACKED_H */


ok

Thanks!

regards,
Jens 




[dpdk-dev] [PATCH v7] net/virtio-user: add support for server mode

2018-04-06 Thread zhiyong . yang
In a container environment if the vhost-user backend restarts, there's
no way for it to reconnect to virtio-user. To address this, support for
server mode is added. In this mode the socket file is created by virtio-
user, which the backend then connects to. This means that if the backend
restarts, it can reconnect to virtio-user and continue communications.

With current implementation, LSC is enabled at virtio-user side to
support to accept the coming connection.

Server mode virtio-user only supports to work with vhost-user.

Release note is updated in this patch.

Signed-off-by: Zhiyong Yang 
---

Cc: maxime.coque...@redhat.com
Cc: jianfeng@intel.com
Cc: tiwei@intel.com
Cc: zhihong.w...@intel.com
Cc: dong1.w...@intel.com
Cc: tho...@monjalon.net

Changes in V7:
1. avoid misusing vhost-kernel in server mode virtio-user.
2. move the funciton definition is_vhost_user_by_type before 
virtio_user_start_device in order that it can be called.
3. add comments in the code to state feature negotiation limit.

Changes in V6:
1. fix report wrong link stauts in server mode.
2. fix some code style issues.

Changes in V5:
1. Support server mode virtio-user startup in non-blocking mode.
2. rebase on top of dpdk-next-virtio.

Changes in V4:
1. Don't create new pthread any more and use librte_eal interrupt thread
instead. 

Changes in V3:
1. use EAL epoll mechanism instead of vhost events. Cancel to export vhost
event APIs.
2. rebase the code on top of dpdk-next-virtio

Changes in V2:
1. split two patches 1/5 and 2/5 from v1 patchset to fix some existing issues
which is not strongly related to support for server mode
2. move fdset related functions to librte_eal from librte_vhost exposed as
new APIs.
3. release note is added in the patch 5/5.
4. squash data structure change patch into 4/5 according to Maxime's suggestion.

 doc/guides/rel_notes/release_18_05.rst   |   6 ++
 drivers/net/virtio/virtio_user/vhost_user.c  |  45 --
 drivers/net/virtio/virtio_user/virtio_user_dev.c | 101 ---
 drivers/net/virtio/virtio_user/virtio_user_dev.h |   3 +
 drivers/net/virtio/virtio_user_ethdev.c  | 101 ---
 5 files changed, 209 insertions(+), 47 deletions(-)

diff --git a/doc/guides/rel_notes/release_18_05.rst 
b/doc/guides/rel_notes/release_18_05.rst
index 9cc77f893..f8897b2e9 100644
--- a/doc/guides/rel_notes/release_18_05.rst
+++ b/doc/guides/rel_notes/release_18_05.rst
@@ -58,6 +58,12 @@ New Features
   * Added support for NVGRE, VXLAN and GENEVE filters in flow API.
   * Added support for DROP action in flow API.
 
+* **Added support for virtio-user server mode.**
+  In a container environment if the vhost-user backend restarts, there's no way
+  for it to reconnect to virtio-user. To address this, support for server mode
+  is added. In this mode the socket file is created by virtio-user, which the
+  backend connects to. This means that if the backend restarts, it can 
reconnect
+  to virtio-user and continue communications.
 
 API Changes
 ---
diff --git a/drivers/net/virtio/virtio_user/vhost_user.c 
b/drivers/net/virtio/virtio_user/vhost_user.c
index 91c6449bb..a6df97a00 100644
--- a/drivers/net/virtio/virtio_user/vhost_user.c
+++ b/drivers/net/virtio/virtio_user/vhost_user.c
@@ -378,6 +378,30 @@ vhost_user_sock(struct virtio_user_dev *dev,
return 0;
 }
 
+#define MAX_VIRTIO_USER_BACKLOG 1
+static int
+virtio_user_start_server(struct virtio_user_dev *dev, struct sockaddr_un *un)
+{
+   int ret;
+   int flag;
+   int fd = dev->listenfd;
+
+   ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
+   if (ret < 0) {
+   PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and try 
again\n",
+   dev->path, strerror(errno));
+   return -1;
+   }
+   ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
+   if (ret < 0)
+   return -1;
+
+   flag = fcntl(fd, F_GETFL);
+   fcntl(fd, F_SETFL, flag | O_NONBLOCK);
+
+   return 0;
+}
+
 /**
  * Set up environment to talk with a vhost user backend.
  *
@@ -405,13 +429,24 @@ vhost_user_setup(struct virtio_user_dev *dev)
memset(&un, 0, sizeof(un));
un.sun_family = AF_UNIX;
snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
-   if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
-   PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
-   close(fd);
-   return -1;
+
+   if (dev->is_server) {
+   dev->listenfd = fd;
+   if (virtio_user_start_server(dev, &un) < 0) {
+   PMD_DRV_LOG(ERR, "virtio-user startup fails in server 
mode");
+   close(fd);
+   return -1;
+   }
+   dev->vhostfd = -1;
+   } else {
+   if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
+   PMD_DRV_LOG(ERR, 

Re: [dpdk-dev] [PATCH v3 14/21] vhost: dequeue for packed queues

2018-04-06 Thread Maxime Coquelin



On 04/05/2018 12:10 PM, Jens Freimann wrote:

Implement code to dequeue and process descriptors from
the vring if VIRTIO_F_RING_PACKED is enabled.

Check if descriptor was made available by driver by looking at
VIRTIO_F_DESC_AVAIL flag in descriptor. If so dequeue and set
the used flag VIRTIO_F_DESC_USED to the current value of the
used wrap counter.

Used ring wrap counter needs to be toggled when last descriptor is
written out. This allows the host/guest to detect new descriptors even
after the ring has wrapped.

Signed-off-by: Jens Freimann 
---
  lib/librte_vhost/vhost.c  |   1 +
  lib/librte_vhost/vhost.h  |   1 +
  lib/librte_vhost/virtio_net.c | 228 ++
  3 files changed, 230 insertions(+)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 1f17cdd75..eb5a98875 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -185,6 +185,7 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
  
  	vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;

vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
+   vq->used_wrap_counter = 1;
  
  	vhost_user_iotlb_init(dev, vring_idx);

/* Backends are set to -1 indicating an inactive device. */
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 20d78f883..c8aa946fd 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -112,6 +112,7 @@ struct vhost_virtqueue {
  
  	struct batch_copy_elem	*batch_copy_elems;

uint16_tbatch_copy_nb_elems;
+   uint16_tused_wrap_counter;
  
  	rte_rwlock_t	iotlb_lock;

rte_rwlock_tiotlb_pending_lock;
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index ed7198dbb..7eea1da04 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -19,6 +19,7 @@
  
  #include "iotlb.h"

  #include "vhost.h"
+#include "virtio-1.1.h"
  
  #define MAX_PKT_BURST 32
  
@@ -1118,6 +1119,233 @@ restore_mbuf(struct rte_mbuf *m)

}
  }
  
+static inline uint16_t

+dequeue_desc_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+struct rte_mempool *mbuf_pool, struct rte_mbuf *m,
+struct vring_desc_packed *descs)
+{
+   struct vring_desc_packed *desc;
+   uint64_t desc_addr;
+   uint32_t desc_avail, desc_offset;
+   uint32_t mbuf_avail, mbuf_offset;
+   uint32_t cpy_len;
+   struct rte_mbuf *cur = m, *prev = m;
+   struct virtio_net_hdr *hdr = NULL;
+   uint16_t head_idx = vq->last_used_idx & (vq->size - 1);
+   int wrap_counter = vq->used_wrap_counter;
+   int rc = 0;
+
+   rte_spinlock_lock(&vq->access_lock);
+
+   if (unlikely(vq->enabled == 0))
+   goto out;


It is unbalanced as it would unlock iotlb_rd_lock that isn't locked yet.


+   if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+   vhost_user_iotlb_rd_lock(vq);
+
+   desc = &descs[vq->last_used_idx & (vq->size - 1)];
+   if (unlikely((desc->len < dev->vhost_hlen)) ||
+   (desc->flags & VRING_DESC_F_INDIRECT)) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "INDIRECT not supported yet\n");
+   rc = -1;
+   goto out;
+   }
+
+   desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
+ sizeof(*desc), VHOST_ACCESS_RO);
+
+   if (unlikely(!desc_addr)) {
+   rc = -1;
+   goto out;
+   }
+
+   if (virtio_net_with_host_offload(dev)) {
+   hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
+   rte_prefetch0(hdr);
+   }
+
+   /*
+* A virtio driver normally uses at least 2 desc buffers
+* for Tx: the first for storing the header, and others
+* for storing the data.
+*/
+   if (likely((desc->len == dev->vhost_hlen) &&
+  (desc->flags & VRING_DESC_F_NEXT) != 0)) {
+   if ((++vq->last_used_idx & (vq->size - 1)) == 0)
+   toggle_wrap_counter(vq);
+
+   desc = &descs[vq->last_used_idx & (vq->size - 1)];
+
+   if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "INDIRECT not supported yet\n");
+   rc = -1;
+   goto out;
+   }
+
+   desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
+ sizeof(*desc), VHOST_ACCESS_RO);
+   if (unlikely(!desc_addr)) {
+   rc = -1;
+   goto out;
+   }
+
+   desc_offset = 0;
+   desc_avail  = desc->len;
+   } else {
+   desc_avail  = desc->len - dev->vhost_hlen;
+   desc_offset = dev->vhost_hlen;
+   }
+
+   rte

Re: [dpdk-dev] [PATCH v3 15/21] vhost: packed queue enqueue path

2018-04-06 Thread Maxime Coquelin



On 04/05/2018 12:10 PM, Jens Freimann wrote:

Implement enqueue of packets to the receive virtqueue.

Set descriptor flag VIRTQ_DESC_F_USED and toggle used wrap counter if
last descriptor in ring is used. Perform a write memory barrier before
flags are written to descriptor.

Chained descriptors are not supported with this patch.

Signed-off-by: Jens Freimann 
---
  lib/librte_vhost/virtio_net.c | 129 ++
  1 file changed, 129 insertions(+)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 7eea1da04..578e5612e 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -695,6 +695,135 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t 
queue_id,
return pkt_idx;
  }
  
+static inline uint32_t __attribute__((always_inline))

+vhost_enqueue_burst_packed(struct virtio_net *dev, uint16_t queue_id,
+ struct rte_mbuf **pkts, uint32_t count)
+{
+   struct vhost_virtqueue *vq;
+   struct vring_desc_packed *descs;
+   uint16_t idx;
+   uint16_t mask;
+   uint16_t i;
+
+   vq = dev->virtqueue[queue_id];
+
+   rte_spinlock_lock(&vq->access_lock);
+
+   if (unlikely(vq->enabled == 0)) {
+   i = 0;
+   goto out_access_unlock;
+   }
+
+   if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+   vhost_user_iotlb_rd_lock(vq);
+
+   descs = vq->desc_packed;
+   mask = vq->size - 1;
+
+   for (i = 0; i < count; i++) {
+   uint32_t desc_avail, desc_offset;
+   uint32_t mbuf_avail, mbuf_offset;
+   uint32_t cpy_len;
+   struct vring_desc_packed *desc;
+   uint64_t desc_addr;
+   struct virtio_net_hdr_mrg_rxbuf *hdr;
+   struct rte_mbuf *m = pkts[i];
+
+   /* XXX: there is an assumption that no desc will be chained */

Is this assumption still true?
If not what are the plan to fix this?


+   idx = vq->last_used_idx & mask;
+   desc = &descs[idx];
+
+   if (!desc_is_avail(vq, desc))

IIUC, it means the ring is full.
I think this is an unlikely case, so maybe better to use the unlikely
macro here.


+   break;
+   rte_smp_rmb();
+
+   desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
+ sizeof(*desc), VHOST_ACCESS_RW);
+   /*
+* Checking of 'desc_addr' placed outside of 'unlikely' macro
+* to avoid performance issue with some versions of gcc (4.8.4
+* and 5.3.0) which otherwise stores offset on the stack instead
+* of in a register.
+*/
+   if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr)
+   break;
+
+   hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr;
+   virtio_enqueue_offload(m, &hdr->hdr);
+   vhost_log_write(dev, desc->addr, dev->vhost_hlen);
+   PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
+
+   desc_offset = dev->vhost_hlen;
+   desc_avail  = desc->len - dev->vhost_hlen;
+
+   mbuf_avail  = rte_pktmbuf_data_len(m);
+   mbuf_offset = 0;
+   while (mbuf_avail != 0 || m->next != NULL) {
+   /* done with current mbuf, fetch next */
+   if (mbuf_avail == 0) {
+   m = m->next;
+
+   mbuf_offset = 0;
+   mbuf_avail  = rte_pktmbuf_data_len(m);
+   }
+
+   /* done with current desc buf, fetch next */
+   if (desc_avail == 0) {
+   if ((desc->flags & VRING_DESC_F_NEXT) == 0) {
+   /* Room in vring buffer is not enough */
+   goto out;
+   }
+
+   idx = (idx+1) & (vq->size - 1);
+   desc = &descs[idx];
+   if (unlikely(!desc_is_avail(vq, desc)))
+   goto out ;
+
+   desc_addr = vhost_iova_to_vva(dev, vq, 
desc->addr,
+ sizeof(*desc),
+ VHOST_ACCESS_RW);
+   if (unlikely(!desc_addr))
+   goto out;
+
+   desc_offset = 0;
+   desc_avail  = desc->len;
+   }
+
+   cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+   rte_memcpy((void *)((uintptr_t)(desc_addr + 
desc_offset)),
+   rte

Re: [dpdk-dev] [PATCH v3 17/21] net/virtio: disable ctrl virtqueue for packed rings

2018-04-06 Thread Maxime Coquelin



On 04/05/2018 12:10 PM, Jens Freimann wrote:

Signed-off-by: Jens Freiman 
---
  drivers/net/virtio/virtio_ethdev.c | 7 +++
  1 file changed, 7 insertions(+)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index dc220c743..7367d9c5d 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1157,6 +1157,13 @@ virtio_negotiate_features(struct virtio_hw *hw, uint64_t 
req_features)
req_features &= ~(1ull << VIRTIO_F_RING_PACKED);
  #endif
  
+	if (req_features & (1ULL << VIRTIO_F_RING_PACKED)) {

+   req_features &= ~(1ull << VIRTIO_NET_F_CTRL_MAC_ADDR);
+   req_features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ);
+   req_features &= ~(1ull << VIRTIO_NET_F_CTRL_RX);
+   req_features &= ~(1ull << VIRTIO_NET_F_CTRL_VLAN);
+   }
+


Does packed ring not support ctrl vqs, or is it just a workaround while
it is implemented?


/*
 * Negotiate features: Subset of device feature bits are written back
 * guest feature bits.



Re: [dpdk-dev] [PATCH] net/octeneontx: fix warnings

2018-04-06 Thread Ferruh Yigit
On 4/6/2018 7:55 AM, Jerin Jacob wrote:
> -Original Message-
>> Date: Thu,  5 Apr 2018 08:12:28 -0700
>> From: Stephen Hemminger 
>> To: jerin.ja...@caviumnetworks.com
>> Cc: dev@dpdk.org, Stephen Hemminger 
>> Subject: [PATCH] net/octeneontx: fix warnings
>> X-Mailer: git-send-email 2.16.3
>>
>> This is fix for Coverity Devect 268319 about uninitialized speed
> 
> s/Devect/defect
> 
>> in an error case. Also drop unnecessary assignment.
> 
> Coverity issue: 268319
> Fixes: 4fac7c0a147e ("net/octeontx: add link update")
> 
> CC: sta...@dpdk.org
> 

>> Signed-off-by: Stephen Hemminger 

> Acked-by: Jerin Jacob 

Applied to dpdk-next-net/master, thanks.

(with suggested commit log updates)


Re: [dpdk-dev] [PATCH v3 17/21] net/virtio: disable ctrl virtqueue for packed rings

2018-04-06 Thread Jens Freimann

On Fri, Apr 06, 2018 at 11:38:50AM +0200, Maxime Coquelin wrote:



On 04/05/2018 12:10 PM, Jens Freimann wrote:

Signed-off-by: Jens Freiman 
---
 drivers/net/virtio/virtio_ethdev.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index dc220c743..7367d9c5d 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1157,6 +1157,13 @@ virtio_negotiate_features(struct virtio_hw *hw, uint64_t 
req_features)
req_features &= ~(1ull << VIRTIO_F_RING_PACKED);
 #endif
+   if (req_features & (1ULL << VIRTIO_F_RING_PACKED)) {
+   req_features &= ~(1ull << VIRTIO_NET_F_CTRL_MAC_ADDR);
+   req_features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ);
+   req_features &= ~(1ull << VIRTIO_NET_F_CTRL_RX);
+   req_features &= ~(1ull << VIRTIO_NET_F_CTRL_VLAN);
+   }
+


Does packed ring not support ctrl vqs, or is it just a workaround while
it is implemented?


packed queues support virtqueues, but I had not implemented it yet. I
have a patch for it though and will include it in v4.

regards,
Jens 



/*
 * Negotiate features: Subset of device feature bits are written back
 * guest feature bits.



Re: [dpdk-dev] [PATCH v3 17/21] net/virtio: disable ctrl virtqueue for packed rings

2018-04-06 Thread Maxime Coquelin



On 04/06/2018 11:43 AM, Jens Freimann wrote:

On Fri, Apr 06, 2018 at 11:38:50AM +0200, Maxime Coquelin wrote:



On 04/05/2018 12:10 PM, Jens Freimann wrote:

Signed-off-by: Jens Freiman 
---
 drivers/net/virtio/virtio_ethdev.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c

index dc220c743..7367d9c5d 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1157,6 +1157,13 @@ virtio_negotiate_features(struct virtio_hw 
*hw, uint64_t req_features)

 req_features &= ~(1ull << VIRTIO_F_RING_PACKED);
 #endif
+    if (req_features & (1ULL << VIRTIO_F_RING_PACKED)) {
+    req_features &= ~(1ull << VIRTIO_NET_F_CTRL_MAC_ADDR);
+    req_features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ);
+    req_features &= ~(1ull << VIRTIO_NET_F_CTRL_RX);
+    req_features &= ~(1ull << VIRTIO_NET_F_CTRL_VLAN);
+    }
+


Does packed ring not support ctrl vqs, or is it just a workaround while
it is implemented?


packed queues support virtqueues, but I had not implemented it yet. I
have a patch for it though and will include it in v4.


Great, so if you hadn't the patch, I think that a comment would be
welcome. But it does not apply as you have the patch in your queue!

Thanks,
Maxime



regards,
Jens



 /*
  * Negotiate features: Subset of device feature bits are written 
back

  * guest feature bits.



Re: [dpdk-dev] [PATCH v2] net/bonding: switch to new offloading API

2018-04-06 Thread Radu Nicolau


On 3/22/2018 6:13 PM, Ferruh Yigit wrote:

Switch to new ethdev offloading API.

Signed-off-by: Ferruh Yigit 
---
Cc: Radu Nicolau 
Cc: Matan Azrad 

v2:
* added [rt]x_queue_offload_capa to driver internal struct
* increased [rt]x_offload_capa size to 64bit as same as ethdev
---
  
Acked-by: Radu Nicolau  



Re: [dpdk-dev] [dpdk-stable] [PATCH] net/bonding: fix setting VLAN ID on slave ports

2018-04-06 Thread Ferruh Yigit
On 4/5/2018 12:06 PM, Radu Nicolau wrote:
> 
> On 4/3/2018 5:01 PM, Chas Williams wrote:
>> From: Chas Williams
>>
>> The pos returned is just the offset of the slab.  You need to use this
>> to offset the bits in the slab.
>>
>> Fixes: c771e4ef38 ("net/bonding: enable slave VLAN filter")
>> Cc:sta...@dpdk.org
>>
>> Signed-off-by: Chas Williams
>> ---
>>   
> Acked-by: Radu Nicolau  

Applied to dpdk-next-net/master, thanks.


Re: [dpdk-dev] [dpdk-stable] [PATCH v2] net/bonding: clear dev_started if start fails

2018-04-06 Thread Ferruh Yigit
On 4/5/2018 11:58 AM, Radu Nicolau wrote:
> 
> 
> On 3/23/2018 5:05 PM, Chas Williams wrote:
>> From: "Charles (Chas) Williams" 
>>
>> There are several error paths where the bonding device may not start.
>> Clear dev_started before we return if we take one of these paths.
>>
>> Fixes: 2efb58cbab ("bond: new link bonding library")
>> Cc: sta...@dpdk.org
>>
>> Signed-off-by: Chas Williams 
>> ---
>>
> Acked-by: Radu Nicolau  

Applied to dpdk-next-net/master, thanks.


Re: [dpdk-dev] [PATCH v3 18/21] net/virtio: add support for mergeable buffers with packed virtqueues

2018-04-06 Thread Maxime Coquelin

Maybe better to place this patch right after the non-mrg one.

On 04/05/2018 12:10 PM, Jens Freimann wrote:

Implement support for receiving merged buffers in virtio when packed virtqueues
are enabled.

Signed-off-by: Jens Freimann 
---
  drivers/net/virtio/virtio_ethdev.c |  10 ++--
  drivers/net/virtio/virtio_rxtx.c   | 107 +
  drivers/net/virtio/virtqueue.h |   1 +
  3 files changed, 104 insertions(+), 14 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 7367d9c5d..a3c3376d7 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1322,15 +1322,15 @@ set_rxtx_funcs(struct rte_eth_dev *eth_dev)
  {
struct virtio_hw *hw = eth_dev->data->dev_private;
  
-	/* workarount for packed vqs which don't support mrg_rxbuf at this point */

-   if (vtpci_packed_queue(hw) && vtpci_with_feature(hw, 
VIRTIO_NET_F_MRG_RXBUF)) {
-   eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
-   } else if (hw->use_simple_rx) {
+   if (hw->use_simple_rx) {
PMD_INIT_LOG(INFO, "virtio: using simple Rx path on port %u",
eth_dev->data->port_id);
eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
} else if (vtpci_packed_queue(hw)) {
-   eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
+   if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF))
+   eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
+   else
+   eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
} else if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
PMD_INIT_LOG(INFO,
"virtio: using mergeable buffer Rx path on port %u",
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 9220ae661..a48ca6aaa 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -155,8 +155,8 @@ vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
while (dp->flags & VRING_DESC_F_NEXT) {
-   desc_idx_last = dp->next;
-   dp = &vq->vq_ring.desc[dp->next];
+   desc_idx_last = desc_idx++;
+   dp = &vq->vq_ring.desc[desc_idx];


Are you sure this change is in the right patch?


}
}
dxp->ndescs = 0;
@@ -177,6 +177,76 @@ vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
dp->next = VQ_RING_DESC_CHAIN_END;
  }
  


Re: [dpdk-dev] [PATCH v3 14/21] vhost: dequeue for packed queues

2018-04-06 Thread Jens Freimann

On Fri, Apr 06, 2018 at 11:30:10AM +0200, Maxime Coquelin wrote:



On 04/05/2018 12:10 PM, Jens Freimann wrote:

Implement code to dequeue and process descriptors from
the vring if VIRTIO_F_RING_PACKED is enabled.

Check if descriptor was made available by driver by looking at
VIRTIO_F_DESC_AVAIL flag in descriptor. If so dequeue and set
the used flag VIRTIO_F_DESC_USED to the current value of the
used wrap counter.

Used ring wrap counter needs to be toggled when last descriptor is
written out. This allows the host/guest to detect new descriptors even
after the ring has wrapped.

Signed-off-by: Jens Freimann 
---
 lib/librte_vhost/vhost.c  |   1 +
 lib/librte_vhost/vhost.h  |   1 +
 lib/librte_vhost/virtio_net.c | 228 ++
 3 files changed, 230 insertions(+)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 1f17cdd75..eb5a98875 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -185,6 +185,7 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
+   vq->used_wrap_counter = 1;
vhost_user_iotlb_init(dev, vring_idx);
/* Backends are set to -1 indicating an inactive device. */
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 20d78f883..c8aa946fd 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -112,6 +112,7 @@ struct vhost_virtqueue {
struct batch_copy_elem  *batch_copy_elems;
uint16_tbatch_copy_nb_elems;
+   uint16_tused_wrap_counter;
rte_rwlock_tiotlb_lock;
rte_rwlock_tiotlb_pending_lock;
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index ed7198dbb..7eea1da04 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -19,6 +19,7 @@
 #include "iotlb.h"
 #include "vhost.h"
+#include "virtio-1.1.h"
 #define MAX_PKT_BURST 32
@@ -1118,6 +1119,233 @@ restore_mbuf(struct rte_mbuf *m)
}
 }
+static inline uint16_t
+dequeue_desc_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+struct rte_mempool *mbuf_pool, struct rte_mbuf *m,
+struct vring_desc_packed *descs)
+{
+   struct vring_desc_packed *desc;
+   uint64_t desc_addr;
+   uint32_t desc_avail, desc_offset;
+   uint32_t mbuf_avail, mbuf_offset;
+   uint32_t cpy_len;
+   struct rte_mbuf *cur = m, *prev = m;
+   struct virtio_net_hdr *hdr = NULL;
+   uint16_t head_idx = vq->last_used_idx & (vq->size - 1);
+   int wrap_counter = vq->used_wrap_counter;
+   int rc = 0;
+
+   rte_spinlock_lock(&vq->access_lock);
+
+   if (unlikely(vq->enabled == 0))
+   goto out;


It is unbalanced as it would unlock iotlb_rd_lock that isn't locked yet.


yes, will fix



+   if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+   vhost_user_iotlb_rd_lock(vq);
+
+   desc = &descs[vq->last_used_idx & (vq->size - 1)];
+   if (unlikely((desc->len < dev->vhost_hlen)) ||
+   (desc->flags & VRING_DESC_F_INDIRECT)) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "INDIRECT not supported yet\n");
+   rc = -1;
+   goto out;
+   }
+
+   desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
+ sizeof(*desc), VHOST_ACCESS_RO);
+
+   if (unlikely(!desc_addr)) {
+   rc = -1;
+   goto out;
+   }
+
+   if (virtio_net_with_host_offload(dev)) {
+   hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
+   rte_prefetch0(hdr);
+   }
+
+   /*
+* A virtio driver normally uses at least 2 desc buffers
+* for Tx: the first for storing the header, and others
+* for storing the data.
+*/
+   if (likely((desc->len == dev->vhost_hlen) &&
+  (desc->flags & VRING_DESC_F_NEXT) != 0)) {
+   if ((++vq->last_used_idx & (vq->size - 1)) == 0)
+   toggle_wrap_counter(vq);
+
+   desc = &descs[vq->last_used_idx & (vq->size - 1)];
+
+   if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "INDIRECT not supported yet\n");
+   rc = -1;
+   goto out;
+   }
+
+   desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
+ sizeof(*desc), VHOST_ACCESS_RO);
+   if (unlikely(!desc_addr)) {
+   rc = -1;
+   goto out;
+   }
+
+   desc_offset = 0;
+   desc_avail  = desc->len;
+   } else {
+   desc_avail  = desc->len - dev->vhost_hlen;
+ 

Re: [dpdk-dev] [PATCH v2] net/bonding: switch to new offloading API

2018-04-06 Thread Ferruh Yigit
On 4/6/2018 10:57 AM, Radu Nicolau wrote:
> 
> On 3/22/2018 6:13 PM, Ferruh Yigit wrote:
>> Switch to new ethdev offloading API.
>>
>> Signed-off-by: Ferruh Yigit 
>> ---
>> Cc: Radu Nicolau 
>> Cc: Matan Azrad 
>>
>> v2:
>> * added [rt]x_queue_offload_capa to driver internal struct
>> * increased [rt]x_offload_capa size to 64bit as same as ethdev
>> ---
>>  
> Acked-by: Radu Nicolau 

Re: [dpdk-dev] [PATCH V18 3/5] eal: add failure handler mechanism for hot plug

2018-04-06 Thread Guo, Jia



On 4/4/2018 10:58 AM, Zhang, Qi Z wrote:



-Original Message-
From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Jeff Guo
Sent: Wednesday, April 4, 2018 2:17 AM
To: step...@networkplumber.org; Richardson, Bruce
; Yigit, Ferruh ;
Ananyev, Konstantin ;
gaetan.ri...@6wind.com; Wu, Jingjing ;
tho...@monjalon.net; mo...@mellanox.com; Van Haaren, Harry
; Tan, Jianfeng 
Cc: jblu...@infradead.org; shreyansh.j...@nxp.com; dev@dpdk.org; Guo, Jia
; Zhang, Helin 
Subject: [dpdk-dev] [PATCH V18 3/5] eal: add failure handler mechanism for
hot plug

This patch introduces an API (rte_dev_handle_hot_unplug) to handle device
hot unplug event. When device be hot plug out, the device resource become
invalid, if this resource is still be unexpected read/write, system will crash. 
The
api let user register the hot unplug handler, when hot plug failure occur, the
working thread will be block until the uevent mechanism successful recovery
the memory and guaranty the application keep running smoothly.

Signed-off-by: Jeff Guo 
---
v16->v15:
add document and signal bus handler
---
  doc/guides/rel_notes/release_18_05.rst|   6 ++
  lib/librte_eal/common/include/rte_dev.h   |  19 +
  lib/librte_eal/linuxapp/eal/eal_dev.c | 134
+-
  lib/librte_eal/linuxapp/igb_uio/igb_uio.c |   4 +
  lib/librte_eal/rte_eal_version.map|   1 +
  5 files changed, 163 insertions(+), 1 deletion(-)

diff --git a/doc/guides/rel_notes/release_18_05.rst
b/doc/guides/rel_notes/release_18_05.rst
index 37e00c4..3aacbf1 100644
--- a/doc/guides/rel_notes/release_18_05.rst
+++ b/doc/guides/rel_notes/release_18_05.rst
@@ -51,6 +51,12 @@ New Features
* ``rte_dev_event_callback_register`` and
``rte_dev_event_callback_unregister``
  are for the user's callbacks register and unregister.

+* **Added hot plug failure handler.**
+
+  Added a failure handler machenism to handle hot plug removal.
+
+  * ``rte_dev_handle_hot_unplug`` for handle hot plug removel failure.
+
  API Changes
  ---

diff --git a/lib/librte_eal/common/include/rte_dev.h
b/lib/librte_eal/common/include/rte_dev.h
index 4c78938..7075e56 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -361,4 +361,23 @@ rte_dev_event_monitor_start(void);
   */
  int __rte_experimental
  rte_dev_event_monitor_stop(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * It can be used to register the device signal bus handler, and save
+the
+ * current environment of each thread, when signal bus error invoke,
+the
+ * handler would restore the environment by long jmp to each working
+ * thread, then block the thread  to waiting until the memory recovery
+ * and remapping be finished, that would guaranty the system not crash
+ * when the device be hot unplug.
+ *
+ * @param none
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int __rte_experimental
+rte_dev_handle_hot_unplug(void);
  #endif /* _RTE_DEV_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c
b/lib/librte_eal/linuxapp/eal/eal_dev.c
index 9f2ee40..fabb37a 100644
--- a/lib/librte_eal/linuxapp/eal/eal_dev.c
+++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
@@ -4,6 +4,9 @@

  #include 
  #include 
+#include 
+#include 
+#include 
  #include 
  #include 

@@ -12,12 +15,17 @@
  #include 
  #include 
  #include 
+#include 
+#include 

  #include "eal_private.h"

  static struct rte_intr_handle intr_handle = {.fd = -1 };  static bool
monitor_started;

+pthread_mutex_t failure_recovery_lock;
+pthread_cond_t failure_recovery_cond;
+
  #define EAL_UEV_MSG_LEN 4096
  #define EAL_UEV_MSG_ELEM_LEN 128

@@ -29,6 +37,22 @@ enum eal_dev_event_subsystem {
EAL_DEV_EVENT_SUBSYSTEM_MAX
  };

+static RTE_DEFINE_PER_LCORE(sigjmp_buf, unplug_longjmp_env);
+
+static void sigbus_handler(int signum __rte_unused) {
+   RTE_LOG(DEBUG, EAL, "receive SIGBUS error!\n");
+   siglongjmp(RTE_PER_LCORE(unplug_longjmp_env), 1); }
+
+static int cmp_dev_name(const struct rte_device *dev,
+   const void *_name)
+{
+   const char *name = _name;
+
+   return strcmp(dev->name, name);
+}
+
  static int
  dev_uev_socket_fd_create(void)
  {
@@ -135,16 +159,114 @@ dev_uev_receive(int fd, struct rte_dev_event
*uevent)
return 0;
  }

+static int
+dev_uev_remove_handler(struct rte_device *dev) {
+   struct rte_bus *bus = rte_bus_find_by_device_name(dev->name);
+   int ret;
+
+   if (!dev)
+   return -1;
+
+   if (bus->handle_hot_unplug) {
+   /**
+* call bus ops to handle hot unplug.
+*/
+   ret = bus->handle_hot_unplug(dev);
+   if (ret) {
+   RTE_LOG(ERR, EAL,
+   "It cannot handle hot unplug for device (%s) "
+   "on the bus.\n ",
+   dev->name);
+   return r

Re: [dpdk-dev] [PATCH V18 1/5] bus: introduce device hot unplug handle

2018-04-06 Thread Guo, Jia

thanks.


On 4/4/2018 12:31 PM, Tan, Jianfeng wrote:



-Original Message-
From: Guo, Jia
Sent: Wednesday, April 4, 2018 2:17 AM
To: step...@networkplumber.org; Richardson, Bruce; Yigit, Ferruh;
Ananyev, Konstantin; gaetan.ri...@6wind.com; Wu, Jingjing;
tho...@monjalon.net; mo...@mellanox.com; Van Haaren, Harry; Tan,
Jianfeng
Cc: jblu...@infradead.org; shreyansh.j...@nxp.com; dev@dpdk.org; Guo,
Jia; Zhang, Helin
Subject: [PATCH V18 1/5] bus: introduce device hot unplug handle

As of device hot unplug, we need some preparatory measures so that we will
not encounter memory fault after device have been plug out of the system,
and also let we could recover the running data path but not been break.
This allows the buses to handle device hot unplug event.
In the following patch, will show how to handle the case for pci bus.

Squeeze this patch with the next one.


Signed-off-by: Jeff Guo 
---
v16->v15:
split patch, and remove the ops from RTE_VERIFY
---
  lib/librte_eal/common/include/rte_bus.h | 15 +++
  1 file changed, 15 insertions(+)

diff --git a/lib/librte_eal/common/include/rte_bus.h
b/lib/librte_eal/common/include/rte_bus.h
index 6fb0834..ecd8b1c 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -168,6 +168,19 @@ typedef int (*rte_bus_unplug_t)(struct rte_device
*dev);
  typedef int (*rte_bus_parse_t)(const char *name, void *addr);

  /**
+ * Implementation specific hot unplug handler function which is responsible
+ * for handle the failure when hot unplug the device, guaranty the system
+ * would not crash in the case.
+ * @param dev
+ * Pointer of the device structure.
+ *
+ * @return
+ * 0 on success.
+ * !0 on error.
+ */
+typedef int (*rte_bus_handle_hot_unplug_t)(struct rte_device *dev);
+
+/**
   * Bus scan policies
   */
  enum rte_bus_scan_mode {
@@ -209,6 +222,8 @@ struct rte_bus {
rte_bus_plug_t plug; /**< Probe single device for drivers */
rte_bus_unplug_t unplug; /**< Remove single device from driver
*/
rte_bus_parse_t parse;   /**< Parse a device name */
+   rte_bus_handle_hot_unplug_t handle_hot_unplug; /**< handle when device
+   hot unplug */

May be just rte_bus_hot_unplug_t hot_unplug /**< Handle hot unplug device event 
*/


struct rte_bus_conf conf;/**< Bus configuration */
rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu
class */
  };
--
2.7.4




Re: [dpdk-dev] [PATCH V18 2/5] bus/pci: implement handle hot unplug operation

2018-04-06 Thread Guo, Jia



On 4/4/2018 1:25 PM, Tan, Jianfeng wrote:



-Original Message-
From: Guo, Jia
Sent: Wednesday, April 4, 2018 2:17 AM
To: step...@networkplumber.org; Richardson, Bruce; Yigit, Ferruh;
Ananyev, Konstantin; gaetan.ri...@6wind.com; Wu, Jingjing;
tho...@monjalon.net; mo...@mellanox.com; Van Haaren, Harry; Tan,
Jianfeng
Cc: jblu...@infradead.org; shreyansh.j...@nxp.com; dev@dpdk.org; Guo,
Jia; Zhang, Helin
Subject: [PATCH V18 2/5] bus/pci: implement handle hot unplug operation

When handle device hot unplug event, remap a dummy memory to avoid
bus read/write error.

Signed-off-by: Jeff Guo 
---
v16->v15;
split patch, merge some function to be simple
---
  drivers/bus/pci/pci_common.c | 42

  drivers/bus/pci/pci_common_uio.c | 33
+++
  drivers/bus/pci/private.h| 12 
  3 files changed, 87 insertions(+)

diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
index 2a00f36..fa077ec 100644
--- a/drivers/bus/pci/pci_common.c
+++ b/drivers/bus/pci/pci_common.c
@@ -474,6 +474,47 @@ pci_find_device(const struct rte_device *start,
rte_dev_cmp_t cmp,
  }

  static int
+pci_handle_hot_unplug(struct rte_device *dev)
+{
+   struct rte_pci_device *pdev;
+   int ret;
+
+   if (dev == NULL)
+   return -EINVAL;
+
+   pdev = RTE_DEV_TO_PCI(dev);
+
+   /* remap resources for devices */
+   switch (pdev->kdrv) {
+   case RTE_KDRV_VFIO:
+#ifdef VFIO_PRESENT
+   /* TODO */
+#endif

What's the difference between uio and vfio? We can just fall though?
the VFIO mapping is functional different with uio, and the key is i 
don't implement vfio hotplug right now, so let it TODO.

+   break;
+   case RTE_KDRV_IGB_UIO:
+   case RTE_KDRV_UIO_GENERIC:
+   if (rte_eal_using_phys_addrs()) {

Why do we care about if we are using physical addresses?

please check with the mapping function.

+   /* map resources for devices that use uio */
+   ret = pci_uio_remap_resource(pdev);
+   }
+   break;
+   case RTE_KDRV_NIC_UIO:
+   ret = pci_uio_remap_resource(pdev);
+   break;
+   default:
+   RTE_LOG(DEBUG, EAL,
+   "  Not managed by a supported kernel driver, 
skipped\n");
+   ret = 1;

-1 for such case?

thanks.

+   break;
+   }
+
+   if (ret != 0)
+   RTE_LOG(ERR, EAL, "failed to handle hot unplug of %s",
+   pdev->name);
+   return ret;
+}
+
+static int
  pci_plug(struct rte_device *dev)
  {
return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev));
@@ -503,6 +544,7 @@ struct rte_pci_bus rte_pci_bus = {
.unplug = pci_unplug,
.parse = pci_parse,
.get_iommu_class = rte_pci_get_iommu_class,
+   .handle_hot_unplug = pci_handle_hot_unplug,
},
.device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
.driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),
diff --git a/drivers/bus/pci/pci_common_uio.c
b/drivers/bus/pci/pci_common_uio.c
index 54bc20b..468ade4 100644
--- a/drivers/bus/pci/pci_common_uio.c
+++ b/drivers/bus/pci/pci_common_uio.c
@@ -146,6 +146,39 @@ pci_uio_unmap(struct mapped_pci_resource
*uio_res)
}
  }

+/* remap the PCI resource of a PCI device in private virtual memory */
+int
+pci_uio_remap_resource(struct rte_pci_device *dev)

Why's this function uio specific? I think we can move it to pci_common.c.

not convince because not let vfio in this patch set.

+{
+   int i;
+   uint64_t phaddr;
+   void *map_address;
+
+   if (dev == NULL)
+   return -1;
+
+   /* Map all BARs */

s/Map/Remap
  

+   for (i = 0; i != PCI_MAX_RESOURCE; i++) {
+   /* skip empty BAR */
+   phaddr = dev->mem_resource[i].phys_addr;
+   if (phaddr == 0)
+   continue;

How about just simple:

if (dev->mem_resource[i].phys_addr == 0)


+   pci_unmap_resource(dev->mem_resource[i].addr,
+   (size_t)dev->mem_resource[i].len);
+   map_address = pci_map_resource(
+   dev->mem_resource[i].addr, -1, 0,
+   (size_t)dev->mem_resource[i].len,
+   MAP_ANONYMOUS | MAP_FIXED);
+   if (map_address == MAP_FAILED) {
+   RTE_LOG(ERR, EAL,
+   "Cannot remap resource for device %s\n",
dev->name);
+   return -1;
+   }
+   }
+
+   return 0;
+}
+
  static struct mapped_pci_resource *
  pci_uio_find_resource(struct rte_pci_device *dev)
  {
diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h
index 88fa587..7a862ef 100644
--- a/drivers/bus/pci/private.h
+++ b

[dpdk-dev] [PATCH V19 2/4] eal: add failure handler mechanism for hot plug

2018-04-06 Thread Jeff Guo
This patch introduces an API (rte_dev_handle_hot_unplug) to handle device
hot unplug event. When device be hot plug out, the device resource
become invalid, if this resource is still be unexpected read/write,
system will crash. The api let user register the hot unplug handler, when
hot plug failure occur, the working thread will be block until the uevent
mechanism successful recovery the memory and guaranty the application keep
running smoothly.

Signed-off-by: Jeff Guo 
---
v19->18:
add note for limitation of multiple hotplug
---
 doc/guides/rel_notes/release_18_05.rst  |   6 ++
 kernel/linux/igb_uio/igb_uio.c  |   4 +
 lib/librte_eal/common/include/rte_dev.h |  19 +
 lib/librte_eal/linuxapp/eal/eal_dev.c   | 140 +++-
 lib/librte_eal/rte_eal_version.map  |   1 +
 5 files changed, 169 insertions(+), 1 deletion(-)

diff --git a/doc/guides/rel_notes/release_18_05.rst 
b/doc/guides/rel_notes/release_18_05.rst
index cb9e050..2707e73 100644
--- a/doc/guides/rel_notes/release_18_05.rst
+++ b/doc/guides/rel_notes/release_18_05.rst
@@ -70,6 +70,12 @@ New Features
 
   Linux uevent is supported as backend of this device event notification 
framework.
 
+* **Added hot plug failure handler.**
+
+  Added a failure handler machenism to handle hot unplug device.
+
+  * ``rte_dev_handle_hot_unplug`` for handle hot unplug device failure.
+
 API Changes
 ---
 
diff --git a/kernel/linux/igb_uio/igb_uio.c b/kernel/linux/igb_uio/igb_uio.c
index 4cae4dd..293c310 100644
--- a/kernel/linux/igb_uio/igb_uio.c
+++ b/kernel/linux/igb_uio/igb_uio.c
@@ -344,6 +344,10 @@ igbuio_pci_release(struct uio_info *info, struct inode 
*inode)
struct rte_uio_pci_dev *udev = info->priv;
struct pci_dev *dev = udev->pdev;
 
+   /* check if device has been remove before release */
+   if ((&dev->dev.kobj)->state_remove_uevent_sent == 1)
+   return -1;
+
mutex_lock(&udev->lock);
if (--udev->refcnt > 0) {
mutex_unlock(&udev->lock);
diff --git a/lib/librte_eal/common/include/rte_dev.h 
b/lib/librte_eal/common/include/rte_dev.h
index a5203e7..17c446d 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -361,4 +361,23 @@ rte_dev_event_monitor_start(void);
  */
 int __rte_experimental
 rte_dev_event_monitor_stop(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * It can be used to register the device signal bus handler, and save the
+ * current environment for each thread, when signal bus error invoke, the
+ * handler would restore the environment by long jmp to each working
+ * thread previous locate, then block the thread to waiting until the memory
+ * recovery and remapping be finished, that would guaranty the system not
+ * crash when the device be hot unplug.
+ *
+ * @param none
+ * @return
+ *   - From a successful direct invocation, zero.
+ *   - From a call of siglongjmp(), non_zero.
+ */
+int __rte_experimental
+rte_dev_handle_hot_unplug(void);
 #endif /* _RTE_DEV_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c 
b/lib/librte_eal/linuxapp/eal/eal_dev.c
index 9478a39..84b7efc 100644
--- a/lib/librte_eal/linuxapp/eal/eal_dev.c
+++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
@@ -4,6 +4,9 @@
 
 #include 
 #include 
+#include 
+#include 
+#include 
 #include 
 #include 
 
@@ -13,12 +16,17 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include "eal_private.h"
 
 static struct rte_intr_handle intr_handle = {.fd = -1 };
 static bool monitor_started;
 
+pthread_mutex_t failure_recovery_lock;
+pthread_cond_t failure_recovery_cond;
+
 #define EAL_UEV_MSG_LEN 4096
 #define EAL_UEV_MSG_ELEM_LEN 128
 
@@ -32,6 +40,22 @@ enum eal_dev_event_subsystem {
EAL_DEV_EVENT_SUBSYSTEM_MAX
 };
 
+static RTE_DEFINE_PER_LCORE(sigjmp_buf, unplug_longjmp_env);
+
+static void sigbus_handler(int signum __rte_unused)
+{
+   RTE_LOG(DEBUG, EAL, "receive SIGBUS error!\n");
+   siglongjmp(RTE_PER_LCORE(unplug_longjmp_env), 1);
+}
+
+static int cmp_dev_name(const struct rte_device *dev,
+   const void *_name)
+{
+   const char *name = _name;
+
+   return strcmp(dev->name, name);
+}
+
 static int
 dev_uev_socket_fd_create(void)
 {
@@ -132,6 +156,31 @@ dev_uev_parse(const char *buf, struct rte_dev_event 
*event, int length)
return 0;
 }
 
+static int
+dev_uev_remove_handler(struct rte_device *dev)
+{
+   struct rte_bus *bus = rte_bus_find_by_device_name(dev->name);
+   int ret;
+
+   if (!dev)
+   return -1;
+
+   if (bus->handle_hot_unplug) {
+   /**
+* call bus ops to handle hot unplug.
+*/
+   ret = bus->handle_hot_unplug(dev);
+   if (ret) {
+   RTE_LOG(ERR, EAL,
+   "It cannot handle hot unplug for device (%s) "
+   "on the bus.\n ",
+   

[dpdk-dev] [PATCH V19 3/4] eal: add driver auto bind for hot insertion

2018-04-06 Thread Jeff Guo
Normally we use driverctl or dpdk-devbind.py to bind kernel driver before
application running, but lack of an function to automatically bind driver
at runtime. This patch introduce a new API (rte_dev_bind_kernel_driver),
aim to let user call it to bind the specific kernel driver according
their own policy, that would preparing for the next step of attach device,
let app running smoothly when hotplug behavior occur.

Signed-off-by: Jeff Guo 
---
v19->v18:
no change
---
 doc/guides/rel_notes/release_18_05.rst  |  8 +++--
 lib/librte_eal/bsdapp/eal/eal_dev.c |  7 +
 lib/librte_eal/common/include/rte_dev.h | 16 ++
 lib/librte_eal/linuxapp/eal/eal_dev.c   | 54 +
 lib/librte_eal/rte_eal_version.map  |  1 +
 5 files changed, 83 insertions(+), 3 deletions(-)

diff --git a/doc/guides/rel_notes/release_18_05.rst 
b/doc/guides/rel_notes/release_18_05.rst
index 2707e73..f8822d3 100644
--- a/doc/guides/rel_notes/release_18_05.rst
+++ b/doc/guides/rel_notes/release_18_05.rst
@@ -70,11 +70,13 @@ New Features
 
   Linux uevent is supported as backend of this device event notification 
framework.
 
-* **Added hot plug failure handler.**
+* **Added hot plug failure handler and kernel driver auto-bind func**
 
-  Added a failure handler machenism to handle hot unplug device.
+  Added a failure handler machenism to handle hot unplug device, and added an 
kernel driver
+  auto bind function for hot plug insertion. The list of new APIs:
 
-  * ``rte_dev_handle_hot_unplug`` for handle hot unplug device failure.
+  * ``rte_dev_handle_hot_unplug`` for handle hot uplug device failure.
+  * ``rte_dev_bind_kernel_driver`` for hot plug insertion.
 
 API Changes
 ---
diff --git a/lib/librte_eal/bsdapp/eal/eal_dev.c 
b/lib/librte_eal/bsdapp/eal/eal_dev.c
index 1c6c51b..e953a87 100644
--- a/lib/librte_eal/bsdapp/eal/eal_dev.c
+++ b/lib/librte_eal/bsdapp/eal/eal_dev.c
@@ -19,3 +19,10 @@ rte_dev_event_monitor_stop(void)
RTE_LOG(ERR, EAL, "Device event is not supported for FreeBSD\n");
return -1;
 }
+
+int __rte_experimental
+rte_dev_bind_driver(const char *dev_name, enum rte_kernel_driver kdrv_type)
+{
+   RTE_LOG(ERR, EAL, "Bind driver is not supported for FreeBSD\n");
+   return -1;
+}
diff --git a/lib/librte_eal/common/include/rte_dev.h 
b/lib/librte_eal/common/include/rte_dev.h
index 17c446d..35f45d3 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -380,4 +380,20 @@ rte_dev_event_monitor_stop(void);
  */
 int __rte_experimental
 rte_dev_handle_hot_unplug(void);
+
+/**
+ * It can be used to bind a device to a specific type of kernel driver.
+ *
+ * @param dev_name
+ *  The device name.
+ * @param kdrv_type
+ *  The specific kernel driver's type.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+int __rte_experimental
+rte_dev_bind_kernel_driver(const char *dev_name,
+  enum rte_kernel_driver kdrv_type);
 #endif /* _RTE_DEV_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c 
b/lib/librte_eal/linuxapp/eal/eal_dev.c
index 84b7efc..2ad7444 100644
--- a/lib/librte_eal/linuxapp/eal/eal_dev.c
+++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
@@ -4,6 +4,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -359,3 +360,56 @@ rte_dev_event_monitor_stop(void)
 
return 0;
 }
+
+int __rte_experimental
+rte_dev_bind_kernel_driver(const char *dev_name,
+  enum rte_kernel_driver kdrv_type)
+{
+   const char *kdrv_name = NULL;
+   char drv_override_path[1024];
+   int drv_override_fd = -1;
+
+   if (!dev_name || !kdrv_type)
+   return -1;
+
+   switch (kdrv_type) {
+   case RTE_KDRV_IGB_UIO:
+   kdrv_name = "igb_uio";
+   break;
+   case RTE_KDRV_VFIO:
+   kdrv_name = "vfio-pci";
+   break;
+   case RTE_KDRV_UIO_GENERIC:
+   kdrv_name = "uio_pci_generic";
+   break;
+   case RTE_KDRV_NIC_UIO:
+   RTE_LOG(ERR, EAL, "Don't support to bind nic uio driver.\n");
+   goto err;
+   default:
+   break;
+   }
+
+   snprintf(drv_override_path, sizeof(drv_override_path),
+   "/sys/bus/pci/devices/%s/driver_override", dev_name);
+
+   /* specify the driver for a device by writing to driver_override */
+   drv_override_fd = open(drv_override_path, O_WRONLY);
+   if (drv_override_fd < 0) {
+   RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+   drv_override_path, strerror(errno));
+   goto err;
+   }
+
+   if (write(drv_override_fd, kdrv_name, sizeof(kdrv_name)) < 0) {
+   RTE_LOG(ERR, EAL,
+   "Error: bind failed - Cannot write "
+   "driver %s to device %s\n", kdrv_name, dev_name);
+   goto err;
+   }
+
+   close(drv

[dpdk-dev] [PATCH V19 1/4] bus/pci: introduce device hot unplug handle

2018-04-06 Thread Jeff Guo
As of device hot unplug, we need some preparatory measures so that we will
not encounter memory fault after device be plug out of the system,
and also let we could recover the running data path but not been break.
This allows the buses to handle device hot unplug event.
The patch only enable the ops in pci bus, when handle device hot unplug
event, remap a dummy memory to avoid bus read/write error.
Other buses could accordingly implement this ops specific by themselves.

Signed-off-by: Jeff Guo 
---
v19->v18:
fix some typo and squeeze patch
---
 drivers/bus/pci/pci_common.c| 42 +
 drivers/bus/pci/pci_common_uio.c| 32 +
 drivers/bus/pci/private.h   | 12 ++
 lib/librte_eal/common/include/rte_bus.h | 15 
 4 files changed, 101 insertions(+)

diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
index 2a00f36..09192ed 100644
--- a/drivers/bus/pci/pci_common.c
+++ b/drivers/bus/pci/pci_common.c
@@ -474,6 +474,47 @@ pci_find_device(const struct rte_device *start, 
rte_dev_cmp_t cmp,
 }
 
 static int
+pci_handle_hot_unplug(struct rte_device *dev)
+{
+   struct rte_pci_device *pdev;
+   int ret;
+
+   if (dev == NULL)
+   return -EINVAL;
+
+   pdev = RTE_DEV_TO_PCI(dev);
+
+   /* remap resources for devices */
+   switch (pdev->kdrv) {
+   case RTE_KDRV_VFIO:
+#ifdef VFIO_PRESENT
+   /* TODO */
+#endif
+   break;
+   case RTE_KDRV_IGB_UIO:
+   case RTE_KDRV_UIO_GENERIC:
+   if (rte_eal_using_phys_addrs()) {
+   /* map resources for devices that use uio */
+   ret = pci_uio_remap_resource(pdev);
+   }
+   break;
+   case RTE_KDRV_NIC_UIO:
+   ret = pci_uio_remap_resource(pdev);
+   break;
+   default:
+   RTE_LOG(DEBUG, EAL,
+   "  Not managed by a supported kernel driver, 
skipped\n");
+   ret = -1;
+   break;
+   }
+
+   if (ret != 0)
+   RTE_LOG(ERR, EAL, "failed to handle hot unplug of %s",
+   pdev->name);
+   return ret;
+}
+
+static int
 pci_plug(struct rte_device *dev)
 {
return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev));
@@ -503,6 +544,7 @@ struct rte_pci_bus rte_pci_bus = {
.unplug = pci_unplug,
.parse = pci_parse,
.get_iommu_class = rte_pci_get_iommu_class,
+   .handle_hot_unplug = pci_handle_hot_unplug,
},
.device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
.driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),
diff --git a/drivers/bus/pci/pci_common_uio.c b/drivers/bus/pci/pci_common_uio.c
index 54bc20b..31a4094 100644
--- a/drivers/bus/pci/pci_common_uio.c
+++ b/drivers/bus/pci/pci_common_uio.c
@@ -146,6 +146,38 @@ pci_uio_unmap(struct mapped_pci_resource *uio_res)
}
 }
 
+/* remap the PCI resource of a PCI device in private virtual memory */
+int
+pci_uio_remap_resource(struct rte_pci_device *dev)
+{
+   int i;
+   void *map_address;
+
+   if (dev == NULL)
+   return -1;
+
+   /* Remap all BARs */
+   for (i = 0; i != PCI_MAX_RESOURCE; i++) {
+   /* skip empty BAR */
+   if (dev->mem_resource[i].phys_addr == 0)
+   continue;
+   pci_unmap_resource(dev->mem_resource[i].addr,
+   (size_t)dev->mem_resource[i].len);
+   map_address = pci_map_resource(
+   dev->mem_resource[i].addr, -1, 0,
+   (size_t)dev->mem_resource[i].len,
+   MAP_ANONYMOUS | MAP_FIXED);
+   if (map_address == MAP_FAILED) {
+   RTE_LOG(ERR, EAL,
+   "Cannot remap resource for device %s\n",
+   dev->name);
+   return -1;
+   }
+   }
+
+   return 0;
+}
+
 static struct mapped_pci_resource *
 pci_uio_find_resource(struct rte_pci_device *dev)
 {
diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h
index 88fa587..7a862ef 100644
--- a/drivers/bus/pci/private.h
+++ b/drivers/bus/pci/private.h
@@ -173,6 +173,18 @@ void pci_uio_free_resource(struct rte_pci_device *dev,
struct mapped_pci_resource *uio_res);
 
 /**
+ * remap the pci uio resource..
+ *
+ * @param dev
+ *   Point to the struct rte pci device.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+pci_uio_remap_resource(struct rte_pci_device *dev);
+
+/**
  * Map device memory to uio resource
  *
  * This function is private to EAL.
diff --git a/lib/librte_eal/common/include/rte_bus.h 
b/lib/librte_eal/common/include/rte_bus.h
index 6fb0834..729ff34 100644
--- a/lib/libr

[dpdk-dev] [PATCH V19 0/4] add hot plug failure and auto bind handler

2018-04-06 Thread Jeff Guo
At the prior, device event monitor machenism have been introduced.
But for device hot unplug, if we want data path would not be break when
device hot plug in or out, we still need some preparatory measures to do
some preparation work for the device detach and attach, so that we will
not encounter memory fault after device have been plug out of the system,
and also let user directly attach device which have been auto bind onto
the specific kernel driver.

This patch set will introduces two APIs to do that failure and auto bind
handle for hot plug feature, and also use testpmd to show example how to
use these 2 APIs for process hot plug event, let the process could be
smoothly like below case:

1)hot plug removal:
plugout->failure handle->stop forward->stop port->close port->detach port

2)hot plug insertion:
plugin->kernel driver auto bind->attach port->start port

with this machenism, every user such as fail-safe driver or testpmd, if
enable device event monitor they will be able to develop their own
hotplug application.

patchset history:
v19->18:
note for limitation of multiple hotplug,fix some typo, sqeeze patch.

v18->v15:
add document, add signal bus handler, refine the code to be more clear.

the prior patch history please check the patch set
"add device event monitor framework"

Jeff Guo (4):
  bus/pci: introduce device hot unplug handle
  eal: add failure handler mechanism for hot plug
  eal: add driver auto bind for hot insertion
  app/testpmd: use auto handle for hotplug

 app/test-pmd/testpmd.c  | 199 
 app/test-pmd/testpmd.h  |   9 ++
 doc/guides/rel_notes/release_18_05.rst  |   8 ++
 drivers/bus/pci/pci_common.c|  42 +++
 drivers/bus/pci/pci_common_uio.c|  32 +
 drivers/bus/pci/private.h   |  12 ++
 kernel/linux/igb_uio/igb_uio.c  |   4 +
 lib/librte_eal/bsdapp/eal/eal_dev.c |   7 ++
 lib/librte_eal/common/include/rte_bus.h |  15 +++
 lib/librte_eal/common/include/rte_dev.h |  35 ++
 lib/librte_eal/linuxapp/eal/eal_dev.c   | 194 ++-
 lib/librte_eal/rte_eal_version.map  |   2 +
 12 files changed, 534 insertions(+), 25 deletions(-)

-- 
2.7.4



[dpdk-dev] [PATCH V19 4/4] app/testpmd: use auto handle for hotplug

2018-04-06 Thread Jeff Guo
Use testpmd for example, to show how an application smoothly handle
failure when device be hot removal, and show how to auto bind kernal
driver to preparing attach device when device being hot insertion.

Signed-off-by: Jeff Guo 
---
v19->v18:
clean code
---
 app/test-pmd/testpmd.c | 199 +++--
 app/test-pmd/testpmd.h |   9 +++
 2 files changed, 184 insertions(+), 24 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index d2c122a..d7fa913 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -285,6 +285,8 @@ uint8_t lsc_interrupt = 1; /* enabled by default */
  */
 uint8_t rmv_interrupt = 1; /* enabled by default */
 
+#define HOT_PLUG_FOR_ALL_DEVICE -1
+#define ALL_CALLBACK -1
 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
 
 /*
@@ -387,6 +389,8 @@ uint8_t bitrate_enabled;
 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
 
+static struct hotplug_request_list hp_list;
+
 /* Forward function declarations */
 static void map_port_queue_stats_mapping_registers(portid_t pi,
   struct rte_port *port);
@@ -397,9 +401,13 @@ static int eth_event_callback(portid_t port_id,
 static void eth_dev_event_callback(char *device_name,
enum rte_dev_event_type type,
void *param);
-static int eth_dev_event_callback_register(void);
-static int eth_dev_event_callback_unregister(void);
+static int eth_dev_event_callback_register(portid_t port_id);
+static int eth_dev_event_callback_unregister(portid_t port_id);
+
+static bool in_hotplug_list(const char *dev_name);
 
+static int hotplug_list_add(struct rte_device *device,
+   enum rte_kernel_driver device_kdrv);
 
 /*
  * Check if all the ports are started.
@@ -1120,11 +1128,17 @@ run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t 
pkt_fwd)
uint64_t tics_datum;
uint64_t tics_current;
uint8_t idx_port, cnt_ports;
+   int ret;
 
cnt_ports = rte_eth_dev_count();
tics_datum = rte_rdtsc();
tics_per_1sec = rte_get_timer_hz();
 #endif
+   if (hot_plug) {
+   ret = rte_dev_handle_hot_unplug();
+   if (ret)
+   printf("The device is being hot unplug!\n");
+   }
fsm = &fwd_streams[fc->stream_idx];
nb_fs = fc->stream_nb;
do {
@@ -1863,15 +1877,24 @@ reset_port(portid_t pid)
 }
 
 static int
-eth_dev_event_callback_register(void)
+eth_dev_event_callback_register(portid_t port_id)
 {
int ret;
+   char *device_name;
 
+   /* if port id equal -1, unregister event callbacks for all device. */
+   if (port_id == (portid_t)HOT_PLUG_FOR_ALL_DEVICE) {
+   device_name = NULL;
+   } else {
+   device_name = strdup(rte_eth_devices[port_id].device->name);
+   if (!device_name)
+   return -1;
+   }
/* register the device event callback */
-   ret = rte_dev_event_callback_register(NULL,
-   eth_dev_event_callback, NULL);
+   ret = rte_dev_event_callback_register(device_name,
+   eth_dev_event_callback, (void *)(intptr_t)port_id);
if (ret) {
-   printf("Failed to register device event callback\n");
+   printf("Failed to register device event callback.\n");
return -1;
}
 
@@ -1880,15 +1903,25 @@ eth_dev_event_callback_register(void)
 
 
 static int
-eth_dev_event_callback_unregister(void)
+eth_dev_event_callback_unregister(portid_t port_id)
 {
int ret;
+   char *device_name;
+
+   /* if port id equal -1, unregister all device event callbacks */
+   if (port_id == (portid_t)HOT_PLUG_FOR_ALL_DEVICE) {
+   device_name = NULL;
+   } else {
+   device_name = strdup(rte_eth_devices[port_id].device->name);
+   if (!device_name)
+   return -1;
+   }
 
/* unregister the device event callback */
-   ret = rte_dev_event_callback_unregister(NULL,
-   eth_dev_event_callback, NULL);
-   if (ret < 0) {
-   printf("Failed to unregister device event callback\n");
+   ret = rte_dev_event_callback_unregister(device_name,
+   eth_dev_event_callback, (void *)(intptr_t)port_id);
+   if (ret) {
+   printf("Failed to unregister device event callback.\n");
return -1;
}
 
@@ -1911,6 +1944,8 @@ attach_port(char *identifier)
if (rte_eth_dev_attach(identifier, &pi))
return;
 
+   eth_dev_event_callback_register(pi);
+
socket_id = (unsigned)rte_eth_dev_socket_id(pi);
/* if socket_id is invalid, set to 0 */
if (check_socket_id(socket_id) < 0)
@@ -1922,6 +1957,12 @@ attach_port(char *identifier)
 
ports[pi].port_s

[dpdk-dev] [PATCH] eal: fix clang compilation error on ARM64

2018-04-06 Thread Pavan Nikhilesh
Use __atomic_exchange_n instead of __atomic_exchange_(2/4/8).

Fixes: ff2863570fcc ("eal: introduce atomic exchange operation")

Signed-off-by: Pavan Nikhilesh 
---
 lib/librte_eal/common/include/generic/rte_atomic.h | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/lib/librte_eal/common/include/generic/rte_atomic.h 
b/lib/librte_eal/common/include/generic/rte_atomic.h
index 8652c0264..91a6d615a 100644
--- a/lib/librte_eal/common/include/generic/rte_atomic.h
+++ b/lib/librte_eal/common/include/generic/rte_atomic.h
@@ -12,7 +12,9 @@
  * This file defines a generic API for atomic operations.
  */
 
+#include 
 #include 
+
 #include 
 
 #ifdef __DOXYGEN__
@@ -212,7 +214,7 @@ rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val);
 static inline uint16_t
 rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val)
 {
-   return __atomic_exchange_2(dst, val, __ATOMIC_SEQ_CST);
+   return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
 }
 #endif
 
@@ -491,7 +493,7 @@ rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val);
 static inline uint32_t
 rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val)
 {
-   return __atomic_exchange_4(dst, val, __ATOMIC_SEQ_CST);
+   return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
 }
 #endif
 
@@ -769,7 +771,7 @@ rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val);
 static inline uint64_t
 rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val)
 {
-   return __atomic_exchange_8(dst, val, __ATOMIC_SEQ_CST);
+   return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
 }
 #endif
 
-- 
2.17.0



Re: [dpdk-dev] [PATCH v3 1/4] app/eventdev: add event timer adapter as a producer

2018-04-06 Thread Jerin Jacob
-Original Message-
> Date: Thu,  5 Apr 2018 17:23:26 +0530
> From: Pavan Nikhilesh 
> To: jerin.ja...@caviumnetworks.com, santosh.shu...@caviumnetworks.com,
>  erik.g.carri...@intel.com
> Cc: dev@dpdk.org, Pavan Nikhilesh 
> Subject: [dpdk-dev] [PATCH v3 1/4] app/eventdev: add event timer adapter as
>  a producer
> X-Mailer: git-send-email 2.16.3
> 
> Add event timer adapter as producer option that can be selected by
> passing --prod_type_timerdev.
> 
> Signed-off-by: Pavan Nikhilesh 

Acked-by: Jerin Jacob 


Re: [dpdk-dev] [PATCH v3 2/4] app/eventdev: add burst mode for event timer adapter

2018-04-06 Thread Jerin Jacob
-Original Message-
> Date: Thu,  5 Apr 2018 17:23:27 +0530
> From: Pavan Nikhilesh 
> To: jerin.ja...@caviumnetworks.com, santosh.shu...@caviumnetworks.com,
>  erik.g.carri...@intel.com
> Cc: dev@dpdk.org, Pavan Nikhilesh 
> Subject: [dpdk-dev] [PATCH v3 2/4] app/eventdev: add burst mode for event
>  timer adapter
> X-Mailer: git-send-email 2.16.3
> 
> Add burst mode for event timer adapter that can be selected by passing
> --prod_type_timerdev_burst.
> 
> Signed-off-by: Pavan Nikhilesh 

Acked-by: Jerin Jacob 



Re: [dpdk-dev] [PATCH v3 3/4] app/eventdev: add options to configure event timer adapter

2018-04-06 Thread Jerin Jacob
-Original Message-
> Date: Thu,  5 Apr 2018 17:23:28 +0530
> From: Pavan Nikhilesh 
> To: jerin.ja...@caviumnetworks.com, santosh.shu...@caviumnetworks.com,
>  erik.g.carri...@intel.com
> Cc: dev@dpdk.org, Pavan Nikhilesh 
> Subject: [dpdk-dev] [PATCH v3 3/4] app/eventdev: add options to configure
>  event timer adapter
> X-Mailer: git-send-email 2.16.3
> 
> Add options to configure expiry timeout, max number of timers and number
> of event timer adapters through command line parameters.
> 
> Signed-off-by: Pavan Nikhilesh 
Acked-by: Jerin Jacob 




Re: [dpdk-dev] [PATCH v3 1/4] app/eventdev: add event timer adapter as a producer

2018-04-06 Thread Jerin Jacob
-Original Message-
> Date: Thu,  5 Apr 2018 17:23:26 +0530
> From: Pavan Nikhilesh 
> To: jerin.ja...@caviumnetworks.com, santosh.shu...@caviumnetworks.com,
>  erik.g.carri...@intel.com
> Cc: dev@dpdk.org, Pavan Nikhilesh 
> Subject: [dpdk-dev] [PATCH v3 1/4] app/eventdev: add event timer adapter as
>  a producer
> X-Mailer: git-send-email 2.16.3
> 
> Add event timer adapter as producer option that can be selected by
> passing --prod_type_timerdev.
> 
> Signed-off-by: Pavan Nikhilesh 
> +static inline int
> +perf_event_timer_producer(void *arg)
> +{
> + struct prod_data *p  = arg;
> + struct test_perf *t = p->t;
> + struct evt_options *opt = t->opt;
> + uint32_t flow_counter = 0;
> + uint64_t count = 0;
> + uint64_t arm_latency = 0;
> + const uint8_t nb_timer_adptrs = opt->nb_timer_adptrs;
> + const uint32_t nb_flows = t->nb_flows;
> + const uint64_t nb_timers = opt->nb_timers;
> + struct rte_mempool *pool = t->pool;
> + struct perf_elt *m = NULL;

NULL assignment is not required.

> + struct rte_event_timer_adapter **adptr = t->timer_adptr;
> + uint64_t timeout_ticks = opt->optm_bkt_tck_nsec ?
> + (opt->nb_bkt_tcks * opt->bkt_tck_nsec)
> + / opt->optm_bkt_tck_nsec : opt->nb_bkt_tcks;
> +
>  struct perf_elt {
> + struct rte_event_timer tim;

This will make perf_elf two Cacheline, use union + offset of scheme to
fit timestamp on the first cacheline.

>   uint64_t timestamp;
>  } __rte_cache_aligned;

With above changes:
Acked-by: Jerin Jacob 



Re: [dpdk-dev] [RFT] vmxnet3: coverity reported defect

2018-04-06 Thread Neil Horman
On Thu, Apr 05, 2018 at 08:36:18AM -0700, Stephen Hemminger wrote:
> I noticed Coverity defect in DPDK number 124563 was assigned to me.
> 
> It looks like vmxnet3 driver code doesn't handle case where host incorrectly
> sends a frame with out setting SOP bit in first segment. This would lead to
> rxq->start_seq being NULL.
> 
> Maybe something like this would fix it.  I don't have VMware (or time) to 
> test.
> 
> diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c 
> b/drivers/net/vmxnet3/vmxnet3_rxtx.c
> index 57557492e0f5..3118d94add90 100644
> --- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
> +++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
> @@ -813,6 +813,13 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf 
> **rx_pkts, uint16_t nb_pkts)
>  
>   RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
>  
> + if (unlikely(start == NULL)) {
> + PMD_RX_LOG(ERR, "Missing sop");
> +
> + rte_pktmbuf_free_seg(rxm);
> + goto rcd_done;
> + }
> +
>   start->pkt_len += rxm->data_len;
>   start->nb_segs++;
>  
This looks reasonable to me, though my vmware instance is down at the moment, so
I'm unable to test
> 
> PS: the email for the VMXNET3 maintainer  Shrikrishna Khare 
>  bounces.
> 
I think it needs to be updated to point at pv-driv...@vmware.com, thats what
they use in the kernel these days

Neil



[dpdk-dev] [PATCH] net/tap: fix memcpy with incorrect size

2018-04-06 Thread Pavan Nikhilesh
Fix incorrect sizeof operation being used for getting mac addr size.

Found while compiling with arm64 clang.
drivers/net/tap/rte_eth_tap.c:1410:40: error: argument to 'sizeof' in
'memcpy' call is the same pointer type 'struct ether_addr *' as the
destination; expected 'struct ether_addr' or an explicit length
[-Werror,-Wsizeof-pointer-memaccess]
   rte_memcpy(&pmd->eth_addr, mac_addr, sizeof(mac_addr));
  ~~^~~~

Fixes: bcab6c1d27fa ("net/tap: allow user MAC to be passed as args")

Signed-off-by: Pavan Nikhilesh 
---
 drivers/net/tap/rte_eth_tap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 3e4f7a8e8..6ed4a8a02 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -1406,7 +1406,7 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, char 
*tap_name,
if (is_zero_ether_addr(mac_addr))
eth_random_addr((uint8_t *)&pmd->eth_addr);
else
-   rte_memcpy(&pmd->eth_addr, mac_addr, sizeof(mac_addr));
+   rte_memcpy(&pmd->eth_addr, mac_addr, sizeof(*mac_addr));
 
/* Immediately create the netdevice (this will create the 1st queue). */
/* rx queue */
-- 
2.17.0



Re: [dpdk-dev] [PATCH v3 4/4] doc: update test eventdev documentation

2018-04-06 Thread Jerin Jacob
-Original Message-
> Date: Thu,  5 Apr 2018 17:23:29 +0530
> From: Pavan Nikhilesh 
> To: jerin.ja...@caviumnetworks.com, santosh.shu...@caviumnetworks.com,
>  erik.g.carri...@intel.com
> Cc: dev@dpdk.org, Pavan Nikhilesh 
> Subject: [dpdk-dev] [PATCH v3 4/4] doc: update test eventdev documentation
> X-Mailer: git-send-email 2.16.3
> 
> Update documentation to include new options added for using event timer
> adapter as a producer.
> 
> Signed-off-by: Pavan Nikhilesh 
> ---
>  doc/guides/tools/testeventdev.rst | 60 
> +++
>  1 file changed, 60 insertions(+)
> 
> diff --git a/doc/guides/tools/testeventdev.rst 
> b/doc/guides/tools/testeventdev.rst
> index 77480ffe9..eb3bc3ae5 100644
> --- a/doc/guides/tools/testeventdev.rst
> +++ b/doc/guides/tools/testeventdev.rst
> @@ -123,6 +123,36 @@ The following are the application command-line options:
>  
>  Use ethernet device as producer.
>  
> +* ``--prod_type_timerdev``
> +
> +Use event timer adapter as producer.
> +
> + * ``--prod_type_timerdev_burst``
> +
> +Use burst mode event timer adapter as producer.
> +
> + * ``--bkt_tck_nsec``
> +
> +Used to dictate number of nano seconds between bucket traversal of 
> the
> +event timer adapter. Refer `rte_event_timer_adapter_conf`.

IMO, instead of "bkt_tck_nsec", it makes sense to call it as
"resolution" or "timer_tick_ns"(it is the term used in
rte_event_timer_adapter_conf spec). If you agree, please change the code and
documentation accordingly.


> +
> + * ``--max_tmo_nsec``
> +
> +Used to configure event timer adapter max arm timeout in nano 
> seconds.
> +
> + * ``--nb_bkt_tcks``
> +
> +Dictate the number of bucket ticks after which the event timer 
> expires.

IMO, instead of "nb_bkt_tcks", it makes sense to call it as
"expiry_ns" or something similar. Please express all configuration
option in nano seconds so that user should not know the internals of
implementation.

> +
> + * ``--nb_timers``
> +
> +Number of event timers each producer core will generate.
> +
> + * ``--nb_timer_adptrs``
> +
> +Number of event timer adapters to be used. Each adapter is used in
> +round robin manner by the producer cores.
> +
>  Eventdev Tests
>  --
>  
> @@ -347,6 +377,13 @@ Supported application command line options are 
> following::
>  --fwd_latency
>  --queue_priority
>  --prod_type_ethdev
> +--prod_type_timerdev_burst
> +--prod_type_timerdev
> +--bkt_tck_nsec

see above

> +--max_tmo_nsec
> +--nb_bkt_tcks

see above

> +--nb_timers
> +--nb_timer_adptrs
>  
>  Example
>  ^^^
> @@ -365,6 +402,14 @@ Example command to run perf queue test with ethernet 
> ports:
> sudo build/app/dpdk-test-eventdev --vdev=event_sw0 -- \
>  --test=perf_queue --plcores=2 --wlcore=3 --stlist=p 
> --prod_type_ethdev
>  
> +Example command to run perf queue test with event timer adapter:
> +
> +.. code-block:: console
> +
> +   sudo  build/app/dpdk-test-eventdev --vdev="event_octeontx" -- \
> +--wlcores 4 --plcores 12 --test perf_queue --stlist=a \
> +--prod_type_timerdev --fwd_latency
> +
>  PERF_ATQ Test
>  ~~~
>  
> @@ -431,6 +476,13 @@ Supported application command line options are 
> following::
>  --worker_deq_depth
>  --fwd_latency
>  --prod_type_ethdev
> +--prod_type_timerdev_burst
> +--prod_type_timerdev
> +--bkt_tck_nsec

see above

> +--max_tmo_nsec
> +--nb_bkt_tcks

see above.

> +--nb_timers
> +--nb_timer_adptrs

With above name changes in documentation and code:
Acked-by: Jerin Jacob 
  


Re: [dpdk-dev] [PATCH] net/tap: fix memcpy with incorrect size

2018-04-06 Thread Yang, Zhiyong

> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Pavan Nikhilesh
> Sent: Friday, April 6, 2018 7:31 PM
> To: jerin.ja...@caviumnetworks.com; Yigit, Ferruh ;
> pascal.ma...@6wind.com; Varghese, Vipin ;
> tho...@monjalon.net
> Cc: dev@dpdk.org; Pavan Nikhilesh 
> Subject: [dpdk-dev] [PATCH] net/tap: fix memcpy with incorrect size
> 
> Fix incorrect sizeof operation being used for getting mac addr size.
> 
> Found while compiling with arm64 clang.
> drivers/net/tap/rte_eth_tap.c:1410:40: error: argument to 'sizeof' in
> 'memcpy' call is the same pointer type 'struct ether_addr *' as the
> destination; expected 'struct ether_addr' or an explicit length
> [-Werror,-Wsizeof-pointer-memaccess]
>rte_memcpy(&pmd->eth_addr, mac_addr, sizeof(mac_addr));
>   ~~^~~~
> 
> Fixes: bcab6c1d27fa ("net/tap: allow user MAC to be passed as args")
> 
> Signed-off-by: Pavan Nikhilesh 
> ---

Good catch!

Acked-by: Zhiyong Yang 


Re: [dpdk-dev] [PATCH v3 00/68] Memory Hotplug for DPDK

2018-04-06 Thread Hemant Agrawal
Hi Thomas

> > -Original Message-
> > From: Thomas Monjalon [mailto:tho...@monjalon.net]
> > Sent: Thursday, April 05, 2018 7:43 PM
> > To: Shreyansh Jain 
> > Cc: Anatoly Burakov ; dev@dpdk.org;
> > keith.wi...@intel.com; jianfeng@intel.com;
> > andras.kov...@ericsson.com; laszlo.vadk...@ericsson.com;
> > benjamin.wal...@intel.com; bruce.richard...@intel.com;
> > konstantin.anan...@intel.com; kuralamudhan.ramakrish...@intel.com;
> > louise.m.d...@intel.com; nelio.laranje...@6wind.com;
> > ys...@mellanox.com; peppe...@japf.ch; jerin.ja...@caviumnetworks.com;
> > Hemant Agrawal ; olivier.m...@6wind.com;
> > gowrishanka...@linux.vnet.ibm.com
> > Subject: Re: [dpdk-dev] [PATCH v3 00/68] Memory Hotplug for DPDK
> > Importance: High
> >
> > 05/04/2018 16:24, Shreyansh Jain:
> > > Physical addressing cases for both, dpaa/dpaa2, depend heavily on
> > > the fact that physical addressing was the base and was available in
> > > sorted manner. This is reversed/negated with hotplugging support.
> > > So, rework of both the drivers is required from this perspective.
> > > There are some suggestions floated by Anatoly and internally, but
> > > work still needs to be done.
> > > It also impacts a lot of use-cases for virtualization (no-iommu).
> >
> > So what is your recommendation?
> > Can you rework PA case in dpaa/dpaa2 drivers within 18.05 timeframe?
> >
> We will like 2-3 more days on this before we can ack/nack this patch.
> We are working on priority on this.  PA case rework is not a trivial change.

The patch is good to go. However, we will be making changes in dpaa/dpaa2 
drivers to fix the PA issues shortly (within 18.05 timeframe)

Anatoly needs to take care of following:
1. Comment by Shreyansh on " Re: [dpdk-dev] [PATCH v3 50/68] eal: replace 
memzone array with fbarray"
2. I could not apply the patches cleanly on current master.

Tested-by: Hemant Agrawal 
 
> Regards,
> Hemant
> 



Re: [dpdk-dev] [PATCH 1/2] net/sfc: support loopback mode configuration

2018-04-06 Thread Ferruh Yigit
On 4/4/2018 12:10 PM, Andrew Rybchenko wrote:
> All loopback modes are listed in efx_loopback_type_t.
> Available loopback modes are listed per link speed in
> the enc_loopback_types member of the efx_nic_cfg_t.
> 
> Signed-off-by: Andrew Rybchenko 
> Reviewed-by: Andy Moreton 
> Reviewed-by: Ivan Malov 

Getting following build error with clang, can you please check?


.../dpdk/drivers/net/sfc/base/efx_port.c:122:6: error: comparison of constant 64
with expression of type 'efx_loopback_type_t' (aka 'enum efx_loopback_type_e')
is always true [-Werror,-Wtautological-constant-out-of-range-compare]
if (EFX_TEST_QWORD_BIT(encp->enc_loopback_types[link_mode],
^~~
.../dpdk/drivers/net/sfc/base/efx_types.h:1590:28: note: expanded from macro
'EFX_TEST_QWORD_BIT'
#define EFX_TEST_QWORD_BIT  EFX_TEST_QWORD_BIT64
^
.../dpdk/drivers/net/sfc/base/efx_types.h:1412:22: note: expanded from macro
'EFX_TEST_QWORD_BIT64'
__CPU_TO_LE_64(EFX_SHIFT64(_bit, FIX_LINT(0 != 0)
~~~^~~
.../dpdk/drivers/net/sfc/base/efx_types.h:1290:32: note: expanded from macro
'EFX_SHIFT64'
(((_bit) >= (_base) && (_bit) < (_base) + 64) ? \
  ^
.../dpdk/drivers/net/sfc/base/efx_types.h:269:50: note: expanded from macro
'__CPU_TO_LE_64'
#define __CPU_TO_LE_64(_x)  ((uint64_t)__NOSWAP64(_x))
   ~~~^~~
.../dpdk/drivers/net/sfc/base/efx_types.h:238:26: note: expanded from macro
'__NOSWAP64'
#define __NOSWAP64(_x)  (_x)
 ^~
1 error generated.



Re: [dpdk-dev] [PATCH 2/2] app/testpmd: add commands to set loopback mode

2018-04-06 Thread Ferruh Yigit
On 4/4/2018 12:10 PM, Andrew Rybchenko wrote:
> Signed-off-by: Andrew Rybchenko 
> Reviewed-by: Roman Zhukov 
> Reviewed-by: Ivan Malov 


Re: [dpdk-dev] [PATCH 0/6] enable easier app compilation testing with meson

2018-04-06 Thread Van Haaren, Harry
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Bruce Richardson
> Sent: Thursday, March 29, 2018 2:55 PM
> To: dev@dpdk.org
> Cc: hemant.agra...@nxp.com; shreyansh.j...@nxp.com; Richardson, Bruce
> 
> Subject: [dpdk-dev] [PATCH 0/6] enable easier app compilation testing with
> meson
> 
> Summary:
>   With this set you can test building all applicable examples by
>   calling meson with "-Dexamples=all"
> 
> When building DPDK with meson, it's possible to specify a list of sample
> apps to have built along with the main code. However, specifying a full
> list of all apps can be unwieldy, so this set adds support for passing
> "all" as the examples to be built.
> 
> With "all", meson just adds all subdirectories of "examples" to the build,
> so the first few patches are ensuring that we don't get an error by
> attempting to build an unsupported application. On linux, only 7 apps were
> unsupported, in that they had not been given a meson.build file. On
> FreeBSD, a few others had to have their meson.build files updated to report
> them as unsupported.
> 
> In terms of behaviour, the meson.build file for each app will report if the
> app can be built or not. If "all" is requested, then a message is printed
> and the meson run can continue. If, however, the app is requested by name,
> then an error is reported and the meson run halts.
> 
> The final two patches in the series are more cleanup, the former improves
> error reporting, while the last patch is a performance improvement. Meson
> runs quickly enough in the normal case, but with a full set of examples,
> the dependency chain resolution can slow things down. Reducing the lists of
> dependencies makes a noticable difference in this case. [NOTE: this
> slowness and speedup only applies to the meson run; the actual build using
> ninja is as fast as ever!]
> 
> Bruce Richardson (6):
>   examples: add empty meson files for unsupported examples
>   examples/l2fwd-cat: make build dependent on pqos library
>   examples: disable unsupported examples on BSD
>   examples: allow building all examples as part of meson build
>   examples: improve error report for missing meson deps
>   drivers/dpaa*: reduce meson dependency lists

Working fine for me here!

Series-Tested-by: Harry van Haaren 



Re: [dpdk-dev] [PATCH v3 15/21] vhost: packed queue enqueue path

2018-04-06 Thread Jens Freimann

On Fri, Apr 06, 2018 at 11:36:03AM +0200, Maxime Coquelin wrote:



On 04/05/2018 12:10 PM, Jens Freimann wrote:

Implement enqueue of packets to the receive virtqueue.

Set descriptor flag VIRTQ_DESC_F_USED and toggle used wrap counter if
last descriptor in ring is used. Perform a write memory barrier before
flags are written to descriptor.

Chained descriptors are not supported with this patch.

Signed-off-by: Jens Freimann 
---
 lib/librte_vhost/virtio_net.c | 129 ++
 1 file changed, 129 insertions(+)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 7eea1da04..578e5612e 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -695,6 +695,135 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t 
queue_id,
return pkt_idx;
 }
+static inline uint32_t __attribute__((always_inline))
+vhost_enqueue_burst_packed(struct virtio_net *dev, uint16_t queue_id,
+ struct rte_mbuf **pkts, uint32_t count)
+{
+   struct vhost_virtqueue *vq;
+   struct vring_desc_packed *descs;
+   uint16_t idx;
+   uint16_t mask;
+   uint16_t i;
+
+   vq = dev->virtqueue[queue_id];
+
+   rte_spinlock_lock(&vq->access_lock);
+
+   if (unlikely(vq->enabled == 0)) {
+   i = 0;
+   goto out_access_unlock;
+   }
+
+   if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+   vhost_user_iotlb_rd_lock(vq);
+
+   descs = vq->desc_packed;
+   mask = vq->size - 1;
+
+   for (i = 0; i < count; i++) {
+   uint32_t desc_avail, desc_offset;
+   uint32_t mbuf_avail, mbuf_offset;
+   uint32_t cpy_len;
+   struct vring_desc_packed *desc;
+   uint64_t desc_addr;
+   struct virtio_net_hdr_mrg_rxbuf *hdr;
+   struct rte_mbuf *m = pkts[i];
+
+   /* XXX: there is an assumption that no desc will be chained */

Is this assumption still true?
If not what are the plan to fix this?


This is a leftover from the prototype code. I checked the code and
don't see what it could still relate to except if it is supposed to
mean indirect instead of chained. I think the comment can be removed. 



+   idx = vq->last_used_idx & mask;
+   desc = &descs[idx];
+
+   if (!desc_is_avail(vq, desc))

IIUC, it means the ring is full.
I think this is an unlikely case, so maybe better to use the unlikely
macro here.


yes, we can use unlikely here, will fix.

thanks!

regards,
Jens 


Re: [dpdk-dev] [PATCH v2 4/4] ethdev: Add metadata flow and action items support

2018-04-06 Thread Adrien Mazarguil
On Thu, Apr 05, 2018 at 06:49:32PM +0200, Thomas Monjalon wrote:
> 05/04/2018 15:51, Declan Doherty:
> > +struct rte_flow_item_metadata {
> > +   uint32_t id;/**< field identifier */
> > +   uint32_t size;  /**< field size */
> > +   uint8_t bytes[];/**< field value */
> > +};
> 
> Spotted C99 syntax of flexible array.
> Are we OK with all supported compilers?

I also thought they were a good idea at first but got rid of them in
rte_flow [1] for the following reasons:

- Not valid/standard C++.
- Can't be statically initialized.

Both can be overcome by relying on compiler extensions, however their use
should be restricted to a minimum in public APIs for portability reasons.

[1] http://dpdk.org/ml/archives/dev/2018-April/095307.html

-- 
Adrien Mazarguil
6WIND


[dpdk-dev] [PATCH v3 3/4] ethdev: Add group action type to rte_flow

2018-04-06 Thread Declan Doherty
Add group action type which defines a terminating action which
allows a matched flow to be redirect to a group. This allows logical
flow table hierarchies to be managed through rte_flow.

Signed-off-by: Declan Doherty 
---
 doc/guides/prog_guide/rte_flow.rst | 23 +++
 lib/librte_ether/rte_flow.h| 15 +++
 2 files changed, 38 insertions(+)

diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index 106fb93..2f0a47a 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -1557,6 +1557,29 @@ set of overlay header type.
| ``item type`` | Item type of tunnel end-point to decapsulate |
+---+--+
 
+
+Action: ``GROUP``
+^
+
+Redirects packets to a group on the current device.
+
+In a hierarchy of groups, which can be used to represent physical or logical
+flow tables on the device, this action allows the terminating action to be a
+group on that device.
+
+- Terminating by default.
+
+.. _table_rte_flow_action_group:
+
+.. table:: GROUP
+
+   +--+-+
+   | Field| Value   |
+   +==+=+
+   | ``id``   | Group ID to redirect packets to |
+   +--+-+
+
+
 Negative types
 ~~
 
diff --git a/lib/librte_ether/rte_flow.h b/lib/librte_ether/rte_flow.h
index 6d94423..968a23b 100644
--- a/lib/librte_ether/rte_flow.h
+++ b/lib/librte_ether/rte_flow.h
@@ -1251,6 +1251,21 @@ struct rte_flow_action_tunnel_decap {
 };
 
 /**
+ * RTE_FLOW_ACTION_TYPE_GROUP
+ *
+ * Redirects packets to a group on the current device.
+ *
+ * In a hierarchy of groups, which can be used to represent physical or logical
+ * flow tables on the device, this action allows the terminating action to be a
+ * group on that device.
+ *
+ * Terminating by default.
+ */
+struct rte_flow_action_group {
+   uint32_t id;
+};
+
+/**
  * Definition of a single action.
  *
  * A list of actions is terminated by a END action.
-- 
2.7.4



[dpdk-dev] [PATCH v3 1/4] ethdev: add group counter support to rte_flow

2018-04-06 Thread Declan Doherty
Add new RTE_FLOW_ACTION_TYPE_GROUP_COUNT action type to enable shared
counters across multiple flows on a single port or across multiple
flows on multiple ports within the same switch domain.

Introduce new API rte_flow_query_group_count to allow querying of group
counters.

Signed-off-by: Declan Doherty 
---
 doc/guides/prog_guide/rte_flow.rst  | 35 +
 lib/librte_ether/rte_ethdev_version.map |  8 +
 lib/librte_ether/rte_flow.c | 21 +
 lib/librte_ether/rte_flow.h | 56 -
 lib/librte_ether/rte_flow_driver.h  |  6 
 5 files changed, 125 insertions(+), 1 deletion(-)

diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index 961943d..fd33d19 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -1698,6 +1698,41 @@ Return values:
 
 - 0 on success, a negative errno value otherwise and ``rte_errno`` is set.
 
+
+Group Count Query
+~
+
+Query group counter which can be associated with multiple flows on a specified
+port.
+
+This function allows retrieving of group counters. A group counter is a
+counter which can be shared among multiple flows on a single port or among
+multiple flows on multiple ports within the same switch domain. Data is
+gathered by special actions which must be present in the flow rule
+definition.
+
+.. code-block:: c
+
+   int
+   rte_flow_query_group_count(uint16_t port_id,
+  uint32_t group_counter_id,
+  struct rte_flow_query_count *count,
+   struct rte_flow_error *error);
+
+Arguments:
+
+- ``port_id``: port identifier of Ethernet device.
+- ``group_counter_id``: group counter identifier.
+- ``count``: group counter parameters.
+- ``error``: perform verbose error reporting if not NULL. PMDs initialize
+  this structure in case of error only.
+
+Return values:
+
+- 0 on success, a negative errno value otherwise and ``rte_errno`` is set.
+
+
+
 Isolated mode
 -
 
diff --git a/lib/librte_ether/rte_ethdev_version.map 
b/lib/librte_ether/rte_ethdev_version.map
index 34df6c8..cff6807 100644
--- a/lib/librte_ether/rte_ethdev_version.map
+++ b/lib/librte_ether/rte_ethdev_version.map
@@ -229,3 +229,11 @@ EXPERIMENTAL {
rte_mtr_stats_update;
 
 } DPDK_17.11;
+
+
+EXPERIMENTAL {
+   global:
+
+   rte_flow_query_group_count
+
+} DPDK_18.05;
diff --git a/lib/librte_ether/rte_flow.c b/lib/librte_ether/rte_flow.c
index 38f2d27..e10b1d0 100644
--- a/lib/librte_ether/rte_flow.c
+++ b/lib/librte_ether/rte_flow.c
@@ -418,3 +418,24 @@ rte_flow_copy(struct rte_flow_desc *desc, size_t len,
}
return 0;
 }
+
+int __rte_experimental
+rte_flow_query_group_count(uint16_t port_id,
+   uint32_t group_count_id,
+   struct rte_flow_query_count *count,
+   struct rte_flow_error *error)
+{
+   struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+   const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error);
+
+   if (!ops)
+   return -rte_errno;
+   if (likely(!!ops->query_group_count))
+   return flow_err(port_id,
+   ops->query_group_count(dev, group_count_id,
+  count, error),
+   error);
+   return rte_flow_error_set(error, ENOSYS,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL, rte_strerror(ENOSYS));
+}
diff --git a/lib/librte_ether/rte_flow.h b/lib/librte_ether/rte_flow.h
index 13e4202..7d1f89d 100644
--- a/lib/librte_ether/rte_flow.h
+++ b/lib/librte_ether/rte_flow.h
@@ -1010,7 +1010,19 @@ enum rte_flow_action_type {
 *
 * See struct rte_flow_action_security.
 */
-   RTE_FLOW_ACTION_TYPE_SECURITY
+   RTE_FLOW_ACTION_TYPE_SECURITY,
+
+   /**
+* Enable a shared flow group counter for flow. Group counters can be
+* associated with multiples flows on the same port or on port within
+* the same switch domain if supported by that device.
+*
+* Group counters can be retrieved and reset through
+* rte_flow_query_group_count()
+*
+* See struct rte_flow_action_group_count.
+*/
+   RTE_FLOW_ACTION_TYPE_GROUP_COUNT
 };
 
 /**
@@ -1149,6 +1161,18 @@ struct rte_flow_action_security {
 };
 
 /**
+ * RTE_FLOW_ACTION_TYPE_GROUP_COUNT
+ *
+ * A packet/byte counter which can be shared across a group of flows programmed
+ * on the same port/switch domain.
+ *
+ * Non-terminating by default.
+ */
+struct rte_flow_action_group_count {
+   uint32_t id;
+};
+
+/**
  * Definition of a single action.
  *
  * A list of actions is terminated by a END action.
@@ -1476,6 +1500,36 @@ rte_flow_copy(struct rte_flow_desc *fd, size_t len,
  const struct rte_flow_item *items,
  const struct r

[dpdk-dev] [PATCH v3 2/4] ethdev: Add tunnel encap/decap actions

2018-04-06 Thread Declan Doherty
Add new flow action types and associated action data structures to
support the encapsulation and decapsulation of the virtual tunnel
endpoints.

The RTE_FLOW_ACTION_TYPE_TUNNEL_ENCAP action will cause the matching
flow to be encapsulated in the virtual tunnel endpoint overlay
defined in the tunnel_encap action data.

The RTE_FLOW_ACTION_TYPE_TUNNEL_DECAP action will cause all virtual
tunnel endpoint overlays up to and including the first instance of
the flow item type defined in the tunnel_decap action data for the
matching flows.

Signed-off-by: Declan Doherty 
---
 doc/guides/prog_guide/rte_flow.rst | 77 --
 lib/librte_ether/rte_flow.h| 84 --
 2 files changed, 155 insertions(+), 6 deletions(-)

diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index fd33d19..106fb93 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -997,9 +997,11 @@ Actions
 
 Each possible action is represented by a type. Some have associated
 configuration structures. Several actions combined in a list can be assigned
-to a flow rule. That list is not ordered.
+to a flow rule. That list is not ordered, with the exception of  actions which
+modify the packet itself, these packet modification actions must be specified
+in the explicit order in which they are to be executed.
 
-They fall in three categories:
+They fall in four categories:
 
 - Terminating actions (such as QUEUE, DROP, RSS, PF, VF) that prevent
   processing matched packets by subsequent flow rules, unless overridden
@@ -1008,8 +1010,11 @@ They fall in three categories:
 - Non-terminating actions (PASSTHRU, DUP) that leave matched packets up for
   additional processing by subsequent flow rules.
 
+- Non-terminating meta actions that do not affect the fate of packets but 
result
+  in modification of the packet itself (SECURITY, TUNNEL_ENCAP, TUNNEL_DECAP).
+
 - Other non-terminating meta actions that do not affect the fate of packets
-  (END, VOID, MARK, FLAG, COUNT, SECURITY).
+  (END, VOID, MARK, FLAG, COUNT).
 
 When several actions are combined in a flow rule, they should all have
 different types (e.g. dropping a packet twice is not possible).
@@ -1486,6 +1491,72 @@ fields in the pattern items.
| 1 | END  |
+---+--+
 
+
+Action: ``TUNNEL_ENCAP``
+^^
+
+Performs an encapsulation action by encapsulating the flows matched by the
+pattern items according to the network overlay defined in the
+``rte_flow_action_tunnel_encap`` pattern items.
+
+This action modifies the payload of matched flows. The pattern items specified
+in the ``rte_flow_action_tunnel_encap`` action structure must defined a valid
+set of overlay headers, from the Ethernet header up to the overlay header. The
+pattern must be terminated with the RTE_FLOW_ITEM_TYPE_END item type.
+
+- Non-terminating by default.
+
+.. _table_rte_flow_action_tunnel_encap:
+
+.. table:: TUNNEL_ENCAP
+
+   +-+-+
+   | Field   | Value   |
+   +=+=+
+   | ``pattern`` | Virtual tunnel end-point pattern definition |
+   +-+-+
+
+
+.. _table_rte_flow_action_tunnel_encap_example:
+
+.. table:: IPv4 VxLAN flow pattern example.
+
+   +---+--++
+   | Index | Flow Item Type   | Flow Item  |
+   +===+==++
+   | 0 | RTE_FLOW_ITEM_TYPE_ETH   | eth item   |
+   +---+--++
+   | 1 | RTE_FLOW_ITEM_TYPE_IPV4  | ipv4 item  |
+   +---+--++
+   | 2 | RTE_FLOW_ITEM_TYPE_UDP   | udp item   |
+   +---+--++
+   | 3 | RTE_FLOW_ITEM_TYPE_VXLAN | vxlan item |
+   +---+--++
+   | 4 | RTE_FLOW_ITEM_TYPE_END   | NULL   |
+   +---+--++
+
+
+Action: ``TUNNEL_DECAP``
+^^
+
+Performs a decapsulation action by stripping all headers of the virtual tunnel
+end-point overlay up to the header defined by the flow item type of flows
+matched by the pattern items.
+
+This action modifies the payload of matched flows. The flow item type specified
+in the ``rte_flow_action_tunnel_decap`` action structure must defined a valid
+set of overlay header type.
+
+- Non-terminating by default.
+
+.. _table_rte_flow_action_tunnel_decap:
+
+   +---+--+
+   | Field | Value|
+   +===+==+
+   | ``item type`` | Item type of tunnel end-point to decapsulate |
+   +---+

[dpdk-dev] [PATCH v3 0/4] ethdev: Additions to support tunnel encap/decap offload

2018-04-06 Thread Declan Doherty
This patchset contains the revised proposal to manage virtual
tunnel endpoints hardware accleration based on community
feedback on RFC
(http://dpdk.org/ml/archives/dev/2017-December/084676.html). This
proposal is purely enabled through rte_flow APIs with the
additions of some new features which were previously implemented
by the proposed rte_tep APIs which were proposed in the original
RFC. This patchset ultimately aims to enable the configuration
of inline data path encapsulation and decapsulation of tunnel
endpoint network overlays on accelerated IO devices.

V2:
Split new functions into separate patches, and add additional
documentaiton.

V3:
Extended the description of group counter in documentation.
Renamed VTEP to TUNNEL.
Fixed C99 syntax.

The summary of the additions to the rte_flow are as follows:

- Add new flow actions RTE_RTE_FLOW_ACTION_TYPE_TUNNEL_ENCAP and
RTE_FLOW_ACTION_TYPE_TUNNEL_DECAP to rte_flow to support specfication
of encapsulation and decapsulation of virtual Tunnel Endpoint on
hardware.

- Updates the matching pattern item definition
description to specify that all actions which modify a packet
must be specified in the explicit order they are to be excuted.

- Introduces support for the use of pipeline metadata in
the flow pattern definition and the population of metadata fields
from flow actions.

- Adds group counters to enable statistics to be kept on groups of
flows such as all ingress/egress flows of a tunnel

- Adds group_action to allow a flow termination to be a group/table
within the device.

A high level summary of the proposed usage model is as follows:

1. Decapsulation
1.1. Decapsulation of tunnel outer headers and forward all traffic
 to the same queue/s or port, would have the follow flows
 paramteters, sudo code used here.

struct rte_flow_attr attr = { .ingress = 1 };

struct rte_flow_item pattern[] = {
{ .type = RTE_FLOW_ITEM_TYPE_ETH,  .spec = ð_item },
{ .type = RTE_FLOW_ITEM_TYPE_IPV4, .spec = &ipv4_item },
{ .type = RTE_FLOW_ITEM_TYPE_UDP, .spec = &udp_item },
{ .type = RTE_FLOW_ITEM_TYPE_VxLAN, .spec = &vxlan_item },
{ .type = RTE_FLOW_ITEM_TYPE_END }
};

struct rte_flow_action actions[] = {
{ .type = RTE_FLOW_ACTION_TYPE_TUNNEL_DECAP, .type = VxLAN },
{ .type = RTE_FLOW_ACTION_TYPE_VF, .conf = &vf_action  },
{ .type = RTE_FLOW_ACTION_TYPE_END }
}

1.2.

Decapsulation of tunnel outer headers and matching on inner
headers, and forwarding to the same queue/s or port.

1.2.1.

The same scenario as above but either the application
or hardware requires configuration as 2 logically independent
operations (viewing it as 2 logical tables). The first stage
being the flow rule to define the pattern to match the tunnel
and the action to decapsulate the packet, and the second stage
stage table matches the inner header and defines the actions,
forward to port etc.

flow rule for outer header on table 0

struct rte_flow_attr attr = { .ingress = 1, .table = 0 };

struct rte_flow_item pattern[] = {
{ .type = RTE_FLOW_ITEM_TYPE_ETH,  .spec = ð_item },
{ .type = RTE_FLOW_ITEM_TYPE_IPV4, .spec = &ipv4_item },
{ .type = RTE_FLOW_ITEM_TYPE_UDP, .spec = &udp_item },
{ .type = RTE_FLOW_ITEM_TYPE_VxLAN, .spec = &vxlan_item },
{ .type = RTE_FLOW_ITEM_TYPE_END }
};

struct rte_flow_action actions[] = {
{ .type = RTE_FLOW_ACTION_TYPE_GROUP_COUNT, .conf = &tunnel_counter },
{ .type = RTE_FLOW_ACTION_TYPE_METADATA, .conf = &metadata_action },
{ .type = RTE_FLOW_ACTION_TYPE_TUNNEL_DECAP, .conf = VxLAN },
{
  .type = RTE_FLOW_ACTION_TYPE_GROUP,
  .conf = &group_action = { .id = 1 }
},
{ .type = RTE_FLOW_ACTION_TYPE_END }
}

flow rule for inner header on table 1

struct rte_flow_attr attr = { .ingress = 1, .table = 1 };

struct rte_flow_item pattern[] = {
{ .type = RTE_FLOW_ITEM_TYPE_METADATA,  .spec = &metadata_item },
{ .type = RTE_FLOW_ITEM_TYPE_ETH,  .spec = ð_item },
{ .type = RTE_FLOW_ITEM_TYPE_IPV4, .spec = &ipv4_item },
{ .type = RTE_FLOW_ITEM_TYPE_TCP, .spec = &tcp_item },
{ .type = RTE_FLOW_ITEM_TYPE_END }
};

struct rte_flow_action actions[] = {
{
  .type = RTE_FLOW_ACTION_TYPE_PORT_ID,
  .conf = &port_id_action = { port_id }
},
{ .type = RTE_FLOW_ACTION_TYPE_END }
}

Note that the metadata action in the flow rule in table 0 is generating
the metadata in the pipeline which is then used in as part as the flow
pattern in table 1 to specify the exact flow to match against. In the
case where exact match rules are being provided by the application
then this metadata could be extracted by the PMD from the flow pattern
for the group 0 rule, the matching metadata will then need to be
explicitly provided by the application in the flow pattern for the flow
rule in group 1. For devices capable of wildcard matching then the hardware
must 

[dpdk-dev] [PATCH v3 4/4] ethdev: Add metadata flow and action items support

2018-04-06 Thread Declan Doherty
Introduces a new action type RTE_FLOW_ACTION_TYPE_METADATA which enables
metadata extraction from a packet into a specified metadata container
for consumption on further pipeline stages or for propagation to the host
interface.

As complementary function to the new metadata action type this patch also
introduces a new flow item type which enables flow patterns to specify a
specific metadata container as a matching criteria for a flow rule.

Signed-off-by: Declan Doherty 
---
 doc/guides/prog_guide/rte_flow.rst | 85 ++
 lib/librte_ether/rte_flow.h| 42 +++
 2 files changed, 127 insertions(+)

diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index 2f0a47a..9b2b0e3 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -1580,6 +1580,91 @@ group on that device.
+--+-+
 
 
+
+Action: ``METADATA``
+
+
+Action extracts data from packet into user specified metadata field in pipeline
+for use in downstream processing stages or for propagation to host interface.
+
+The pattern mask is used to define the data which is to be extracted from the
+packet. The mask pattern types defined in the action metadata pattern must
+match the flow pattern definitions up to the last flow item from which data is
+to be extracted.
+
+- Non-terminating by default.
+
+.. _table_rte_flow_action_metadata:
+
+.. table:: METADATA
+
+   +--+---+
+   | Field| Value |
+   +==+===+
+   | ``id``   | Metadata field Identifier |
+   +--+---+
+   | ``pattern``  | Extraction mask pattern   |
+   +--+---+
+
+The example below demonstrates how the extraction mask to extract the source/
+destination IPv4 address, the UDP destination port and and the VxLAN VNI can be
+specified.
+
+.. _table_rte_flow_action_metadata_example:
+
+.. table:: IPv4 VxLAN metadata extraction
+
+   +---+--+---+
+   | Index | Flow Item Type   | Flow Mask |
+   +===+==+===+
+   | 0 | RTE_FLOW_ITEM_TYPE_ETH   | .dst = "\x00\x00\x00\x00\x00\x00" |
+   |   |  +---+
+   |   |  | .src = "\x00\x00\x00\x00\x00\x00" |
+   |   |  +---+
+   |   |  | .type = RTE_BE16(0x0) |
+   +---+--+---+
+   | 1 | RTE_FLOW_ITEM_TYPE_IPV4  | .src_addr = RTE_BE32(0x)  |
+   |   |  +---+
+   |   |  | .dst_addr = RTE_BE32(0x)  |
+   +---+--+---+
+   | 2 | RTE_FLOW_ITEM_TYPE_UDP   | .src_port = RTE_BE16(0x0) |
+   |   |  +---+
+   |   |  | .dst_port = RTE_BE16(0x)  |
+   +---+--+---+
+   | 3 | RTE_FLOW_ITEM_TYPE_VXLAN | .vni = "\xff\xff\xff" |
+   +---+--+---+
+   | 4 | RTE_FLOW_ITEM_TYPE_END   | NULL  |
+   +---+--+---+
+
+If only the VxLAN VNI extraction was required then the extraction mask would be
+as follows.
+
+.. _table_rte_flow_action_metadata_example_2:
+
+.. table::  VxLAN VNI metadata extraction
+
+   +---+--+---+
+   | Index | Flow Item Type   | Flow Mask |
+   +===+==+===+
+   | 0 | RTE_FLOW_ITEM_TYPE_ETH   | .dst = "\x00\x00\x00\x00\x00\x00" |
+   |   |  +---+
+   |   |  | .src = "\x00\x00\x00\x00\x00\x00" |
+   |   |  +---+
+   |   |  | .type = RTE_BE16(0x0) |
+   +---+--+---+
+   | 1 | RTE_FLOW_ITEM_TYPE_IPV4  | .src_addr = RTE_BE32(0x0) |
+   |   |  +---+
+   |   |  | .dst_addr = RTE_BE32(0x0) |
+   +---+--+---+
+   | 2 | RTE_FLOW_ITEM_TYPE_UDP   | .src_port = RTE_B

[dpdk-dev] [PATCH v5 01/18] net/axgbe: add minimal dev init and uninit support

2018-04-06 Thread Ravi Kumar
Add ethernet poll mode driver for AMD 10G devices embedded in
AMD EPYC™ EMBEDDED 3000 family processors.

Signed-off-by: Ravi Kumar 
---
 MAINTAINERS |  6 ++
 config/common_base  |  6 ++
 doc/guides/rel_notes/release_18_05.rst  |  5 ++
 drivers/net/Makefile|  1 +
 drivers/net/axgbe/Makefile  | 27 
 drivers/net/axgbe/axgbe_common.h| 50 +++
 drivers/net/axgbe/axgbe_ethdev.c| 97 +
 drivers/net/axgbe/axgbe_ethdev.h| 23 +++
 drivers/net/axgbe/axgbe_logs.h  | 26 
 drivers/net/axgbe/rte_pmd_axgbe_version.map |  4 ++
 mk/rte.app.mk   |  1 +
 11 files changed, 246 insertions(+)
 create mode 100644 drivers/net/axgbe/Makefile
 create mode 100644 drivers/net/axgbe/axgbe_common.h
 create mode 100644 drivers/net/axgbe/axgbe_ethdev.c
 create mode 100644 drivers/net/axgbe/axgbe_ethdev.h
 create mode 100644 drivers/net/axgbe/axgbe_logs.h
 create mode 100644 drivers/net/axgbe/rte_pmd_axgbe_version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index ed3251d..1bf7c03 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -349,6 +349,12 @@ M: Ferruh Yigit 
 T: git://dpdk.org/next/dpdk-next-net
 F: doc/guides/nics/features/default.ini
 
+AMD AXGBE PMD
+M: Ravi Kumar 
+F: drivers/net/axgbe/
+F: doc/guides/nics/axgbe.rst
+F: doc/guides/nics/features/axgbe.ini
+
 Link bonding
 M: Declan Doherty 
 F: drivers/net/bonding/
diff --git a/config/common_base b/config/common_base
index c09c7cf..6c7e7fd 100644
--- a/config/common_base
+++ b/config/common_base
@@ -145,6 +145,12 @@ CONFIG_RTE_LIBRTE_ARK_DEBUG_STATS=n
 CONFIG_RTE_LIBRTE_ARK_DEBUG_TRACE=n
 
 #
+# Compile AMD PMD
+#
+CONFIG_RTE_LIBRTE_AXGBE_PMD=y
+CONFIG_RTE_LIBRTE_AXGBE_PMD_DEBUG=n
+
+#
 # Compile burst-oriented Broadcom PMD driver
 #
 CONFIG_RTE_LIBRTE_BNX2X_PMD=n
diff --git a/doc/guides/rel_notes/release_18_05.rst 
b/doc/guides/rel_notes/release_18_05.rst
index e5fac1c..2db 100644
--- a/doc/guides/rel_notes/release_18_05.rst
+++ b/doc/guides/rel_notes/release_18_05.rst
@@ -58,6 +58,11 @@ New Features
   * Added support for NVGRE, VXLAN and GENEVE filters in flow API.
   * Added support for DROP action in flow API.
 
+* **Added Ethernet poll mode driver for AMD XGBE devices.**
+
+  Added the new ``axgbe`` ethernet poll mode driver for AMD XGBE devices.
+  See the :doc:`../nics/axgbe` nic driver guide for more details on this
+  new driver.
 
 API Changes
 ---
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 37ca19a..dc5047e 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -12,6 +12,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_AF_PACKET) += af_packet
 DIRS-$(CONFIG_RTE_LIBRTE_ARK_PMD) += ark
 DIRS-$(CONFIG_RTE_LIBRTE_AVF_PMD) += avf
 DIRS-$(CONFIG_RTE_LIBRTE_AVP_PMD) += avp
+DIRS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe
 DIRS-$(CONFIG_RTE_LIBRTE_BNX2X_PMD) += bnx2x
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += bonding
 DIRS-$(CONFIG_RTE_LIBRTE_CXGBE_PMD) += cxgbe
diff --git a/drivers/net/axgbe/Makefile b/drivers/net/axgbe/Makefile
new file mode 100644
index 000..9d9c8d9
--- /dev/null
+++ b/drivers/net/axgbe/Makefile
@@ -0,0 +1,27 @@
+#   SPDX-License-Identifier: BSD-3-Clause
+#   Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_axgbe.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+EXPORT_MAP := rte_pmd_axgbe_version.map
+
+LIBABIVER := 1
+
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool
+LDLIBS += -lrte_pci -lrte_bus_pci
+LDLIBS += -lrte_ethdev
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_ethdev.c
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/axgbe/axgbe_common.h b/drivers/net/axgbe/axgbe_common.h
new file mode 100644
index 000..3591d77
--- /dev/null
+++ b/drivers/net/axgbe/axgbe_common.h
@@ -0,0 +1,50 @@
+/*   SPDX-License-Identifier: BSD-3-Clause
+ *   Copyright(c) 2018 Advanced Micro Devices, Inc. All rights reserved.
+ *   Copyright(c) 2018 Synopsys, Inc. All rights reserved.
+ */
+
+#ifndef __AXGBE_COMMON_H__
+#define __AXGBE_COMMON_H__
+
+#include "axgbe_logs.h"
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define BIT(nr)   (1 << (nr))
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+#define AXGBE_HZ   250
+
+#endif /* __AXGBE_COMMON_H__ */
diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
new file mode 100644
index 000..8d7ff28
--- /dev/null
+++ b/dri

[dpdk-dev] [PATCH v5 03/18] net/axgbe: add phy register map and helper macros

2018-04-06 Thread Ravi Kumar
Added phy related register definitions.

Signed-off-by: Ravi Kumar 
---
 drivers/net/axgbe/axgbe_phy.h | 192 ++
 1 file changed, 192 insertions(+)
 create mode 100644 drivers/net/axgbe/axgbe_phy.h

diff --git a/drivers/net/axgbe/axgbe_phy.h b/drivers/net/axgbe/axgbe_phy.h
new file mode 100644
index 000..77ee20a
--- /dev/null
+++ b/drivers/net/axgbe/axgbe_phy.h
@@ -0,0 +1,192 @@
+/*   SPDX-License-Identifier: BSD-3-Clause
+ *   Copyright(c) 2018 Advanced Micro Devices, Inc. All rights reserved.
+ *   Copyright(c) 2018 Synopsys, Inc. All rights reserved.
+ */
+
+#ifndef __AXGBE_PHY_H__
+#define __AXGBE_PHY_H__
+
+#define SPEED_1010
+#define SPEED_100   100
+#define SPEED_1000  1000
+#define SPEED_2500  2500
+#define SPEED_1 1
+
+
+/* Or MII_ADDR_C45 into regnum for read/write on mii_bus to enable the 21 bit
+ * IEEE 802.3ae clause 45 addressing mode used by 10GIGE phy chips.
+ */
+#define MII_ADDR_C45 (1 << 30)
+
+/* Basic mode status register. */
+#define BMSR_LSTATUS0x0004  /* Link status */
+
+/* Status register 1. */
+#define MDIO_STAT1_LSTATUS  BMSR_LSTATUS
+
+/* Generic MII registers. */
+#define MII_BMCR   0x00/* Basic mode control register */
+#define MII_BMSR   0x01/* Basic mode status register  */
+#define MII_PHYSID10x02/* PHYS ID 1   */
+#define MII_PHYSID20x03/* PHYS ID 2   */
+#define MII_ADVERTISE  0x04/* Advertisement control reg   */
+#define MII_LPA0x05/* Link partner ability reg
*/
+#define MII_EXPANSION  0x06/* Expansion register  */
+#define MII_CTRL1000   0x09/* 1000BASE-T control  */
+#define MII_STAT1000   0x0a/* 1000BASE-T status   */
+#defineMII_MMD_CTRL0x0d/* MMD Access Control Register 
*/
+#defineMII_MMD_DATA0x0e/* MMD Access Data Register */
+#define MII_ESTATUS0x0f/* Extended Status */
+#define MII_DCOUNTER   0x12/* Disconnect counter  */
+#define MII_FCSCOUNTER 0x13/* False carrier counter   */
+#define MII_NWAYTEST   0x14/* N-way auto-neg test reg */
+#define MII_RERRCOUNTER0x15/* Receive error counter   
*/
+#define MII_SREVISION  0x16/* Silicon revision*/
+#define MII_RESV1  0x17/* Reserved... */
+#define MII_LBRERROR   0x18/* Lpback, rx, bypass error*/
+#define MII_PHYADDR0x19/* PHY address */
+#define MII_RESV2  0x1a/* Reserved... */
+#define MII_TPISTATUS  0x1b/* TPI status for 10mbps   */
+#define MII_NCONFIG0x1c/* Network interface config*/
+
+/* Basic mode control register. */
+#define BMCR_RESV  0x003f  /* Unused...   */
+#define BMCR_SPEED1000 0x0040  /* MSB of Speed (1000) */
+#define BMCR_CTST  0x0080  /* Collision test  */
+#define BMCR_FULLDPLX  0x0100  /* Full duplex */
+#define BMCR_ANRESTART 0x0200  /* Auto negotiation restart*/
+#define BMCR_ISOLATE   0x0400  /* Isolate data paths from MII */
+#define BMCR_PDOWN 0x0800  /* Enable low power state  */
+#define BMCR_ANENABLE  0x1000  /* Enable auto negotiation */
+#define BMCR_SPEED100  0x2000  /* Select 100Mbps  */
+#define BMCR_LOOPBACK  0x4000  /* TXD loopback bits   */
+#define BMCR_RESET 0x8000  /* Reset to default state  */
+#define BMCR_SPEED10   0x  /* Select 10Mbps   */
+
+
+/* MDIO Manageable Devices (MMDs). */
+#define MDIO_MMD_PMAPMD1   /* Physical Medium Attachment
+* Physical Medium Dependent
+*/
+#define MDIO_MMD_WIS   2   /* WAN Interface Sublayer */
+#define MDIO_MMD_PCS   3   /* Physical Coding Sublayer */
+#define MDIO_MMD_PHYXS 4   /* PHY Extender Sublayer */
+#define MDIO_MMD_DTEXS 5   /* DTE Extender Sublayer */
+#define MDIO_MMD_TC6   /* Transmission Convergence */
+#define MDIO_MMD_AN7   /* Auto-Negotiation */
+#define MDIO_MMD_C22EXT29  /* Clause 22 extension */
+#define MDIO_MMD_VEND1 30  /* Vendor specific 1 */
+#define MDIO_MMD_VEND2 31  /* Vendor specific 2 */
+
+/* Generic MDIO registers. */
+#define MDIO_CTRL1 MII_BMCR
+#define MDIO_STAT1 MII_BMSR
+#define MDIO_DEVID1MII_PHYSID1
+#define MDIO_DEVID2MII_PHYSID2
+#define MDIO_SPEED 4   /* Speed ability */
+#define MDIO_DEVS1 5   /

[dpdk-dev] [PATCH v5 04/18] net/axgbe: add structures for MAC initialization and reset

2018-04-06 Thread Ravi Kumar
Signed-off-by: Ravi Kumar 
---
 drivers/net/axgbe/Makefile   |   1 +
 drivers/net/axgbe/axgbe_dev.c|  45 +
 drivers/net/axgbe/axgbe_ethdev.c | 301 -
 drivers/net/axgbe/axgbe_ethdev.h | 349 +++
 4 files changed, 694 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/axgbe/axgbe_dev.c

diff --git a/drivers/net/axgbe/Makefile b/drivers/net/axgbe/Makefile
index 9d9c8d9..6b8da12 100644
--- a/drivers/net/axgbe/Makefile
+++ b/drivers/net/axgbe/Makefile
@@ -23,5 +23,6 @@ LDLIBS += -lrte_ethdev
 # all source are stored in SRCS-y
 #
 SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_ethdev.c
+SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_dev.c
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/axgbe/axgbe_dev.c b/drivers/net/axgbe/axgbe_dev.c
new file mode 100644
index 000..70a796b
--- /dev/null
+++ b/drivers/net/axgbe/axgbe_dev.c
@@ -0,0 +1,45 @@
+/*   SPDX-License-Identifier: BSD-3-Clause
+ *   Copyright(c) 2018 Advanced Micro Devices, Inc. All rights reserved.
+ *   Copyright(c) 2018 Synopsys, Inc. All rights reserved.
+ */
+
+#include "axgbe_ethdev.h"
+#include "axgbe_common.h"
+#include "axgbe_phy.h"
+
+static int __axgbe_exit(struct axgbe_port *pdata)
+{
+   unsigned int count = 2000;
+
+   /* Issue a software reset */
+   AXGMAC_IOWRITE_BITS(pdata, DMA_MR, SWR, 1);
+   rte_delay_us(10);
+
+   /* Poll Until Poll Condition */
+   while (--count && AXGMAC_IOREAD_BITS(pdata, DMA_MR, SWR))
+   rte_delay_us(500);
+
+   if (!count)
+   return -EBUSY;
+
+   return 0;
+}
+
+static int axgbe_exit(struct axgbe_port *pdata)
+{
+   int ret;
+
+   /* To guard against possible incorrectly generated interrupts,
+* issue the software reset twice.
+*/
+   ret = __axgbe_exit(pdata);
+   if (ret)
+   return ret;
+
+   return __axgbe_exit(pdata);
+}
+
+void axgbe_init_function_ptrs_dev(struct axgbe_hw_if *hw_if)
+{
+   hw_if->exit = axgbe_exit;
+}
diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index 8d7ff28..8f4 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -4,6 +4,8 @@
  */
 
 #include "axgbe_ethdev.h"
+#include "axgbe_common.h"
+#include "axgbe_phy.h"
 
 static int eth_axgbe_dev_init(struct rte_eth_dev *eth_dev);
 static int eth_axgbe_dev_uninit(struct rte_eth_dev *eth_dev);
@@ -22,6 +24,190 @@ static const struct rte_pci_id pci_id_axgbe_map[] = {
{ .vendor_id = 0, },
 };
 
+static struct axgbe_version_data axgbe_v2a = {
+   .xpcs_access= AXGBE_XPCS_ACCESS_V2,
+   .mmc_64bit  = 1,
+   .tx_max_fifo_size   = 229376,
+   .rx_max_fifo_size   = 229376,
+   .tx_tstamp_workaround   = 1,
+   .ecc_support= 1,
+   .i2c_support= 1,
+};
+
+static struct axgbe_version_data axgbe_v2b = {
+   .xpcs_access= AXGBE_XPCS_ACCESS_V2,
+   .mmc_64bit  = 1,
+   .tx_max_fifo_size   = 65536,
+   .rx_max_fifo_size   = 65536,
+   .tx_tstamp_workaround   = 1,
+   .ecc_support= 1,
+   .i2c_support= 1,
+};
+
+static void axgbe_get_all_hw_features(struct axgbe_port *pdata)
+{
+   unsigned int mac_hfr0, mac_hfr1, mac_hfr2;
+   struct axgbe_hw_features *hw_feat = &pdata->hw_feat;
+
+   mac_hfr0 = AXGMAC_IOREAD(pdata, MAC_HWF0R);
+   mac_hfr1 = AXGMAC_IOREAD(pdata, MAC_HWF1R);
+   mac_hfr2 = AXGMAC_IOREAD(pdata, MAC_HWF2R);
+
+   memset(hw_feat, 0, sizeof(*hw_feat));
+
+   hw_feat->version = AXGMAC_IOREAD(pdata, MAC_VR);
+
+   /* Hardware feature register 0 */
+   hw_feat->gmii= AXGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, GMIISEL);
+   hw_feat->vlhash  = AXGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, VLHASH);
+   hw_feat->sma = AXGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, SMASEL);
+   hw_feat->rwk = AXGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, RWKSEL);
+   hw_feat->mgk = AXGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, MGKSEL);
+   hw_feat->mmc = AXGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, MMCSEL);
+   hw_feat->aoe = AXGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, ARPOFFSEL);
+   hw_feat->ts  = AXGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, TSSEL);
+   hw_feat->eee = AXGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, EEESEL);
+   hw_feat->tx_coe  = AXGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, TXCOESEL);
+   hw_feat->rx_coe  = AXGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, RXCOESEL);
+   hw_feat->addn_mac= AXGMAC_GET_BITS(mac_hfr0, MAC_HWF0R,
+ ADDMACADRSEL);
+   hw_feat->ts_src  = AXGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, TSSTSSEL);
+   hw_feat->sa_vlan_ins = AXGMAC_GET_BITS(mac_hfr0, MAC_HWF0R

[dpdk-dev] [PATCH v5 02/18] net/axgbe: add register map and related macros

2018-04-06 Thread Ravi Kumar
Added DMA and MAC related register definitions.

Signed-off-by: Ravi Kumar 
---
 drivers/net/axgbe/axgbe_common.h | 1644 ++
 1 file changed, 1644 insertions(+)

diff --git a/drivers/net/axgbe/axgbe_common.h b/drivers/net/axgbe/axgbe_common.h
index 3591d77..298e794 100644
--- a/drivers/net/axgbe/axgbe_common.h
+++ b/drivers/net/axgbe/axgbe_common.h
@@ -47,4 +47,1648 @@
 
 #define AXGBE_HZ   250
 
+/* DMA register offsets */
+#define DMA_MR 0x3000
+#define DMA_SBMR   0x3004
+#define DMA_ISR0x3008
+#define DMA_AXIARCR0x3010
+#define DMA_AXIAWCR0x3018
+#define DMA_AXIAWRCR   0x301c
+#define DMA_DSR0   0x3020
+#define DMA_DSR1   0x3024
+#define EDMA_TX_CONTROL0x3040
+#define EDMA_RX_CONTROL0x3044
+
+/* DMA register entry bit positions and sizes */
+#define DMA_AXIARCR_DRC_INDEX  0
+#define DMA_AXIARCR_DRC_WIDTH  4
+#define DMA_AXIARCR_DRD_INDEX  4
+#define DMA_AXIARCR_DRD_WIDTH  2
+#define DMA_AXIARCR_TEC_INDEX  8
+#define DMA_AXIARCR_TEC_WIDTH  4
+#define DMA_AXIARCR_TED_INDEX  12
+#define DMA_AXIARCR_TED_WIDTH  2
+#define DMA_AXIARCR_THC_INDEX  16
+#define DMA_AXIARCR_THC_WIDTH  4
+#define DMA_AXIARCR_THD_INDEX  20
+#define DMA_AXIARCR_THD_WIDTH  2
+#define DMA_AXIAWCR_DWC_INDEX  0
+#define DMA_AXIAWCR_DWC_WIDTH  4
+#define DMA_AXIAWCR_DWD_INDEX  4
+#define DMA_AXIAWCR_DWD_WIDTH  2
+#define DMA_AXIAWCR_RPC_INDEX  8
+#define DMA_AXIAWCR_RPC_WIDTH  4
+#define DMA_AXIAWCR_RPD_INDEX  12
+#define DMA_AXIAWCR_RPD_WIDTH  2
+#define DMA_AXIAWCR_RHC_INDEX  16
+#define DMA_AXIAWCR_RHC_WIDTH  4
+#define DMA_AXIAWCR_RHD_INDEX  20
+#define DMA_AXIAWCR_RHD_WIDTH  2
+#define DMA_AXIAWCR_RDC_INDEX  24
+#define DMA_AXIAWCR_RDC_WIDTH  4
+#define DMA_AXIAWCR_RDD_INDEX  28
+#define DMA_AXIAWCR_RDD_WIDTH  2
+#define DMA_AXIAWRCR_TDWC_INDEX0
+#define DMA_AXIAWRCR_TDWC_WIDTH4
+#define DMA_AXIAWRCR_TDWD_INDEX4
+#define DMA_AXIAWRCR_TDWD_WIDTH4
+#define DMA_AXIAWRCR_RDRC_INDEX8
+#define DMA_AXIAWRCR_RDRC_WIDTH4
+#define DMA_ISR_MACIS_INDEX17
+#define DMA_ISR_MACIS_WIDTH1
+#define DMA_ISR_MTLIS_INDEX16
+#define DMA_ISR_MTLIS_WIDTH1
+#define DMA_MR_INTM_INDEX  12
+#define DMA_MR_INTM_WIDTH  2
+#define DMA_MR_SWR_INDEX   0
+#define DMA_MR_SWR_WIDTH   1
+#define DMA_SBMR_WR_OSR_INDEX  24
+#define DMA_SBMR_WR_OSR_WIDTH  6
+#define DMA_SBMR_RD_OSR_INDEX  16
+#define DMA_SBMR_RD_OSR_WIDTH  6
+#define DMA_SBMR_AAL_INDEX 12
+#define DMA_SBMR_AAL_WIDTH 1
+#define DMA_SBMR_EAME_INDEX11
+#define DMA_SBMR_EAME_WIDTH1
+#define DMA_SBMR_BLEN_256_INDEX7
+#define DMA_SBMR_BLEN_256_WIDTH1
+#define DMA_SBMR_BLEN_32_INDEX 4
+#define DMA_SBMR_BLEN_32_WIDTH 1
+#define DMA_SBMR_UNDEF_INDEX   0
+#define DMA_SBMR_UNDEF_WIDTH   1
+
+/* DMA register values */
+#define DMA_DSR_RPS_WIDTH  4
+#define DMA_DSR_TPS_WIDTH  4
+#define DMA_DSR_Q_WIDTH(DMA_DSR_RPS_WIDTH + 
DMA_DSR_TPS_WIDTH)
+#define DMA_DSR0_RPS_START 8
+#define DMA_DSR0_TPS_START 12
+#define DMA_DSRX_FIRST_QUEUE   3
+#define DMA_DSRX_INC   4
+#define DMA_DSRX_QPR   4
+#define DMA_DSRX_RPS_START 0
+#define DMA_DSRX_TPS_START 4
+#define DMA_TPS_STOPPED0x00
+#define DMA_TPS_SUSPENDED  0x06
+
+/* DMA channel register offsets
+ *   Multiple channels can be active.  The first channel has registers
+ *   that begin at 0x3100.  Each subsequent channel has registers that
+ *   are accessed using an offset of 0x80 from the previous channel.
+ */
+#define DMA_CH_BASE0x3100
+#define DMA_CH_INC 0x80
+
+#define DMA_CH_CR  0x00
+#define DMA_CH_TCR 0x04
+#define DMA_CH_RCR 0x08
+#define DMA_CH_TDLR_HI 0x10
+#define DMA_CH_TDLR_LO 0x14
+#define DMA_CH_RDLR_HI 0x18
+#define DMA_CH_RDLR_LO 0x1c
+#define DMA_CH_TDTR_LO 0x24
+#define DMA_CH_RDTR_LO 0x2c
+#define DMA_CH_TDRLR   0x30
+#define DMA_CH_RDRLR   0x34
+#define DMA_CH_IER 0x38
+#define DMA_CH_RIWT0x3c
+#d

[dpdk-dev] [PATCH v5 05/18] net/axgbe: add phy initialization and related apis

2018-04-06 Thread Ravi Kumar
Added device phy initialization, read/write and other
maintenance apis to be used within PMD.

Signed-off-by: Ravi Kumar 
---
 drivers/net/axgbe/Makefile |   3 +
 drivers/net/axgbe/axgbe_dev.c  | 188 ++
 drivers/net/axgbe/axgbe_ethdev.c   |  11 +
 drivers/net/axgbe/axgbe_ethdev.h   | 172 ++
 drivers/net/axgbe/axgbe_i2c.c  | 331 ++
 drivers/net/axgbe/axgbe_mdio.c |  81 +
 drivers/net/axgbe/axgbe_phy_impl.c | 677 +
 7 files changed, 1463 insertions(+)
 create mode 100644 drivers/net/axgbe/axgbe_i2c.c
 create mode 100644 drivers/net/axgbe/axgbe_mdio.c
 create mode 100644 drivers/net/axgbe/axgbe_phy_impl.c

diff --git a/drivers/net/axgbe/Makefile b/drivers/net/axgbe/Makefile
index 6b8da12..efd9f0f 100644
--- a/drivers/net/axgbe/Makefile
+++ b/drivers/net/axgbe/Makefile
@@ -24,5 +24,8 @@ LDLIBS += -lrte_ethdev
 #
 SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_ethdev.c
 SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_dev.c
+SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_mdio.c
+SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_phy_impl.c
+SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_i2c.c
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/axgbe/axgbe_dev.c b/drivers/net/axgbe/axgbe_dev.c
index 70a796b..4a45ee6 100644
--- a/drivers/net/axgbe/axgbe_dev.c
+++ b/drivers/net/axgbe/axgbe_dev.c
@@ -7,6 +7,187 @@
 #include "axgbe_common.h"
 #include "axgbe_phy.h"
 
+/* query busy bit */
+static int mdio_complete(struct axgbe_port *pdata)
+{
+   if (!AXGMAC_IOREAD_BITS(pdata, MAC_MDIOSCCDR, BUSY))
+   return 1;
+
+   return 0;
+}
+
+static int axgbe_write_ext_mii_regs(struct axgbe_port *pdata, int addr,
+   int reg, u16 val)
+{
+   unsigned int mdio_sca, mdio_sccd;
+   uint64_t timeout;
+
+   mdio_sca = 0;
+   AXGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg);
+   AXGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr);
+   AXGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca);
+
+   mdio_sccd = 0;
+   AXGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, DATA, val);
+   AXGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, CMD, 1);
+   AXGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, BUSY, 1);
+   AXGMAC_IOWRITE(pdata, MAC_MDIOSCCDR, mdio_sccd);
+
+   timeout = rte_get_timer_cycles() + rte_get_timer_hz();
+   while (time_before(rte_get_timer_cycles(), timeout)) {
+   rte_delay_us(100);
+   if (mdio_complete(pdata))
+   return 0;
+   }
+
+   PMD_DRV_LOG(ERR, "Mdio write operation timed out\n");
+   return -ETIMEDOUT;
+}
+
+static int axgbe_read_ext_mii_regs(struct axgbe_port *pdata, int addr,
+  int reg)
+{
+   unsigned int mdio_sca, mdio_sccd;
+   uint64_t timeout;
+
+   mdio_sca = 0;
+   AXGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg);
+   AXGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr);
+   AXGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca);
+
+   mdio_sccd = 0;
+   AXGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, CMD, 3);
+   AXGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, BUSY, 1);
+   AXGMAC_IOWRITE(pdata, MAC_MDIOSCCDR, mdio_sccd);
+
+   timeout = rte_get_timer_cycles() + rte_get_timer_hz();
+
+   while (time_before(rte_get_timer_cycles(), timeout)) {
+   rte_delay_us(100);
+   if (mdio_complete(pdata))
+   goto success;
+   }
+
+   PMD_DRV_LOG(ERR, "Mdio read operation timed out\n");
+   return -ETIMEDOUT;
+
+success:
+   return AXGMAC_IOREAD_BITS(pdata, MAC_MDIOSCCDR, DATA);
+}
+
+static int axgbe_set_ext_mii_mode(struct axgbe_port *pdata, unsigned int port,
+ enum axgbe_mdio_mode mode)
+{
+   unsigned int reg_val = 0;
+
+   switch (mode) {
+   case AXGBE_MDIO_MODE_CL22:
+   if (port > AXGMAC_MAX_C22_PORT)
+   return -EINVAL;
+   reg_val |= (1 << port);
+   break;
+   case AXGBE_MDIO_MODE_CL45:
+   break;
+   default:
+   return -EINVAL;
+   }
+   AXGMAC_IOWRITE(pdata, MAC_MDIOCL22R, reg_val);
+
+   return 0;
+}
+
+static int axgbe_read_mmd_regs_v2(struct axgbe_port *pdata,
+ int prtad __rte_unused, int mmd_reg)
+{
+   unsigned int mmd_address, index, offset;
+   int mmd_data;
+
+   if (mmd_reg & MII_ADDR_C45)
+   mmd_address = mmd_reg & ~MII_ADDR_C45;
+   else
+   mmd_address = (pdata->mdio_mmd << 16) | (mmd_reg & 0x);
+
+   /* The PCS registers are accessed using mmio. The underlying
+* management interface uses indirect addressing to access the MMD
+* register sets. This requires accessing of the PCS register in two
+* phases, an address phase and a data phase.
+*
+* The mmio interface is based on 16-bit offsets and values. 

[dpdk-dev] [PATCH v5 08/18] net/axgbe: add transmit and receive queue setup apis

2018-04-06 Thread Ravi Kumar
Add support for data path setup apis defined for PMDs.

Signed-off-by: Ravi Kumar 
---
 drivers/net/axgbe/Makefile   |   1 +
 drivers/net/axgbe/axgbe_ethdev.c |  82 +
 drivers/net/axgbe/axgbe_ethdev.h |  36 ++
 drivers/net/axgbe/axgbe_rxtx.c   | 241 +++
 drivers/net/axgbe/axgbe_rxtx.h   | 167 +++
 5 files changed, 527 insertions(+)
 create mode 100644 drivers/net/axgbe/axgbe_rxtx.c
 create mode 100644 drivers/net/axgbe/axgbe_rxtx.h

diff --git a/drivers/net/axgbe/Makefile b/drivers/net/axgbe/Makefile
index efd9f0f..e1e5f53 100644
--- a/drivers/net/axgbe/Makefile
+++ b/drivers/net/axgbe/Makefile
@@ -27,5 +27,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_dev.c
 SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_mdio.c
 SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_phy_impl.c
 SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_i2c.c
+SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_rxtx.c
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index 7d2efa3..3b5f1ae 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -3,6 +3,7 @@
  *   Copyright(c) 2018 Synopsys, Inc. All rights reserved.
  */
 
+#include "axgbe_rxtx.h"
 #include "axgbe_ethdev.h"
 #include "axgbe_common.h"
 #include "axgbe_phy.h"
@@ -10,6 +11,9 @@
 static int eth_axgbe_dev_init(struct rte_eth_dev *eth_dev);
 static int eth_axgbe_dev_uninit(struct rte_eth_dev *eth_dev);
 static void axgbe_dev_interrupt_handler(void *param);
+static void axgbe_dev_close(struct rte_eth_dev *dev);
+static void axgbe_dev_info_get(struct rte_eth_dev *dev,
+  struct rte_eth_dev_info *dev_info);
 
 /* The set of PCI devices this driver supports */
 #define AMD_PCI_VENDOR_ID   0x1022
@@ -47,6 +51,27 @@ static struct axgbe_version_data axgbe_v2b = {
.i2c_support= 1,
 };
 
+static const struct rte_eth_desc_lim rx_desc_lim = {
+   .nb_max = AXGBE_MAX_RING_DESC,
+   .nb_min = AXGBE_MIN_RING_DESC,
+   .nb_align = 8,
+};
+
+static const struct rte_eth_desc_lim tx_desc_lim = {
+   .nb_max = AXGBE_MAX_RING_DESC,
+   .nb_min = AXGBE_MIN_RING_DESC,
+   .nb_align = 8,
+};
+
+static const struct eth_dev_ops axgbe_eth_dev_ops = {
+   .dev_close= axgbe_dev_close,
+   .dev_infos_get= axgbe_dev_info_get,
+   .rx_queue_setup   = axgbe_dev_rx_queue_setup,
+   .rx_queue_release = axgbe_dev_rx_queue_release,
+   .tx_queue_setup   = axgbe_dev_tx_queue_setup,
+   .tx_queue_release = axgbe_dev_tx_queue_release,
+};
+
 /*
  * Interrupt handler triggered by NIC  for handling
  * specific interrupt.
@@ -71,6 +96,57 @@ axgbe_dev_interrupt_handler(void *param)
rte_intr_enable(&pdata->pci_dev->intr_handle);
 }
 
+/* Clear all resources like TX/RX queues. */
+static void
+axgbe_dev_close(struct rte_eth_dev *dev)
+{
+   axgbe_dev_clear_queues(dev);
+}
+
+static void
+axgbe_dev_info_get(struct rte_eth_dev *dev,
+  struct rte_eth_dev_info *dev_info)
+{
+   struct axgbe_port *pdata = dev->data->dev_private;
+
+   dev_info->pci_dev = RTE_ETH_DEV_TO_PCI(dev);
+   dev_info->max_rx_queues = pdata->tx_ring_count;
+   dev_info->max_tx_queues = pdata->rx_ring_count;
+   dev_info->min_rx_bufsize = AXGBE_RX_MIN_BUF_SIZE;
+   dev_info->max_rx_pktlen = AXGBE_RX_MAX_BUF_SIZE;
+   dev_info->max_mac_addrs = AXGBE_MAX_MAC_ADDRS;
+   dev_info->speed_capa =  ETH_LINK_SPEED_10G;
+
+   dev_info->rx_offload_capa =
+   DEV_RX_OFFLOAD_IPV4_CKSUM |
+   DEV_RX_OFFLOAD_UDP_CKSUM  |
+   DEV_RX_OFFLOAD_TCP_CKSUM;
+
+   dev_info->tx_offload_capa =
+   DEV_TX_OFFLOAD_IPV4_CKSUM  |
+   DEV_TX_OFFLOAD_UDP_CKSUM   |
+   DEV_TX_OFFLOAD_TCP_CKSUM;
+
+   if (pdata->hw_feat.rss) {
+   dev_info->flow_type_rss_offloads = AXGBE_RSS_OFFLOAD;
+   dev_info->reta_size = pdata->hw_feat.hash_table_size;
+   dev_info->hash_key_size =  AXGBE_RSS_HASH_KEY_SIZE;
+   }
+
+   dev_info->rx_desc_lim = rx_desc_lim;
+   dev_info->tx_desc_lim = tx_desc_lim;
+
+   dev_info->default_rxconf = (struct rte_eth_rxconf) {
+   .rx_free_thresh = AXGBE_RX_FREE_THRESH,
+   };
+
+   dev_info->default_txconf = (struct rte_eth_txconf) {
+   .tx_free_thresh = AXGBE_TX_FREE_THRESH,
+   .txq_flags = ETH_TXQ_FLAGS_NOMULTSEGS |
+   ETH_TXQ_FLAGS_NOOFFLOADS,
+   };
+}
+
 static void axgbe_get_all_hw_features(struct axgbe_port *pdata)
 {
unsigned int mac_hfr0, mac_hfr1, mac_hfr2;
@@ -250,6 +326,8 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
uint32_t reg, mac_lo, mac_hi;
int ret;
 
+   eth_dev->dev_ops = &axgbe_eth_dev_ops;
+
/*
 * For secondary processes, we 

[dpdk-dev] [PATCH v5 06/18] net/axgbe: add phy programming apis

2018-04-06 Thread Ravi Kumar
Signed-off-by: Ravi Kumar 
---
 drivers/net/axgbe/axgbe_dev.c  |   27 +
 drivers/net/axgbe/axgbe_mdio.c |  963 +
 drivers/net/axgbe/axgbe_phy_impl.c | 1397 
 3 files changed, 2387 insertions(+)

diff --git a/drivers/net/axgbe/axgbe_dev.c b/drivers/net/axgbe/axgbe_dev.c
index 4a45ee6..82fe7bf 100644
--- a/drivers/net/axgbe/axgbe_dev.c
+++ b/drivers/net/axgbe/axgbe_dev.c
@@ -188,6 +188,30 @@ static void axgbe_write_mmd_regs(struct axgbe_port *pdata, 
int prtad,
}
 }
 
+static int axgbe_set_speed(struct axgbe_port *pdata, int speed)
+{
+   unsigned int ss;
+
+   switch (speed) {
+   case SPEED_1000:
+   ss = 0x03;
+   break;
+   case SPEED_2500:
+   ss = 0x02;
+   break;
+   case SPEED_1:
+   ss = 0x00;
+   break;
+   default:
+   return -EINVAL;
+   }
+
+   if (AXGMAC_IOREAD_BITS(pdata, MAC_TCR, SS) != ss)
+   AXGMAC_IOWRITE_BITS(pdata, MAC_TCR, SS, ss);
+
+   return 0;
+}
+
 static int __axgbe_exit(struct axgbe_port *pdata)
 {
unsigned int count = 2000;
@@ -224,9 +248,12 @@ void axgbe_init_function_ptrs_dev(struct axgbe_hw_if 
*hw_if)
 {
hw_if->exit = axgbe_exit;
 
+
hw_if->read_mmd_regs = axgbe_read_mmd_regs;
hw_if->write_mmd_regs = axgbe_write_mmd_regs;
 
+   hw_if->set_speed = axgbe_set_speed;
+
hw_if->set_ext_mii_mode = axgbe_set_ext_mii_mode;
hw_if->read_ext_mii_regs = axgbe_read_ext_mii_regs;
hw_if->write_ext_mii_regs = axgbe_write_ext_mii_regs;
diff --git a/drivers/net/axgbe/axgbe_mdio.c b/drivers/net/axgbe/axgbe_mdio.c
index 691ff79..914f34f 100644
--- a/drivers/net/axgbe/axgbe_mdio.c
+++ b/drivers/net/axgbe/axgbe_mdio.c
@@ -7,6 +7,963 @@
 #include "axgbe_common.h"
 #include "axgbe_phy.h"
 
+static void axgbe_an37_clear_interrupts(struct axgbe_port *pdata)
+{
+   int reg;
+
+   reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_STAT);
+   reg &= ~AXGBE_AN_CL37_INT_MASK;
+   XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_STAT, reg);
+}
+
+static void axgbe_an37_disable_interrupts(struct axgbe_port *pdata)
+{
+   int reg;
+
+   reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL);
+   reg &= ~AXGBE_AN_CL37_INT_MASK;
+   XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL, reg);
+
+   reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_PCS_DIG_CTRL);
+   reg &= ~AXGBE_PCS_CL37_BP;
+   XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_PCS_DIG_CTRL, reg);
+}
+
+static void axgbe_an73_clear_interrupts(struct axgbe_port *pdata)
+{
+   XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0);
+}
+
+static void axgbe_an73_disable_interrupts(struct axgbe_port *pdata)
+{
+   XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, 0);
+}
+
+static void axgbe_an73_enable_interrupts(struct axgbe_port *pdata)
+{
+   XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK,
+   AXGBE_AN_CL73_INT_MASK);
+}
+
+static void axgbe_an_enable_interrupts(struct axgbe_port *pdata)
+{
+   switch (pdata->an_mode) {
+   case AXGBE_AN_MODE_CL73:
+   case AXGBE_AN_MODE_CL73_REDRV:
+   axgbe_an73_enable_interrupts(pdata);
+   break;
+   case AXGBE_AN_MODE_CL37:
+   case AXGBE_AN_MODE_CL37_SGMII:
+   PMD_DRV_LOG(ERR, "Unsupported AN_MOD_37\n");
+   break;
+   default:
+   break;
+   }
+}
+
+static void axgbe_an_clear_interrupts_all(struct axgbe_port *pdata)
+{
+   axgbe_an73_clear_interrupts(pdata);
+   axgbe_an37_clear_interrupts(pdata);
+}
+
+static void axgbe_an73_enable_kr_training(struct axgbe_port *pdata)
+{
+   unsigned int reg;
+
+   reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
+
+   reg |= AXGBE_KR_TRAINING_ENABLE;
+   XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg);
+}
+
+static void axgbe_an73_disable_kr_training(struct axgbe_port *pdata)
+{
+   unsigned int reg;
+
+   reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
+
+   reg &= ~AXGBE_KR_TRAINING_ENABLE;
+   XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg);
+}
+
+static void axgbe_kr_mode(struct axgbe_port *pdata)
+{
+   /* Enable KR training */
+   axgbe_an73_enable_kr_training(pdata);
+
+   /* Set MAC to 10G speed */
+   pdata->hw_if.set_speed(pdata, SPEED_1);
+
+   /* Call PHY implementation support to complete rate change */
+   pdata->phy_if.phy_impl.set_mode(pdata, AXGBE_MODE_KR);
+}
+
+static void axgbe_kx_2500_mode(struct axgbe_port *pdata)
+{
+   /* Disable KR training */
+   axgbe_an73_disable_kr_training(pdata);
+
+   /* Set MAC to 2.5G speed */
+   pdata->hw_if.set_speed(pdata, SPEED_2500);
+
+   /* Call PHY implementation support to complete rate change */
+   pdata->phy_if.phy_impl.set_mode(pdata, 

[dpdk-dev] [PATCH v5 07/18] net/axgbe: add interrupt handler for autonegotiation

2018-04-06 Thread Ravi Kumar
Signed-off-by: Ravi Kumar 
---
 drivers/net/axgbe/axgbe_ethdev.c | 37 +
 1 file changed, 37 insertions(+)

diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index d4cf279..7d2efa3 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -9,6 +9,7 @@
 
 static int eth_axgbe_dev_init(struct rte_eth_dev *eth_dev);
 static int eth_axgbe_dev_uninit(struct rte_eth_dev *eth_dev);
+static void axgbe_dev_interrupt_handler(void *param);
 
 /* The set of PCI devices this driver supports */
 #define AMD_PCI_VENDOR_ID   0x1022
@@ -46,6 +47,30 @@ static struct axgbe_version_data axgbe_v2b = {
.i2c_support= 1,
 };
 
+/*
+ * Interrupt handler triggered by NIC  for handling
+ * specific interrupt.
+ *
+ * @param handle
+ *  Pointer to interrupt handle.
+ * @param param
+ *  The address of parameter (struct rte_eth_dev *) regsitered before.
+ *
+ * @return
+ *  void
+ */
+static void
+axgbe_dev_interrupt_handler(void *param)
+{
+   struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
+   struct axgbe_port *pdata = dev->data->dev_private;
+
+   pdata->phy_if.an_isr(pdata);
+
+   /* Enable interrupts since disabled after generation*/
+   rte_intr_enable(&pdata->pci_dev->intr_handle);
+}
+
 static void axgbe_get_all_hw_features(struct axgbe_port *pdata)
 {
unsigned int mac_hfr0, mac_hfr1, mac_hfr2;
@@ -347,6 +372,9 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
return ret;
}
 
+   rte_intr_callback_register(&pci_dev->intr_handle,
+  axgbe_dev_interrupt_handler,
+  (void *)eth_dev);
PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
 eth_dev->data->port_id, pci_dev->id.vendor_id,
 pci_dev->id.device_id);
@@ -357,15 +385,24 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
 static int
 eth_axgbe_dev_uninit(struct rte_eth_dev *eth_dev)
 {
+   struct rte_pci_device *pci_dev;
+
PMD_INIT_FUNC_TRACE();
 
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
return 0;
 
+   pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
/*Free macaddres*/
rte_free(eth_dev->data->mac_addrs);
eth_dev->data->mac_addrs = NULL;
 
+   /* disable uio intr before callback unregister */
+   rte_intr_disable(&pci_dev->intr_handle);
+   rte_intr_callback_unregister(&pci_dev->intr_handle,
+axgbe_dev_interrupt_handler,
+(void *)eth_dev);
+
return 0;
 }
 
-- 
2.7.4



[dpdk-dev] [PATCH v5 12/18] net/axgbe: add link status update

2018-04-06 Thread Ravi Kumar
Added support to update device link status atomically.

Signed-off-by: Ravi Kumar 
---
 doc/guides/nics/features/axgbe.ini |  1 +
 drivers/net/axgbe/axgbe_ethdev.c   | 30 ++
 2 files changed, 31 insertions(+)

diff --git a/doc/guides/nics/features/axgbe.ini 
b/doc/guides/nics/features/axgbe.ini
index f644128..2dbff18 100644
--- a/doc/guides/nics/features/axgbe.ini
+++ b/doc/guides/nics/features/axgbe.ini
@@ -5,6 +5,7 @@
 ;
 [Features]
 Speed capabilities   = Y
+Link status  = Y
 Jumbo frame  = Y
 RSS hash = Y
 CRC offload  = Y
diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index a293058..3123572 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -15,6 +15,8 @@ static int  axgbe_dev_start(struct rte_eth_dev *dev);
 static void axgbe_dev_stop(struct rte_eth_dev *dev);
 static void axgbe_dev_interrupt_handler(void *param);
 static void axgbe_dev_close(struct rte_eth_dev *dev);
+static int axgbe_dev_link_update(struct rte_eth_dev *dev,
+int wait_to_complete);
 static void axgbe_dev_info_get(struct rte_eth_dev *dev,
   struct rte_eth_dev_info *dev_info);
 
@@ -71,6 +73,7 @@ static const struct eth_dev_ops axgbe_eth_dev_ops = {
.dev_start= axgbe_dev_start,
.dev_stop = axgbe_dev_stop,
.dev_close= axgbe_dev_close,
+   .link_update  = axgbe_dev_link_update,
.dev_infos_get= axgbe_dev_info_get,
.rx_queue_setup   = axgbe_dev_rx_queue_setup,
.rx_queue_release = axgbe_dev_rx_queue_release,
@@ -216,6 +219,33 @@ axgbe_dev_close(struct rte_eth_dev *dev)
axgbe_dev_clear_queues(dev);
 }
 
+/* return 0 means link status changed, -1 means not changed */
+static int
+axgbe_dev_link_update(struct rte_eth_dev *dev,
+ int wait_to_complete __rte_unused)
+{
+   struct axgbe_port *pdata = dev->data->dev_private;
+   struct rte_eth_link link;
+   int ret = 0;
+
+   PMD_INIT_FUNC_TRACE();
+   rte_delay_ms(800);
+
+   pdata->phy_if.phy_status(pdata);
+
+   memset(&link, 0 , sizeof(struct rte_eth_link));
+   link.link_duplex = pdata->phy.duplex;
+   link.link_status = pdata->phy_link;
+   link.link_speed = pdata->phy_speed;
+   link.link_autoneg = !(dev->data->dev_conf.link_speeds &
+ ETH_LINK_SPEED_FIXED);
+   ret = rte_eth_linkstatus_set(dev, &link);
+   if (ret == -1)
+   PMD_DRV_LOG(ERR, "No change in link status\n");
+
+   return ret;
+}
+
 static void
 axgbe_dev_info_get(struct rte_eth_dev *dev,
   struct rte_eth_dev_info *dev_info)
-- 
2.7.4



[dpdk-dev] [PATCH v5 09/18] net/axgbe: add DMA programming and dev start and stop apis

2018-04-06 Thread Ravi Kumar
This patch adds support to program DMA and DPDK device start
and stop apis.

Signed-off-by: Ravi Kumar 
---
 drivers/net/axgbe/axgbe_dev.c| 844 +++
 drivers/net/axgbe/axgbe_ethdev.c |  96 +
 2 files changed, 940 insertions(+)

diff --git a/drivers/net/axgbe/axgbe_dev.c b/drivers/net/axgbe/axgbe_dev.c
index 82fe7bf..d05f9c8 100644
--- a/drivers/net/axgbe/axgbe_dev.c
+++ b/drivers/net/axgbe/axgbe_dev.c
@@ -6,6 +6,13 @@
 #include "axgbe_ethdev.h"
 #include "axgbe_common.h"
 #include "axgbe_phy.h"
+#include "axgbe_rxtx.h"
+
+static inline unsigned int axgbe_get_max_frame(struct axgbe_port *pdata)
+{
+   return pdata->eth_dev->data->mtu + ETHER_HDR_LEN +
+   ETHER_CRC_LEN + VLAN_HLEN;
+}
 
 /* query busy bit */
 static int mdio_complete(struct axgbe_port *pdata)
@@ -212,6 +219,191 @@ static int axgbe_set_speed(struct axgbe_port *pdata, int 
speed)
return 0;
 }
 
+static int axgbe_disable_tx_flow_control(struct axgbe_port *pdata)
+{
+   unsigned int max_q_count, q_count;
+   unsigned int reg, reg_val;
+   unsigned int i;
+
+   /* Clear MTL flow control */
+   for (i = 0; i < pdata->rx_q_count; i++)
+   AXGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, EHFC, 0);
+
+   /* Clear MAC flow control */
+   max_q_count = AXGMAC_MAX_FLOW_CONTROL_QUEUES;
+   q_count = RTE_MIN(pdata->tx_q_count,
+   max_q_count);
+   reg = MAC_Q0TFCR;
+   for (i = 0; i < q_count; i++) {
+   reg_val = AXGMAC_IOREAD(pdata, reg);
+   AXGMAC_SET_BITS(reg_val, MAC_Q0TFCR, TFE, 0);
+   AXGMAC_IOWRITE(pdata, reg, reg_val);
+
+   reg += MAC_QTFCR_INC;
+   }
+
+   return 0;
+}
+
+static int axgbe_enable_tx_flow_control(struct axgbe_port *pdata)
+{
+   unsigned int max_q_count, q_count;
+   unsigned int reg, reg_val;
+   unsigned int i;
+
+   /* Set MTL flow control */
+   for (i = 0; i < pdata->rx_q_count; i++) {
+   unsigned int ehfc = 0;
+
+   /* Flow control thresholds are established */
+   if (pdata->rx_rfd[i])
+   ehfc = 1;
+
+   AXGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, EHFC, ehfc);
+   }
+
+   /* Set MAC flow control */
+   max_q_count = AXGMAC_MAX_FLOW_CONTROL_QUEUES;
+   q_count = RTE_MIN(pdata->tx_q_count,
+   max_q_count);
+   reg = MAC_Q0TFCR;
+   for (i = 0; i < q_count; i++) {
+   reg_val = AXGMAC_IOREAD(pdata, reg);
+
+   /* Enable transmit flow control */
+   AXGMAC_SET_BITS(reg_val, MAC_Q0TFCR, TFE, 1);
+   /* Set pause time */
+   AXGMAC_SET_BITS(reg_val, MAC_Q0TFCR, PT, 0x);
+
+   AXGMAC_IOWRITE(pdata, reg, reg_val);
+
+   reg += MAC_QTFCR_INC;
+   }
+
+   return 0;
+}
+
+static int axgbe_disable_rx_flow_control(struct axgbe_port *pdata)
+{
+   AXGMAC_IOWRITE_BITS(pdata, MAC_RFCR, RFE, 0);
+
+   return 0;
+}
+
+static int axgbe_enable_rx_flow_control(struct axgbe_port *pdata)
+{
+   AXGMAC_IOWRITE_BITS(pdata, MAC_RFCR, RFE, 1);
+
+   return 0;
+}
+
+static int axgbe_config_tx_flow_control(struct axgbe_port *pdata)
+{
+   if (pdata->tx_pause)
+   axgbe_enable_tx_flow_control(pdata);
+   else
+   axgbe_disable_tx_flow_control(pdata);
+
+   return 0;
+}
+
+static int axgbe_config_rx_flow_control(struct axgbe_port *pdata)
+{
+   if (pdata->rx_pause)
+   axgbe_enable_rx_flow_control(pdata);
+   else
+   axgbe_disable_rx_flow_control(pdata);
+
+   return 0;
+}
+
+static void axgbe_config_flow_control(struct axgbe_port *pdata)
+{
+   axgbe_config_tx_flow_control(pdata);
+   axgbe_config_rx_flow_control(pdata);
+
+   AXGMAC_IOWRITE_BITS(pdata, MAC_RFCR, PFCE, 0);
+}
+
+static void axgbe_queue_flow_control_threshold(struct axgbe_port *pdata,
+  unsigned int queue,
+  unsigned int q_fifo_size)
+{
+   unsigned int frame_fifo_size;
+   unsigned int rfa, rfd;
+
+   frame_fifo_size = AXGMAC_FLOW_CONTROL_ALIGN(axgbe_get_max_frame(pdata));
+
+   /* This path deals with just maximum frame sizes which are
+* limited to a jumbo frame of 9,000 (plus headers, etc.)
+* so we can never exceed the maximum allowable RFA/RFD
+* values.
+*/
+   if (q_fifo_size <= 2048) {
+   /* rx_rfd to zero to signal no flow control */
+   pdata->rx_rfa[queue] = 0;
+   pdata->rx_rfd[queue] = 0;
+   return;
+   }
+
+   if (q_fifo_size <= 4096) {
+   /* Between 2048 and 4096 */
+   pdata->rx_rfa[queue] = 0;   /* Full - 1024 bytes */
+   pdata->rx_rfd[queue] = 1;   /* Full - 1536 bytes */
+   return;
+   }

[dpdk-dev] [PATCH v5 13/18] net/axgbe: add configure flow control while link adjustment

2018-04-06 Thread Ravi Kumar
Signed-off-by: Ravi Kumar 
---
 drivers/net/axgbe/axgbe_mdio.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/drivers/net/axgbe/axgbe_mdio.c b/drivers/net/axgbe/axgbe_mdio.c
index 914f34f..2296de7 100644
--- a/drivers/net/axgbe/axgbe_mdio.c
+++ b/drivers/net/axgbe/axgbe_mdio.c
@@ -674,6 +674,19 @@ static void axgbe_an_init(struct axgbe_port *pdata)
 static void axgbe_phy_adjust_link(struct axgbe_port *pdata)
 {
if (pdata->phy.link) {
+   /* Flow control support */
+   pdata->pause_autoneg = pdata->phy.pause_autoneg;
+
+   if (pdata->tx_pause != (unsigned int)pdata->phy.tx_pause) {
+   pdata->hw_if.config_tx_flow_control(pdata);
+   pdata->tx_pause = pdata->phy.tx_pause;
+   }
+
+   if (pdata->rx_pause != (unsigned int)pdata->phy.rx_pause) {
+   pdata->hw_if.config_rx_flow_control(pdata);
+   pdata->rx_pause = pdata->phy.rx_pause;
+   }
+
/* Speed support */
if (pdata->phy_speed != pdata->phy.speed)
pdata->phy_speed = pdata->phy.speed;
-- 
2.7.4



[dpdk-dev] [PATCH v5 11/18] doc: add documents for AMD axgbe Ethernet PMD

2018-04-06 Thread Ravi Kumar
Signed-off-by: Ravi Kumar 
---
 doc/guides/nics/axgbe.rst  | 86 ++
 doc/guides/nics/features/axgbe.ini | 14 +++
 doc/guides/nics/index.rst  |  1 +
 3 files changed, 101 insertions(+)
 create mode 100644 doc/guides/nics/axgbe.rst
 create mode 100644 doc/guides/nics/features/axgbe.ini

diff --git a/doc/guides/nics/axgbe.rst b/doc/guides/nics/axgbe.rst
new file mode 100644
index 000..b6812c2
--- /dev/null
+++ b/doc/guides/nics/axgbe.rst
@@ -0,0 +1,86 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
+
+AXGBE Poll Mode Driver
+==
+
+The AXGBE poll mode driver library (**librte_pmd_axgbe**) implements support
+for AMD 10 Gbps family of adapters. It is compiled and tested in standard 
linux distro like Ubuntu.
+
+Detailed information about SoCs that use these devices can be found here:
+
+- `AMD EPYC??? EMBEDDED 3000 family 
`_.
+
+
+Supported Features
+--
+
+AXGBE PMD has support for:
+
+- Base L2 features
+- TSS (Transmit Side Scaling)
+- RSS (Receive Side Scaling)
+- Checksum offload
+- Jumbo Frame upto 9K
+
+
+Configuration Information
+-
+
+The following options can be modified in the ``.config`` file. Please note that
+enabling debugging options may affect system performance.
+
+- ``CONFIG_RTE_LIBRTE_AXGBE_PMD`` (default **y**)
+
+  Toggle compilation of axgbe PMD.
+
+- ``CONFIG_RTE_LIBRTE_AXGBE_PMD_DEBUG`` (default **n**)
+
+  Toggle display for PMD debug related messages.
+
+
+Building DPDK
+-
+
+See the :ref:`DPDK Getting Started Guide for Linux ` for
+instructions on how to build DPDK.
+
+By default the AXGBE PMD library will be built into the DPDK library.
+
+For configuring and using UIO frameworks, please also refer :ref:`the
+documentation that comes with DPDK suite `.
+
+
+Prerequisites and Pre-conditions
+
+- Prepare the system as recommended by DPDK suite.
+
+- Bind the intended AMD device to ``igb_uio`` or ``vfio-pci`` module.
+
+Now system is ready to run DPDK application.
+
+
+Usage Example
+-
+
+Refer to the document :ref:`compiling and testing a PMD for a NIC 
`
+for details.
+
+Example output:
+
+.. code-block:: console
+
+   [...]
+   EAL: PCI device :02:00.4 on NUMA socket 0
+   EAL:   probe driver: 1022:1458 net_axgbe
+   Interactive-mode selected
+   USER1: create a new mbuf pool : n=171456, size=2176, 
socket=0
+   USER1: create a new mbuf pool : n=171456, size=2176, 
socket=1
+   USER1: create a new mbuf pool : n=171456, size=2176, 
socket=2
+   USER1: create a new mbuf pool : n=171456, size=2176, 
socket=3
+   Configuring Port 0 (socket 0)
+   Port 0: 00:00:1A:1C:6A:17
+   Checking link statuses...
+   Port 0 Link Up - speed 1 Mbps - full-duplex
+   Done
+   testpmd>
diff --git a/doc/guides/nics/features/axgbe.ini 
b/doc/guides/nics/features/axgbe.ini
new file mode 100644
index 000..f644128
--- /dev/null
+++ b/doc/guides/nics/features/axgbe.ini
@@ -0,0 +1,14 @@
+;
+; Supported features of the 'axgbe' network poll mode driver.
+;
+; Refer to default.ini for the full list of available PMD features.
+;
+[Features]
+Speed capabilities   = Y
+Jumbo frame  = Y
+RSS hash = Y
+CRC offload  = Y
+L3 checksum offload  = Y
+L4 checksum offload  = Y
+Linux UIO= Y
+x86-64   = Y
diff --git a/doc/guides/nics/index.rst b/doc/guides/nics/index.rst
index 51c453d..ea9110c 100644
--- a/doc/guides/nics/index.rst
+++ b/doc/guides/nics/index.rst
@@ -13,6 +13,7 @@ Network Interface Controller Drivers
 build_and_test
 ark
 avp
+axgbe
 bnx2x
 bnxt
 cxgbe
-- 
2.7.4



[dpdk-dev] [PATCH v5 10/18] net/axgbe: add transmit and receive data path apis

2018-04-06 Thread Ravi Kumar
Supported scalar implementation for RX data path.
Supported scalar and vector implementation for TX data path.

Signed-off-by: Ravi Kumar 
---
 drivers/net/axgbe/Makefile |   3 +
 drivers/net/axgbe/axgbe_ethdev.c   |  22 +-
 drivers/net/axgbe/axgbe_rxtx.c | 433 +
 drivers/net/axgbe/axgbe_rxtx.h |  19 ++
 drivers/net/axgbe/axgbe_rxtx_vec_sse.c |  93 +++
 5 files changed, 569 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/axgbe/axgbe_rxtx_vec_sse.c

diff --git a/drivers/net/axgbe/Makefile b/drivers/net/axgbe/Makefile
index e1e5f53..72215ae 100644
--- a/drivers/net/axgbe/Makefile
+++ b/drivers/net/axgbe/Makefile
@@ -28,5 +28,8 @@ SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_mdio.c
 SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_phy_impl.c
 SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_i2c.c
 SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_rxtx.c
+ifeq ($(CONFIG_RTE_ARCH_X86),y)
+SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_rxtx_vec_sse.c
+endif
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index f8cfbd8..a293058 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -102,9 +102,22 @@ axgbe_dev_interrupt_handler(void *param)
 {
struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
struct axgbe_port *pdata = dev->data->dev_private;
+   unsigned int dma_isr, dma_ch_isr;
 
pdata->phy_if.an_isr(pdata);
-
+   /*DMA related interrupts*/
+   dma_isr = AXGMAC_IOREAD(pdata, DMA_ISR);
+   if (dma_isr) {
+   if (dma_isr & 1) {
+   dma_ch_isr =
+   AXGMAC_DMA_IOREAD((struct axgbe_rx_queue *)
+ pdata->rx_queues[0],
+ DMA_CH_SR);
+   AXGMAC_DMA_IOWRITE((struct axgbe_rx_queue *)
+  pdata->rx_queues[0],
+  DMA_CH_SR, dma_ch_isr);
+   }
+   }
/* Enable interrupts since disabled after generation*/
rte_intr_enable(&pdata->pci_dev->intr_handle);
 }
@@ -166,6 +179,8 @@ axgbe_dev_start(struct rte_eth_dev *dev)
 
/* phy start*/
pdata->phy_if.phy_start(pdata);
+   axgbe_dev_enable_tx(dev);
+   axgbe_dev_enable_rx(dev);
 
axgbe_clear_bit(AXGBE_STOPPED, &pdata->dev_state);
axgbe_clear_bit(AXGBE_DOWN, &pdata->dev_state);
@@ -185,6 +200,8 @@ axgbe_dev_stop(struct rte_eth_dev *dev)
return;
 
axgbe_set_bit(AXGBE_STOPPED, &pdata->dev_state);
+   axgbe_dev_disable_tx(dev);
+   axgbe_dev_disable_rx(dev);
 
pdata->phy_if.phy_stop(pdata);
pdata->hw_if.exit(pdata);
@@ -423,6 +440,7 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
int ret;
 
eth_dev->dev_ops = &axgbe_eth_dev_ops;
+   eth_dev->rx_pkt_burst = &axgbe_recv_pkts;
 
/*
 * For secondary processes, we don't initialise any further as primary
@@ -573,6 +591,8 @@ eth_axgbe_dev_uninit(struct rte_eth_dev *eth_dev)
rte_free(eth_dev->data->mac_addrs);
eth_dev->data->mac_addrs = NULL;
eth_dev->dev_ops = NULL;
+   eth_dev->rx_pkt_burst = NULL;
+   eth_dev->tx_pkt_burst = NULL;
axgbe_dev_clear_queues(eth_dev);
 
/* disable uio intr before callback unregister */
diff --git a/drivers/net/axgbe/axgbe_rxtx.c b/drivers/net/axgbe/axgbe_rxtx.c
index 1dff7c8..e96e2be 100644
--- a/drivers/net/axgbe/axgbe_rxtx.c
+++ b/drivers/net/axgbe/axgbe_rxtx.c
@@ -113,6 +113,197 @@ int axgbe_dev_rx_queue_setup(struct rte_eth_dev *dev, 
uint16_t queue_idx,
return 0;
 }
 
+static void axgbe_prepare_rx_stop(struct axgbe_port *pdata,
+ unsigned int queue)
+{
+   unsigned int rx_status;
+   unsigned long rx_timeout;
+
+   /* The Rx engine cannot be stopped if it is actively processing
+* packets. Wait for the Rx queue to empty the Rx fifo.  Don't
+* wait forever though...
+*/
+   rx_timeout = rte_get_timer_cycles() + (AXGBE_DMA_STOP_TIMEOUT *
+  rte_get_timer_hz());
+
+   while (time_before(rte_get_timer_cycles(), rx_timeout)) {
+   rx_status = AXGMAC_MTL_IOREAD(pdata, queue, MTL_Q_RQDR);
+   if ((AXGMAC_GET_BITS(rx_status, MTL_Q_RQDR, PRXQ) == 0) &&
+   (AXGMAC_GET_BITS(rx_status, MTL_Q_RQDR, RXQSTS) == 0))
+   break;
+
+   rte_delay_us(900);
+   }
+
+   if (!time_before(rte_get_timer_cycles(), rx_timeout))
+   PMD_DRV_LOG(ERR,
+   "timed out waiting for Rx queue %u to empty\n",
+   queue);
+}
+
+void axgbe_dev_disable_rx(struct rte_eth_dev *dev)
+{
+   struct axgbe_rx_queue *rxq;
+

[dpdk-dev] [PATCH v5 15/18] net/axgbe: support generic transmit and receive stats api

2018-04-06 Thread Ravi Kumar
This patch adds support for port statistics api defined
for ethernet PMDs.

Signed-off-by: Ravi Kumar 
---
 doc/guides/nics/axgbe.rst  |  1 +
 doc/guides/nics/features/axgbe.ini |  1 +
 drivers/net/axgbe/axgbe_ethdev.c   | 52 ++
 3 files changed, 54 insertions(+)

diff --git a/doc/guides/nics/axgbe.rst b/doc/guides/nics/axgbe.rst
index 009be03..e30f494 100644
--- a/doc/guides/nics/axgbe.rst
+++ b/doc/guides/nics/axgbe.rst
@@ -20,6 +20,7 @@ AXGBE PMD has support for:
 - Base L2 features
 - TSS (Transmit Side Scaling)
 - Promiscuous mode
+- Port statistics
 - Multicast mode
 - RSS (Receive Side Scaling)
 - Checksum offload
diff --git a/doc/guides/nics/features/axgbe.ini 
b/doc/guides/nics/features/axgbe.ini
index 9f4d38f..042ff1e 100644
--- a/doc/guides/nics/features/axgbe.ini
+++ b/doc/guides/nics/features/axgbe.ini
@@ -13,5 +13,6 @@ RSS hash = Y
 CRC offload  = Y
 L3 checksum offload  = Y
 L4 checksum offload  = Y
+Basic stats  = Y
 Linux UIO= Y
 x86-64   = Y
diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index e5495bd..8c7b0ee 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -21,6 +21,9 @@ static void axgbe_dev_allmulticast_enable(struct rte_eth_dev 
*dev);
 static void axgbe_dev_allmulticast_disable(struct rte_eth_dev *dev);
 static int axgbe_dev_link_update(struct rte_eth_dev *dev,
 int wait_to_complete);
+static int axgbe_dev_stats_get(struct rte_eth_dev *dev,
+   struct rte_eth_stats *stats);
+static void axgbe_dev_stats_reset(struct rte_eth_dev *dev);
 static void axgbe_dev_info_get(struct rte_eth_dev *dev,
   struct rte_eth_dev_info *dev_info);
 
@@ -82,6 +85,8 @@ static const struct eth_dev_ops axgbe_eth_dev_ops = {
.allmulticast_enable  = axgbe_dev_allmulticast_enable,
.allmulticast_disable = axgbe_dev_allmulticast_disable,
.link_update  = axgbe_dev_link_update,
+   .stats_get= axgbe_dev_stats_get,
+   .stats_reset  = axgbe_dev_stats_reset,
.dev_infos_get= axgbe_dev_info_get,
.rx_queue_setup   = axgbe_dev_rx_queue_setup,
.rx_queue_release = axgbe_dev_rx_queue_release,
@@ -294,6 +299,53 @@ axgbe_dev_link_update(struct rte_eth_dev *dev,
return ret;
 }
 
+static int
+axgbe_dev_stats_get(struct rte_eth_dev *dev,
+   struct rte_eth_stats *stats)
+{
+   struct axgbe_rx_queue *rxq;
+   struct axgbe_tx_queue *txq;
+   unsigned int i;
+
+   for (i = 0; i < dev->data->nb_rx_queues; i++) {
+   rxq = dev->data->rx_queues[i];
+   stats->q_ipackets[i] = rxq->pkts;
+   stats->ipackets += rxq->pkts;
+   stats->q_ibytes[i] = rxq->bytes;
+   stats->ibytes += rxq->bytes;
+   }
+   for (i = 0; i < dev->data->nb_tx_queues; i++) {
+   txq = dev->data->tx_queues[i];
+   stats->q_opackets[i] = txq->pkts;
+   stats->opackets += txq->pkts;
+   stats->q_obytes[i] = txq->bytes;
+   stats->obytes += txq->bytes;
+   }
+
+   return 0;
+}
+
+static void
+axgbe_dev_stats_reset(struct rte_eth_dev *dev)
+{
+   struct axgbe_rx_queue *rxq;
+   struct axgbe_tx_queue *txq;
+   unsigned int i;
+
+   for (i = 0; i < dev->data->nb_rx_queues; i++) {
+   rxq = dev->data->rx_queues[i];
+   rxq->pkts = 0;
+   rxq->bytes = 0;
+   rxq->errors = 0;
+   }
+   for (i = 0; i < dev->data->nb_tx_queues; i++) {
+   txq = dev->data->tx_queues[i];
+   txq->pkts = 0;
+   txq->bytes = 0;
+   txq->errors = 0;
+   }
+}
+
 static void
 axgbe_dev_info_get(struct rte_eth_dev *dev,
   struct rte_eth_dev_info *dev_info)
-- 
2.7.4



[dpdk-dev] [PATCH v5 17/18] net/axgbe: add workaround for axgbe ethernet training bug

2018-04-06 Thread Ravi Kumar
Signed-off-by: Ravi Kumar 
---
 drivers/net/axgbe/axgbe_common.h   |   8 +++
 drivers/net/axgbe/axgbe_ethdev.c   |   2 +
 drivers/net/axgbe/axgbe_ethdev.h   |   6 ++
 drivers/net/axgbe/axgbe_mdio.c |  13 -
 drivers/net/axgbe/axgbe_phy_impl.c | 117 +
 5 files changed, 144 insertions(+), 2 deletions(-)

diff --git a/drivers/net/axgbe/axgbe_common.h b/drivers/net/axgbe/axgbe_common.h
index 64c7a7f..97a80f5 100644
--- a/drivers/net/axgbe/axgbe_common.h
+++ b/drivers/net/axgbe/axgbe_common.h
@@ -1247,6 +1247,10 @@
 #define MDIO_VEND2_AN_STAT 0x8002
 #endif
 
+#ifndef MDIO_VEND2_PMA_CDR_CONTROL
+#define MDIO_VEND2_PMA_CDR_CONTROL 0x8056
+#endif
+
 #ifndef MDIO_CTRL1_SPEED1G
 #define MDIO_CTRL1_SPEED1G (MDIO_CTRL1_SPEED10G & ~BMCR_SPEED100)
 #endif
@@ -1294,6 +1298,10 @@
 #define AXGBE_AN_CL37_PCS_MODE_SGMII   0x04
 #define AXGBE_AN_CL37_TX_CONFIG_MASK   0x08
 
+#define AXGBE_PMA_CDR_TRACK_EN_MASK0x01
+#define AXGBE_PMA_CDR_TRACK_EN_OFF 0x00
+#define AXGBE_PMA_CDR_TRACK_EN_ON  0x01
+
 /*generic*/
 #define __iomem
 
diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index 61a600c..096154c 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -50,6 +50,7 @@ static struct axgbe_version_data axgbe_v2a = {
.tx_tstamp_workaround   = 1,
.ecc_support= 1,
.i2c_support= 1,
+   .an_cdr_workaround  = 1,
 };
 
 static struct axgbe_version_data axgbe_v2b = {
@@ -61,6 +62,7 @@ static struct axgbe_version_data axgbe_v2b = {
.tx_tstamp_workaround   = 1,
.ecc_support= 1,
.i2c_support= 1,
+   .an_cdr_workaround  = 1,
 };
 
 static const struct rte_eth_desc_lim rx_desc_lim = {
diff --git a/drivers/net/axgbe/axgbe_ethdev.h b/drivers/net/axgbe/axgbe_ethdev.h
index 7bd2900..b1cd298 100644
--- a/drivers/net/axgbe/axgbe_ethdev.h
+++ b/drivers/net/axgbe/axgbe_ethdev.h
@@ -337,6 +337,10 @@ struct axgbe_phy_impl_if {
/* Process results of auto-negotiation */
enum axgbe_mode (*an_outcome)(struct axgbe_port *);
 
+   /* Pre/Post auto-negotiation support */
+   void (*an_pre)(struct axgbe_port *port);
+   void (*an_post)(struct axgbe_port *port);
+
/* Pre/Post KR training enablement support */
void (*kr_training_pre)(struct axgbe_port *);
void (*kr_training_post)(struct axgbe_port *);
@@ -431,6 +435,7 @@ struct axgbe_version_data {
unsigned int tx_tstamp_workaround;
unsigned int ecc_support;
unsigned int i2c_support;
+   unsigned int an_cdr_workaround;
 };
 
 /*
@@ -450,6 +455,7 @@ struct axgbe_port {
void *xprop_regs;   /* AXGBE property registers */
void *xi2c_regs;/* AXGBE I2C CSRs */
 
+   bool cdr_track_early;
/* XPCS indirect addressing lock */
unsigned int xpcs_window_def_reg;
unsigned int xpcs_window_sel_reg;
diff --git a/drivers/net/axgbe/axgbe_mdio.c b/drivers/net/axgbe/axgbe_mdio.c
index 2296de7..2721e5c 100644
--- a/drivers/net/axgbe/axgbe_mdio.c
+++ b/drivers/net/axgbe/axgbe_mdio.c
@@ -287,10 +287,14 @@ static void axgbe_an73_disable(struct axgbe_port *pdata)
 {
axgbe_an73_set(pdata, false, false);
axgbe_an73_disable_interrupts(pdata);
+   pdata->an_start = 0;
 }
 
 static void axgbe_an_restart(struct axgbe_port *pdata)
 {
+   if (pdata->phy_if.phy_impl.an_pre)
+   pdata->phy_if.phy_impl.an_pre(pdata);
+
switch (pdata->an_mode) {
case AXGBE_AN_MODE_CL73:
case AXGBE_AN_MODE_CL73_REDRV:
@@ -307,6 +311,9 @@ static void axgbe_an_restart(struct axgbe_port *pdata)
 
 static void axgbe_an_disable(struct axgbe_port *pdata)
 {
+   if (pdata->phy_if.phy_impl.an_post)
+   pdata->phy_if.phy_impl.an_post(pdata);
+
switch (pdata->an_mode) {
case AXGBE_AN_MODE_CL73:
case AXGBE_AN_MODE_CL73_REDRV:
@@ -482,9 +489,9 @@ static enum axgbe_an axgbe_an73_incompat_link(struct 
axgbe_port *pdata)
return AXGBE_AN_NO_LINK;
}
 
-   axgbe_an73_disable(pdata);
+   axgbe_an_disable(pdata);
axgbe_switch_mode(pdata);
-   axgbe_an73_restart(pdata);
+   axgbe_an_restart(pdata);
 
return AXGBE_AN_INCOMPAT_LINK;
 }
@@ -553,6 +560,8 @@ static void axgbe_an73_state_machine(struct axgbe_port 
*pdata)
pdata->kr_state = AXGBE_RX_BPA;
pdata->kx_state = AXGBE_RX_BPA;
pdata->an_start = 0;
+   if (pdata->phy_if.phy_impl.an_post)
+   pdata->phy_if.phy_impl.an_post(pdata);
}
 
if (cur_state != pdata->an_state)
diff --git a/drivers/net/axgbe/axgbe_phy_impl.c 
b/drivers/net/axgbe/axgbe_phy_impl.c
index 19bd4be..dfa908d 100644
--- a/drivers/net/axgbe/axgbe_phy_impl.c
+++ b/

[dpdk-dev] [PATCH v5 14/18] net/axgbe: add promiscuous mode support

2018-04-06 Thread Ravi Kumar
This patch enables promiscous and multicast support
for AXGBE PMD.

Signed-off-by: Ravi Kumar 
---
 doc/guides/nics/axgbe.rst  |  2 ++
 doc/guides/nics/features/axgbe.ini |  2 ++
 drivers/net/axgbe/axgbe_ethdev.c   | 48 ++
 3 files changed, 52 insertions(+)

diff --git a/doc/guides/nics/axgbe.rst b/doc/guides/nics/axgbe.rst
index b6812c2..009be03 100644
--- a/doc/guides/nics/axgbe.rst
+++ b/doc/guides/nics/axgbe.rst
@@ -19,6 +19,8 @@ AXGBE PMD has support for:
 
 - Base L2 features
 - TSS (Transmit Side Scaling)
+- Promiscuous mode
+- Multicast mode
 - RSS (Receive Side Scaling)
 - Checksum offload
 - Jumbo Frame upto 9K
diff --git a/doc/guides/nics/features/axgbe.ini 
b/doc/guides/nics/features/axgbe.ini
index 2dbff18..9f4d38f 100644
--- a/doc/guides/nics/features/axgbe.ini
+++ b/doc/guides/nics/features/axgbe.ini
@@ -7,6 +7,8 @@
 Speed capabilities   = Y
 Link status  = Y
 Jumbo frame  = Y
+Promiscuous mode = Y
+Allmulticast mode= Y
 RSS hash = Y
 CRC offload  = Y
 L3 checksum offload  = Y
diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index 3123572..e5495bd 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -15,6 +15,10 @@ static int  axgbe_dev_start(struct rte_eth_dev *dev);
 static void axgbe_dev_stop(struct rte_eth_dev *dev);
 static void axgbe_dev_interrupt_handler(void *param);
 static void axgbe_dev_close(struct rte_eth_dev *dev);
+static void axgbe_dev_promiscuous_enable(struct rte_eth_dev *dev);
+static void axgbe_dev_promiscuous_disable(struct rte_eth_dev *dev);
+static void axgbe_dev_allmulticast_enable(struct rte_eth_dev *dev);
+static void axgbe_dev_allmulticast_disable(struct rte_eth_dev *dev);
 static int axgbe_dev_link_update(struct rte_eth_dev *dev,
 int wait_to_complete);
 static void axgbe_dev_info_get(struct rte_eth_dev *dev,
@@ -73,6 +77,10 @@ static const struct eth_dev_ops axgbe_eth_dev_ops = {
.dev_start= axgbe_dev_start,
.dev_stop = axgbe_dev_stop,
.dev_close= axgbe_dev_close,
+   .promiscuous_enable   = axgbe_dev_promiscuous_enable,
+   .promiscuous_disable  = axgbe_dev_promiscuous_disable,
+   .allmulticast_enable  = axgbe_dev_allmulticast_enable,
+   .allmulticast_disable = axgbe_dev_allmulticast_disable,
.link_update  = axgbe_dev_link_update,
.dev_infos_get= axgbe_dev_info_get,
.rx_queue_setup   = axgbe_dev_rx_queue_setup,
@@ -219,6 +227,46 @@ axgbe_dev_close(struct rte_eth_dev *dev)
axgbe_dev_clear_queues(dev);
 }
 
+static void
+axgbe_dev_promiscuous_enable(struct rte_eth_dev *dev)
+{
+   PMD_INIT_FUNC_TRACE();
+   struct axgbe_port *pdata = dev->data->dev_private;
+
+   AXGMAC_IOWRITE_BITS(pdata, MAC_PFR, PR, 1);
+}
+
+static void
+axgbe_dev_promiscuous_disable(struct rte_eth_dev *dev)
+{
+   PMD_INIT_FUNC_TRACE();
+   struct axgbe_port *pdata = dev->data->dev_private;
+
+   AXGMAC_IOWRITE_BITS(pdata, MAC_PFR, PR, 0);
+}
+
+static void
+axgbe_dev_allmulticast_enable(struct rte_eth_dev *dev)
+{
+   PMD_INIT_FUNC_TRACE();
+   struct axgbe_port *pdata = dev->data->dev_private;
+
+   if (AXGMAC_IOREAD_BITS(pdata, MAC_PFR, PM))
+   return;
+   AXGMAC_IOWRITE_BITS(pdata, MAC_PFR, PM, 1);
+}
+
+static void
+axgbe_dev_allmulticast_disable(struct rte_eth_dev *dev)
+{
+   PMD_INIT_FUNC_TRACE();
+   struct axgbe_port *pdata = dev->data->dev_private;
+
+   if (!AXGMAC_IOREAD_BITS(pdata, MAC_PFR, PM))
+   return;
+   AXGMAC_IOWRITE_BITS(pdata, MAC_PFR, PM, 0);
+}
+
 /* return 0 means link status changed, -1 means not changed */
 static int
 axgbe_dev_link_update(struct rte_eth_dev *dev,
-- 
2.7.4



[dpdk-dev] [PATCH v5 16/18] net/axgbe: add support for build 32-bit mode

2018-04-06 Thread Ravi Kumar
Signed-off-by: Ravi Kumar 
---
 doc/guides/nics/features/axgbe.ini |  1 +
 drivers/net/axgbe/axgbe_common.h   | 53 ++
 drivers/net/axgbe/axgbe_ethdev.c   | 10 ---
 drivers/net/axgbe/axgbe_ethdev.h   |  8 +++---
 drivers/net/axgbe/axgbe_rxtx.c | 12 -
 drivers/net/axgbe/axgbe_rxtx.h |  4 +--
 6 files changed, 50 insertions(+), 38 deletions(-)

diff --git a/doc/guides/nics/features/axgbe.ini 
b/doc/guides/nics/features/axgbe.ini
index 042ff1e..ab4da55 100644
--- a/doc/guides/nics/features/axgbe.ini
+++ b/doc/guides/nics/features/axgbe.ini
@@ -15,4 +15,5 @@ L3 checksum offload  = Y
 L4 checksum offload  = Y
 Basic stats  = Y
 Linux UIO= Y
+x86-32   = Y
 x86-64   = Y
diff --git a/drivers/net/axgbe/axgbe_common.h b/drivers/net/axgbe/axgbe_common.h
index 298e794..64c7a7f 100644
--- a/drivers/net/axgbe/axgbe_common.h
+++ b/drivers/net/axgbe/axgbe_common.h
@@ -1385,7 +1385,7 @@ do {  
\
  *  register definitions formed using the input names
  */
 #define AXGMAC_IOREAD(_pdata, _reg)\
-   rte_read32((void *)((_pdata)->xgmac_regs + (_reg)))
+   rte_read32((uint8_t *)((_pdata)->xgmac_regs) + (_reg))
 
 #define AXGMAC_IOREAD_BITS(_pdata, _reg, _field)   \
GET_BITS(AXGMAC_IOREAD((_pdata), _reg), \
@@ -1393,7 +1393,8 @@ do {  
\
 _reg##_##_field##_WIDTH)
 
 #define AXGMAC_IOWRITE(_pdata, _reg, _val) \
-   rte_write32((_val), (void *)((_pdata)->xgmac_regs + (_reg)))
+   rte_write32((_val), \
+   (uint8_t *)((_pdata)->xgmac_regs) + (_reg))
 
 #define AXGMAC_IOWRITE_BITS(_pdata, _reg, _field, _val)
\
 do {   \
@@ -1409,8 +1410,8 @@ do {  
\
  *  base register value is calculated by the queue or traffic class number
  */
 #define AXGMAC_MTL_IOREAD(_pdata, _n, _reg)\
-   rte_read32((void *)((_pdata)->xgmac_regs +  \
-MTL_Q_BASE + ((_n) * MTL_Q_INC) + (_reg)))
+   rte_read32((uint8_t *)((_pdata)->xgmac_regs) +  \
+MTL_Q_BASE + ((_n) * MTL_Q_INC) + (_reg))
 
 #define AXGMAC_MTL_IOREAD_BITS(_pdata, _n, _reg, _field)   \
GET_BITS(AXGMAC_MTL_IOREAD((_pdata), (_n), (_reg)), \
@@ -1418,8 +1419,8 @@ do {  
\
 _reg##_##_field##_WIDTH)
 
 #define AXGMAC_MTL_IOWRITE(_pdata, _n, _reg, _val) \
-   rte_write32((_val), (void *)((_pdata)->xgmac_regs + \
- MTL_Q_BASE + ((_n) * MTL_Q_INC) + (_reg)))
+   rte_write32((_val), (uint8_t *)((_pdata)->xgmac_regs) +\
+ MTL_Q_BASE + ((_n) * MTL_Q_INC) + (_reg))
 
 #define AXGMAC_MTL_IOWRITE_BITS(_pdata, _n, _reg, _field, _val)
\
 do {   \
@@ -1435,7 +1436,7 @@ do {  
\
  *  base register value is obtained from the ring
  */
 #define AXGMAC_DMA_IOREAD(_channel, _reg)  \
-   rte_read32((void *)((_channel)->dma_regs + (_reg)))
+   rte_read32((uint8_t *)((_channel)->dma_regs) + (_reg))
 
 #define AXGMAC_DMA_IOREAD_BITS(_channel, _reg, _field) \
GET_BITS(AXGMAC_DMA_IOREAD((_channel), _reg),   \
@@ -1443,7 +1444,8 @@ do {  
\
 _reg##_##_field##_WIDTH)
 
 #define AXGMAC_DMA_IOWRITE(_channel, _reg, _val)   \
-   rte_write32((_val), (void *)((_channel)->dma_regs + (_reg)))
+   rte_write32((_val), \
+   (uint8_t *)((_channel)->dma_regs) + (_reg))
 
 #define AXGMAC_DMA_IOWRITE_BITS(_channel, _reg, _field, _val)  \
 do {   \
@@ -1468,16 +1470,18 @@ do {
\
 _prefix##_##_field##_WIDTH, (_val))
 
 #define XPCS32_IOWRITE(_pdata, _off, _val) \
-   rte_write32(_val, (void *)((_pdata)->xpcs_regs + (_off)))
+   rte_write32(_val,   \
+   (uint8_t *)((_pdata)->xpcs_regs) + (_off))
 
 #define XPCS32_IOREAD(_pdata, _off)\
-   rte_read32((void *)((_pdata)->xpcs_regs + (_off)))
+   r

[dpdk-dev] [PATCH v5 18/18] net/axgbe : support meson build

2018-04-06 Thread Ravi Kumar
Signed-off-by: Ravi Kumar 
---
 drivers/net/axgbe/meson.build | 19 +++
 drivers/net/meson.build   |  2 +-
 2 files changed, 20 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/axgbe/meson.build

diff --git a/drivers/net/axgbe/meson.build b/drivers/net/axgbe/meson.build
new file mode 100644
index 000..548
--- /dev/null
+++ b/drivers/net/axgbe/meson.build
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
+
+if host_machine.system() != 'linux'
+   build = false
+endif
+
+sources = files('axgbe_ethdev.c',
+   'axgbe_dev.c',
+   'axgbe_mdio.c',
+   'axgbe_phy_impl.c',
+   'axgbe_i2c.c',
+   'axgbe_rxtx.c')
+
+cflags += '-Wno-cast-qual'
+
+if arch_subdir == 'x86'
+   sources += files('axgbe_rxtx_vec_sse.c')
+endif
diff --git a/drivers/net/meson.build b/drivers/net/meson.build
index aae817c..b7cac4a 100644
--- a/drivers/net/meson.build
+++ b/drivers/net/meson.build
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-drivers = ['af_packet', 'bonding', 'dpaa', 'dpaa2',
+drivers = ['af_packet', 'axgbe', 'bonding', 'dpaa', 'dpaa2',
'e1000', 'enic', 'fm10k', 'i40e', 'ixgbe',
'null', 'octeontx', 'pcap', 'ring',
'sfc', 'thunderx', 'virtio']
-- 
2.7.4



Re: [dpdk-dev] [PATCH v4 01/17] net/axgbe: add minimal dev init and uninit support

2018-04-06 Thread Kumar, Ravi1
>On 4/5/2018 7:39 AM, Ravi Kumar wrote:
>> add ethernet poll mode driver for AMD 10G devices embedded in AMD 
>> EPYC™ EMBEDDED 3000 family processors
>> 
>> Signed-off-by: Ravi Kumar 
><...>
>

Hi Ferruh,

Please check my comments inline.

>> @@ -410,6 +410,12 @@ CONFIG_RTE_PMD_RING_MAX_TX_RINGS=16
>>  CONFIG_RTE_LIBRTE_PMD_SOFTNIC=y
>>  
>>  #
>> +# Compile AMD PMD
>> +#
>> +CONFIG_RTE_LIBRTE_AXGBE_DEBUG_INIT=n
>> +CONFIG_RTE_LIBRTE_AXGBE_PMD=y
>
>
>Please add alphabetically.
>
>RTE_LIBRTE_AXGBE_DEBUG_INIT is used for data path logs, otherwise it should be 
>dynamic logging. So the name is wrong for the purpose, it is no more "init"
>debug log, you have dynamic log for init already.
>And in documentation this has been documented as "Toggle display of 
>initialization related messages" which seems wrong as well.

[Ravi] Done. Corrected document as well.

>
><...>
>
>> @@ -12,6 +12,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_AF_PACKET) += af_packet
>>  DIRS-$(CONFIG_RTE_LIBRTE_ARK_PMD) += ark
>>  DIRS-$(CONFIG_RTE_LIBRTE_AVF_PMD) += avf
>>  DIRS-$(CONFIG_RTE_LIBRTE_AVP_PMD) += avp
>> +DIRS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe
>
>Please consider adding meson support too. Perhaps with a separate patch to 
>this set.

[Ravi] Added meson build support in a separate patch.

>
><...>
>
>> +
>> +#
>> +# all source are stored in SRCS-y
>> +#
>> +SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_ethdev.c
>
>Shared build causing build error, you need to add dependent libraries [1], and 
>please test shared library builds:
>
>[1] something like:
> +LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring  +LDLIBS += 
> -lrte_ethdev -lrte_net -lrte_kvargs  +LDLIBS += -lrte_bus_pci
>
><...>
>

[Ravi] Supported shared builds. Thanks.

>> +RTE_PMD_REGISTER_PCI(net_axgbe, rte_axgbe_pmd); 
>> +RTE_PMD_REGISTER_PCI_TABLE(net_axgbe, pci_id_axgbe_map); 
>> +RTE_PMD_REGISTER_KMOD_DEP(net_axgbe, "* igb_uio | uio_pci_generic | 
>> +vfio-pci");
>
>Is vfio-pci supported?
>
>Documentation says:
>"
>AXGBE PMD works only with legacy interrupts. Load ``igb_uio`` module in legacy 
>interrupt mode using module params.
>
>Bind the intended AMD device to igb_uio module "
><...>

[Ravi] PMD supports vfio-pci. Corrected old document.


Re: [dpdk-dev] [PATCH v4 17/17] net/axgbe: add workaround for axgbe ethernet training bug

2018-04-06 Thread Kumar, Ravi1
>On 4/5/2018 7:39 AM, Ravi Kumar wrote:
>> Signed-off-by: Ravi Kumar 
>
>Can you please give more information what is the bug solved here? What problem 
>observed if it is not fixed? This may help people having problem.
>

Hi Ferruh,

This bug is internal and not useful for end-user. Having it might cause some 
performance drop and there is no functional regression without this fix.

Regards,
Ravi


Re: [dpdk-dev] [PATCH v4 10/17] net/axgbe: add transmit and receive data path apis

2018-04-06 Thread Kumar, Ravi1
>On 4/5/2018 7:39 AM, Ravi Kumar wrote:
>> Supported scalar implementation for RX data path Supported scalar and 
>> vector implementation for TX data path
>> 
>> Signed-off-by: Ravi Kumar 
>> ---
>>  drivers/net/axgbe/Makefile |   1 +
>>  drivers/net/axgbe/axgbe_ethdev.c   |  22 +-
>>  drivers/net/axgbe/axgbe_rxtx.c | 429 
>> +
>>  drivers/net/axgbe/axgbe_rxtx.h |  19 ++
>>  drivers/net/axgbe/axgbe_rxtx_vec_sse.c |  93 +++
>>  5 files changed, 563 insertions(+), 1 deletion(-)  create mode 100644 
>> drivers/net/axgbe/axgbe_rxtx_vec_sse.c
>> 
>> diff --git a/drivers/net/axgbe/Makefile b/drivers/net/axgbe/Makefile 
>> index 9fd7b5e..aff7917 100644
>> --- a/drivers/net/axgbe/Makefile
>> +++ b/drivers/net/axgbe/Makefile
>> @@ -24,5 +24,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_mdio.c
>>  SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_phy_impl.c
>>  SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_i2c.c
>>  SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_rxtx.c
>> +SRCS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe_rxtx_vec_sse.c
>
>This needs to be protected with x86 checks. PMD is enabled by default in 
>config, which means it will be enabled for other architectures too, like arm 
>and ibm, and this file will cause build error for them.
>

Hi ferruh,

Protected vector implementation under CONFIG_RTE_ARCH_X86 checks. Thanks.

Regards,
Ravi


[dpdk-dev] [PATCH] lib/librte_vhost: fix meson build

2018-04-06 Thread Fan Zhang
Fixes: 7834b5c82bf3 ("lib/librte_vhost: update makefile")

This patch fixes some meson build bugs.

Signed-off-by: Fan Zhang 
---
 lib/librte_vhost/meson.build | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/librte_vhost/meson.build b/lib/librte_vhost/meson.build
index f01ef67fc..dee154e40 100644
--- a/lib/librte_vhost/meson.build
+++ b/lib/librte_vhost/meson.build
@@ -10,6 +10,6 @@ endif
 version = 4
 allow_experimental_apis = true
 sources = files('fd_man.c', 'iotlb.c', 'socket.c', 'vhost.c', 'vhost_user.c',
-   'virtio_net.c', 'virtio_crypto.c')
+   'virtio_net.c', 'vhost_crypto.c')
 headers = files('rte_vhost.h', 'rte_vhost_crypto.h')
-deps += ['ethdev', 'cryptodev', 'pci']
+deps += ['ethdev', 'cryptodev', 'pci', 'hash']
-- 
2.13.6



Re: [dpdk-dev] [PATCH v3 00/68] Memory Hotplug for DPDK

2018-04-06 Thread Burakov, Anatoly

On 06-Apr-18 1:01 PM, Hemant Agrawal wrote:

Hi Thomas


-Original Message-
From: Thomas Monjalon [mailto:tho...@monjalon.net]
Sent: Thursday, April 05, 2018 7:43 PM
To: Shreyansh Jain 
Cc: Anatoly Burakov ; dev@dpdk.org;
keith.wi...@intel.com; jianfeng@intel.com;
andras.kov...@ericsson.com; laszlo.vadk...@ericsson.com;
benjamin.wal...@intel.com; bruce.richard...@intel.com;
konstantin.anan...@intel.com; kuralamudhan.ramakrish...@intel.com;
louise.m.d...@intel.com; nelio.laranje...@6wind.com;
ys...@mellanox.com; peppe...@japf.ch; jerin.ja...@caviumnetworks.com;
Hemant Agrawal ; olivier.m...@6wind.com;
gowrishanka...@linux.vnet.ibm.com
Subject: Re: [dpdk-dev] [PATCH v3 00/68] Memory Hotplug for DPDK
Importance: High

05/04/2018 16:24, Shreyansh Jain:

Physical addressing cases for both, dpaa/dpaa2, depend heavily on
the fact that physical addressing was the base and was available in
sorted manner. This is reversed/negated with hotplugging support.
So, rework of both the drivers is required from this perspective.
There are some suggestions floated by Anatoly and internally, but
work still needs to be done.
It also impacts a lot of use-cases for virtualization (no-iommu).


So what is your recommendation?
Can you rework PA case in dpaa/dpaa2 drivers within 18.05 timeframe?


We will like 2-3 more days on this before we can ack/nack this patch.
We are working on priority on this.  PA case rework is not a trivial change.


The patch is good to go. However, we will be making changes in dpaa/dpaa2 
drivers to fix the PA issues shortly (within 18.05 timeframe)


That's great to hear!



Anatoly needs to take care of following:
1. Comment by Shreyansh on " Re: [dpdk-dev] [PATCH v3 50/68] eal: replace memzone 
array with fbarray"


Yes, that is already fixed in both github and upcoming v4.


2. I could not apply the patches cleanly on current master.


The patchset has dependencies, listed in the cover letter. I'll rebase 
on latest master before sending v4 just in case.




Tested-by: Hemant Agrawal 
  

Regards,
Hemant







--
Thanks,
Anatoly


Re: [dpdk-dev] [PATCH v3 19/21] vhost: support mergeable rx buffers with packed queues

2018-04-06 Thread Maxime Coquelin



On 04/05/2018 12:10 PM, Jens Freimann wrote:

This implements support for mergeable receive buffers in vhost when using
packed virtqueues. The difference to split virtqueues is not big, it differs
mostly where descriptor flags are touched and virtio features are checked.

Signed-off-by: Jens Freimann 
---
  lib/librte_vhost/vhost.c  |   2 +
  lib/librte_vhost/virtio_net.c | 160 +-
  2 files changed, 127 insertions(+), 35 deletions(-)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index eb5a98875..3c633e71e 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -580,6 +580,8 @@ rte_vhost_enable_guest_notification(int vid, uint16_t 
queue_id, int enable)
  
  	if (dev == NULL)

return -1;
+   if (dev->features & (1ULL << VIRTIO_F_RING_PACKED))
+   return -1;


Is this change really related to mergeable buffers?

  
  	if (enable) {

RTE_LOG(ERR, VHOST_CONFIG,
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 18e67fdc1..b82c24081 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -401,17 +401,53 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
  }
  
  static __rte_always_inline int

-fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
-uint32_t avail_idx, uint32_t *vec_idx,
-struct buf_vector *buf_vec, uint16_t *desc_chain_head,
-uint16_t *desc_chain_len)
+__fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+struct buf_vector *buf_vec,
+uint32_t *len, uint32_t *vec_id)
+{
+   uint16_t idx = vq->last_avail_idx & (vq->size - 1);
+   struct vring_desc_packed *descs= vq->desc_packed;
+   uint32_t _vec_id = *vec_id;
+
+   if (vq->desc_packed[idx].flags & VRING_DESC_F_INDIRECT) {
+   descs = (struct vring_desc_packed *)(uintptr_t)
+   vhost_iova_to_vva(dev, vq, vq->desc_packed[idx].addr,
+   vq->desc_packed[idx].len,
+   VHOST_ACCESS_RO);
+   if (unlikely(!descs))
+   return -1;
+
+   idx = 0;
+   }
+
+   while (1) {
+   if (unlikely(_vec_id >= BUF_VECTOR_MAX || idx >= vq->size))
+   return -1;
+
+   *len += descs[idx & (vq->size - 1)].len;
+   buf_vec[_vec_id].buf_addr = descs[idx].addr;
+   buf_vec[_vec_id].buf_len  = descs[idx].len;
+   buf_vec[_vec_id].desc_idx = idx;
+   _vec_id++;
+
+   if ((descs[idx & (vq->size - 1)].flags & VRING_DESC_F_NEXT) == 
0)
+   break;
+
+   idx++;
+   }
+   *vec_id = _vec_id;
+
+   return 0;
+}
+
+static __rte_always_inline int
+__fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
+struct buf_vector *buf_vec,
+uint32_t *len, uint32_t *vec_id, uint32_t avail_idx)
  {
uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
-   uint32_t vec_id = *vec_idx;
-   uint32_t len= 0;
struct vring_desc *descs = vq->desc;
-
-   *desc_chain_head = idx;
+   uint32_t _vec_id = *vec_id;
  
  	if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) {

descs = (struct vring_desc *)(uintptr_t)
@@ -425,20 +461,53 @@ fill_vec_buf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
}
  
  	while (1) {

-   if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size))
+   if (unlikely(_vec_id >= BUF_VECTOR_MAX || idx >= vq->size))
return -1;
  
-		len += descs[idx].len;

-   buf_vec[vec_id].buf_addr = descs[idx].addr;
-   buf_vec[vec_id].buf_len  = descs[idx].len;
-   buf_vec[vec_id].desc_idx = idx;
-   vec_id++;
+   *len += descs[idx].len;
+   buf_vec[_vec_id].buf_addr = descs[idx].addr;
+   buf_vec[_vec_id].buf_len  = descs[idx].len;
+   buf_vec[_vec_id].desc_idx = idx;
+   _vec_id++;
  
  		if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)

break;
  
  		idx = descs[idx].next;

}
+   *vec_id = _vec_id;
+
+   return 0;
+}
+
+static __rte_always_inline int
+fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
+uint32_t avail_idx, uint32_t *vec_idx,
+struct buf_vector *buf_vec, uint16_t *desc_chain_head,
+uint16_t *desc_chain_len)
+{
+   uint16_t idx;
+   uint32_t vec_id = *vec_idx;
+   uint32_t len= 0;
+
+   if (dev->features & (1ULL << VIRTIO_F_RING_PACKED)) {
+   idx = vq->last_avail_idx & (vq

Re: [dpdk-dev] [PATCH] lib/librte_vhost: fix meson build

2018-04-06 Thread Maxime Coquelin



On 04/06/2018 02:43 PM, Fan Zhang wrote:

Fixes: 7834b5c82bf3 ("lib/librte_vhost: update makefile")

This patch fixes some meson build bugs.

Signed-off-by: Fan Zhang 
---
  lib/librte_vhost/meson.build | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/librte_vhost/meson.build b/lib/librte_vhost/meson.build
index f01ef67fc..dee154e40 100644
--- a/lib/librte_vhost/meson.build
+++ b/lib/librte_vhost/meson.build
@@ -10,6 +10,6 @@ endif
  version = 4
  allow_experimental_apis = true
  sources = files('fd_man.c', 'iotlb.c', 'socket.c', 'vhost.c', 'vhost_user.c',
-   'virtio_net.c', 'virtio_crypto.c')
+   'virtio_net.c', 'vhost_crypto.c')
  headers = files('rte_vhost.h', 'rte_vhost_crypto.h')
-deps += ['ethdev', 'cryptodev', 'pci']
+deps += ['ethdev', 'cryptodev', 'pci', 'hash']



Acked-by: Maxime Coquelin 

I will squash the change in patch 7834b5c82bf3 if not applied yet.

As discussed on IRC, we should ideally have such dependencies only if
vhost-crypto build is enabled.

Thanks!
Maxime


Re: [dpdk-dev] [PATCH 2/4] net/szedata2: use dynamically allocated queues

2018-04-06 Thread Ferruh Yigit
On 4/4/2018 2:42 PM, Matej Vido wrote:
> Previously the queues were the part of private data structure of the
> Ethernet device.
> Now the queues are allocated at setup thus numa-aware allocation is
> possible.

Hi Matej,

Yes by default [rt]x_queues are allocated via rte_zmalloc, which uses 
SOCKET_ID_ANY.

And in burst functions, we do:
 nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts, nb_pkts);

So there is an access to rx_queues in each rte_eth_rx_burst() call.

I wonder if you observe any performance difference with this update?
And what about moving to the ethdev layer instead of keeping local to the PMD?

> 
> Signed-off-by: Matej Vido 

<...>



[dpdk-dev] [PATCH v3 00/11] Bunch of flow API-related fixes

2018-04-06 Thread Adrien Mazarguil
This series contains several fixes for rte_flow and its implementation in
PMDs and testpmd. Upcoming work on the flow API depends on it.

v3 changes:

- Rebased series.
- Dropped unnecessary "net/sfc: fix endian conversions in flow API".
- Dropped "ethdev: fix ABI version in meson build", handled by prior commit
  d9736a248785 ("ethdev: fix library version in meson build").

v2 changes:

- mlx5 fix (patch #3).
- bnxt fix (patch #4).
- sfc fix (patch #6).
- Missing include (patch #13).

Adrien Mazarguil (11):
  net/mlx4: fix RSS resource leak in case of error
  net/mlx4: fix ignored RSS hash types
  net/mlx5: fix RSS flow action bounds check
  net/bnxt: fix matching of flow API item masks
  app/testpmd: fix flow completion for RSS queues
  app/testpmd: fix lack of flow action configuration
  app/testpmd: fix RSS flow action configuration
  app/testpmd: fix missing RSS fields in flow action
  ethdev: fix shallow copy of flow API RSS action
  ethdev: fix missing boolean values in flow command
  ethdev: fix missing include in flow API

 app/test-pmd/cmdline_flow.c | 255 ---
 app/test-pmd/config.c   | 160 +-
 app/test-pmd/testpmd.h  |  13 ++
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |   8 +
 drivers/net/bnxt/bnxt_filter.c  |  14 +-
 drivers/net/mlx4/mlx4_flow.c|  17 +-
 drivers/net/mlx5/mlx5_flow.c|   9 +
 lib/librte_ether/rte_flow.c | 145 +
 lib/librte_ether/rte_flow.h |   2 +
 9 files changed, 495 insertions(+), 128 deletions(-)

-- 
2.11.0


[dpdk-dev] [PATCH v3 01/11] net/mlx4: fix RSS resource leak in case of error

2018-04-06 Thread Adrien Mazarguil
When memory cannot be allocated for a flow rule, its RSS context reference
is not dropped.

Fixes: 078b8b452e6b ("net/mlx4: add RSS flow rule action support")
Cc: sta...@dpdk.org

Signed-off-by: Adrien Mazarguil 
Cc: Shahaf Shuler 
---
 drivers/net/mlx4/mlx4_flow.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index 2d55bfe03..a3b4480b4 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -820,11 +820,14 @@ mlx4_flow_prepare(struct priv *priv,
},
};
 
-   if (!mlx4_zmallocv(__func__, vec, RTE_DIM(vec)))
+   if (!mlx4_zmallocv(__func__, vec, RTE_DIM(vec))) {
+   if (temp.rss)
+   mlx4_rss_put(temp.rss);
return rte_flow_error_set
(error, -rte_errno,
 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
 "flow rule handle allocation failure");
+   }
/* Most fields will be updated by second pass. */
*flow = (struct rte_flow){
.ibv_attr = temp.ibv_attr,
-- 
2.11.0


[dpdk-dev] [PATCH v3 02/11] net/mlx4: fix ignored RSS hash types

2018-04-06 Thread Adrien Mazarguil
When an unsupported hash type is part of a RSS configuration structure, it
is silently ignored instead of triggering an error. This may lead
applications to assume that such types are accepted, while they are in fact
not part of the resulting flow rules.

Fixes: 078b8b452e6b ("net/mlx4: add RSS flow rule action support")
Cc: sta...@dpdk.org

Signed-off-by: Adrien Mazarguil 
Cc: Shahaf Shuler 
---
 drivers/net/mlx4/mlx4_flow.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index a3b4480b4..4d26df326 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -706,6 +706,7 @@ mlx4_flow_prepare(struct priv *priv,
const struct rte_flow_action_queue *queue;
const struct rte_flow_action_rss *rss;
const struct rte_eth_rss_conf *rss_conf;
+   uint64_t fields;
unsigned int i;
 
case RTE_FLOW_ACTION_TYPE_VOID:
@@ -780,10 +781,15 @@ mlx4_flow_prepare(struct priv *priv,
" of the context size";
goto exit_action_not_supported;
}
+   rte_errno = 0;
+   fields = mlx4_conv_rss_hf(priv, rss_conf->rss_hf);
+   if (fields == (uint64_t)-1 && rte_errno) {
+   msg = "unsupported RSS hash type requested";
+   goto exit_action_not_supported;
+   }
flow->rss = mlx4_rss_get
-   (priv,
-mlx4_conv_rss_hf(priv, rss_conf->rss_hf),
-rss_conf->rss_key, rss->num, rss->queue);
+   (priv, fields, rss_conf->rss_key, rss->num,
+rss->queue);
if (!flow->rss) {
msg = "either invalid parameters or not enough"
" resources for additional multi-queue"
-- 
2.11.0


[dpdk-dev] [PATCH v3 03/11] net/mlx5: fix RSS flow action bounds check

2018-04-06 Thread Adrien Mazarguil
The number of queues provided by the application is not checked against
parser's supported maximum.

Fixes: 3d821d6fea40 ("net/mlx5: support RSS action flow rule")
Cc: sta...@dpdk.org
Cc: Nelio Laranjeiro 

Signed-off-by: Adrien Mazarguil 
Acked-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_flow.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index e6af3243d..f051fbef5 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -16,6 +16,7 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
+#include 
 #include 
 #include 
 #include 
@@ -713,6 +714,14 @@ mlx5_flow_convert_actions(struct rte_eth_dev *dev,
return -rte_errno;
}
}
+   if (rss->num > RTE_DIM(parser->queues)) {
+   rte_flow_error_set(error, EINVAL,
+  RTE_FLOW_ERROR_TYPE_ACTION,
+  actions,
+  "too many queues for RSS"
+  " context");
+   return -rte_errno;
+   }
for (n = 0; n < rss->num; ++n) {
if (rss->queue[n] >= priv->rxqs_n) {
rte_flow_error_set(error, EINVAL,
-- 
2.11.0


[dpdk-dev] [PATCH v3 04/11] net/bnxt: fix matching of flow API item masks

2018-04-06 Thread Adrien Mazarguil
Some values are interpreted without endian conversion and/or without
taking the proper mask into account.

Fixes: 5ef3b79fdfe6 ("net/bnxt: support flow filter ops")
Cc: sta...@dpdk.org
Cc: Ajit Khaparde 

Signed-off-by: Adrien Mazarguil 
---
 drivers/net/bnxt/bnxt_filter.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_filter.c b/drivers/net/bnxt/bnxt_filter.c
index 96b382ba8..0f9c1c9ae 100644
--- a/drivers/net/bnxt/bnxt_filter.c
+++ b/drivers/net/bnxt/bnxt_filter.c
@@ -5,6 +5,7 @@
 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -346,7 +347,8 @@ bnxt_validate_and_parse_flow_type(struct bnxt *bp,
}
 
/* Mask is not allowed. Only exact matches are */
-   if ((eth_mask->type & UINT16_MAX) != UINT16_MAX) {
+   if (eth_mask->type &&
+   eth_mask->type != RTE_BE16(0x)) {
rte_flow_error_set(error, EINVAL,
   RTE_FLOW_ERROR_TYPE_ITEM,
   item,
@@ -372,7 +374,7 @@ bnxt_validate_and_parse_flow_type(struct bnxt *bp,
   *  RTE_LOG(ERR, PMD, "Handle this condition\n");
   * }
   */
-   if (eth_spec->type) {
+   if (eth_mask->type) {
filter->ethertype =
rte_be_to_cpu_16(eth_spec->type);
en |= use_ntuple ?
@@ -384,13 +386,15 @@ bnxt_validate_and_parse_flow_type(struct bnxt *bp,
case RTE_FLOW_ITEM_TYPE_VLAN:
vlan_spec = item->spec;
vlan_mask = item->mask;
-   if (vlan_mask->tci & 0x && !vlan_mask->tpid) {
+   if (vlan_mask->tci &&
+   vlan_mask->tci == RTE_BE16(0x0fff) &&
+   !vlan_mask->tpid) {
/* Only the VLAN ID can be matched. */
filter->l2_ovlan =
rte_be_to_cpu_16(vlan_spec->tci &
-0xFFF);
+RTE_BE16(0x0fff));
en |= EM_FLOW_ALLOC_INPUT_EN_OVLAN_VID;
-   } else {
+   } else if (vlan_mask->tci || vlan_mask->tpid) {
rte_flow_error_set(error, EINVAL,
   RTE_FLOW_ERROR_TYPE_ITEM,
   item,
-- 
2.11.0


[dpdk-dev] [PATCH v3 05/11] app/testpmd: fix flow completion for RSS queues

2018-04-06 Thread Adrien Mazarguil
The lack of a working completion for RSS queues was overlooked during
development; until now only "end" was displayed as a valid token.

Fixes: 05d34c6e9d2c ("app/testpmd: add queue actions to flow command")
Cc: sta...@dpdk.org

Signed-off-by: Adrien Mazarguil 
Cc: Wenzhuo Lu 
Cc: Jingjing Wu 
---
 app/test-pmd/cmdline_flow.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index a5cf84f79..9cac8e9bf 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -2663,17 +2663,15 @@ static int
 comp_vc_action_rss_queue(struct context *ctx, const struct token *token,
 unsigned int ent, char *buf, unsigned int size)
 {
-   static const char *const str[] = { "", "end", NULL };
-   unsigned int i;
-
(void)ctx;
(void)token;
-   for (i = 0; str[i] != NULL; ++i)
-   if (buf && i == ent)
-   return snprintf(buf, size, "%s", str[i]);
-   if (buf)
-   return -1;
-   return i;
+   if (!buf)
+   return nb_rxq + 1;
+   if (ent < nb_rxq)
+   return snprintf(buf, size, "%u", ent);
+   if (ent == nb_rxq)
+   return snprintf(buf, size, "end");
+   return -1;
 }
 
 /** Internal context. */
-- 
2.11.0


[dpdk-dev] [PATCH v3 07/11] app/testpmd: fix RSS flow action configuration

2018-04-06 Thread Adrien Mazarguil
Except for a list of queues, RSS configuration (hash key and fields) cannot
be specified from the flow command line and testpmd does not provide safe
defaults either.

In order to validate their implementation with testpmd, PMDs had to
interpret its NULL RSS configuration parameters somehow, however this has
never been valid to begin with.

This patch makes testpmd always provide default values.

Fixes: 05d34c6e9d2c ("app/testpmd: add queue actions to flow command")
Cc: sta...@dpdk.org

Signed-off-by: Adrien Mazarguil 
Acked-by: Nelio Laranjeiro 
Cc: Wenzhuo Lu 
Cc: Jingjing Wu 
---
 app/test-pmd/cmdline_flow.c | 104 +
 app/test-pmd/config.c   | 140 +++
 2 files changed, 191 insertions(+), 53 deletions(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index c2cf415ef..890c36d8e 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -184,13 +184,19 @@ enum index {
 #define ITEM_RAW_SIZE \
(offsetof(struct rte_flow_item_raw, pattern) + ITEM_RAW_PATTERN_SIZE)
 
-/** Number of queue[] entries in struct rte_flow_action_rss. */
-#define ACTION_RSS_NUM 32
-
-/** Storage size for struct rte_flow_action_rss including queues. */
-#define ACTION_RSS_SIZE \
-   (offsetof(struct rte_flow_action_rss, queue) + \
-sizeof(*((struct rte_flow_action_rss *)0)->queue) * ACTION_RSS_NUM)
+/** Maximum number of queue indices in struct rte_flow_action_rss. */
+#define ACTION_RSS_QUEUE_NUM 32
+
+/** Storage for struct rte_flow_action_rss including external data. */
+union action_rss_data {
+   struct rte_flow_action_rss conf;
+   struct {
+   uint8_t conf_data[offsetof(struct rte_flow_action_rss, queue)];
+   uint16_t queue[ACTION_RSS_QUEUE_NUM];
+   struct rte_eth_rss_conf rss_conf;
+   uint8_t rss_key[RSS_HASH_KEY_LENGTH];
+   } s;
+};
 
 /** Maximum number of subsequent tokens and arguments on the stack. */
 #define CTX_STACK_SIZE 16
@@ -316,6 +322,13 @@ struct token {
.size = (sz), \
})
 
+/** Static initializer for ARGS() with arbitrary offset and size. */
+#define ARGS_ENTRY_ARB(o, s) \
+   (&(const struct arg){ \
+   .offset = (o), \
+   .size = (s), \
+   })
+
 /** Same as ARGS_ENTRY() using network byte ordering. */
 #define ARGS_ENTRY_HTON(s, f) \
(&(const struct arg){ \
@@ -650,6 +663,9 @@ static int parse_vc_spec(struct context *, const struct 
token *,
 const char *, unsigned int, void *, unsigned int);
 static int parse_vc_conf(struct context *, const struct token *,
 const char *, unsigned int, void *, unsigned int);
+static int parse_vc_action_rss(struct context *, const struct token *,
+  const char *, unsigned int, void *,
+  unsigned int);
 static int parse_vc_action_rss_queue(struct context *, const struct token *,
 const char *, unsigned int, void *,
 unsigned int);
@@ -1573,9 +1589,9 @@ static const struct token token_list[] = {
[ACTION_RSS] = {
.name = "rss",
.help = "spread packets among several queues",
-   .priv = PRIV_ACTION(RSS, ACTION_RSS_SIZE),
+   .priv = PRIV_ACTION(RSS, sizeof(union action_rss_data)),
.next = NEXT(action_rss),
-   .call = parse_vc,
+   .call = parse_vc_action_rss,
},
[ACTION_RSS_QUEUES] = {
.name = "queues",
@@ -2004,6 +2020,64 @@ parse_vc_conf(struct context *ctx, const struct token 
*token,
return len;
 }
 
+/** Parse RSS action. */
+static int
+parse_vc_action_rss(struct context *ctx, const struct token *token,
+   const char *str, unsigned int len,
+   void *buf, unsigned int size)
+{
+   struct buffer *out = buf;
+   struct rte_flow_action *action;
+   union action_rss_data *action_rss_data;
+   unsigned int i;
+   int ret;
+
+   ret = parse_vc(ctx, token, str, len, buf, size);
+   if (ret < 0)
+   return ret;
+   /* Nothing else to do if there is no buffer. */
+   if (!out)
+   return ret;
+   if (!out->args.vc.actions_n)
+   return -1;
+   action = &out->args.vc.actions[out->args.vc.actions_n - 1];
+   /* Point to selected object. */
+   ctx->object = out->args.vc.data;
+   ctx->objmask = NULL;
+   /* Set up default configuration. */
+   action_rss_data = ctx->object;
+   *action_rss_data = (union action_rss_data){
+   .conf = (struct rte_flow_action_rss){
+   .rss_conf = &action_rss_data->s.rss_conf,
+   .num = RTE_MIN(nb_rxq, ACTION_RSS_QUEUE_NUM),
+   },
+   };
+   action_rss_data->s.rss_conf = (stru

[dpdk-dev] [PATCH v3 10/11] ethdev: fix missing boolean values in flow command

2018-04-06 Thread Adrien Mazarguil
Original implementation lacks the on/off toggle.

This patch shows up as a fix because it has been a popular request ever
since the first DPDK release with the original implementation but was never
addressed.

Fixes: abc3d81aca1b ("app/testpmd: add item raw to flow command")
Cc: sta...@dpdk.org

Signed-off-by: Adrien Mazarguil 
Acked-by: Nelio Laranjeiro 
---
 app/test-pmd/cmdline_flow.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index dbf4afebf..30450f1a4 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -2695,6 +2695,7 @@ static const char *const boolean_name[] = {
"false", "true",
"no", "yes",
"N", "Y",
+   "off", "on",
NULL,
 };
 
-- 
2.11.0


[dpdk-dev] [PATCH v3 08/11] app/testpmd: fix missing RSS fields in flow action

2018-04-06 Thread Adrien Mazarguil
Users cannot override the default RSS settings when entering a RSS action,
only a list of queues can be provided.

This patch enables them to set a RSS hash key and types for a flow rule.

Fixes: 05d34c6e9d2c ("app/testpmd: add queue actions to flow command")
Cc: sta...@dpdk.org

Signed-off-by: Adrien Mazarguil 
Acked-by: Nelio Laranjeiro 
Cc: Wenzhuo Lu 
Cc: Jingjing Wu 
---
 app/test-pmd/cmdline_flow.c | 133 ++-
 app/test-pmd/config.c   |  20 ++--
 app/test-pmd/testpmd.h  |  13 +++
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |   8 ++
 4 files changed, 163 insertions(+), 11 deletions(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 890c36d8e..dbf4afebf 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -167,6 +167,10 @@ enum index {
ACTION_DUP,
ACTION_DUP_INDEX,
ACTION_RSS,
+   ACTION_RSS_TYPES,
+   ACTION_RSS_TYPE,
+   ACTION_RSS_KEY,
+   ACTION_RSS_KEY_LEN,
ACTION_RSS_QUEUES,
ACTION_RSS_QUEUE,
ACTION_PF,
@@ -223,6 +227,9 @@ struct context {
 struct arg {
uint32_t hton:1; /**< Use network byte ordering. */
uint32_t sign:1; /**< Value is signed. */
+   uint32_t bounded:1; /**< Value is bounded. */
+   uintmax_t min; /**< Minimum value if bounded. */
+   uintmax_t max; /**< Maximum value if bounded. */
uint32_t offset; /**< Relative offset from ctx->object. */
uint32_t size; /**< Field size. */
const uint8_t *mask; /**< Bit-mask to use instead of offset/size. */
@@ -329,6 +336,16 @@ struct token {
.size = (s), \
})
 
+/** Same as ARGS_ENTRY_ARB() with bounded values. */
+#define ARGS_ENTRY_ARB_BOUNDED(o, s, i, a) \
+   (&(const struct arg){ \
+   .bounded = 1, \
+   .min = (i), \
+   .max = (a), \
+   .offset = (o), \
+   .size = (s), \
+   })
+
 /** Same as ARGS_ENTRY() using network byte ordering. */
 #define ARGS_ENTRY_HTON(s, f) \
(&(const struct arg){ \
@@ -635,6 +652,9 @@ static const enum index action_dup[] = {
 };
 
 static const enum index action_rss[] = {
+   ACTION_RSS_TYPES,
+   ACTION_RSS_KEY,
+   ACTION_RSS_KEY_LEN,
ACTION_RSS_QUEUES,
ACTION_NEXT,
ZERO,
@@ -666,6 +686,9 @@ static int parse_vc_conf(struct context *, const struct 
token *,
 static int parse_vc_action_rss(struct context *, const struct token *,
   const char *, unsigned int, void *,
   unsigned int);
+static int parse_vc_action_rss_type(struct context *, const struct token *,
+   const char *, unsigned int, void *,
+   unsigned int);
 static int parse_vc_action_rss_queue(struct context *, const struct token *,
 const char *, unsigned int, void *,
 unsigned int);
@@ -721,6 +744,8 @@ static int comp_port(struct context *, const struct token *,
 unsigned int, char *, unsigned int);
 static int comp_rule_id(struct context *, const struct token *,
unsigned int, char *, unsigned int);
+static int comp_vc_action_rss_type(struct context *, const struct token *,
+  unsigned int, char *, unsigned int);
 static int comp_vc_action_rss_queue(struct context *, const struct token *,
unsigned int, char *, unsigned int);
 
@@ -1593,6 +1618,43 @@ static const struct token token_list[] = {
.next = NEXT(action_rss),
.call = parse_vc_action_rss,
},
+   [ACTION_RSS_TYPES] = {
+   .name = "types",
+   .help = "RSS hash types",
+   .next = NEXT(action_rss, NEXT_ENTRY(ACTION_RSS_TYPE)),
+   },
+   [ACTION_RSS_TYPE] = {
+   .name = "{type}",
+   .help = "RSS hash type",
+   .call = parse_vc_action_rss_type,
+   .comp = comp_vc_action_rss_type,
+   },
+   [ACTION_RSS_KEY] = {
+   .name = "key",
+   .help = "RSS hash key",
+   .next = NEXT(action_rss, NEXT_ENTRY(STRING)),
+   .args = ARGS(ARGS_ENTRY_ARB
+(((uintptr_t)&((union action_rss_data *)0)->
+  s.rss_conf.rss_key_len),
+ sizeof(((struct rte_eth_rss_conf *)0)->
+rss_key_len)),
+ARGS_ENTRY_ARB
+(((uintptr_t)((union action_rss_data *)0)->
+  s.rss_key),
+ RSS_HASH_KEY_LENGTH)),
+   },
+   [ACTION_RSS_KEY_LEN] = {
+   .name = "key_len",
+   .help = "RSS hash key length in bytes

[dpdk-dev] [PATCH v3 09/11] ethdev: fix shallow copy of flow API RSS action

2018-04-06 Thread Adrien Mazarguil
The rss_conf field is defined as a pointer to struct rte_eth_rss_conf.

Even assuming it is permanently allocated and a pointer copy is safe,
pointed data may change and not reflect an applied flow rule anymore.

This patch aligns with testpmd by making a deep copy instead.

Fixes: 18da437b5f63 ("ethdev: add flow rule copy function")
Cc: sta...@dpdk.org
Cc: Gaetan Rivet 

Signed-off-by: Adrien Mazarguil 
Cc: Thomas Monjalon 
---
 lib/librte_ether/rte_flow.c | 145 +++
 1 file changed, 102 insertions(+), 43 deletions(-)

diff --git a/lib/librte_ether/rte_flow.c b/lib/librte_ether/rte_flow.c
index 38f2d27be..ba6feddee 100644
--- a/lib/librte_ether/rte_flow.c
+++ b/lib/librte_ether/rte_flow.c
@@ -255,60 +255,119 @@ rte_flow_error_set(struct rte_flow_error *error,
return -code;
 }
 
-/** Compute storage space needed by item specification. */
-static void
-flow_item_spec_size(const struct rte_flow_item *item,
-   size_t *size, size_t *pad)
+/** Pattern item specification types. */
+enum item_spec_type {
+   ITEM_SPEC,
+   ITEM_LAST,
+   ITEM_MASK,
+};
+
+/** Compute storage space needed by item specification and copy it. */
+static size_t
+flow_item_spec_copy(void *buf, const struct rte_flow_item *item,
+   enum item_spec_type type)
 {
-   if (!item->spec) {
-   *size = 0;
+   size_t size = 0;
+   const void *item_spec =
+   type == ITEM_SPEC ? item->spec :
+   type == ITEM_LAST ? item->last :
+   type == ITEM_MASK ? item->mask :
+   NULL;
+
+   if (!item_spec)
goto empty;
-   }
switch (item->type) {
union {
const struct rte_flow_item_raw *raw;
-   } spec;
+   } src;
+   union {
+   struct rte_flow_item_raw *raw;
+   } dst;
 
-   /* Not a fall-through */
case RTE_FLOW_ITEM_TYPE_RAW:
-   spec.raw = item->spec;
-   *size = offsetof(struct rte_flow_item_raw, pattern) +
-   spec.raw->length * sizeof(*spec.raw->pattern);
+   src.raw = item_spec;
+   dst.raw = buf;
+   size = offsetof(struct rte_flow_item_raw, pattern) +
+   src.raw->length * sizeof(*src.raw->pattern);
+   if (dst.raw)
+   memcpy(dst.raw, src.raw, size);
break;
default:
-   *size = rte_flow_desc_item[item->type].size;
+   size = rte_flow_desc_item[item->type].size;
+   if (buf)
+   memcpy(buf, item_spec, size);
break;
}
 empty:
-   *pad = RTE_ALIGN_CEIL(*size, sizeof(double)) - *size;
+   return RTE_ALIGN_CEIL(size, sizeof(double));
 }
 
-/** Compute storage space needed by action configuration. */
-static void
-flow_action_conf_size(const struct rte_flow_action *action,
- size_t *size, size_t *pad)
+/** Compute storage space needed by action configuration and copy it. */
+static size_t
+flow_action_conf_copy(void *buf, const struct rte_flow_action *action)
 {
-   if (!action->conf) {
-   *size = 0;
+   size_t size = 0;
+
+   if (!action->conf)
goto empty;
-   }
switch (action->type) {
union {
const struct rte_flow_action_rss *rss;
-   } conf;
+   } src;
+   union {
+   struct rte_flow_action_rss *rss;
+   } dst;
+   size_t off;
 
-   /* Not a fall-through. */
case RTE_FLOW_ACTION_TYPE_RSS:
-   conf.rss = action->conf;
-   *size = offsetof(struct rte_flow_action_rss, queue) +
-   conf.rss->num * sizeof(*conf.rss->queue);
+   src.rss = action->conf;
+   dst.rss = buf;
+   off = 0;
+   if (dst.rss)
+   *dst.rss = (struct rte_flow_action_rss){
+   .num = src.rss->num,
+   };
+   off += offsetof(struct rte_flow_action_rss, queue);
+   if (src.rss->num) {
+   size = sizeof(*src.rss->queue) * src.rss->num;
+   if (dst.rss)
+   memcpy(dst.rss->queue, src.rss->queue, size);
+   off += size;
+   }
+   off = RTE_ALIGN_CEIL(off, sizeof(double));
+   if (dst.rss) {
+   dst.rss->rss_conf = (void *)((uintptr_t)dst.rss + off);
+   *(struct rte_eth_rss_conf *)(uintptr_t)
+   dst.rss->rss_conf = (struct rte_eth_rss_conf){
+   .rss_key_len = src.rss->rss_conf->rss_key_len,
+   .rss_hf = src.rss->rss_

[dpdk-dev] [PATCH v3 06/11] app/testpmd: fix lack of flow action configuration

2018-04-06 Thread Adrien Mazarguil
Configuration structure is not optional with flow rule actions that expect
one; this pointer is not supposed to be NULL and PMDs should not have to
verify it.

Like pattern item spec/last/mask fields, it is currently set when at least
one configuration parameter is provided on the command line. This patch
sets it as soon as an action is created instead.

Fixes: 7a91969ad35e ("app/testpmd: add various actions to flow command")
Cc: sta...@dpdk.org

Signed-off-by: Adrien Mazarguil 
Acked-by: Nelio Laranjeiro 
Cc: Wenzhuo Lu 
Cc: Jingjing Wu 
---
 app/test-pmd/cmdline_flow.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 9cac8e9bf..c2cf415ef 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -1909,6 +1909,7 @@ parse_vc(struct context *ctx, const struct token *token,
return -1;
*action = (struct rte_flow_action){
.type = priv->type,
+   .conf = data_size ? data : NULL,
};
++out->args.vc.actions_n;
ctx->object = action;
@@ -1989,7 +1990,6 @@ parse_vc_conf(struct context *ctx, const struct token 
*token,
  void *buf, unsigned int size)
 {
struct buffer *out = buf;
-   struct rte_flow_action *action;
 
(void)size;
/* Token name must match. */
@@ -1998,14 +1998,9 @@ parse_vc_conf(struct context *ctx, const struct token 
*token,
/* Nothing else to do if there is no buffer. */
if (!out)
return len;
-   if (!out->args.vc.actions_n)
-   return -1;
-   action = &out->args.vc.actions[out->args.vc.actions_n - 1];
/* Point to selected object. */
ctx->object = out->args.vc.data;
ctx->objmask = NULL;
-   /* Update configuration pointer. */
-   action->conf = ctx->object;
return len;
 }
 
-- 
2.11.0


[dpdk-dev] [PATCH v3 11/11] ethdev: fix missing include in flow API

2018-04-06 Thread Adrien Mazarguil
Fixes: b1a4b4cbc0a8 ("ethdev: introduce generic flow API")
Cc: sta...@dpdk.org

Signed-off-by: Adrien Mazarguil 
Acked-by: Nelio Laranjeiro 
---
 lib/librte_ether/rte_flow.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/librte_ether/rte_flow.h b/lib/librte_ether/rte_flow.h
index 13e420218..cdaaa3a5b 100644
--- a/lib/librte_ether/rte_flow.h
+++ b/lib/librte_ether/rte_flow.h
@@ -14,6 +14,8 @@
  * associated actions in hardware through flow rules.
  */
 
+#include 
+
 #include 
 #include 
 #include 
-- 
2.11.0


[dpdk-dev] [PATCH v2 02/15] ethdev: clarify flow API pattern items and actions

2018-04-06 Thread Adrien Mazarguil
Although pattern items and actions examples end with "and so on", these
lists include all existing definitions and as a result are updated almost
every time new types are added. This is cumbersome and pointless.

This patch also synchronizes Doxygen and external API documentation wording
with a slight clarification regarding meta pattern items.

No fundamental API change.

Signed-off-by: Adrien Mazarguil 
---
 doc/guides/prog_guide/rte_flow.rst | 23 +++
 lib/librte_ether/rte_flow.h| 23 ++-
 2 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index 961943dda..a11ebd617 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -186,12 +186,13 @@ Pattern item
 
 Pattern items fall in two categories:
 
-- Matching protocol headers and packet data (ANY, RAW, ETH, VLAN, IPV4,
-  IPV6, ICMP, UDP, TCP, SCTP, VXLAN, MPLS, GRE, ESP and so on), usually
-  associated with a specification structure.
+- Matching protocol headers and packet data, usually associated with a
+  specification structure. These must be stacked in the same order as the
+  protocol layers to match inside packets, starting from the lowest.
 
-- Matching meta-data or affecting pattern processing (END, VOID, INVERT, PF,
-  VF, PORT and so on), often without a specification structure.
+- Matching meta-data or affecting pattern processing, often without a
+  specification structure. Since they do not match packet contents, their
+  position in the list is usually not relevant.
 
 Item specification structures are used to match specific values among
 protocol fields (or item properties). Documentation describes for each item
@@ -1001,15 +1002,13 @@ to a flow rule. That list is not ordered.
 
 They fall in three categories:
 
-- Terminating actions (such as QUEUE, DROP, RSS, PF, VF) that prevent
-  processing matched packets by subsequent flow rules, unless overridden
-  with PASSTHRU.
+- Terminating actions that prevent processing matched packets by subsequent
+  flow rules, unless overridden with PASSTHRU.
 
-- Non-terminating actions (PASSTHRU, DUP) that leave matched packets up for
-  additional processing by subsequent flow rules.
+- Non-terminating actions that leave matched packets up for additional
+  processing by subsequent flow rules.
 
-- Other non-terminating meta actions that do not affect the fate of packets
-  (END, VOID, MARK, FLAG, COUNT, SECURITY).
+- Other non-terminating meta actions that do not affect the fate of packets.
 
 When several actions are combined in a flow rule, they should all have
 different types (e.g. dropping a packet twice is not possible).
diff --git a/lib/librte_ether/rte_flow.h b/lib/librte_ether/rte_flow.h
index 95799fd9c..36fd38ffa 100644
--- a/lib/librte_ether/rte_flow.h
+++ b/lib/librte_ether/rte_flow.h
@@ -78,15 +78,13 @@ struct rte_flow_attr {
  *
  * Pattern items fall in two categories:
  *
- * - Matching protocol headers and packet data (ANY, RAW, ETH, VLAN, IPV4,
- *   IPV6, ICMP, UDP, TCP, SCTP, VXLAN and so on), usually associated with a
+ * - Matching protocol headers and packet data, usually associated with a
  *   specification structure. These must be stacked in the same order as the
- *   protocol layers to match, starting from the lowest.
+ *   protocol layers to match inside packets, starting from the lowest.
  *
- * - Matching meta-data or affecting pattern processing (END, VOID, INVERT,
- *   PF, VF, PORT and so on), often without a specification structure. Since
- *   they do not match packet contents, these can be specified anywhere
- *   within item lists without affecting others.
+ * - Matching meta-data or affecting pattern processing, often without a
+ *   specification structure. Since they do not match packet contents, their
+ *   position in the list is usually not relevant.
  *
  * See the description of individual types for more information. Those
  * marked with [META] fall into the second category.
@@ -865,15 +863,14 @@ struct rte_flow_item {
  *
  * They fall in three categories:
  *
- * - Terminating actions (such as QUEUE, DROP, RSS, PF, VF) that prevent
- *   processing matched packets by subsequent flow rules, unless overridden
- *   with PASSTHRU.
+ * - Terminating actions that prevent processing matched packets by
+ *   subsequent flow rules, unless overridden with PASSTHRU.
  *
- * - Non terminating actions (PASSTHRU, DUP) that leave matched packets up
- *   for additional processing by subsequent flow rules.
+ * - Non terminating actions that leave matched packets up for additional
+ *   processing by subsequent flow rules.
  *
  * - Other non terminating meta actions that do not affect the fate of
- *   packets (END, VOID, MARK, FLAG, COUNT).
+ *   packets.
  *
  * When several actions are combined in a flow rule, they should all have
  * different types (e.g. dropping a packet twice is not possible).

[dpdk-dev] [PATCH v2 00/15] Flow API overhaul for switch offloads

2018-04-06 Thread Adrien Mazarguil
As summarized in a prior RFC [1], the flow API (rte_flow) was chosen as a
means to manage switch offloads supported by many devices (usually going by
names such as E-Switch or vSwitch) through user-specified flow rules.

Combined with the need to support encap/decap actions, this requires a
change in the way flow actions are processed (in order and possibly
repeated) which modifies the behavior of some of the existing actions, thus
warranting a major ABI breakage.

Given this ABI breakage is also required by other work submitted for the
current release [2][3], this series addresses various longstanding issues
with the flow API and makes minor improvements in preparation for upcoming
features.

Changes summary:

- Additional error types.
- Clearer documentation.
- Improved C++ compatibility.
- Exhaustive RSS action.
- Consistent behavior of VLAN pattern item.
- New "transfer" attribute bringing consistency to VF/PF pattern items.
- Confusing "PORT" pattern item renamed "PHY_PORT", with new action
  counterpart.
- New "PORT_ID" pattern item and action to be used with port representors.

This series piggybacks on the major ABI update introduced by a prior
commit [4] for DPDK 18.05 and depends on several fixes [5] which must be
applied first.

[1] "[RFC] Switch device offload with DPDK"
http://dpdk.org/ml/archives/dev/2018-March/092513.html

[2] commit 676b605182a5 ("doc: announce ethdev API change for RSS
configuration")

[3] "[PATCH v1 00/21] MLX5 tunnel Rx offloading"
http://dpdk.org/ml/archives/dev/2018-March/092264.html

[4] commit 653e038efc9b ("ethdev: remove versioning of filter control
function")

[5] "[PATCH v3 00/11] Bunch of flow API-related fixes"
http://dpdk.org/ml/archives/dev/2018-April/095762.html

v2 changes:

- Squashed "ethdev: update ABI for flow API functions" in subsequent
  patches.
- Emphasized ABI impact in relevant commit logs.
- Modified documentation in "ethdev: alter behavior of flow API actions" to
  describe how terminating flow rules without any action of the fate kind
  result in undefined behavior instead of dropping traffic.
- Fixed other minor documentation formatting issues.
- Modified "ethdev: refine TPID handling in flow API" as follows:
  - Using standard macro definitions for VLAN, QinQ and E-Tag EtherTypes.
  - Fixed endian conversion in sfc.
  - Replaced a condition in VLAN pattern item processing with an assertion
check for i40e.

Adrien Mazarguil (15):
  ethdev: add error types to flow API
  ethdev: clarify flow API pattern items and actions
  doc: remove flow API migration section
  ethdev: remove DUP action from flow API
  ethdev: alter behavior of flow API actions
  ethdev: remove C99 flexible arrays from flow API
  ethdev: flatten RSS configuration in flow API
  ethdev: add hash function to RSS flow API action
  ethdev: add encap level to RSS flow API action
  ethdev: refine TPID handling in flow API
  ethdev: add transfer attribute to flow API
  ethdev: update behavior of VF/PF in flow API
  ethdev: rename physical port item in flow API
  ethdev: add physical port action to flow API
  ethdev: add port ID item and action to flow API

 app/test-pmd/cmdline_flow.c | 405 ++-
 app/test-pmd/config.c   |  78 +--
 doc/guides/nics/tap.rst |   2 +-
 doc/guides/prog_guide/rte_flow.rst  | 602 ---
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |  54 +-
 drivers/net/bnxt/bnxt_filter.c  |  53 +-
 drivers/net/e1000/e1000_ethdev.h|  13 +-
 drivers/net/e1000/igb_ethdev.c  |   4 +-
 drivers/net/e1000/igb_flow.c|  83 +++-
 drivers/net/e1000/igb_rxtx.c|  55 ++-
 drivers/net/enic/enic_flow.c|  53 +-
 drivers/net/i40e/i40e_ethdev.c  |  57 ++-
 drivers/net/i40e/i40e_ethdev.h  |  15 +-
 drivers/net/i40e/i40e_flow.c| 139 --
 drivers/net/ixgbe/ixgbe_ethdev.c|   7 +-
 drivers/net/ixgbe/ixgbe_ethdev.h|  13 +-
 drivers/net/ixgbe/ixgbe_flow.c  |  91 +++-
 drivers/net/ixgbe/ixgbe_rxtx.c  |  55 ++-
 drivers/net/mlx4/mlx4.c |   2 +-
 drivers/net/mlx4/mlx4_flow.c| 117 +++--
 drivers/net/mlx4/mlx4_flow.h|   2 +-
 drivers/net/mlx4/mlx4_rxq.c |   2 +-
 drivers/net/mlx4/mlx4_rxtx.h|   2 +-
 drivers/net/mlx5/mlx5_flow.c| 319 ++--
 drivers/net/mlx5/mlx5_rxq.c |  22 +-
 drivers/net/mlx5/mlx5_rxtx.h|  26 +-
 drivers/net/mvpp2/mrvl_flow.c   |  33 +-
 drivers/net/sfc/sfc_flow.c  |  83 +++-
 drivers/net/tap/tap_flow.c  |  51 +-
 examples/ipsec-secgw/ipsec.c|  21 +-
 lib/librte_ether/rte_ethdev_version.map |  22 +-
 lib/librte_ether/rte_flow.c |  68 +--
 lib/librte_ether/rte_flow.h | 32

[dpdk-dev] [PATCH v2 01/15] ethdev: add error types to flow API

2018-04-06 Thread Adrien Mazarguil
These enable more precise reporting of objects responsible for errors.

This breaks ABI compatibility for the following public functions:

- rte_flow_create()
- rte_flow_destroy()
- rte_flow_error_set()
- rte_flow_flush()
- rte_flow_isolate()
- rte_flow_query()
- rte_flow_validate()

Signed-off-by: Adrien Mazarguil 
---
 app/test-pmd/config.c   |  4 
 lib/librte_ether/rte_ethdev_version.map | 20 +---
 lib/librte_ether/rte_flow.h |  4 
 3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index 2058e6ec8..7ae0295f6 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -1228,8 +1228,12 @@ port_flow_complain(struct rte_flow_error *error)
[RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field",
[RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure",
[RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length",
+   [RTE_FLOW_ERROR_TYPE_ITEM_SPEC] = "item specification",
+   [RTE_FLOW_ERROR_TYPE_ITEM_LAST] = "item specification range",
+   [RTE_FLOW_ERROR_TYPE_ITEM_MASK] = "item specification mask",
[RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item",
[RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions",
+   [RTE_FLOW_ERROR_TYPE_ACTION_CONF] = "action configuration",
[RTE_FLOW_ERROR_TYPE_ACTION] = "specific action",
};
const char *errstr;
diff --git a/lib/librte_ether/rte_ethdev_version.map 
b/lib/librte_ether/rte_ethdev_version.map
index 34df6c8b5..e915e7929 100644
--- a/lib/librte_ether/rte_ethdev_version.map
+++ b/lib/librte_ether/rte_ethdev_version.map
@@ -127,11 +127,6 @@ DPDK_17.02 {
 
_rte_eth_dev_reset;
rte_eth_dev_fw_version_get;
-   rte_flow_create;
-   rte_flow_destroy;
-   rte_flow_flush;
-   rte_flow_query;
-   rte_flow_validate;
 
 } DPDK_16.07;
 
@@ -153,7 +148,6 @@ DPDK_17.08 {
_rte_eth_dev_callback_process;
rte_eth_dev_adjust_nb_rx_tx_desc;
rte_flow_copy;
-   rte_flow_isolate;
rte_tm_capabilities_get;
rte_tm_hierarchy_commit;
rte_tm_level_capabilities_get;
@@ -192,7 +186,6 @@ DPDK_17.11 {
rte_eth_dev_get_sec_ctx;
rte_eth_dev_pool_ops_supported;
rte_eth_dev_reset;
-   rte_flow_error_set;
 
 } DPDK_17.08;
 
@@ -203,6 +196,19 @@ DPDK_18.02 {
 
 } DPDK_17.11;
 
+DPDK_18.05 {
+   global:
+
+   rte_flow_create;
+   rte_flow_destroy;
+   rte_flow_error_set;
+   rte_flow_flush;
+   rte_flow_isolate;
+   rte_flow_query;
+   rte_flow_validate;
+
+} DPDK_18.02;
+
 EXPERIMENTAL {
global:
 
diff --git a/lib/librte_ether/rte_flow.h b/lib/librte_ether/rte_flow.h
index cdaaa3a5b..95799fd9c 100644
--- a/lib/librte_ether/rte_flow.h
+++ b/lib/librte_ether/rte_flow.h
@@ -1186,8 +1186,12 @@ enum rte_flow_error_type {
RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, /**< Egress field. */
RTE_FLOW_ERROR_TYPE_ATTR, /**< Attributes structure. */
RTE_FLOW_ERROR_TYPE_ITEM_NUM, /**< Pattern length. */
+   RTE_FLOW_ERROR_TYPE_ITEM_SPEC, /**< Item specification. */
+   RTE_FLOW_ERROR_TYPE_ITEM_LAST, /**< Item specification range. */
+   RTE_FLOW_ERROR_TYPE_ITEM_MASK, /**< Item specification mask. */
RTE_FLOW_ERROR_TYPE_ITEM, /**< Specific pattern item. */
RTE_FLOW_ERROR_TYPE_ACTION_NUM, /**< Number of actions. */
+   RTE_FLOW_ERROR_TYPE_ACTION_CONF, /**< Action configuration. */
RTE_FLOW_ERROR_TYPE_ACTION, /**< Specific action. */
 };
 
-- 
2.11.0


  1   2   3   >