[dpdk-dev] [PATCH v3 4/4] bonding: remove memcpy from burst functions

2016-06-12 Thread Bernard Iremonger
Now that the queue spinlocks have been added to the rx and
tx burst functions the memcpy of the slave data is no
longer necessary, so it has been removed.

Signed-off-by: Bernard Iremonger 
Acked-by: Konstantin Ananyev 
---
 drivers/net/bonding/rte_eth_bond_pmd.c | 71 ++
 1 file changed, 28 insertions(+), 43 deletions(-)

diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c 
b/drivers/net/bonding/rte_eth_bond_pmd.c
index 93043ef..ce46450 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -146,7 +146,6 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf 
**bufs,

const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
uint16_t num_rx_total = 0;  /* Total number of received packets */
-   uint8_t slaves[RTE_MAX_ETHPORTS];
uint8_t slave_count;

uint8_t collecting;  /* current slave collecting status */
@@ -159,15 +158,16 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf 
**bufs,
return num_rx_total;

slave_count = internals->active_slave_count;
-   memcpy(slaves, internals->active_slaves,
-   sizeof(internals->active_slaves[0]) * slave_count);

for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
j = num_rx_total;
-   collecting = ACTOR_STATE(_8023ad_ports[slaves[i]], 
COLLECTING);
+   collecting = ACTOR_STATE(
+   _8023ad_ports[internals->active_slaves[i]],
+   COLLECTING);

/* Read packets from this slave */
-   num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
+   num_rx_total += rte_eth_rx_burst(internals->active_slaves[i],
+   bd_rx_q->queue_id,
[num_rx_total], nb_pkts - num_rx_total);

for (k = j; k < 2 && k < num_rx_total; k++)
@@ -188,7 +188,9 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf 
**bufs,
!is_same_ether_addr(_mac, 
>d_addr {

if (hdr->ether_type == ether_type_slow_be) {
-   
bond_mode_8023ad_handle_slow_pkt(internals, slaves[i],
+   bond_mode_8023ad_handle_slow_pkt(
+   internals,
+   internals->active_slaves[i],
bufs[j]);
} else
rte_pktmbuf_free(bufs[j]);
@@ -409,8 +411,6 @@ bond_ethdev_tx_burst_round_robin(void *queue, struct 
rte_mbuf **bufs,
uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };

uint8_t num_of_slaves;
-   uint8_t slaves[RTE_MAX_ETHPORTS];
-
uint16_t num_tx_total = 0, num_tx_slave;

static int slave_idx = 0;
@@ -422,12 +422,7 @@ bond_ethdev_tx_burst_round_robin(void *queue, struct 
rte_mbuf **bufs,
if (rte_spinlock_trylock(_tx_q->lock) == 0)
return num_tx_total;

-   /* Copy slave list to protect against slave up/down changes during tx
-* bursting */
num_of_slaves = internals->active_slave_count;
-   memcpy(slaves, internals->active_slaves,
-   sizeof(internals->active_slaves[0]) * num_of_slaves);
-
if (num_of_slaves < 1) {
rte_spinlock_unlock(_tx_q->lock);
return num_tx_total;
@@ -446,7 +441,9 @@ bond_ethdev_tx_burst_round_robin(void *queue, struct 
rte_mbuf **bufs,
/* Send packet burst on each slave device */
for (i = 0; i < num_of_slaves; i++) {
if (slave_nb_pkts[i] > 0) {
-   num_tx_slave = rte_eth_tx_burst(slaves[i], 
bd_tx_q->queue_id,
+   num_tx_slave = rte_eth_tx_burst(
+   internals->active_slaves[i],
+   bd_tx_q->queue_id,
slave_bufs[i], slave_nb_pkts[i]);

/* if tx burst fails move packets to end of bufs */
@@ -721,7 +718,6 @@ bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf 
**bufs, uint16_t nb_pkts)
uint8_t i, j;

uint8_t num_of_slaves;
-   uint8_t slaves[RTE_MAX_ETHPORTS];

struct ether_hdr *ether_hdr;
struct ether_addr primary_slave_addr;
@@ -736,9 +732,6 @@ bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf 
**bufs, uint16_t nb_pkts)
return num_tx_total;
}

-   memcpy(slaves, internals->tlb_slaves_order,
-   sizeof(internals->tlb_slaves_order[0]) * 
num_of_slaves);
-
ether_addr_copy(primary_port->data->mac_addrs, _slave_addr);

if (nb_pkts > 3) {
@@ -747,7 +740,8 @@ bond_ethdev_tx_burst_tlb(void *queue, 

[dpdk-dev] [PATCH v3 3/4] bonding: take queue spinlock in rx/tx burst functions

2016-06-12 Thread Bernard Iremonger
Use rte_spinlock_trylock() in the rx/tx burst functions to
take the queue spinlock.

Signed-off-by: Bernard Iremonger 
Acked-by: Konstantin Ananyev 
---
 drivers/net/bonding/rte_eth_bond_pmd.c | 116 -
 1 file changed, 84 insertions(+), 32 deletions(-)

diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c 
b/drivers/net/bonding/rte_eth_bond_pmd.c
index 2e624bb..93043ef 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -92,16 +92,22 @@ bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, 
uint16_t nb_pkts)

internals = bd_rx_q->dev_private;

-
-   for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
-   /* Offset of pointer to *bufs increases as packets are received
-* from other slaves */
-   num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
-   bd_rx_q->queue_id, bufs + num_rx_total, 
nb_pkts);
-   if (num_rx_slave) {
-   num_rx_total += num_rx_slave;
-   nb_pkts -= num_rx_slave;
+   if (rte_spinlock_trylock(_rx_q->lock)) {
+   for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
+   /* Offset of pointer to *bufs increases as packets
+* are received from other slaves
+*/
+   num_rx_slave = rte_eth_rx_burst(
+   internals->active_slaves[i],
+   bd_rx_q->queue_id,
+   bufs + num_rx_total,
+   nb_pkts);
+   if (num_rx_slave) {
+   num_rx_total += num_rx_slave;
+   nb_pkts -= num_rx_slave;
+   }
}
+   rte_spinlock_unlock(_rx_q->lock);
}

return num_rx_total;
@@ -112,14 +118,19 @@ bond_ethdev_rx_burst_active_backup(void *queue, struct 
rte_mbuf **bufs,
uint16_t nb_pkts)
 {
struct bond_dev_private *internals;
+   uint16_t ret = 0;

/* Cast to structure, containing bonded device's port id and queue id */
struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;

internals = bd_rx_q->dev_private;

-   return rte_eth_rx_burst(internals->current_primary_port,
-   bd_rx_q->queue_id, bufs, nb_pkts);
+   if (rte_spinlock_trylock(_rx_q->lock)) {
+   ret = rte_eth_rx_burst(internals->current_primary_port,
+   bd_rx_q->queue_id, bufs, nb_pkts);
+   rte_spinlock_unlock(_rx_q->lock);
+   }
+   return ret;
 }

 static uint16_t
@@ -143,8 +154,10 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf 
**bufs,
uint8_t i, j, k;

rte_eth_macaddr_get(internals->port_id, _mac);
-   /* Copy slave list to protect against slave up/down changes during tx
-* bursting */
+
+   if (rte_spinlock_trylock(_rx_q->lock) == 0)
+   return num_rx_total;
+
slave_count = internals->active_slave_count;
memcpy(slaves, internals->active_slaves,
sizeof(internals->active_slaves[0]) * slave_count);
@@ -190,7 +203,7 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf 
**bufs,
j++;
}
}
-
+   rte_spinlock_unlock(_rx_q->lock);
return num_rx_total;
 }

@@ -406,14 +419,19 @@ bond_ethdev_tx_burst_round_robin(void *queue, struct 
rte_mbuf **bufs,
bd_tx_q = (struct bond_tx_queue *)queue;
internals = bd_tx_q->dev_private;

+   if (rte_spinlock_trylock(_tx_q->lock) == 0)
+   return num_tx_total;
+
/* Copy slave list to protect against slave up/down changes during tx
 * bursting */
num_of_slaves = internals->active_slave_count;
memcpy(slaves, internals->active_slaves,
sizeof(internals->active_slaves[0]) * num_of_slaves);

-   if (num_of_slaves < 1)
+   if (num_of_slaves < 1) {
+   rte_spinlock_unlock(_tx_q->lock);
return num_tx_total;
+   }

/* Populate slaves mbuf with which packets are to be sent on it  */
for (i = 0; i < nb_pkts; i++) {
@@ -444,7 +462,7 @@ bond_ethdev_tx_burst_round_robin(void *queue, struct 
rte_mbuf **bufs,
num_tx_total += num_tx_slave;
}
}
-
+   rte_spinlock_unlock(_tx_q->lock);
return 

[dpdk-dev] [PATCH v3 2/4] bonding: grab queue spinlocks in slave add and remove

2016-06-12 Thread Bernard Iremonger
When adding or removing a slave device from the bonding device
the rx and tx queue spinlocks should be held.

Signed-off-by: Bernard Iremonger 
Acked-by: Konstantin Ananyev 
---
 drivers/net/bonding/rte_eth_bond_api.c | 52 --
 1 file changed, 49 insertions(+), 3 deletions(-)

diff --git a/drivers/net/bonding/rte_eth_bond_api.c 
b/drivers/net/bonding/rte_eth_bond_api.c
index 53df9fe..006c901 100644
--- a/drivers/net/bonding/rte_eth_bond_api.c
+++ b/drivers/net/bonding/rte_eth_bond_api.c
@@ -437,8 +437,10 @@ rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t 
slave_port_id)
 {
struct rte_eth_dev *bonded_eth_dev;
struct bond_dev_private *internals;
-
+   struct bond_tx_queue *bd_tx_q;
+   struct bond_rx_queue *bd_rx_q;
int retval;
+   uint16_t i;

/* Verify that port id's are valid bonded and slave ports */
if (valid_bonded_port_id(bonded_port_id) != 0)
@@ -448,11 +450,30 @@ rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t 
slave_port_id)
internals = bonded_eth_dev->data->dev_private;

rte_spinlock_lock(>lock);
+   if (bonded_eth_dev->data->dev_started) {
+   for (i = 0; i < bonded_eth_dev->data->nb_rx_queues; i++) {
+   bd_rx_q = bonded_eth_dev->data->rx_queues[i];
+   rte_spinlock_lock(_rx_q->lock);
+   }
+   for (i = 0; i < bonded_eth_dev->data->nb_rx_queues; i++) {
+   bd_tx_q = bonded_eth_dev->data->tx_queues[i];
+   rte_spinlock_lock(_tx_q->lock);
+   }
+   }

retval = __eth_bond_slave_add_lock_free(bonded_port_id, slave_port_id);

+   if (bonded_eth_dev->data->dev_started) {
+   for (i = 0; i < bonded_eth_dev->data->nb_rx_queues; i++) {
+   bd_rx_q = bonded_eth_dev->data->rx_queues[i];
+   rte_spinlock_unlock(_rx_q->lock);
+   }
+   for (i = 0; i < bonded_eth_dev->data->nb_rx_queues; i++) {
+   bd_tx_q = bonded_eth_dev->data->tx_queues[i];
+   rte_spinlock_unlock(_tx_q->lock);
+   }
+   }
rte_spinlock_unlock(>lock);
-
return retval;
 }

@@ -541,7 +562,10 @@ rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t 
slave_port_id)
 {
struct rte_eth_dev *bonded_eth_dev;
struct bond_dev_private *internals;
+   struct bond_tx_queue *bd_tx_q;
+   struct bond_rx_queue *bd_rx_q;
int retval;
+   uint16_t i;

if (valid_bonded_port_id(bonded_port_id) != 0)
return -1;
@@ -550,11 +574,33 @@ rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t 
slave_port_id)
internals = bonded_eth_dev->data->dev_private;

rte_spinlock_lock(>lock);
+   if (bonded_eth_dev->data->dev_started) {
+   for (i = 0; i < bonded_eth_dev->data->nb_rx_queues; i++) {
+   bd_rx_q = bonded_eth_dev->data->rx_queues[i];
+   rte_spinlock_lock(_rx_q->lock);
+   }
+
+   for (i = 0; i < bonded_eth_dev->data->nb_tx_queues; i++) {
+   bd_tx_q = bonded_eth_dev->data->tx_queues[i];
+   rte_spinlock_lock(_tx_q->lock);
+   }
+   }

retval = __eth_bond_slave_remove_lock_free(bonded_port_id, 
slave_port_id);

-   rte_spinlock_unlock(>lock);
+   if (bonded_eth_dev->data->dev_started) {
+   for (i = 0; i < bonded_eth_dev->data->nb_tx_queues; i++) {
+   bd_tx_q = bonded_eth_dev->data->tx_queues[i];
+   rte_spinlock_unlock(_tx_q->lock);
+   }

+   for (i = 0; i < bonded_eth_dev->data->nb_rx_queues; i++) {
+   bd_rx_q = bonded_eth_dev->data->rx_queues[i];
+   rte_spinlock_unlock(_rx_q->lock);
+   }
+   rte_spinlock_unlock(>lock);
+   }
+   rte_spinlock_unlock(>lock);
return retval;
 }

-- 
2.6.3



[dpdk-dev] [PATCH v3 1/4] bonding: add spinlock to rx and tx queues

2016-06-12 Thread Bernard Iremonger
At present it is possible to add and remove slave devices from the
bonding device while traffic is running. This can result in
segmentation faults occurring in the rx and tx burst functions.
To resolve this issue spinlocks have been added to the rx and tx
queues.

Now when a slave is added or removed the rx and tx queue spinlocks
must be held.

Fixes: 2efb58cbab6e ("bond: new link bonding library")

Signed-off-by: Bernard Iremonger 
Acked-by: Konstantin Ananyev 
---
 drivers/net/bonding/rte_eth_bond_pmd.c | 4 
 drivers/net/bonding/rte_eth_bond_private.h | 4 +++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c 
b/drivers/net/bonding/rte_eth_bond_pmd.c
index 129f04b..2e624bb 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -1676,6 +1676,8 @@ bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, 
uint16_t rx_queue_id,
if (bd_rx_q == NULL)
return -1;

+   rte_spinlock_init(_rx_q->lock);
+
bd_rx_q->queue_id = rx_queue_id;
bd_rx_q->dev_private = dev->data->dev_private;

@@ -1701,6 +1703,8 @@ bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, 
uint16_t tx_queue_id,
if (bd_tx_q == NULL)
return -1;

+   rte_spinlock_init(_tx_q->lock);
+
bd_tx_q->queue_id = tx_queue_id;
bd_tx_q->dev_private = dev->data->dev_private;

diff --git a/drivers/net/bonding/rte_eth_bond_private.h 
b/drivers/net/bonding/rte_eth_bond_private.h
index 8312397..b6abcba 100644
--- a/drivers/net/bonding/rte_eth_bond_private.h
+++ b/drivers/net/bonding/rte_eth_bond_private.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -76,6 +76,7 @@ struct bond_rx_queue {
/**< Copy of RX configuration structure for queue */
struct rte_mempool *mb_pool;
/**< Reference to mbuf pool to use for RX queue */
+   rte_spinlock_t lock;
 };

 struct bond_tx_queue {
@@ -87,6 +88,7 @@ struct bond_tx_queue {
/**< Number of TX descriptors available for the queue */
struct rte_eth_txconf tx_conf;
/**< Copy of TX configuration structure for queue */
+   rte_spinlock_t lock;
 };

 /** Bonded slave devices structure */
-- 
2.6.3



[dpdk-dev] [PATCH v3 0/4] bonding: locks

2016-06-12 Thread Bernard Iremonger
Add spinlock to bonding rx and tx queues.
Take spinlock in rx and tx burst functions.
Take all spinlocks in slave add and remove functions.
With spinlocks in place remove memcpy of slaves.

Changes in v3:
Rebase to latest master.
Drop patches 4 and 5 from v2 patchset.
Update commit messages on patches.

Changes in v2:
Replace patch 1.
Add patch 2 and reorder patches.
Add spinlock to bonding rx and tx queues.
Take all spinlocks in slave add and remove functions.
Replace readlocks with spinlocks.

Bernard Iremonger (4):
  bonding: add spinlock to rx and tx queues
  bonding: grab queue spinlocks in slave add and remove
  bonding: take queue spinlock in rx/tx burst functions
  bonding: remove memcpy from burst functions

 drivers/net/bonding/rte_eth_bond_api.c |  52 +++-
 drivers/net/bonding/rte_eth_bond_pmd.c | 189 ++---
 drivers/net/bonding/rte_eth_bond_private.h |   4 +-
 3 files changed, 167 insertions(+), 78 deletions(-)

-- 
2.6.3



[dpdk-dev] [PATCH v3 1/2] ethdev: add callback to get register size in bytes

2016-06-12 Thread Zyta Szpak
Hi,
please see inline

2016-06-08 10:53 GMT+02:00 Thomas Monjalon :

> Hi Zyta,
>
> 2016-06-01 09:56, zr at semihalf.com:
> > rte_eth_dev_get_reg_length and rte_eth_dev_get_reg callbacks
> > do not provide register size to the app in any way. It is
> > needed to allocate proper number of bytes before retrieving
> > registers content with rte_eth_dev_get_reg.
>
> Yes, register size is needed.
> And I think it makes sense to register it in the struct rte_dev_reg_info.
> We already have a length field, so we could just add a width field.
>
That was my first thought to add reg_size to reg_info struct but
get_reg_length doesn' take reg_info as parameter so it would require
modification of this callback as well. This would interfere with the
author's vision. I think that adding a new one is clear and readable.

>
> > @@ -1455,6 +1458,8 @@ struct eth_dev_ops {
> >
> >   eth_get_reg_length_t get_reg_length;
> >   /**< Get # of registers */
> > + eth_get_reg_width_t get_reg_width;
> > + /**< Get # of bytes in register */
> >   eth_get_reg_t get_reg;
> >   /**< Get registers */
>
> I am not sure it is a good practice to add a new function for each
> parameter of a request.
> I would prefer having only one function rte_eth_dev_get_regs()
> which returns length and width if data is NULL.
> The first call is a parameter request before buffer allocation,
> and the second call fills the buffer.
>
> We can deprecate the old API and introduce this new one.
>
> Opinions?
>

In my opinion as it is now it works fine. Gathering all parameters in one
callback might be a good idea if the maintainer also agrees to that because
as I mentioned, it interferes.
Any other opinions?\

Best regards,
Zyta Szpak


[dpdk-dev] [PATCH v3 8/9] app/testpmd: check for valid mbuf pool

2016-06-12 Thread Bernard Iremonger
Fixes: b6ea6408fbc7 ("ethdev: store numa_node per device")
Signed-off-by: Bernard Iremonger 
---
 app/test-pmd/testpmd.c | 20 +---
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 991457d..1e13c36 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -1358,7 +1358,7 @@ start_port(portid_t pid)
if (mp == NULL) {
printf("Failed to setup RX 
queue:"
"No mempool allocation"
-   "on the socket %d\n",
+   " on the socket %d\n",
rxring_numa[pi]);
return -1;
}
@@ -1366,17 +1366,23 @@ start_port(portid_t pid)
diag = rte_eth_rx_queue_setup(pi, qi,
 nb_rxd,rxring_numa[pi],
 &(port->rx_conf),mp);
-   }
-   else
+   } else {
+   struct rte_mempool *mp =
+   mbuf_pool_find(port->socket_id);
+   if (mp == NULL) {
+   printf("Failed to setup RX 
queue:"
+   "No mempool allocation"
+   " on the socket %d\n",
+   port->socket_id);
+   return -1;
+   }
diag = rte_eth_rx_queue_setup(pi, qi,
 nb_rxd,port->socket_id,
-&(port->rx_conf),
-mbuf_pool_find(port->socket_id));
-
+&(port->rx_conf), mp);
+   }
if (diag == 0)
continue;

-
/* Fail to setup rx queue, return */
if (rte_atomic16_cmpset(&(port->port_status),
RTE_PORT_HANDLING,
-- 
2.6.3



[dpdk-dev] [PATCH v3 7/9] app/testpmd: check for valid socket id when attaching port

2016-06-12 Thread Bernard Iremonger
Fixes: edab33b1c01d ("app/testpmd: support port hotplug")
Signed-off-by: Bernard Iremonger 
---
 app/test-pmd/testpmd.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index c7ab8a0..991457d 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -1525,6 +1525,7 @@ void
 attach_port(char *identifier)
 {
portid_t pi = 0;
+   unsigned int socket_id;

printf("Attaching a new port...\n");

@@ -1537,7 +1538,11 @@ attach_port(char *identifier)
return;

ports[pi].enabled = 1;
-   reconfig(pi, rte_eth_dev_socket_id(pi));
+   socket_id = (unsigned)rte_eth_dev_socket_id(pi);
+   /* if socket_id is invalid, set to 0 */
+   if (check_socket_id(socket_id) < 0)
+   socket_id = 0;
+   reconfig(pi, socket_id);
rte_eth_promiscuous_enable(pi);

nb_ports = rte_eth_dev_count();
-- 
2.6.3



[dpdk-dev] [PATCH v3 6/9] app/testpmd: move call to init_fwd_streams

2016-06-12 Thread Bernard Iremonger
Move call to init_fwd_streams from start_port function
to start_packet_forwarding function.

Signed-off-by: Bernard Iremonger 
---
 app/test-pmd/testpmd.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 6f68a18..c7ab8a0 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -992,6 +992,12 @@ start_packet_forwarding(int with_tx_first)
printf("Packet forwarding already started\n");
return;
}
+
+   if (init_fwd_streams() < 0) {
+   printf("Fail from init_fwd_streams()\n");
+   return;
+   }
+
if(dcb_test) {
for (i = 0; i < nb_fwd_ports; i++) {
pt_id = fwd_ports_ids[i];
@@ -1282,11 +1288,6 @@ start_port(portid_t pid)
if (port_id_is_invalid(pid, ENABLED_WARN))
return 0;

-   if (init_fwd_streams() < 0) {
-   printf("Fail from init_fwd_streams()\n");
-   return -1;
-   }
-
if(dcb_config)
dcb_test = 1;
FOREACH_PORT(pi, ports) {
-- 
2.6.3



[dpdk-dev] [PATCH v3 5/9] app/testpmd: add function port_is_bonding_slave

2016-06-12 Thread Bernard Iremonger
Use this function in stop_port and close_port functions.

Signed-off-by: Bernard Iremonger 
---
 app/test-pmd/testpmd.c | 18 ++
 app/test-pmd/testpmd.h |  2 ++
 2 files changed, 20 insertions(+)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index f22d1b6..6f68a18 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -1449,6 +1449,11 @@ stop_port(portid_t pid)
continue;
}

+   if (port_is_bonding_slave(pi)) {
+   printf("Please remove port %d from bonded device.\n", 
pi);
+   continue;
+   }
+
port = [pi];
if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STARTED,
RTE_PORT_HANDLING) == 0)
@@ -1487,6 +1492,11 @@ close_port(portid_t pid)
continue;
}

+   if (port_is_bonding_slave(pi)) {
+   printf("Please remove port %d from bonded device.\n", 
pi);
+   continue;
+   }
+
port = [pi];
if (rte_atomic16_cmpset(&(port->port_status),
RTE_PORT_CLOSED, RTE_PORT_CLOSED) == 1) {
@@ -1824,6 +1834,14 @@ void clear_port_slave_flag(portid_t slave_pid)
port->slave_flag = 0;
 }

+uint8_t port_is_bonding_slave(portid_t slave_pid)
+{
+   struct rte_port *port;
+
+   port = [slave_pid];
+   return port->slave_flag;
+}
+
 const uint16_t vlan_tags[] = {
0,  1,  2,  3,  4,  5,  6,  7,
8,  9, 10, 11,  12, 13, 14, 15,
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index aa4bdac..50f81d7 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -532,6 +532,8 @@ void dev_set_link_down(portid_t pid);
 void init_port_config(void);
 void set_port_slave_flag(portid_t slave_pid);
 void clear_port_slave_flag(portid_t slave_pid);
+uint8_t port_is_bonding_slave(portid_t slave_pid);
+
 int init_port_dcb_config(portid_t pid, enum dcb_mode_enable dcb_mode,
 enum rte_eth_nb_tcs num_tcs,
 uint8_t pfc_en);
-- 
2.6.3



[dpdk-dev] [PATCH v3 4/9] app/testpmd: remove fwd_config_setup from fwd_config_display

2016-06-12 Thread Bernard Iremonger
Remove fwd_config_setup from fwd_config_display and check that
forwarding has been setup before displaying forwarding configuration.
Add call to fwd_config_setup for corelist, coremask, nbcore setup.
Add call to fwd_config_setup for portlist, portmask, nbport setup.

Signed-off-by: Bernard Iremonger 
---
 app/test-pmd/cmdline.c | 23 ---
 app/test-pmd/config.c  |  6 --
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index fd389ac..2c2a0c7 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -2520,16 +2520,20 @@ static void cmd_set_list_parsed(void *parsed_result,
nb_item = parse_item_list(res->list_of_items, "core",
  RTE_MAX_LCORE,
  parsed_items.lcorelist, 1);
-   if (nb_item > 0)
+   if (nb_item > 0) {
set_fwd_lcores_list(parsed_items.lcorelist, nb_item);
+   fwd_config_setup();
+   }
return;
}
if (!strcmp(res->list_name, "portlist")) {
nb_item = parse_item_list(res->list_of_items, "port",
  RTE_MAX_ETHPORTS,
  parsed_items.portlist, 1);
-   if (nb_item > 0)
+   if (nb_item > 0) {
set_fwd_ports_list(parsed_items.portlist, nb_item);
+   fwd_config_setup();
+   }
}
 }

@@ -2573,10 +2577,13 @@ static void cmd_set_mask_parsed(void *parsed_result,
printf("Please stop forwarding first\n");
return;
}
-   if (!strcmp(res->mask, "coremask"))
+   if (!strcmp(res->mask, "coremask")) {
set_fwd_lcores_mask(res->hexavalue);
-   else if (!strcmp(res->mask, "portmask"))
+   fwd_config_setup();
+   } else if (!strcmp(res->mask, "portmask")) {
set_fwd_ports_mask(res->hexavalue);
+   fwd_config_setup();
+   }
 }

 cmdline_parse_token_string_t cmd_setmask_set =
@@ -2613,11 +2620,13 @@ static void cmd_set_parsed(void *parsed_result,
   __attribute__((unused)) void *data)
 {
struct cmd_set_result *res = parsed_result;
-   if (!strcmp(res->what, "nbport"))
+   if (!strcmp(res->what, "nbport")) {
set_fwd_ports_number(res->value);
-   else if (!strcmp(res->what, "nbcore"))
+   fwd_config_setup();
+   } else if (!strcmp(res->what, "nbcore")) {
set_fwd_lcores_number(res->value);
-   else if (!strcmp(res->what, "burst"))
+   fwd_config_setup();
+   } else if (!strcmp(res->what, "burst"))
set_nb_pkt_per_burst(res->value);
else if (!strcmp(res->what, "verbose"))
set_verbose_level(res->value);
diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index f434999..8ef9c85 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -1424,8 +1424,10 @@ pkt_fwd_config_display(struct fwd_config *cfg)
 void
 fwd_config_display(void)
 {
-   fwd_config_setup();
-   pkt_fwd_config_display(_fwd_config);
+   if (cur_fwd_config.nb_fwd_ports)
+   pkt_fwd_config_display(_fwd_config);
+   else
+   printf("Please set portlist first\n");
 }

 int
-- 
2.6.3



[dpdk-dev] [PATCH v3 3/9] app/testpmd: check port is not forwarding in stop_port and close_port

2016-06-12 Thread Bernard Iremonger
Add calls to port_is_forwarding function.

Signed-off-by: Bernard Iremonger 
---
 app/test-pmd/testpmd.c | 24 ++--
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 7a1e470..f22d1b6 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -1279,11 +1279,6 @@ start_port(portid_t pid)
struct rte_port *port;
struct ether_addr mac_addr;

-   if (test_done == 0) {
-   printf("Please stop forwarding first\n");
-   return -1;
-   }
-
if (port_id_is_invalid(pid, ENABLED_WARN))
return 0;

@@ -1435,10 +1430,6 @@ stop_port(portid_t pid)
struct rte_port *port;
int need_check_link_status = 0;

-   if (test_done == 0) {
-   printf("Please stop forwarding first\n");
-   return;
-   }
if (dcb_test) {
dcb_test = 0;
dcb_config = 0;
@@ -1453,6 +1444,11 @@ stop_port(portid_t pid)
if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
continue;

+   if (port_is_forwarding(pi) != 0 && test_done == 0) {
+   printf("Please remove port %d from forwarding 
configuration.\n", pi);
+   continue;
+   }
+
port = [pi];
if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STARTED,
RTE_PORT_HANDLING) == 0)
@@ -1477,11 +1473,6 @@ close_port(portid_t pid)
portid_t pi;
struct rte_port *port;

-   if (test_done == 0) {
-   printf("Please stop forwarding first\n");
-   return;
-   }
-
if (port_id_is_invalid(pid, ENABLED_WARN))
return;

@@ -1491,6 +1482,11 @@ close_port(portid_t pid)
if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
continue;

+   if (port_is_forwarding(pi) != 0 && test_done == 0) {
+   printf("Please remove port %d from forwarding 
configuration.\n", pi);
+   continue;
+   }
+
port = [pi];
if (rte_atomic16_cmpset(&(port->port_status),
RTE_PORT_CLOSED, RTE_PORT_CLOSED) == 1) {
-- 
2.6.3



[dpdk-dev] [PATCH v3 2/9] app/testpmd: don't update fwding config when attaching/detaching a port

2016-06-12 Thread Bernard Iremonger
Remove checks on test_done variable.
Remove code to update forwarding configuration.

Fixes: edab33b1c01d ("app/testpmd: support port hotplug")

Signed-off-by: Bernard Iremonger 
---
 app/test-pmd/testpmd.c | 28 +---
 1 file changed, 1 insertion(+), 27 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index dd6b046..7a1e470 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -1517,7 +1517,7 @@ close_port(portid_t pid)
 void
 attach_port(char *identifier)
 {
-   portid_t i, j, pi = 0;
+   portid_t pi = 0;

printf("Attaching a new port...\n");

@@ -1526,11 +1526,6 @@ attach_port(char *identifier)
return;
}

-   if (test_done == 0) {
-   printf("Please stop forwarding first\n");
-   return;
-   }
-
if (rte_eth_dev_attach(identifier, ))
return;

@@ -1540,16 +1535,6 @@ attach_port(char *identifier)

nb_ports = rte_eth_dev_count();

-   /* set_default_fwd_ports_config(); */
-   memset(fwd_ports_ids, 0, sizeof(fwd_ports_ids));
-   i = 0;
-   FOREACH_PORT(j, ports) {
-   fwd_ports_ids[i] = j;
-   i++;
-   }
-   nb_cfg_ports = nb_ports;
-   nb_fwd_ports++;
-
ports[pi].port_status = RTE_PORT_STOPPED;

printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
@@ -1559,7 +1544,6 @@ attach_port(char *identifier)
 void
 detach_port(uint8_t port_id)
 {
-   portid_t i, pi = 0;
char name[RTE_ETH_NAME_MAX_LEN];

printf("Detaching a port...\n");
@@ -1575,16 +1559,6 @@ detach_port(uint8_t port_id)
ports[port_id].enabled = 0;
nb_ports = rte_eth_dev_count();

-   /* set_default_fwd_ports_config(); */
-   memset(fwd_ports_ids, 0, sizeof(fwd_ports_ids));
-   i = 0;
-   FOREACH_PORT(pi, ports) {
-   fwd_ports_ids[i] = pi;
-   i++;
-   }
-   nb_cfg_ports = nb_ports;
-   nb_fwd_ports--;
-
printf("Port '%s' is detached. Now total ports is %d\n",
name, nb_ports);
printf("Done\n");
-- 
2.6.3



[dpdk-dev] [PATCH v3 1/9] app/testpmd: add function port_is_forwarding

2016-06-12 Thread Bernard Iremonger
Add function port_is_forwarding to check whether
a port is forwarding or not.

Signed-off-by: Bernard Iremonger 
---
 app/test-pmd/config.c  | 18 +-
 app/test-pmd/testpmd.h |  3 ++-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index 1c552e4..f434999 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -1565,6 +1565,22 @@ set_fwd_ports_number(uint16_t nb_pt)
   (unsigned int) nb_fwd_ports);
 }

+int
+port_is_forwarding(portid_t port_id)
+{
+   unsigned int i;
+
+   if (port_id_is_invalid(port_id, ENABLED_WARN))
+   return -1;
+
+   for (i = 0; i < nb_fwd_ports; i++) {
+   if (fwd_ports_ids[i] == port_id)
+   return 1;
+   }
+
+   return 0;
+}
+
 void
 set_nb_pkt_per_burst(uint16_t nb)
 {
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 0f72ca1..aa4bdac 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -500,6 +500,7 @@ void set_fwd_lcores_number(uint16_t nb_lc);
 void set_fwd_ports_list(unsigned int *portlist, unsigned int nb_pt);
 void set_fwd_ports_mask(uint64_t portmask);
 void set_fwd_ports_number(uint16_t nb_pt);
+int port_is_forwarding(portid_t port_id);

 void rx_vlan_strip_set(portid_t port_id, int on);
 void rx_vlan_strip_set_on_queue(portid_t port_id, uint16_t queue_id, int on);
-- 
2.6.3



[dpdk-dev] [PATCH v3 0/9] app/testpmd: forwarding

2016-06-12 Thread Bernard Iremonger
Modify testpmd to allow stop, close, detach and attach
of a port without stopping forwarding.

Changes in v3:
rebase to latest master.
added patch 9
  app/testpmd: stop forwarding on exit
reworked and renamed patch 4
  app/testpmd: remove fwd_config_setup from fwd_config_display

Changes in v2:
Added 4 more patches
  app/testpmd: add function port_is_bonding_slave
  app/testpmd: move call to init_fwd_streams
  app/testpmd: check for valid socket id when attaching port
  app/testpmd: check for valid mbuf pool

Bernard Iremonger (9):
  app/testpmd: add function port_is_forwarding
  app/testpmd: don't update fwding config when attaching/detaching a
port
  app/testpmd: check port is not forwarding in stop_port and close_port
  app/testpmd: remove fwd_config_setup from fwd_config_display
  app/testpmd: add function port_is_bonding_slave
  app/testpmd: move call to init_fwd_streams
  app/testpmd: check for valid socket id when attaching port
  app/testpmd: check for valid mbuf pool
  app/testpmd: stop forwarding on exit

 app/test-pmd/cmdline.c |  23 ++
 app/test-pmd/config.c  |  24 +--
 app/test-pmd/testpmd.c | 112 -
 app/test-pmd/testpmd.h |   5 ++-
 4 files changed, 95 insertions(+), 69 deletions(-)

-- 
2.6.3



[dpdk-dev] Can't build DPDK-16.04 on CentOS 6.8

2016-06-12 Thread Rosen, Rami
Hi Thiago,

> Any clue?

Sure. This is the reason: in dpdk-16.04/lib/librte_eal/linuxapp/igb_uio we have:

#ifndef PCI_MSIX_ENTRY_SIZE
#define PCI_MSIX_ENTRY_SIZE 16
#define  PCI_MSIX_ENTRY_LOWER_ADDR  0
#define  PCI_MSIX_ENTRY_UPPER_ADDR  4
#define  PCI_MSIX_ENTRY_DATA8
#define  PCI_MSIX_ENTRY_VECTOR_CTRL 12
#define   PCI_MSIX_ENTRY_CTRL_MASKBIT   1
#endif
...

The root cause of the problem is due to that in CentOS 6.8, PCI_MSIX_ENTRY_SIZE 
is defined; this  
causes *avoiding* having the PCI_MSIX_ENTRY_CTRL_MASKBIT definition as shown 
above.

The definition of PCI_MSIX_ENTRY_SIZE is due to the following patch, which was 
introduced in CentOS 6.8, which moved PCI_MSIX_ENTRY_SIZE from the inner 
drivers/pci/msi.h header file into include/linux/pci_regs.h,
The linux/pci_regs.h is included in the pci.h header, which is used by the 
igb_uio.c, which failed in compilation.

"PATCH 1/8] PCI: MSI: Move MSI-X entry definition to pci_regs.h"

--- a/drivers/pci/msi.h
+++ b/drivers/pci/msi.h
@@ -6,12 +6,6 @@
#ifndef MSI_H
#define MSI_H

-#define PCI_MSIX_ENTRY_SIZE16
...
...
...
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 455b9cc..acfc224 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -307,6 +307,13 @@
#define PCI_MSIX_FLAGS_MASKALL  (1 << 14)
#define PCI_MSIX_FLAGS_BIRMASK  (7 << 0)

+/* MSI-X entry's format */
+#define PCI_MSIX_ENTRY_SIZE16


http://linux-pci.vger.kernel.narkive.com/cZquopIw/patch-1-8-pci-msi-move-msi-x-entry-definition-to-pci-regs-h

See also:
rpm -qp --changelog kernel-devel-2.6.32-642.el6.x86_64.rpm | grep "Move MSI-X 
entry"
warning: kernel-devel-2.6.32-642.el6.x86_64.rpm: Header V3 RSA/SHA1 Signature, 
key ID c105b9de: NOKEY
- [pci] msi: Move MSI-X entry definition to pci_regs.h (Myron Stowe) [1288629


A proper patch which checks kernel version can fix it.

Regards,
Rami Rosen
Intel Corporation



[dpdk-dev] [PATCH] virtio: fix allocating virtnet_rx not mem aligned

2016-06-12 Thread Jianfeng Tan
Compile DPDK with clang, below line in virtio_rxtx.c could be
optimized with four "VMOVAPS ymm, m256".
  memset(>fake_mbuf, 0, sizeof(rxvq->fake_mbuf));

This instruction requires memory address is 32-byte aligned.
Or, it leads to segfault. Although only tested with Clang 3.6.0,
it can be reproduced in any compilers, which do aggressive
optimization, aka, change memset of known length to VMOVAPS.

The fact that struct rte_mbuf is cache line aligned, can only make
sure fake_mbuf is aligned compared to the start address of struct
virtnet_rx. Unfortunately, this address is not necessarily aligned
because it's allocated by:
  rxvq = (struct virtnet_rx *)RTE_PTR_ADD(vq, sz_vq);

When sz_vq is not aligned, then rxvq cannot be allocated with an
aligned address, and then rxvq->fake_mbuf (addr of rxvq + cache line
size) is not an aligned address.

The fix is very simple that making sz_vq 32-byte aligned. Here we
make it cache line aligned for future optimization.

Fixes: a900472aedef ("virtio: split virtio Rx/Tx queue")

Signed-off-by: Jianfeng Tan 
---
 drivers/net/virtio/virtio_ethdev.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index a995520..ad0f5a6 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -337,7 +337,10 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,

snprintf(vq_name, sizeof(vq_name), "port%d_%s%d",
 dev->data->port_id, queue_names[queue_type], queue_idx);
-   sz_vq = sizeof(*vq) + vq_size * sizeof(struct vq_desc_extra);
+
+   sz_vq = RTE_ALIGN_CEIL(sizeof(*vq) +
+   vq_size * sizeof(struct vq_desc_extra),
+   RTE_CACHE_LINE_SIZE);
if (queue_type == VTNET_RQ) {
sz_q = sz_vq + sizeof(*rxvq);
} else if (queue_type == VTNET_TQ) {
-- 
2.1.4



[dpdk-dev] [PATCH v2] ip_pipeline: fix false cacheline sharing among threads

2016-06-12 Thread Jasvinder Singh
In ip_pipeline app, the structure app_thread_data needs to be aligned to
the cache line boundary as threads on different cpu cores are accessing
fields of the app->thread_data and having this structure not aligned on
cacheline boundary leads to false cacheline sharing.

Fixes: 7f64b9c004aa ("examples/ip_pipeline: rework config file syntax")

Signed-off-by: Jasvinder Singh 
Acked-by: Cristian Dumitrescu 
---
v2
- fix checkpatch error

 examples/ip_pipeline/app.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/ip_pipeline/app.h b/examples/ip_pipeline/app.h
index 848244a..7611341 100644
--- a/examples/ip_pipeline/app.h
+++ b/examples/ip_pipeline/app.h
@@ -300,7 +300,7 @@ struct app_thread_data {
uint64_t headroom_time;
uint64_t headroom_cycles;
double headroom_ratio;
-};
+} __rte_cache_aligned;

 #ifndef APP_MAX_LINKS
 #define APP_MAX_LINKS16
-- 
2.5.5



[dpdk-dev] [PATCH] virito: fix reuse index in nested loop

2016-06-12 Thread Jianfeng Tan
This patches fixes problem of reusing index of outmost loop in nested
loops. This bug will lead to failure when starting a multi queue
virtio device: rx queues (except from the first one) cannot be started,
expecially their vq_ring cannot be initialized, so that when invoking
rx func on these queues, segment fault happens.

Fixes: a900472aedef ("virtio: split virtio Rx/Tx queue")

Signed-off-by: Jianfeng Tan 
---
 drivers/net/virtio/virtio_rxtx.c | 36 
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 2e7205b..b96d0cb 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -331,7 +331,7 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev)
 * -Allocate blank mbufs for the each rx descriptor
 *
 */
-   int i;
+   int i, j;

PMD_INIT_FUNC_TRACE();

@@ -352,15 +352,18 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev)
error = ENOSPC;

 #ifdef RTE_MACHINE_CPUFLAG_SSSE3
-   if (use_simple_rxtx)
-   for (i = 0; i < vq->vq_nentries; i++) {
-   vq->vq_ring.avail->ring[i] = i;
-   vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
+   if (use_simple_rxtx) {
+   uint16_t k;
+
+   for (k = 0; k < vq->vq_nentries; k++) {
+   vq->vq_ring.avail->ring[k] = k;
+   vq->vq_ring.desc[k].flags = VRING_DESC_F_WRITE;
}
+   }
 #endif
memset(>fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
-   for (i = 0; i < RTE_PMD_VIRTIO_RX_MAX_BURST; i++)
-   vq->sw_ring[vq->vq_nentries + i] = >fake_mbuf;
+   for (j = 0; j < RTE_PMD_VIRTIO_RX_MAX_BURST; j++)
+   vq->sw_ring[vq->vq_nentries + j] = >fake_mbuf;

while (!virtqueue_full(vq)) {
m = rte_mbuf_raw_alloc(rxvq->mpool);
@@ -399,20 +402,21 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev)
 #ifdef RTE_MACHINE_CPUFLAG_SSSE3
if (use_simple_rxtx) {
int mid_idx  = vq->vq_nentries >> 1;
-   for (i = 0; i < mid_idx; i++) {
-   vq->vq_ring.avail->ring[i] = i + mid_idx;
-   vq->vq_ring.desc[i + mid_idx].next = i;
-   vq->vq_ring.desc[i + mid_idx].addr =
+
+   for (j = 0; j < mid_idx; j++) {
+   vq->vq_ring.avail->ring[j] = j + mid_idx;
+   vq->vq_ring.desc[j + mid_idx].next = j;
+   vq->vq_ring.desc[j + mid_idx].addr =
txvq->virtio_net_hdr_mem +
offsetof(struct virtio_tx_region, 
tx_hdr);
-   vq->vq_ring.desc[i + mid_idx].len =
+   vq->vq_ring.desc[j + mid_idx].len =
vq->hw->vtnet_hdr_size;
-   vq->vq_ring.desc[i + mid_idx].flags =
+   vq->vq_ring.desc[j + mid_idx].flags =
VRING_DESC_F_NEXT;
-   vq->vq_ring.desc[i].flags = 0;
+   vq->vq_ring.desc[j].flags = 0;
}
-   for (i = mid_idx; i < vq->vq_nentries; i++)
-   vq->vq_ring.avail->ring[i] = i;
+   for (j = mid_idx; j < vq->vq_nentries; j++)
+   vq->vq_ring.avail->ring[j] = j;
}
 #endif
VIRTQUEUE_DUMP(vq);
-- 
2.1.4



[dpdk-dev] [PATCH 2/8] lib/librte_ether: defind RX/TX lock mode

2016-06-12 Thread Lu, Wenzhuo
Hi Stephen,

> -Original Message-
> From: Stephen Hemminger [mailto:stephen at networkplumber.org]
> Sent: Saturday, June 11, 2016 2:12 AM
> To: Lu, Wenzhuo
> Cc: dev at dpdk.org; Tao, Zhe
> Subject: Re: [dpdk-dev] [PATCH 2/8] lib/librte_ether: defind RX/TX lock mode
> 
> On Wed, 8 Jun 2016 07:34:43 +
> "Lu, Wenzhuo"  wrote:
> 
> > >
> > > The fact that it requires lots more locking inside each device
> > > driver implies to me this is not correct way to architect this.
> > It's a good question. This patch set doesn't follow the regular assumption 
> > of
> DPDK.
> > But it's a requirement we've got from some customers. The users want the
> driver does as much as it can. The best is the link state change is 
> transparent to
> the  users.
> > The patch set tries to provide another choice if the users don't want to 
> > stop
> their rx/tx to handle the reset event.
> 
> Then bring those uses to the development world (on users mailing list) and 
> lets
> start the discussion there.  The requirements creeping in through the backdoor
> also worries me.
Got it. Then how about we only provide a reset API and let the APP to 
stop/start the rx/tx and call the API to reset the port? Thanks.


[dpdk-dev] [PATCH 2/8] lib/librte_ether: defind RX/TX lock mode

2016-06-12 Thread Lu, Wenzhuo
Hi Olivier,

> -Original Message-
> From: Olivier Matz [mailto:olivier.matz at 6wind.com]
> Sent: Thursday, June 9, 2016 3:51 PM
> To: Lu, Wenzhuo; Stephen Hemminger
> Cc: dev at dpdk.org; Tao, Zhe
> Subject: Re: [dpdk-dev] [PATCH 2/8] lib/librte_ether: defind RX/TX lock mode
> 
> Hi,
> 
> On 06/08/2016 09:34 AM, Lu, Wenzhuo wrote:
> > Hi Stephen,
> >
> >
> >> -Original Message-
> >> From: Stephen Hemminger [mailto:stephen at networkplumber.org]
> >> Sent: Wednesday, June 8, 2016 10:16 AM
> >> To: Lu, Wenzhuo
> >> Cc: dev at dpdk.org; Tao, Zhe
> >> Subject: Re: [dpdk-dev] [PATCH 2/8] lib/librte_ether: defind RX/TX
> >> lock mode
> >>
> >> On Mon,  6 Jun 2016 13:40:47 +0800
> >> Wenzhuo Lu  wrote:
> >>
> >>> Define lock mode for RX/TX queue. Because when resetting the device
> >>> we want the resetting thread to get the lock of the RX/TX queue to
> >>> make sure the RX/TX is stopped.
> >>>
> >>> Using next ABI macro for this ABI change as it has too much impact.
> >>> 7 APIs and 1 global variable are impacted.
> >>>
> >>> Signed-off-by: Wenzhuo Lu 
> >>> Signed-off-by: Zhe Tao 
> >>
> >> Why does this patch set make a different assumption the rest of the DPDK?
> >>
> >> The rest of the DPDK operates on the principle that the application
> >> is smart enough to stop the device before making changes. There is no
> >> equivalent to the Linux kernel RTNL mutex. The API assumes
> >> application threads are well behaved and will not try and sabotage each
> other.
> >>
> >> If you restrict the reset operation to only being available when
> >> RX/TX is stopped, then no lock is needed.
> >>
> >> The fact that it requires lots more locking inside each device driver
> >> implies to me this is not correct way to architect this.
> 
> +1
> 
> I'm not sure adding locks is the proper way to do.
> This is the application responsibility to ensure that:
> - control functions are not called concurrently on the same port
> - rx/tx functions are not called when the device is stopped/reset/...
> 
> However, I do think the usage paradigms of the ethdev api should be better
> documented in rte_ethdev.h (ex: which functions can be called concurrently).
> This would be a first step.
> 
> If we really want a helper API to do that in DPDK, the _next_ step could be to
> add them in the ethdev api to achieve this. Maybe something like (the function
> names could be better):
> 
> - to be called on one control thread:
> 
>   rte_eth_stop_rxtx(port)
>   rte_eth_start_rxtx(port)
> 
>   rte_eth_get_rxtx_state(port)
>  -> return "running" if at least one core is inside the rx/tx code
>  -> return "stopped" if all cores are outside the rx/tx code
> 
> - to be called on dataplane cores:
> 
>   /* same than rte_eth_rx_burst(), but checks if rx/tx is allowed
>* first, else do nothing */
>   rte_eth_rx_burst_interruptible()
>   rte_eth_tx_burst_interruptible()
> 
> 
> The code of control thread could be:
> 
>   rte_eth_stop_rxtx(port);
>   /* wait that all dataplane cores finished their processing */
>   while (rte_eth_get_rxtx_state(port) != stopped)
>   ;
>   rte_eth_some_control_operation(port);
>   rte_eth_start_rxtx(port);
> 
> 
> I think this could be done without any lock, just with the proper memory 
> barriers
> and a per-core status.
> 
> But this API may impose a paradigm to the application, and I'm not sure the
> DPDK should do that.
I don't quite catch your point. Seems your solution still need the APP to 
change the code. I think it's more complex than just letting the APP to stop 
the rx/tx and reset the port. Our purpose of this patch set is to let APP do 
less as possible. It's not a good choice if we make it more complex.
And seems it's hard to stop and start rx/tx in rte layer. Normally APP should 
do that. To my opinion, we have to introduce lock in rte to achieve that.

> 
> Regards,
> Olivier


[dpdk-dev] [PATCH v4 2/8] lib/librte_ether: defind RX/TX lock mode

2016-06-12 Thread Lu, Wenzhuo
Hi Konstantin,


> -Original Message-
> From: Ananyev, Konstantin
> Sent: Wednesday, June 8, 2016 5:20 PM
> To: Lu, Wenzhuo; Tao, Zhe; dev at dpdk.org
> Cc: Richardson, Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing; Zhang, 
> Helin
> Subject: RE: [PATCH v4 2/8] lib/librte_ether: defind RX/TX lock mode
> 
> 
> 
> >
> > Hi Konstantin,
> >
> >
> > > -Original Message-
> > > From: Ananyev, Konstantin
> > > Sent: Tuesday, June 7, 2016 5:59 PM
> > > To: Tao, Zhe; dev at dpdk.org
> > > Cc: Lu, Wenzhuo; Richardson, Bruce; Chen, Jing D; Liang, Cunming;
> > > Wu, Jingjing; Zhang, Helin
> > > Subject: RE: [PATCH v4 2/8] lib/librte_ether: defind RX/TX lock mode
> > >
> > >
> > > Hi Zhe & Wenzhuo,
> > >
> > > Please find my comments below.
> > > BTW, for clarification - is that patch for 16.11?
> > > I believe it's too late to introduce such significant change in 16.07.
> > > Thanks
> > > Konstantin
> > Thanks for the comments.
> > Honestly, our purpose is 16.07. Realizing the big impact, we use
> > NEXT_ABI to comment our change. So, I think although we want to merge it in
> 16.07 this change will become effective after we remove NEXT_ABI in 16.11.
> 
> I don't think it is achievable.
> First I think your code is not in proper shape yet, right now.
> Second, as you said, it is a significant change and I would like to hear 
> opinions
> from the rest of the community.
Agree it should have risk. I mean our target is 16.07. But surely if it can be 
achieved depends on the feedback from the community.

> 
> >
> > >
> > > > Define lock mode for RX/TX queue. Because when resetting the
> > > > device we want the resetting thread to get the lock of the RX/TX
> > > > queue to make sure the RX/TX is stopped.
> > > >
> > > > Using next ABI macro for this ABI change as it has too much
> > > > impact. 7 APIs and 1 global variable are impacted.
> > > >
> > > > Signed-off-by: Wenzhuo Lu 
> > > > Signed-off-by: Zhe Tao 
> > > > ---
> > > >  lib/librte_ether/rte_ethdev.h | 62
> > > > +++
> > > >  1 file changed, 62 insertions(+)
> > > >
> > > > diff --git a/lib/librte_ether/rte_ethdev.h
> > > > b/lib/librte_ether/rte_ethdev.h index 74e895f..4efb5e9 100644
> > > > --- a/lib/librte_ether/rte_ethdev.h
> > > > +++ b/lib/librte_ether/rte_ethdev.h
> > > > @@ -354,7 +354,12 @@ struct rte_eth_rxmode {
> > > > jumbo_frame  : 1, /**< Jumbo Frame Receipt enable. 
> > > > */
> > > > hw_strip_crc : 1, /**< Enable CRC stripping by 
> > > > hardware. */
> > > > enable_scatter   : 1, /**< Enable scatter packets rx 
> > > > handler */
> > > > +#ifndef RTE_NEXT_ABI
> > > > enable_lro   : 1; /**< Enable LRO */
> > > > +#else
> > > > +   enable_lro   : 1, /**< Enable LRO */
> > > > +   lock_mode: 1; /**< Using lock path */
> > > > +#endif
> > > >  };
> > > >
> > > >  /**
> > > > @@ -634,11 +639,68 @@ struct rte_eth_txmode {
> > > > /**< If set, reject sending out tagged pkts */
> > > > hw_vlan_reject_untagged : 1,
> > > > /**< If set, reject sending out untagged pkts */
> > > > +#ifndef RTE_NEXT_ABI
> > > > hw_vlan_insert_pvid : 1;
> > > > /**< If set, enable port based VLAN insertion */
> > > > +#else
> > > > +   hw_vlan_insert_pvid : 1,
> > > > +   /**< If set, enable port based VLAN insertion */
> > > > +   lock_mode : 1;
> > > > +   /**< If set, using lock path */ #endif
> > > >  };
> > > >
> > > >  /**
> > > > + * The macros for the RX/TX lock mode functions  */ #ifdef
> > > > +RTE_NEXT_ABI #define RX_LOCK_FUNCTION(dev, func) \
> > > > +   (dev->data->dev_conf.rxmode.lock_mode ? \
> > > > +   func ## _lock : func)
> > > > +
> > > > +#define TX_LOCK_FUNCTION(dev, func) \
> > > > +   (dev->data->dev_conf.txmode.lock_mode ? \
> > > > +   func ## _lock : func)
> > > > +#else
> > > > +#define RX_LOCK_FUNCTION(dev, func) func
> > > > +
> > > > +#define TX_LOCK_FUNCTION(dev, func) func #endif
> > > > +
> > > > +/* Add the lock RX/TX function for VF reset */ #define
> > > > +GENERATE_RX_LOCK(func, nic) \ uint16_t func ## _lock(void
> > > > +*rx_queue, \
> > > > + struct rte_mbuf **rx_pkts, \
> > > > + uint16_t nb_pkts) \
> > > > +{  \
> > > > +   struct nic ## _rx_queue *rxq = rx_queue; \
> > > > +   uint16_t nb_rx = 0; \
> > > > +   \
> > > > +   if (rte_spinlock_trylock(>rx_lock)) { \
> > > > +   nb_rx = func(rx_queue, rx_pkts, nb_pkts); \
> > > > +   rte_spinlock_unlock(>rx_lock); \
> > > > +   } \
> > > > +   \
> > > > +   return nb_rx; \
> > > > +}
> > > > +
> > > > +#define GENERATE_TX_LOCK(func, nic) \ uint16_t func ## _lock(void
> > > > +*tx_queue, \
> > > > +   

[dpdk-dev] [PATCH v4 4/8] ixgbe: implement device reset on VF

2016-06-12 Thread Lu, Wenzhuo
Hi Konstantin,

> -Original Message-
> From: Ananyev, Konstantin
> Sent: Wednesday, June 8, 2016 4:42 PM
> To: Lu, Wenzhuo; Tao, Zhe; dev at dpdk.org
> Cc: Richardson, Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing; Zhang, 
> Helin
> Subject: RE: [PATCH v4 4/8] ixgbe: implement device reset on VF
> 
> 
> 
> > -Original Message-
> > From: Lu, Wenzhuo
> > Sent: Wednesday, June 08, 2016 8:24 AM
> > To: Ananyev, Konstantin; Tao, Zhe; dev at dpdk.org
> > Cc: Richardson, Bruce; Chen, Jing D; Liang, Cunming; Wu, Jingjing;
> > Zhang, Helin
> > Subject: RE: [PATCH v4 4/8] ixgbe: implement device reset on VF
> >
> > Hi Konstantin,
> >
> > > -Original Message-
> > > From: Ananyev, Konstantin
> > > Sent: Tuesday, June 7, 2016 6:03 PM
> > > To: Tao, Zhe; dev at dpdk.org
> > > Cc: Lu, Wenzhuo; Richardson, Bruce; Chen, Jing D; Liang, Cunming;
> > > Wu, Jingjing; Zhang, Helin
> > > Subject: RE: [PATCH v4 4/8] ixgbe: implement device reset on VF
> > >
> > >
> > >
> > > > -Original Message-
> > > > From: Tao, Zhe
> > > > Sent: Tuesday, June 07, 2016 7:53 AM
> > > > To: dev at dpdk.org
> > > > Cc: Lu, Wenzhuo; Tao, Zhe; Ananyev, Konstantin; Richardson, Bruce;
> > > > Chen, Jing D; Liang, Cunming; Wu, Jingjing; Zhang, Helin
> > > > Subject: [PATCH v4 4/8] ixgbe: implement device reset on VF
> > > >
> > > > Implement the device reset function.
> > > > 1, Add the fake RX/TX functions.
> > > > 2, The reset function tries to stop RX/TX by replacing
> > > >the RX/TX functions with the fake ones and getting the
> > > >locks to make sure the regular RX/TX finished.
> > > > 3, After the RX/TX stopped, reset the VF port, and then
> > > >release the locks and restore the RX/TX functions.
> > > >
> > > > Signed-off-by: Wenzhuo Lu 
> > > >
> > > >  static int
> > > > +ixgbevf_dev_reset(struct rte_eth_dev *dev) {
> > > > +   struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data-
> > > >dev_private);
> > > > +   struct ixgbe_adapter *adapter =
> > > > +   (struct ixgbe_adapter *)dev->data->dev_private;
> > > > +   int diag = 0;
> > > > +   uint32_t vteiam;
> > > > +   uint16_t i;
> > > > +   struct ixgbe_rx_queue *rxq;
> > > > +   struct ixgbe_tx_queue *txq;
> > > > +
> > > > +   /* Nothing needs to be done if the device is not started. */
> > > > +   if (!dev->data->dev_started)
> > > > +   return 0;
> > > > +
> > > > +   PMD_DRV_LOG(DEBUG, "Link up/down event detected.");
> > > > +
> > > > +   /**
> > > > +* Stop RX/TX by fake functions and locks.
> > > > +* Fake functions are used to make RX/TX lock easier.
> > > > +*/
> > > > +   adapter->rx_backup = dev->rx_pkt_burst;
> > > > +   adapter->tx_backup = dev->tx_pkt_burst;
> > > > +   dev->rx_pkt_burst = ixgbevf_recv_pkts_fake;
> > > > +   dev->tx_pkt_burst = ixgbevf_xmit_pkts_fake;
> > >
> > > If you have locking over each queue underneath, why do you still
> > > need fake functions?
> > The fake functions are used to help saving the time of waiting for the 
> > locks.
> > As you see, we want to lock every queue. If we don't use fake functions we
> have to wait for every queue.
> > But if the real functions are replaced by fake functions, ideally when
> > we're waiting for the release of the first queue's lock, the other queues 
> > will run
> into the fake functions. So we need not wait for them and get the locks 
> directly.
> 
> Well, data-path invokes only try_lock(), so it shouldn't be affected 
> significantly,
> right?
> Control path still have to spin on lock and grab it before it can proceed, if 
> it'll
> spin a bit longer I wouldn't see a big deal here.
> What I am trying to say - if we'll go that way - introduce sync 
> control/datapath
> API anyway, we don't need any additional tricks here with rx/tx function
> replacement, correct?
> So let's keep it clean and simple, after all it is a control path and not 
> need to be
> lightning fast.
> Konstantin
Agree, it's not necessary to add the fake functions. I'll remove them to make 
it simple.

> 
> >
> > >
> > > > +
> > > > +   if (dev->data->rx_queues)
> > > > +   for (i = 0; i < dev->data->nb_rx_queues; i++) {
> > > > +   rxq = dev->data->rx_queues[i];
> > > > +   rte_spinlock_lock(>rx_lock);
> > > > +   }
> > > > +
> > > > +   if (dev->data->tx_queues)
> > > > +   for (i = 0; i < dev->data->nb_tx_queues; i++) {
> > > > +   txq = dev->data->tx_queues[i];
> > > > +   rte_spinlock_lock(>tx_lock);
> > > > +   }
> > >
> > > Probably worth to create a separate function for the lines above:
> > > lock_all_queues(), unlock_all_queues.
> > > But as I sadi in previous mail - I think that code better be in 
> > > rte_ethdev.
> > We're discussing it in the previous thread :)
> >
> > > >
> > > > @@ -5235,11 +5243,21 @@ ixgbevf_dev_rxtx_start(struct 

[dpdk-dev] [PATCH] examples: add a new example for link reset

2016-06-12 Thread Lu, Wenzhuo
Hi Konstantin, Thomas,

> -Original Message-
> From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> Sent: Wednesday, June 8, 2016 5:00 PM
> To: Ananyev, Konstantin; Lu, Wenzhuo
> Cc: dev at dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] examples: add a new example for link reset
> 
> 2016-06-08 08:37, Ananyev, Konstantin:
> > > From: Ananyev, Konstantin
> > > > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Wenzhuo Lu
> > > > > Add a new example to show when the PF is down and up, VF port
> > > > > can be reset and recover.
> > > >
> > > > Do we really need a totally new example for it?
> > > > Can't we put it in one of already existing ones?
> > > > Let say we have l3fwd-vf... wouldn't that suit your needs?
> > > > Konstantin
> > > I thought about just modifying an existing example. But I choose to
> > > add a new one at last. The benefit of a totally new example is we can 
> > > make it
> simple enough and focus on the reset function.
> > > So it's easier for the users to find what we want to show. And it's
> > > also easier for us as we don't need to care about if our
> > > modification will break some function of the original example :)
> >
> > I still think that adding a new example for esch new feature/function in
> rte_ethdev API iw way too expensive.
> > If your change is not good enough and will break original example,
> > then you probably re-work your feature patch to make it stable enough.
> > After all people will use it in their existing apps, not write the new ones 
> > right?
> > BTW, why not make it work with testpmd?
> > After all it is a new PMD api, an that's for we have our testpmd here?
> 
> +1 for testpmd

I may not make myself clear. I said "function" but actually I mainly mean the 
performance impact but not the functionality. As we know l2fwd and l3fwd can be 
used to show the performance of DPDK, adding lock will break this function, 
showing the performance data. That's why I don't want to touch l2fwd and l3fwd.
Agree that testpmd can be a choice. I'll try to modify testpmd, maybe add a 
parameter, like "testpmd --lock". So by default we will not use lock mode.


[dpdk-dev] [PATCH v7 6/6] virtio-user: add a new vdev named virtio-user

2016-06-12 Thread Jianfeng Tan
Add a new virtual device named vhost-user, which can be used just like
eth_ring, eth_null, etc. To reuse the code of original virtio, we do
some adjustment in virtio_ethdev.c, such as remove key _static_ of
eth_virtio_dev_init() so that it can be reused in virtual device; and
we add some check to make sure it will not crash.

Configured parameters include:
  - queues (optional, 1 by default), number of queue pairs, multi-queue
not supported for now.
  - cq (optional, 0 by default), not supported for now.
  - mac (optional), random value will be given if not specified.
  - queue_size (optional, 256 by default), size of virtqueues.
  - path (madatory), path of vhost, depends on the file type, vhost
user if the given path points to a unix socket; vhost-net if the
given path points to a char device.
  - ifname (optional), specify the name of backend tap device; only
valid when backend is vhost-net.

When enable CONFIG_RTE_VIRTIO_USER (enabled by default), the compiled
library can be used in both VM and container environment.

Examples:
path_vhost=/dev/vhost-net # use vhost-net as a backend
path_vhost= # use vhost-user as a backend

sudo ./examples/l2fwd/build/l2fwd -c 0x10 -n 4 \
--socket-mem 0,1024 --no-pci --file-prefix=l2fwd \
--vdev=virtio-user0,mac=00:01:02:03:04:05,path=$path_vhost -- -p 0x1

Known issues:
 - Control queue and multi-queue are not supported yet.
 - Cannot work with --huge-unlink.
 - Cannot work with no-huge.
 - Cannot work when there are more than VHOST_MEMORY_MAX_NREGIONS(8)
   hugepages.
 - Root privilege is a must (mainly becase of sorting hugepages according
   to physical address).
 - Applications should not use file name like HUGEFILE_FMT ("%smap_%d").

Signed-off-by: Huawei Xie 
Signed-off-by: Jianfeng Tan 
Acked-by: Neil Horman 
---
 doc/guides/rel_notes/release_16_07.rst  |  11 ++
 doc/guides/sample_app_ug/vhost.rst  |  17 +++
 drivers/net/virtio/virtio_ethdev.c  |  19 ++-
 drivers/net/virtio/virtio_ethdev.h  |   2 +
 drivers/net/virtio/virtio_user_ethdev.c | 228 +++-
 5 files changed, 265 insertions(+), 12 deletions(-)

diff --git a/doc/guides/rel_notes/release_16_07.rst 
b/doc/guides/rel_notes/release_16_07.rst
index 30e78d4..4ecca7e 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -47,6 +47,17 @@ New Features
   * Dropped specific Xen Dom0 code.
   * Dropped specific anonymous mempool code in testpmd.

+* **Virtio support for containers.**
+
+  Add a new virtual device, named virtio-user, to support virtio for 
containers.
+
+  Known limitations:
+
+  * Control queue and multi-queue are not supported yet.
+  * Cannot work with --huge-unlink.
+  * Cannot work with --no-huge.
+  * Cannot work when there are more than VHOST_MEMORY_MAX_NREGIONS(8) 
hugepages.
+  * Root privilege is a must for sorting hugepages by physical address.

 Resolved Issues
 ---
diff --git a/doc/guides/sample_app_ug/vhost.rst 
b/doc/guides/sample_app_ug/vhost.rst
index 5f81802..a93e54d 100644
--- a/doc/guides/sample_app_ug/vhost.rst
+++ b/doc/guides/sample_app_ug/vhost.rst
@@ -833,3 +833,20 @@ For example:
 The above message indicates that device 0 has been registered with MAC address 
cc:bb:bb:bb:bb:bb and VLAN tag 1000.
 Any packets received on the NIC with these values is placed on the devices 
receive queue.
 When a virtio-net device transmits packets, the VLAN tag is added to the 
packet by the DPDK vhost sample code.
+
+Running virtio-user with vhost-switch
+-
+
+We can also use virtio-user with vhost-switch now.
+Virtio-user is a virtual device that can be run in a application (container) 
parallelly with vhost in the same OS,
+aka, there is no need to start a VM. We just run it with a different 
--file-prefix to avoid startup failure.
+
+.. code-block:: console
+
+cd ${RTE_SDK}/x86_64-native-linuxapp-gcc/app
+./testpmd -c 0x3 -n 4 --socket-mem 1024 --no-pci 
--file-prefix=virtio-user-testpmd \
+--vdev=virtio-user0,mac=00:01:02:03:04:05,path=$path_vhost \
+-- -i --txqflags=0xf01 --disable-hw-vlan
+
+There is no difference on the vhost side.
+Pleae note that there are some limitations (see release note for more 
information) in the usage of virtio-user.
diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 9ccce79..4523ceb 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -59,7 +59,6 @@
 #include "virtqueue.h"
 #include "virtio_rxtx.h"

-static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
 static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev);
 static int  virtio_dev_configure(struct rte_eth_dev *dev);
 static int  virtio_dev_start(struct rte_eth_dev *dev);
@@ -1081,7 +1080,7 @@ rx_func_get(struct rte_eth_dev *eth_dev)
  * This function is based on probe() function in virtio_pci.c
  * It returns 0 on success.
  */
-static 

[dpdk-dev] [PATCH v7 5/6] virtio-user: add new virtual pci driver for virtio

2016-06-12 Thread Jianfeng Tan
This patch implements another new instance of struct virtio_pci_ops to
drive the virtio-user virtual device. Instead of rd/wr ioport or PCI
configuration space, this virtual pci driver will rd/wr the virtual
device struct virtio_user_hw, and when necessary, invokes APIs provided
by device emulation later to start/stop the device.

  --
  | -- |
  | | virtio driver  | |> (virtio_user_ethdev.c)
  | -- |
  | |  |
  | -- | -->  virtio-user PMD
  | | device emulate | |
  | || |
  | | vhost adapter  | |
  | -- |
  --
|
|
|
   --
   | vhost backend  |
   --

Signed-off-by: Huawei Xie 
Signed-off-by: Jianfeng Tan 
Acked-by: Neil Horman 
---
 drivers/net/virtio/Makefile |   1 +
 drivers/net/virtio/virtio_pci.h |   1 +
 drivers/net/virtio/virtio_user_ethdev.c | 218 
 3 files changed, 220 insertions(+)
 create mode 100644 drivers/net/virtio/virtio_user_ethdev.c

diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index 88a634a..459260b 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -60,6 +60,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_kernel.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/virtio_user_dev.c
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user_ethdev.c
 endif

 # this lib depends upon:
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 6c7f8d7..dd7693f 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -261,6 +261,7 @@ struct virtio_hw {
struct virtio_pci_common_cfg *common_cfg;
struct virtio_net_config *dev_cfg;
const struct virtio_pci_ops *vtpci_ops;
+   void*virtio_user_dev;
 };

 /*
diff --git a/drivers/net/virtio/virtio_user_ethdev.c 
b/drivers/net/virtio/virtio_user_ethdev.c
new file mode 100644
index 000..e390242
--- /dev/null
+++ b/drivers/net/virtio/virtio_user_ethdev.c
@@ -0,0 +1,218 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+#include 
+
+#include "virtio_logs.h"
+#include "virtio_pci.h"
+#include "virtqueue.h"
+#include "virtio_user/virtio_user_dev.h"
+
+#define virtio_user_get_dev(hw) \
+   ((struct virtio_user_dev *)(hw)->virtio_user_dev);
+
+static void
+virtio_user_read_dev_config(struct virtio_hw *hw, uint64_t offset,
+void *dst, int length)
+{
+   int i;
+   struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+
+   if (offset == offsetof(struct virtio_net_config, mac) &&
+   length == ETHER_ADDR_LEN) {
+   for (i = 0; i < ETHER_ADDR_LEN; ++i)
+   ((uint8_t *)dst)[i] = dev->mac_addr[i];
+   return;
+   }
+
+   if (offset == offsetof(struct virtio_net_config, status))
+   *(uint16_t *)dst = dev->status;
+
+   if (offset == offsetof(struct virtio_net_config, max_virtqueue_pairs))
+   *(uint16_t *)dst = 

[dpdk-dev] [PATCH v7 4/6] virtio-user: add device emulation layer APIs

2016-06-12 Thread Jianfeng Tan
Two device emulation layer APIs are added for virtio driver to call:
  - virtio_user_start_device()
  - virtio_user_stop_device()
  - virtio_user_dev_init()
  - virtio_user_dev_uninit()

These APIs will get called by virtio driver, and they call vhost adapter
layer APIs to implement the functionality. Besides, this patch defines
a struct named virtio_user_dev to help manage the data stands for this
kind of virtual device.

  --
  | -- |
  | | virtio driver  | |
  | -- |
  | |  |
  | -- | -->  virtio-user PMD
  | | device emulate |-|> (virtio_user_dev.c, virtio_user_dev.h)
  | || |
  | | vhost adapter  | |
  | -- |
  --
|
|
|
   --
   | vhost backend  |
   --

Signed-off-by: Huawei Xie 
Signed-off-by: Jianfeng Tan 
Acked-by: Neil Horman 
---
 drivers/net/virtio/Makefile  |   1 +
 drivers/net/virtio/virtio_user/virtio_user_dev.c | 263 +++
 drivers/net/virtio/virtio_user/virtio_user_dev.h |  64 ++
 3 files changed, 328 insertions(+)
 create mode 100644 drivers/net/virtio/virtio_user/virtio_user_dev.c
 create mode 100644 drivers/net/virtio/virtio_user/virtio_user_dev.h

diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index 892c2ae..88a634a 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -59,6 +59,7 @@ ifeq ($(CONFIG_RTE_VIRTIO_USER),y)
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_kernel.c
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/virtio_user_dev.c
 endif

 # this lib depends upon:
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c 
b/drivers/net/virtio/virtio_user/virtio_user_dev.c
new file mode 100644
index 000..b4e53a8
--- /dev/null
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
@@ -0,0 +1,263 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "vhost.h"
+#include "virtio_user_dev.h"
+#include "../virtio_ethdev.h"
+
+static int
+virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel)
+{
+   int callfd, kickfd;
+   struct vhost_vring_file file;
+   struct vhost_vring_state state;
+   struct vring *vring = >vrings[queue_sel];
+   struct vhost_vring_addr addr = {
+   .index = queue_sel,
+   .desc_user_addr = (uint64_t)(uintptr_t)vring->desc,
+   .avail_user_addr = (uint64_t)(uintptr_t)vring->avail,
+   .used_user_addr = (uint64_t)(uintptr_t)vring->used,
+   .log_guest_addr = 0,
+   .flags = 0, /* disable log */
+   };
+
+   /* May use invalid flag, but some backend leverages kickfd and callfd as
+* criteria to judge if dev is alive. so finally we use real event_fd.
+*/
+   callfd = eventfd(0, O_CLOEXEC | O_NONBLOCK);
+   if (callfd < 0) {
+   PMD_DRV_LOG(ERR, "callfd error, %s\n", strerror(errno));
+   return -1;
+   

[dpdk-dev] [PATCH v7 3/6] virtio-user: add vhost adapter layer

2016-06-12 Thread Jianfeng Tan
This patch is to provide vhost adapter layer implementations. Instead
of relying on a hypervisor to translate between device emulation and
vhost backend, here we directly talk with vhost backend through the
vhost file. Depending on the type of vhost file,
  - vhost-user is used if the given path points to a unix socket;
  - vhost-kernel is used if the given path points to a char device.

Here three main APIs are provided to upper layer (device emulation):
  - vhost_user_setup(), to set up env to talk to a vhost user backend;
  - vhost_kernel_setup(), to set up env to talk to a vhost kernel backend.
  - vhost_call(), to provide a unified interface to communicate with
vhost backend.

  --
  | -- |
  | | virtio driver  | |
  | -- |
  | |  |
  | -- | -->  virtio-user PMD
  | | device emulate | |
  | || |
  | | vhost adapter  |-|> (vhost_user.c, vhost_kernel.c, vhost.c)
  | -- |
  --
|
| -- --> (vhost-user protocol or vhost-net ioctls)
|
   --
   | vhost backend  |
   --

Signed-off-by: Huawei Xie 
Signed-off-by: Jianfeng Tan 
Acked-by: Neil Horman 
---
 config/common_linuxapp|   1 +
 drivers/net/virtio/Makefile   |   6 +
 drivers/net/virtio/virtio_user/vhost.c| 105 +++
 drivers/net/virtio/virtio_user/vhost.h| 222 +++
 drivers/net/virtio/virtio_user/vhost_kernel.c | 254 +
 drivers/net/virtio/virtio_user/vhost_user.c   | 378 ++
 6 files changed, 966 insertions(+)
 create mode 100644 drivers/net/virtio/virtio_user/vhost.c
 create mode 100644 drivers/net/virtio/virtio_user/vhost.h
 create mode 100644 drivers/net/virtio/virtio_user/vhost_kernel.c
 create mode 100644 drivers/net/virtio/virtio_user/vhost_user.c

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 7e698e2..2483dfa 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -43,3 +43,4 @@ CONFIG_RTE_LIBRTE_VHOST=y
 CONFIG_RTE_LIBRTE_PMD_VHOST=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
 CONFIG_RTE_LIBRTE_POWER=y
+CONFIG_RTE_VIRTIO_USER=y
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index ef84f60..892c2ae 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -55,6 +55,12 @@ ifeq ($(findstring 
RTE_MACHINE_CPUFLAG_SSSE3,$(CFLAGS)),RTE_MACHINE_CPUFLAG_SSSE
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c
 endif

+ifeq ($(CONFIG_RTE_VIRTIO_USER),y)
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost.c
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_kernel.c
+endif
+
 # this lib depends upon:
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf
diff --git a/drivers/net/virtio/virtio_user/vhost.c 
b/drivers/net/virtio/virtio_user/vhost.c
new file mode 100644
index 000..1944a97
--- /dev/null
+++ b/drivers/net/virtio/virtio_user/vhost.c
@@ -0,0 +1,105 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 

[dpdk-dev] [PATCH v7 2/6] virtio: enable use virtual address to fill desc

2016-06-12 Thread Jianfeng Tan
This patch is related to how to calculate relative address for vhost
backend.

The principle is that: based on one or multiple shared memory regions,
vhost maintains a reference system with the frontend start address,
backend start address, and length for each segment, so that each
frontend address (GPA, Guest Physical Address) can be translated into
vhost-recognizable backend address. To make the address translation
efficient, we need to maintain as few regions as possible. In the case
of VM, GPA is always locally continuous. But for some other case, like
virtio-user, we use virtual address here.

It basically means:
  a. when set_base_addr, VA address is used;
  b. when preparing RX's descriptors, VA address is used;
  c. when transmitting packets, VA is filled in TX's descriptors;
  d. in TX and CQ's header, VA is used.

Signed-off-by: Huawei Xie 
Signed-off-by: Jianfeng Tan 
Acked-by: Neil Horman 
---
 drivers/net/virtio/virtio_ethdev.c  | 43 +++--
 drivers/net/virtio/virtio_rxtx.c|  5 ++--
 drivers/net/virtio/virtio_rxtx_simple.c | 13 +-
 drivers/net/virtio/virtqueue.h  | 10 
 4 files changed, 49 insertions(+), 22 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 841949b..9ccce79 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -431,9 +431,6 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
rxvq->mz = mz;
*pvq = rxvq;
} else if (queue_type == VTNET_TQ) {
-   struct virtio_tx_region *txr;
-   unsigned int i;
-
txvq = (struct virtnet_tx *)RTE_PTR_ADD(vq, sz_vq);
txvq->vq = vq;
txvq->port_id = dev->data->port_id;
@@ -442,6 +439,36 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
txvq->virtio_net_hdr_mz = hdr_mz;
txvq->virtio_net_hdr_mem = hdr_mz->phys_addr;

+   *pvq = txvq;
+   } else if (queue_type == VTNET_CQ) {
+   cvq = (struct virtnet_ctl *)RTE_PTR_ADD(vq, sz_vq);
+   cvq->vq = vq;
+   cvq->mz = mz;
+   cvq->virtio_net_hdr_mz = hdr_mz;
+   cvq->virtio_net_hdr_mem = hdr_mz->phys_addr;
+   memset(cvq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE);
+   *pvq = cvq;
+   }
+
+   /* For virtio-user case (that is when dev->pci_dev is NULL), we use
+* virtual address. And we need properly set _offset_, please see
+* MBUF_DATA_DMA_ADDR in virtqueue.h for more information.
+*/
+   if (dev->pci_dev)
+   vq->offset = offsetof(struct rte_mbuf, buf_physaddr);
+   else {
+   vq->vq_ring_mem = (uintptr_t)mz->addr;
+   vq->offset = offsetof(struct rte_mbuf, buf_addr);
+   if (queue_type == VTNET_TQ)
+   txvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
+   else if (queue_type == VTNET_CQ)
+   cvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
+   }
+
+   if (queue_type == VTNET_TQ) {
+   struct virtio_tx_region *txr;
+   unsigned int i;
+
txr = hdr_mz->addr;
memset(txr, 0, vq_size * sizeof(*txr));
for (i = 0; i < vq_size; i++) {
@@ -457,16 +484,6 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
start_dp->len = hw->vtnet_hdr_size;
start_dp->flags = VRING_DESC_F_NEXT;
}
-
-   *pvq = txvq;
-   } else if (queue_type == VTNET_CQ) {
-   cvq = (struct virtnet_ctl *)RTE_PTR_ADD(vq, sz_vq);
-   cvq->vq = vq;
-   cvq->mz = mz;
-   cvq->virtio_net_hdr_mz = hdr_mz;
-   cvq->virtio_net_hdr_mem = hdr_mz->phys_addr;
-   memset(cvq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE);
-   *pvq = cvq;
}

if (hw->vtpci_ops->setup_queue(hw, vq) < 0) {
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index f371423..2e7205b 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -193,8 +193,7 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct 
rte_mbuf *cookie)

start_dp = vq->vq_ring.desc;
start_dp[idx].addr =
-   (uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM
-   - hw->vtnet_hdr_size);
+   MBUF_DATA_DMA_ADDR(cookie, vq->offset) - hw->vtnet_hdr_size;
start_dp[idx].len =
cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
start_dp[idx].flags =  VRING_DESC_F_WRITE;
@@ -266,7 +265,7 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct 
rte_mbuf *cookie,
}

do {
-   start_dp[idx].addr  = rte_mbuf_data_dma_addr(cookie);
+   

[dpdk-dev] [PATCH v7 1/6] virtio: hide phys addr check inside pci ops

2016-06-12 Thread Jianfeng Tan
This patch is to move phys addr check from virtio_dev_queue_setup
to pci ops. To makt that happen, make sure virtio_ops.setup_queue
return the result if we pass through the check.

Signed-off-by: Jianfeng Tan 
Signed-off-by: Huawei Xie 
Acked-by: Yuanhan Liu 
---
 drivers/net/virtio/virtio_ethdev.c | 17 ++---
 drivers/net/virtio/virtio_pci.c| 30 --
 drivers/net/virtio/virtio_pci.h|  2 +-
 3 files changed, 35 insertions(+), 14 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index a995520..841949b 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -385,16 +385,6 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
}
}

-   /*
-* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
-* and only accepts 32 bit page frame number.
-* Check if the allocated physical memory exceeds 16TB.
-*/
-   if ((mz->phys_addr + vq->vq_ring_size - 1) >> 
(VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
-   PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
-   ret = -ENOMEM;
-   goto fail_q_alloc;
-   }
memset(mz->addr, 0, sizeof(mz->len));

vq->vq_ring_mem = mz->phys_addr;
@@ -479,7 +469,12 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
*pvq = cvq;
}

-   hw->vtpci_ops->setup_queue(hw, vq);
+   if (hw->vtpci_ops->setup_queue(hw, vq) < 0) {
+   PMD_INIT_LOG(ERR, "setup_queue failed");
+   virtio_dev_queue_release(vq);
+   return -EINVAL;
+   }
+
vq->configured = 1;
return 0;

diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index d0f2428..8d0c983 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -55,6 +55,22 @@
  */
 #define VIRTIO_PCI_CONFIG(hw) (((hw)->use_msix) ? 24 : 20)

+static inline int
+check_vq_phys_addr_ok(struct virtqueue *vq)
+{
+   /* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
+* and only accepts 32 bit page frame number.
+* Check if the allocated physical memory exceeds 16TB.
+*/
+   if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >>
+   (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
+   PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
+   return 0;
+   }
+
+   return 1;
+}
+
 static void
 legacy_read_dev_config(struct virtio_hw *hw, size_t offset,
   void *dst, int length)
@@ -143,15 +159,20 @@ legacy_get_queue_num(struct virtio_hw *hw, uint16_t 
queue_id)
return dst;
 }

-static void
+static int
 legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
uint32_t src;

+   if (!check_vq_phys_addr_ok(vq))
+   return -1;
+
rte_eal_pci_ioport_write(>io, >vq_queue_index, 2,
 VIRTIO_PCI_QUEUE_SEL);
src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
rte_eal_pci_ioport_write(>io, , 4, VIRTIO_PCI_QUEUE_PFN);
+
+   return 0;
 }

 static void
@@ -367,12 +388,15 @@ modern_get_queue_num(struct virtio_hw *hw, uint16_t 
queue_id)
return io_read16(>common_cfg->queue_size);
 }

-static void
+static int
 modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
uint64_t desc_addr, avail_addr, used_addr;
uint16_t notify_off;

+   if (!check_vq_phys_addr_ok(vq))
+   return -1;
+
desc_addr = vq->vq_ring_mem;
avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
@@ -400,6 +424,8 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue 
*vq)
PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr);
PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)",
vq->notify_addr, notify_off);
+
+   return 0;
 }

 static void
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index f20468a..6c7f8d7 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -235,7 +235,7 @@ struct virtio_pci_ops {
uint16_t (*set_config_irq)(struct virtio_hw *hw, uint16_t vec);

uint16_t (*get_queue_num)(struct virtio_hw *hw, uint16_t queue_id);
-   void (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq);
+   int (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq);
void (*del_queue)(struct virtio_hw *hw, struct virtqueue *vq);
void (*notify_queue)(struct virtio_hw *hw, struct virtqueue *vq);
 };
-- 
2.1.4



[dpdk-dev] [PATCH v7 0/6] virtio support for container

2016-06-12 Thread Jianfeng Tan
v7:
 - CONFIG_RTE_VIRTIO_VDEV -> CONFIG_RTE_VIRTIO_USER; and corresondingly,
   RTE_VIRTIO_VDEV -> RTE_VIRTIO_USER.
 - uint64_t -> uintptr_t, so that it can be compiled on 32-bit platform.
 - Rebase on latest dpdk-next-virtio branch.
 - Abandon abstracting related code into vring_hdr_desc_init(), instead,
   just move it behind setup_queue().

v6:
 - Move driver related code into from driver/net/virtio/virtio-user/ to
   driver/net/virtio/ directory, inside virtio_user_ethdev.c.
 - Rename vdev to virtio_user in comments and code.
 - Merge code, which lies in virtio_user_pci.c, into virtio_user_ethdev.c.
 - Add some comments at virtio-user special handling at virtio_dev_ethdev.c.
 - Merge document update into the 7nd commit where virtio-user is added.
 - Add usage with vhost-switch in vhost.rst.

v5:
 - Rename struct virtio_user_hw to struct virtio_user_dev.
 - Rename "vdev_private" to "virtio_user_dev".
 - Move special handling into virtio_ethdev.c from queue_setup().
 - Add vring in virtio_user_dev (remove rte_eth_dev_data), so that
   device does not depend on driver's data structure (rte_eth_dev_data).
 - Remove update on doc/guides/nics/overview.rst, because virtio-user has
   exact feature set with virtio.
 - Change "unsigned long int" to "uint64_t", "unsigned" to "uint32_t".
 - Remove unnecessary cast in vdev_read_dev_config().
 - Add functions in virtio_user_dev.c with prefix of "virtio_user_".
 - Rebase on virtio-next-virtio.

v4:
 - Avoid using dev_type, instead use (eth_dev->pci_device is NULL) to
   judge if it's virtual device or physical device.
 - Change the added device name to virtio-user.
 - Split into vhost_user.c, vhost_kernel.c, vhost.c, virtio_user_pci.c,
   virtio_user_dev.c.
 - Move virtio-user specific data from struct virtio_hw into struct
   virtio_user_hw.
 - Add support to send reset_owner message.
 - Change del_queue implementation. (This need more check)
 - Remove rte_panic(), and superseded with log.
 - Add reset_owner into virtio_pci_ops.reset.
 - Merge parameter "rx" and "tx" to "queues" to emliminate confusion.
 - Move get_features to after set_owner.
 - Redefine path in virtio_user_hw from char * to char [].

v3:
 - Remove --single-file option; do no change at EAL memory.
 - Remove the added API rte_eal_get_backfile_info(), instead we check all
   opened files with HUGEFILE_FMT to find hugepage files owned by DPDK.
 - Accordingly, add more restrictions at "Known issue" section.
 - Rename parameter from queue_num to queue_size for confusion.
 - Rename vhost_embedded.c to rte_eth_virtio_vdev.c.
 - Move code related to the newly added vdev to rte_eth_virtio_vdev.c, to
   reuse eth_virtio_dev_init(), remove its static declaration.
 - Implement dev_uninit() for rte_eth_dev_detach().
 - WARN -> ERR, in vhost_embedded.c
 - Add more commit message for clarify the model.

v2:
 - Rebase on the patchset of virtio 1.0 support.
 - Fix cannot create non-hugepage memory.
 - Fix wrong size of memory region when "single-file" is used.
 - Fix setting of offset in virtqueue to use virtual address.
 - Fix setting TUNSETVNETHDRSZ in vhost-user's branch.
 - Add mac option to specify the mac address of this virtual device.
 - Update doc.

This patchset is to provide high performance networking interface (virtio)
for container-based DPDK applications. The way of starting DPDK apps in
containers with ownership of NIC devices exclusively is beyond the scope.
The basic idea here is to present a new virtual device (named virtio-user),
which can be discovered and initialized by DPDK. To minimize the change,
we reuse already-existing virtio PMD code (driver/net/virtio/).

Background: Previously, we usually use a virtio device in the context of
QEMU/VM as below pic shows. Virtio nic is emulated in QEMU, and usually
presented in VM as a PCI device.

  --
  |  virtio driver |  ->  VM
  --
|
| --> (over PCI bus or MMIO or Channel I/O)
|
  --
  | device emulate |
  ||  ->  QEMU
  | vhost adapter  |
  --
|
| --> (vhost-user protocol or vhost-net ioctls)
|
  --
  | vhost backend  |
  --

Compared to QEMU/VM case, virtio support for contaner requires to embedded
device framework inside the virtio PMD. So this converged driver actually
plays three roles:
  - virtio driver to drive this new kind of virtual device;
  - device emulation to present this virtual device and reponse to the
virtio driver, which is originally by QEMU;
  - and the role to communicate with vhost backend, which is also
originally by QEMU.

The code layout and functionality of each module:

  --
  | -- |
  | | virtio driver  | |> (virtio_user_ethdev.c)
  | -- |
  | |  |
  | -- | -->  virtio-user PMD
  | | device emulate |-|> (virtio_user_dev.c)
  | |