[dpdk-dev] [PATCH v3] vhost: remove vhost_net_device_ops

2016-02-19 Thread Thomas Monjalon
2016-02-19 10:10, Rich Lane:
> The indirection is unnecessary because there is only one implementation
> of the vhost common code. Removing it makes the code more readable.
> 
> Signed-off-by: Rich Lane 
> Acked-by: Yuanhan Liu 

Applied, thanks


[dpdk-dev] [PATCH v3 18/18] fm10k/base: remove unused struct element

2016-02-19 Thread Wang Xiao W
Remove the unused element request_lport_map in struct fm10k_mac_ops.

Signed-off-by: Wang Xiao W 
---
 drivers/net/fm10k/base/fm10k_type.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/fm10k/base/fm10k_type.h 
b/drivers/net/fm10k/base/fm10k_type.h
index f807216..3fc8f13 100644
--- a/drivers/net/fm10k/base/fm10k_type.h
+++ b/drivers/net/fm10k/base/fm10k_type.h
@@ -604,7 +604,6 @@ struct fm10k_mac_ops {
struct fm10k_dglort_cfg *);
void (*set_dma_mask)(struct fm10k_hw *, u64);
s32 (*get_fault)(struct fm10k_hw *, int, struct fm10k_fault *);
-   void (*request_lport_map)(struct fm10k_hw *);
s32 (*adjust_systime)(struct fm10k_hw *, s32 ppb);
s32 (*notify_offset)(struct fm10k_hw *, u64 offset);
u64 (*read_systime)(struct fm10k_hw *);
-- 
1.9.3



[dpdk-dev] [PATCH v3 17/18] fm10k/base: minor cleanups

2016-02-19 Thread Wang Xiao W
Some cleanups to better reflect the code that was actually pushed out to
the upstream Linux community.

Among the above cleanups, a few macros such as FM10K_RXINT_TIMER_SHIFT are
removed, but they are needed in dpdk/fm10k, so we have to put all these
necessary macros into fm10k_osdep.h.

Signed-off-by: Wang Xiao W 
---
 drivers/net/fm10k/base/fm10k_mbx.h   |   7 --
 drivers/net/fm10k/base/fm10k_osdep.h |  32 +
 drivers/net/fm10k/base/fm10k_pf.h|   4 --
 drivers/net/fm10k/base/fm10k_type.h  | 132 ---
 4 files changed, 32 insertions(+), 143 deletions(-)

diff --git a/drivers/net/fm10k/base/fm10k_mbx.h 
b/drivers/net/fm10k/base/fm10k_mbx.h
index e642c2f..edc57df 100644
--- a/drivers/net/fm10k/base/fm10k_mbx.h
+++ b/drivers/net/fm10k/base/fm10k_mbx.h
@@ -48,7 +48,6 @@ struct fm10k_mbx_info;
 /* XOR provides means of switching from Tx to Rx FIFO */
 #define FM10K_MBMEM_PF_XOR (FM10K_MBMEM_SM(0) ^ FM10K_MBMEM_PF(0))
 #define FM10K_MBX(_n)  ((_n) + 0x18800)
-#define FM10K_MBX_OWNER0x0001
 #define FM10K_MBX_REQ  0x0002
 #define FM10K_MBX_ACK  0x0004
 #define FM10K_MBX_REQ_INTERRUPT0x0008
@@ -213,7 +212,6 @@ enum fm10k_msg_type {
 /* version number for switch manager mailboxes */
 #define FM10K_SM_MBX_VERSION   1
 #define FM10K_SM_MBX_FIFO_LEN  (FM10K_MBMEM_PF_XOR - 1)
-#define FM10K_SM_MBX_FIFO_HDR_LEN  1

 /* offsets shared between all SM FIFO headers */
 #define FM10K_MSG_SM_TAIL_SHIFT0
@@ -233,18 +231,13 @@ enum fm10k_msg_type {
  */
 #define FM10K_MBX_ERR(_n) ((_n) - 512)
 #define FM10K_MBX_ERR_NO_MBX   FM10K_MBX_ERR(0x01)
-#define FM10K_MBX_ERR_NO_MSG   FM10K_MBX_ERR(0x02)
 #define FM10K_MBX_ERR_NO_SPACE FM10K_MBX_ERR(0x03)
-#define FM10K_MBX_ERR_LOCK FM10K_MBX_ERR(0x04)
 #define FM10K_MBX_ERR_TAIL FM10K_MBX_ERR(0x05)
 #define FM10K_MBX_ERR_HEAD FM10K_MBX_ERR(0x06)
-#define FM10K_MBX_ERR_DST  FM10K_MBX_ERR(0x07)
 #define FM10K_MBX_ERR_SRC  FM10K_MBX_ERR(0x08)
 #define FM10K_MBX_ERR_TYPE FM10K_MBX_ERR(0x09)
-#define FM10K_MBX_ERR_LEN  FM10K_MBX_ERR(0x0A)
 #define FM10K_MBX_ERR_SIZE FM10K_MBX_ERR(0x0B)
 #define FM10K_MBX_ERR_BUSY FM10K_MBX_ERR(0x0C)
-#define FM10K_MBX_ERR_VALUEFM10K_MBX_ERR(0x0D)
 #define FM10K_MBX_ERR_RSVD0FM10K_MBX_ERR(0x0E)
 #define FM10K_MBX_ERR_CRC  FM10K_MBX_ERR(0x0F)

diff --git a/drivers/net/fm10k/base/fm10k_osdep.h 
b/drivers/net/fm10k/base/fm10k_osdep.h
index 6852ef0..a21daa2 100644
--- a/drivers/net/fm10k/base/fm10k_osdep.h
+++ b/drivers/net/fm10k/base/fm10k_osdep.h
@@ -150,6 +150,38 @@ typedef intbool;
 #define fm10k_read_reg FM10K_READ_REG
 #endif

+#define FM10K_INTEL_VENDOR_ID   0x8086
+#define FM10K_DMA_CTRL_MINMSS_SHIFT9
+#define FM10K_EICR_PCA_FAULT   0x0001
+#define FM10K_EICR_THI_FAULT   0x0004
+#define FM10K_EICR_FUM_FAULT   0x0020
+#define FM10K_EICR_SRAMERROR   0x0400
+#define FM10K_SRAM_IP  0x13003
+#define FM10K_RXINT_TIMER_SHIFT8
+#define FM10K_TXINT_TIMER_SHIFT8
+#define FM10K_RXD_PKTTYPE_MASK 0x03F0
+#define FM10K_RXD_PKTTYPE_SHIFT4
+
+enum fm10k_rdesc_pkt_type {
+   /* L3 type */
+   FM10K_PKTTYPE_OTHER = 0x00,
+   FM10K_PKTTYPE_IPV4  = 0x01,
+   FM10K_PKTTYPE_IPV4_EX   = 0x02,
+   FM10K_PKTTYPE_IPV6  = 0x03,
+   FM10K_PKTTYPE_IPV6_EX   = 0x04,
+
+   /* L4 type */
+   FM10K_PKTTYPE_TCP   = 0x08,
+   FM10K_PKTTYPE_UDP   = 0x10,
+   FM10K_PKTTYPE_GRE   = 0x18,
+   FM10K_PKTTYPE_VXLAN = 0x20,
+   FM10K_PKTTYPE_NVGRE = 0x28,
+   FM10K_PKTTYPE_GENEVE= 0x30
+};
+
+#define FM10K_RXD_STATUS_IPCS  0x0008 /* Indicates IPv4 csum */
+#define FM10K_RXD_STATUS_HBO   0x0400 /* header buffer overrun */
+
 #define FM10K_TSO_MINMSS \
(FM10K_DMA_CTRL_MINMSS_64 >> FM10K_DMA_CTRL_MINMSS_SHIFT)
 #define FM10K_TSO_MIN_HEADERLEN54
diff --git a/drivers/net/fm10k/base/fm10k_pf.h 
b/drivers/net/fm10k/base/fm10k_pf.h
index ee8527a..c84b1bc 100644
--- a/drivers/net/fm10k/base/fm10k_pf.h
+++ b/drivers/net/fm10k/base/fm10k_pf.h
@@ -140,10 +140,6 @@ struct fm10k_swapi_1588_clock_owner {
 #pragma pack()
 #endif /* C99 */

-#define FM10K_PF_MSG_LPORT_CREATE_HANDLER(func) \
-   FM10K_MSG_HANDLER(FM10K_PF_MSG_ID_LPORT_CREATE, NULL, func)
-#define FM10K_PF_MSG_LPORT_DELETE_HANDLER(func) \
-   FM10K_MSG_HANDLER(FM10K_PF_MSG_ID_LPORT_DELETE, NULL, func)
 s32 fm10k_msg_lport_map_pf(struct fm10k_hw *, u32 **, struct fm10k_mbx_info *);
 extern const struct fm10k_tlv_attr fm10k_lport_map_msg_attr[];
 #defi

[dpdk-dev] [PATCH v3 16/18] fm10k/base: move constants to the right of binary operators

2016-02-19 Thread Wang Xiao W
The upstream Linux kernel community prefers constants are to the right of
binary operators.

Signed-off-by: Wang Xiao W 
---
 drivers/net/fm10k/base/fm10k_pf.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/fm10k/base/fm10k_pf.c 
b/drivers/net/fm10k/base/fm10k_pf.c
index 456fe64..105babf 100644
--- a/drivers/net/fm10k/base/fm10k_pf.c
+++ b/drivers/net/fm10k/base/fm10k_pf.c
@@ -759,8 +759,8 @@ STATIC s32 fm10k_iov_assign_resources_pf(struct fm10k_hw 
*hw, u16 num_vfs,
FM10K_RXDCTL_WRITE_BACK_MIN_DELAY |
FM10K_RXDCTL_DROP_ON_EMPTY);
FM10K_WRITE_REG(hw, FM10K_RXQCTL(vf_q_idx),
-   FM10K_RXQCTL_VF |
-   (i << FM10K_RXQCTL_VF_SHIFT));
+   (i << FM10K_RXQCTL_VF_SHIFT) |
+   FM10K_RXQCTL_VF);

/* map queue pair to VF */
FM10K_WRITE_REG(hw, FM10K_TQMAP(qmap_idx), vf_q_idx);
@@ -1035,7 +1035,7 @@ STATIC s32 fm10k_iov_reset_resources_pf(struct fm10k_hw 
*hw,
txqctl = ((u32)vf_vid << FM10K_TXQCTL_VID_SHIFT) |
 (vf_idx << FM10K_TXQCTL_TC_SHIFT) |
 FM10K_TXQCTL_VF | vf_idx;
-   rxqctl = FM10K_RXQCTL_VF | (vf_idx << FM10K_RXQCTL_VF_SHIFT);
+   rxqctl = (vf_idx << FM10K_RXQCTL_VF_SHIFT) | FM10K_RXQCTL_VF;

/* stop further DMA and reset queue ownership back to VF */
for (i = vf_q_idx; i < (queues_per_pool + vf_q_idx); i++) {
-- 
1.9.3



[dpdk-dev] [PATCH v3 15/18] fm10k/base: fix TLV structures alignment

2016-02-19 Thread Wang Xiao W
Per comments from an upstream patch, and looking at how TLV LE_STRUCT
code works, we actually want these structures to be 4byte aligned, not
1byte aligned. In practice, 1byte alignment has worked so far because
all our structures end up being a multiple of 4. But if a future TLV
structure were added that had a u8 or similar sticking on the end things
would break. Fix this by using 4byte alignment which will prevent the
TLV LE_STRUCT code from breaking. Update the comment explaining that we
need 4byte alignment of our structures.

Fixes: e24fed68236a ("fm10k: pack TLV overlay structures correctly")

Signed-off-by: Wang Xiao W 
---
 drivers/net/fm10k/base/fm10k_pf.h | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/fm10k/base/fm10k_pf.h 
b/drivers/net/fm10k/base/fm10k_pf.h
index 92e2962..ee8527a 100644
--- a/drivers/net/fm10k/base/fm10k_pf.h
+++ b/drivers/net/fm10k/base/fm10k_pf.h
@@ -91,14 +91,14 @@ enum fm10k_pf_tlv_attr_id_v1 {
 #define FM10K_MSG_UPDATE_PVID_PVID_SHIFT   16
 #define FM10K_MSG_UPDATE_PVID_PVID_SIZE16

-/* The following data structures are overlayed specifically to TLV mailbox
- * messages, and must not have gaps between their values. They must line up
- * correctly to the TLV definition.
+/* The following data structures are overlayed directly onto TLV mailbox
+ * messages, and must not break 4 byte alignment. Ensure the structures line
+ * up correctly as per their TLV definition.
  */
 #ifdef C99
-#pragma pack(push, 1)
+#pragma pack(push, 4)
 #else
-#pragma pack(1)
+#pragma pack(4)
 #endif /* C99 */

 struct fm10k_mac_update {
-- 
1.9.3



[dpdk-dev] [PATCH v3 14/18] fm10k/base: imporve comment per upstream review changes

2016-02-19 Thread Wang Xiao W
The comment here was changed during review of upstream patch, and the
new wording is slightly more clear. Re-write the comment in SHARED code
based on this new wording.

Fix a number of mailbox comment issues with function header comments,
lower-case acronyms (i.e. FIFO, TLV), incorrect function names in
DEBUGFUNC(), duplicate comments and a stubbed-out header comment for
fm10k_sm_mbx_init.

Signed-off-by: Wang Xiao W 
---
 drivers/net/fm10k/base/fm10k_mbx.c | 61 ++
 drivers/net/fm10k/base/fm10k_mbx.h |  4 +--
 drivers/net/fm10k/base/fm10k_pf.c  | 12 
 drivers/net/fm10k/base/fm10k_tlv.c |  2 +-
 drivers/net/fm10k/base/fm10k_tlv.h |  4 +--
 5 files changed, 47 insertions(+), 36 deletions(-)

diff --git a/drivers/net/fm10k/base/fm10k_mbx.c 
b/drivers/net/fm10k/base/fm10k_mbx.c
index 7d03704..2e70434 100644
--- a/drivers/net/fm10k/base/fm10k_mbx.c
+++ b/drivers/net/fm10k/base/fm10k_mbx.c
@@ -70,7 +70,7 @@ STATIC u16 fm10k_fifo_unused(struct fm10k_mbx_fifo *fifo)
 }

 /**
- *  fm10k_fifo_empty - Test to verify if fifo is empty
+ *  fm10k_fifo_empty - Test to verify if FIFO is empty
  *  @fifo: pointer to FIFO
  *
  *  This function returns true if the FIFO is empty, else false
@@ -85,7 +85,7 @@ STATIC bool fm10k_fifo_empty(struct fm10k_mbx_fifo *fifo)
  *  @fifo: pointer to FIFO
  *  @offset: offset to add to head
  *
- *  This function returns the indices into the fifo based on head + offset
+ *  This function returns the indices into the FIFO based on head + offset
  **/
 STATIC u16 fm10k_fifo_head_offset(struct fm10k_mbx_fifo *fifo, u16 offset)
 {
@@ -97,7 +97,7 @@ STATIC u16 fm10k_fifo_head_offset(struct fm10k_mbx_fifo 
*fifo, u16 offset)
  *  @fifo: pointer to FIFO
  *  @offset: offset to add to tail
  *
- *  This function returns the indices into the fifo based on tail + offset
+ *  This function returns the indices into the FIFO based on tail + offset
  **/
 STATIC u16 fm10k_fifo_tail_offset(struct fm10k_mbx_fifo *fifo, u16 offset)
 {
@@ -173,7 +173,7 @@ STATIC u16 fm10k_mbx_index_len(struct fm10k_mbx_info *mbx, 
u16 head, u16 tail)
 /**
  *  fm10k_mbx_tail_add - Determine new tail value with added offset
  *  @mbx: pointer to mailbox
- *  @offset: length to add to head offset
+ *  @offset: length to add to tail offset
  *
  *  This function takes the local tail index and recomputes it for
  *  a given length added as an offset.
@@ -189,7 +189,7 @@ STATIC u16 fm10k_mbx_tail_add(struct fm10k_mbx_info *mbx, 
u16 offset)
 /**
  *  fm10k_mbx_tail_sub - Determine new tail value with subtracted offset
  *  @mbx: pointer to mailbox
- *  @offset: length to add to head offset
+ *  @offset: length to add to tail offset
  *
  *  This function takes the local tail index and recomputes it for
  *  a given length added as an offset.
@@ -253,7 +253,7 @@ STATIC u16 fm10k_mbx_pushed_tail_len(struct fm10k_mbx_info 
*mbx)
 }

 /**
- *  fm10k_fifo_write_copy - pulls data off of msg and places it in fifo
+ *  fm10k_fifo_write_copy - pulls data off of msg and places it in FIFO
  *  @fifo: pointer to FIFO
  *  @msg: message array to populate
  *  @tail_offset: additional offset to add to tail pointer
@@ -331,7 +331,7 @@ STATIC u16 fm10k_mbx_validate_msg_size(struct 
fm10k_mbx_info *mbx, u16 len)
u16 total_len = 0, msg_len;
u32 *msg;

-   DEBUGFUNC("fm10k_mbx_validate_msg");
+   DEBUGFUNC("fm10k_mbx_validate_msg_size");

/* length should include previous amounts pushed */
len += mbx->pushed;
@@ -353,6 +353,7 @@ STATIC u16 fm10k_mbx_validate_msg_size(struct 
fm10k_mbx_info *mbx, u16 len)

 /**
  *  fm10k_mbx_write_copy - pulls data off of Tx FIFO and places it in mbmem
+ *  @hw: pointer to hardware structure
  *  @mbx: pointer to mailbox
  *
  *  This function will take a section of the Tx FIFO and copy it into the
@@ -734,7 +735,7 @@ STATIC bool fm10k_mbx_tx_complete(struct fm10k_mbx_info 
*mbx)
  *  @hw: pointer to hardware structure
  *  @mbx: pointer to mailbox
  *
- *  This function dequeues messages and hands them off to the tlv parser.
+ *  This function dequeues messages and hands them off to the TLV parser.
  *  It will return the number of messages processed when called.
  **/
 STATIC u16 fm10k_mbx_dequeue_rx(struct fm10k_hw *hw,
@@ -951,7 +952,7 @@ STATIC void fm10k_mbx_create_fake_disconnect_hdr(struct 
fm10k_mbx_info *mbx)
 }

 /**
- *  fm10k_mbx_create_error_msg - Generate a error message
+ *  fm10k_mbx_create_error_msg - Generate an error message
  *  @mbx: pointer to mailbox
  *  @err: local error encountered
  *
@@ -984,7 +985,6 @@ STATIC void fm10k_mbx_create_error_msg(struct 
fm10k_mbx_info *mbx, s32 err)
 /**
  *  fm10k_mbx_validate_msg_hdr - Validate common fields in the message header
  *  @mbx: pointer to mailbox
- *  @msg: message array to read
  *
  *  This function will parse up the fields in the mailbox header and return
  *  an error if the header contains any of a number of invalid configurations
@@ -1050,11 +1050,12

[dpdk-dev] [PATCH v3 13/18] fm10k/base: consistently use VLAN ID when referencing vid variables

2016-02-19 Thread Wang Xiao W
The vid variable name is shorthand for VLAN ID, so we should use this in
comments explaining what is happening.

Signed-off-by: Wang Xiao W 
---
 drivers/net/fm10k/base/fm10k_pf.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/fm10k/base/fm10k_pf.c 
b/drivers/net/fm10k/base/fm10k_pf.c
index f5cbda4..716d7f1 100644
--- a/drivers/net/fm10k/base/fm10k_pf.c
+++ b/drivers/net/fm10k/base/fm10k_pf.c
@@ -970,7 +970,7 @@ err_out:
txqctl |= (vf_idx << FM10K_TXQCTL_TC_SHIFT) |
  FM10K_TXQCTL_VF | vf_idx;

-   /* assign VID */
+   /* assign VLAN ID */
for (i = 0; i < queues_per_pool; i++)
FM10K_WRITE_REG(hw, FM10K_TXQCTL(vf_q_idx + i), txqctl);

@@ -1215,12 +1215,12 @@ s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 
**results,
 }

 /**
- * fm10k_iov_select_vid - Select correct default vid
+ * fm10k_iov_select_vid - Select correct default VLAN ID
  * @hw: Pointer to hardware structure
- * @vid: vid to correct
+ * @vid: VLAN ID to correct
  *
- * Will report an error if vid is out of range. For vid = 0, it will return
- * either the pf_vid or sw_vid depending on which one is set.
+ * Will report an error if the VLAN ID is out of range. For VID = 0, it will
+ * return either the pf_vid or sw_vid depending on which one is set.
  */
 STATIC s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid)
 {
@@ -1783,7 +1783,7 @@ static s32 fm10k_msg_update_pvid_pf(struct fm10k_hw *hw, 
u32 **results,
if (!fm10k_glort_valid_pf(hw, glort))
return FM10K_ERR_PARAM;

-   /* verify VID is valid */
+   /* verify VLAN ID is valid */
if (pvid >= FM10K_VLAN_TABLE_VID_MAX)
return FM10K_ERR_PARAM;

-- 
1.9.3



[dpdk-dev] [PATCH v3 12/18] fm10k/base: allow removal of is_slot_appropriate function

2016-02-19 Thread Wang Xiao W
The Linux Kernel provides the OS a call "pcie_get_minimum_link" which
can crawl the PCIe tree and determine the actual minimum link speed of a
device which is a more general check than provided by
is_slot_appropriate. Thus, the upstream driver does not use or want the
is_slot_appropriate function call. Add a NO_IS_SLOT_APPROPRIATE_CHECK
definition which can be defined during strip process to remove the code.
If left undefined (the default) then the code will all be active and no
driver changes should be necessary.

Signed-off-by: Wang Xiao W 
---
 drivers/net/fm10k/base/fm10k_api.c  | 2 ++
 drivers/net/fm10k/base/fm10k_api.h  | 2 ++
 drivers/net/fm10k/base/fm10k_pf.c   | 4 
 drivers/net/fm10k/base/fm10k_type.h | 2 ++
 drivers/net/fm10k/base/fm10k_vf.c   | 4 
 5 files changed, 14 insertions(+)

diff --git a/drivers/net/fm10k/base/fm10k_api.c 
b/drivers/net/fm10k/base/fm10k_api.c
index eb5bdaa..c49d20d 100644
--- a/drivers/net/fm10k/base/fm10k_api.c
+++ b/drivers/net/fm10k/base/fm10k_api.c
@@ -181,6 +181,7 @@ s32 fm10k_get_bus_info(struct fm10k_hw *hw)
   FM10K_NOT_IMPLEMENTED);
 }

+#ifndef NO_IS_SLOT_APPROPRIATE_CHECK
 /**
  *  fm10k_is_slot_appropriate - Indicate appropriate slot for this SKU
  *  @hw: pointer to hardware structure
@@ -195,6 +196,7 @@ bool fm10k_is_slot_appropriate(struct fm10k_hw *hw)
return true;
 }

+#endif
 /**
  *  fm10k_update_vlan - Clear VLAN ID to VLAN filter table
  *  @hw: pointer to hardware structure
diff --git a/drivers/net/fm10k/base/fm10k_api.h 
b/drivers/net/fm10k/base/fm10k_api.h
index 113aef5..2ab3149 100644
--- a/drivers/net/fm10k/base/fm10k_api.h
+++ b/drivers/net/fm10k/base/fm10k_api.h
@@ -44,7 +44,9 @@ s32 fm10k_stop_hw(struct fm10k_hw *hw);
 s32 fm10k_start_hw(struct fm10k_hw *hw);
 s32 fm10k_init_shared_code(struct fm10k_hw *hw);
 s32 fm10k_get_bus_info(struct fm10k_hw *hw);
+#ifndef NO_IS_SLOT_APPROPRIATE_CHECK
 bool fm10k_is_slot_appropriate(struct fm10k_hw *hw);
+#endif
 s32 fm10k_update_vlan(struct fm10k_hw *hw, u32 vid, u8 idx, bool set);
 s32 fm10k_read_mac_addr(struct fm10k_hw *hw);
 void fm10k_update_hw_stats(struct fm10k_hw *hw, struct fm10k_hw_stats *stats);
diff --git a/drivers/net/fm10k/base/fm10k_pf.c 
b/drivers/net/fm10k/base/fm10k_pf.c
index a1469aa..f5cbda4 100644
--- a/drivers/net/fm10k/base/fm10k_pf.c
+++ b/drivers/net/fm10k/base/fm10k_pf.c
@@ -216,6 +216,7 @@ STATIC s32 fm10k_init_hw_pf(struct fm10k_hw *hw)
return FM10K_SUCCESS;
 }

+#ifndef NO_IS_SLOT_APPROPRIATE_CHECK
 /**
  *  fm10k_is_slot_appropriate_pf - Indicate appropriate slot for this SKU
  *  @hw: pointer to hardware structure
@@ -231,6 +232,7 @@ STATIC bool fm10k_is_slot_appropriate_pf(struct fm10k_hw 
*hw)
   (hw->bus.width == hw->bus_caps.width);
 }

+#endif
 /**
  *  fm10k_update_vlan_pf - Update status of VLAN ID in VLAN filter table
  *  @hw: pointer to hardware structure
@@ -2064,7 +2066,9 @@ s32 fm10k_init_ops_pf(struct fm10k_hw *hw)
mac->ops.init_hw = &fm10k_init_hw_pf;
mac->ops.start_hw = &fm10k_start_hw_generic;
mac->ops.stop_hw = &fm10k_stop_hw_generic;
+#ifndef NO_IS_SLOT_APPROPRIATE_CHECK
mac->ops.is_slot_appropriate = &fm10k_is_slot_appropriate_pf;
+#endif
mac->ops.update_vlan = &fm10k_update_vlan_pf;
mac->ops.read_mac_addr = &fm10k_read_mac_addr_pf;
mac->ops.update_uc_addr = &fm10k_update_uc_addr_pf;
diff --git a/drivers/net/fm10k/base/fm10k_type.h 
b/drivers/net/fm10k/base/fm10k_type.h
index 1e84294..c4e5450 100644
--- a/drivers/net/fm10k/base/fm10k_type.h
+++ b/drivers/net/fm10k/base/fm10k_type.h
@@ -679,7 +679,9 @@ struct fm10k_mac_ops {
s32 (*stop_hw)(struct fm10k_hw *);
s32 (*get_bus_info)(struct fm10k_hw *);
s32 (*get_host_state)(struct fm10k_hw *, bool *);
+#ifndef NO_IS_SLOT_APPROPRIATE_CHECK
bool (*is_slot_appropriate)(struct fm10k_hw *);
+#endif
s32 (*update_vlan)(struct fm10k_hw *, u32, u8, bool);
s32 (*read_mac_addr)(struct fm10k_hw *);
s32 (*update_uc_addr)(struct fm10k_hw *, u16, const u8 *,
diff --git a/drivers/net/fm10k/base/fm10k_vf.c 
b/drivers/net/fm10k/base/fm10k_vf.c
index 43eb081..efbdbd1 100644
--- a/drivers/net/fm10k/base/fm10k_vf.c
+++ b/drivers/net/fm10k/base/fm10k_vf.c
@@ -178,6 +178,7 @@ reset_max_queues:
return err;
 }

+#ifndef NO_IS_SLOT_APPROPRIATE_CHECK
 /**
  *  fm10k_is_slot_appropriate_vf - Indicate appropriate slot for this SKU
  *  @hw: pointer to hardware structure
@@ -194,6 +195,7 @@ STATIC bool fm10k_is_slot_appropriate_vf(struct fm10k_hw 
*hw)
return TRUE;
 }

+#endif
 /* This structure defines the attibutes to be parsed below */
 const struct fm10k_tlv_attr fm10k_mac_vlan_msg_attr[] = {
FM10K_TLV_ATTR_U32(FM10K_MAC_VLAN_MSG_VLAN),
@@ -648,7 +650,9 @@ s32 fm10k_init_ops_vf(struct fm10k_hw *hw)
mac->ops.init_hw = &fm10k_init_hw_vf;
mac->ops.start_hw = &fm10k_start_hw_generic;
mac->ops.stop_hw = &fm10k_stop_hw

[dpdk-dev] [PATCH v3 11/18] fm10k/base: use memcpy for mac addr copy

2016-02-19 Thread Wang Xiao W
Use memcpy instead of copying MAC address byte-by-byte.

Signed-off-by: Wang Xiao W 
---
 drivers/net/fm10k/base/fm10k_pf.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/fm10k/base/fm10k_pf.c 
b/drivers/net/fm10k/base/fm10k_pf.c
index 7d48210..a1469aa 100644
--- a/drivers/net/fm10k/base/fm10k_pf.c
+++ b/drivers/net/fm10k/base/fm10k_pf.c
@@ -300,7 +300,6 @@ STATIC s32 fm10k_read_mac_addr_pf(struct fm10k_hw *hw)
 {
u8 perm_addr[ETH_ALEN];
u32 serial_num;
-   int i;

DEBUGFUNC("fm10k_read_mac_addr_pf");

@@ -324,10 +323,8 @@ STATIC s32 fm10k_read_mac_addr_pf(struct fm10k_hw *hw)
perm_addr[4] = (u8)(serial_num >> 8);
perm_addr[5] = (u8)(serial_num);

-   for (i = 0; i < ETH_ALEN; i++) {
-   hw->mac.perm_addr[i] = perm_addr[i];
-   hw->mac.addr[i] = perm_addr[i];
-   }
+   memcpy(hw->mac.perm_addr, perm_addr, ETH_ALEN);
+   memcpy(hw->mac.addr, perm_addr, ETH_ALEN);

return FM10K_SUCCESS;
 }
-- 
1.9.3



[dpdk-dev] [PATCH v3 10/18] fm10k/base: do not use CamelCase

2016-02-19 Thread Wang Xiao W
The upstream Linux kernel community prefers avoiding CamelCase in
variables, function names, etc.

Signed-off-by: Wang Xiao W 
---
 drivers/net/fm10k/base/fm10k_type.h | 14 +++---
 drivers/net/fm10k/fm10k_ethdev.c| 24 
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/net/fm10k/base/fm10k_type.h 
b/drivers/net/fm10k/base/fm10k_type.h
index 98dbf6f..1e84294 100644
--- a/drivers/net/fm10k/base/fm10k_type.h
+++ b/drivers/net/fm10k/base/fm10k_type.h
@@ -531,13 +531,13 @@ struct fm10k_hw;
 #endif

 enum fm10k_int_source {
-   fm10k_int_Mailbox   = 0,
-   fm10k_int_PCIeFault = 1,
-   fm10k_int_SwitchUpDown  = 2,
-   fm10k_int_SwitchEvent   = 3,
-   fm10k_int_SRAM  = 4,
-   fm10k_int_VFLR  = 5,
-   fm10k_int_MaxHoldTime   = 6,
+   fm10k_int_mailbox   = 0,
+   fm10k_int_pcie_fault= 1,
+   fm10k_int_switch_up_down= 2,
+   fm10k_int_switch_event  = 3,
+   fm10k_int_sram  = 4,
+   fm10k_int_vflr  = 5,
+   fm10k_int_max_hold_time = 6,
fm10k_int_sources_max_pf
 };

diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index 6d929cb..3ac08d0 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -2117,12 +2117,12 @@ fm10k_dev_enable_intr_pf(struct rte_eth_dev *dev)
/* Bind all local non-queue interrupt to vector 0 */
int_map |= FM10K_MISC_VEC_ID;

-   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_Mailbox), int_map);
-   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_PCIeFault), int_map);
-   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_SwitchUpDown), int_map);
-   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_SwitchEvent), int_map);
-   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_SRAM), int_map);
-   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_VFLR), int_map);
+   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_mailbox), int_map);
+   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_pcie_fault), int_map);
+   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_switch_up_down), int_map);
+   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_switch_event), int_map);
+   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_sram), int_map);
+   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_vflr), int_map);

/* Enable misc causes */
FM10K_WRITE_REG(hw, FM10K_EIMR, FM10K_EIMR_ENABLE(PCA_FAULT) |
@@ -2148,12 +2148,12 @@ fm10k_dev_disable_intr_pf(struct rte_eth_dev *dev)

int_map |= FM10K_MISC_VEC_ID;

-   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_Mailbox), int_map);
-   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_PCIeFault), int_map);
-   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_SwitchUpDown), int_map);
-   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_SwitchEvent), int_map);
-   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_SRAM), int_map);
-   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_VFLR), int_map);
+   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_mailbox), int_map);
+   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_pcie_fault), int_map);
+   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_switch_up_down), int_map);
+   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_switch_event), int_map);
+   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_sram), int_map);
+   FM10K_WRITE_REG(hw, FM10K_INT_MAP(fm10k_int_vflr), int_map);

/* Disable misc causes */
FM10K_WRITE_REG(hw, FM10K_EIMR, FM10K_EIMR_DISABLE(PCA_FAULT) |
-- 
1.9.3



[dpdk-dev] [PATCH v3 09/18] fm10k/base: use BIT macro instead of open-coded bit-shifting of 1

2016-02-19 Thread Wang Xiao W
The upstream Linux kernel community prefers using the BIT macro over
bit-shifting a 1.  Similar to how this is handled in the i40e shared code,
define a macro for OSes that do not already have it and wrap all that in
LINUX_MACROS so that it can be stripped from the Linux driver.

Signed-off-by: Wang Xiao W 
---
 drivers/net/fm10k/base/fm10k_pf.c   | 12 ++--
 drivers/net/fm10k/base/fm10k_tlv.c  | 24 
 drivers/net/fm10k/base/fm10k_type.h | 18 --
 3 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/drivers/net/fm10k/base/fm10k_pf.c 
b/drivers/net/fm10k/base/fm10k_pf.c
index 3ee88b6..7d48210 100644
--- a/drivers/net/fm10k/base/fm10k_pf.c
+++ b/drivers/net/fm10k/base/fm10k_pf.c
@@ -576,8 +576,8 @@ STATIC s32 fm10k_configure_dglort_map_pf(struct fm10k_hw 
*hw,
return FM10K_ERR_PARAM;

/* determine count of VSIs and queues */
-   queue_count = 1 << (dglort->rss_l + dglort->pc_l);
-   vsi_count = 1 << (dglort->vsi_l + dglort->queue_l);
+   queue_count = BIT(dglort->rss_l + dglort->pc_l);
+   vsi_count = BIT(dglort->vsi_l + dglort->queue_l);
glort = dglort->glort;
q_idx = dglort->queue_b;

@@ -593,8 +593,8 @@ STATIC s32 fm10k_configure_dglort_map_pf(struct fm10k_hw 
*hw,
}

/* determine count of PCs and queues */
-   queue_count = 1 << (dglort->queue_l + dglort->rss_l + dglort->vsi_l);
-   pc_count = 1 << dglort->pc_l;
+   queue_count = BIT(dglort->queue_l + dglort->rss_l + dglort->vsi_l);
+   pc_count = BIT(dglort->pc_l);

/* configure PC for Tx queues */
for (pc = 0; pc < pc_count; pc++) {
@@ -1001,7 +1001,7 @@ STATIC s32 fm10k_iov_reset_resources_pf(struct fm10k_hw 
*hw,
return FM10K_ERR_PARAM;

/* clear event notification of VF FLR */
-   FM10K_WRITE_REG(hw, FM10K_PFVFLREC(vf_idx / 32), 1 << (vf_idx % 32));
+   FM10K_WRITE_REG(hw, FM10K_PFVFLREC(vf_idx / 32), BIT(vf_idx % 32));

/* force timeout and then disconnect the mailbox */
vf_info->mbx.timeout = 0;
@@ -1417,7 +1417,7 @@ s32 fm10k_iov_msg_lport_state_pf(struct fm10k_hw *hw, u32 
**results,
mode = fm10k_iov_supported_xcast_mode_pf(vf_info, mode);

/* if mode is not currently enabled, enable it */
-   if (!(FM10K_VF_FLAG_ENABLED(vf_info) & (1 << mode)))
+   if (!(FM10K_VF_FLAG_ENABLED(vf_info) & BIT(mode)))
fm10k_update_xcast_mode_pf(hw, vf_info->glort, mode);

/* swap mode back to a bit flag */
diff --git a/drivers/net/fm10k/base/fm10k_tlv.c 
b/drivers/net/fm10k/base/fm10k_tlv.c
index 4b5f684..b9000ae 100644
--- a/drivers/net/fm10k/base/fm10k_tlv.c
+++ b/drivers/net/fm10k/base/fm10k_tlv.c
@@ -249,7 +249,7 @@ s32 fm10k_tlv_attr_put_value(u32 *msg, u16 attr_id, s64 
value, u32 len)
attr = &msg[FM10K_TLV_DWORD_LEN(*msg)];

if (len < 4) {
-   attr[1] = (u32)value & ((0x1ul << (8 * len)) - 1);
+   attr[1] = (u32)value & (BIT(8 * len) - 1);
} else {
attr[1] = (u32)value;
if (len > 4)
@@ -699,29 +699,29 @@ STATIC void fm10k_tlv_msg_test_generate_data(u32 *msg, 
u32 attr_flags)
 {
DEBUGFUNC("fm10k_tlv_msg_test_generate_data");

-   if (attr_flags & (1 << FM10K_TEST_MSG_STRING))
+   if (attr_flags & BIT(FM10K_TEST_MSG_STRING))
fm10k_tlv_attr_put_null_string(msg, FM10K_TEST_MSG_STRING,
   test_str);
-   if (attr_flags & (1 << FM10K_TEST_MSG_MAC_ADDR))
+   if (attr_flags & BIT(FM10K_TEST_MSG_MAC_ADDR))
fm10k_tlv_attr_put_mac_vlan(msg, FM10K_TEST_MSG_MAC_ADDR,
test_mac, test_vlan);
-   if (attr_flags & (1 << FM10K_TEST_MSG_U8))
+   if (attr_flags & BIT(FM10K_TEST_MSG_U8))
fm10k_tlv_attr_put_u8(msg, FM10K_TEST_MSG_U8,  test_u8);
-   if (attr_flags & (1 << FM10K_TEST_MSG_U16))
+   if (attr_flags & BIT(FM10K_TEST_MSG_U16))
fm10k_tlv_attr_put_u16(msg, FM10K_TEST_MSG_U16, test_u16);
-   if (attr_flags & (1 << FM10K_TEST_MSG_U32))
+   if (attr_flags & BIT(FM10K_TEST_MSG_U32))
fm10k_tlv_attr_put_u32(msg, FM10K_TEST_MSG_U32, test_u32);
-   if (attr_flags & (1 << FM10K_TEST_MSG_U64))
+   if (attr_flags & BIT(FM10K_TEST_MSG_U64))
fm10k_tlv_attr_put_u64(msg, FM10K_TEST_MSG_U64, test_u64);
-   if (attr_flags & (1 << FM10K_TEST_MSG_S8))
+   if (attr_flags & BIT(FM10K_TEST_MSG_S8))
fm10k_tlv_attr_put_s8(msg, FM10K_TEST_MSG_S8,  test_s8);
-   if (attr_flags & (1 << FM10K_TEST_MSG_S16))
+   if (attr_flags & BIT(FM10K_TEST_MSG_S16))
fm10k_tlv_attr_put_s16(msg, FM10K_TEST_MSG_S16, test_s16);
-   if (attr_flags & (1 << FM10K_TEST_MSG_S32))
+   if (attr_flags & BIT(FM10K_TEST_MSG_S32))
fm10k_tlv_attr

[dpdk-dev] [PATCH v3 08/18] fm10k/base: cleanup useless else

2016-02-19 Thread Wang Xiao W
checkpatch warns that else is not generally useful after a break or return.

Signed-off-by: Wang Xiao W 
---
 drivers/net/fm10k/base/fm10k_pf.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/fm10k/base/fm10k_pf.c 
b/drivers/net/fm10k/base/fm10k_pf.c
index 8ac91d0..3ee88b6 100644
--- a/drivers/net/fm10k/base/fm10k_pf.c
+++ b/drivers/net/fm10k/base/fm10k_pf.c
@@ -1278,8 +1278,8 @@ s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *hw, u32 
**results,
err = fm10k_iov_select_vid(vf_info, (u16)vid);
if (err < 0)
return err;
-   else
-   vid = err;
+
+   vid = err;

/* update VSI info for VF in regards to VLAN table */
err = hw->mac.ops.update_vlan(hw, vid, vf_info->vsi, set);
@@ -1304,8 +1304,8 @@ s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *hw, u32 
**results,
err = fm10k_iov_select_vid(vf_info, vlan);
if (err < 0)
return err;
-   else
-   vlan = (u16)err;
+
+   vlan = (u16)err;

/* notify switch of request for new unicast address */
err = hw->mac.ops.update_uc_addr(hw, vf_info->glort,
@@ -1330,8 +1330,8 @@ s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *hw, u32 
**results,
err = fm10k_iov_select_vid(vf_info, vlan);
if (err < 0)
return err;
-   else
-   vlan = (u16)err;
+
+   vlan = (u16)err;

/* notify switch of request for new multicast address */
err = hw->mac.ops.update_mc_addr(hw, vf_info->glort,
-- 
1.9.3



[dpdk-dev] [PATCH v3 07/18] fm10k/base: cleanup lines over 80 characters

2016-02-19 Thread Wang Xiao W
checkpatch warns about lines over 80 characters.

Signed-off-by: Wang Xiao W 
---
 drivers/net/fm10k/base/fm10k_mbx.c | 2 +-
 drivers/net/fm10k/base/fm10k_pf.c  | 7 ---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/fm10k/base/fm10k_mbx.c 
b/drivers/net/fm10k/base/fm10k_mbx.c
index 3c9ab3a..7d03704 100644
--- a/drivers/net/fm10k/base/fm10k_mbx.c
+++ b/drivers/net/fm10k/base/fm10k_mbx.c
@@ -930,7 +930,7 @@ STATIC void fm10k_mbx_create_disconnect_hdr(struct 
fm10k_mbx_info *mbx)
 }

 /**
- *  fm10k_mbx_create_fake_disconnect_hdr - Generate a false disconnect mailbox 
header
+ *  fm10k_mbx_create_fake_disconnect_hdr - Generate a false disconnect mbox hdr
  *  @mbx: pointer to mailbox
  *
  *  This function creates a fake disconnect header for loading into remote
diff --git a/drivers/net/fm10k/base/fm10k_pf.c 
b/drivers/net/fm10k/base/fm10k_pf.c
index 6de679e..8ac91d0 100644
--- a/drivers/net/fm10k/base/fm10k_pf.c
+++ b/drivers/net/fm10k/base/fm10k_pf.c
@@ -1500,9 +1500,10 @@ STATIC void fm10k_update_hw_stats_pf(struct fm10k_hw *hw,
xec = fm10k_read_hw_stats_32b(hw, FM10K_STATS_XEC, &stats->xec);
vlan_drop = fm10k_read_hw_stats_32b(hw, FM10K_STATS_VLAN_DROP,
&stats->vlan_drop);
-   loopback_drop = fm10k_read_hw_stats_32b(hw,
-   
FM10K_STATS_LOOPBACK_DROP,
-   &stats->loopback_drop);
+   loopback_drop =
+   fm10k_read_hw_stats_32b(hw,
+   FM10K_STATS_LOOPBACK_DROP,
+   &stats->loopback_drop);
nodesc_drop = fm10k_read_hw_stats_32b(hw,
  FM10K_STATS_NODESC_DROP,
  &stats->nodesc_drop);
-- 
1.9.3



[dpdk-dev] [PATCH v3 06/18] fm10k/base: document ITR scale workaround in VF TDLEN register

2016-02-19 Thread Wang Xiao W
Add comments which properly explain the undocumented use of bits in
TDLEN register prior to VF initializing it to the correct value. Note
that the mechanism is entirely software-defined and explain its purpose
to help reduce confusion in the future.

Signed-off-by: Wang Xiao W 
---
 drivers/net/fm10k/base/fm10k_pf.c   | 6 +-
 drivers/net/fm10k/base/fm10k_type.h | 9 +
 drivers/net/fm10k/base/fm10k_vf.c   | 9 +
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/drivers/net/fm10k/base/fm10k_pf.c 
b/drivers/net/fm10k/base/fm10k_pf.c
index 5b8c039..6de679e 100644
--- a/drivers/net/fm10k/base/fm10k_pf.c
+++ b/drivers/net/fm10k/base/fm10k_pf.c
@@ -958,7 +958,8 @@ STATIC s32 fm10k_iov_assign_default_mac_vlan_pf(struct 
fm10k_hw *hw,
FM10K_WRITE_REG(hw, FM10K_TDBAH(vf_q_idx), tdbah);

/* Provide the VF the ITR scale, using software-defined fields in TDLEN
-* to pass the information during VF initialization
+* to pass the information during VF initialization. See definition of
+* FM10K_TDLEN_ITR_SCALE_SHIFT for more details.
 */
FM10K_WRITE_REG(hw, FM10K_TDLEN(vf_q_idx), hw->mac.itr_scale <<
   FM10K_TDLEN_ITR_SCALE_SHIFT);
@@ -1095,6 +1096,9 @@ STATIC s32 fm10k_iov_reset_resources_pf(struct fm10k_hw 
*hw,
for (i = queues_per_pool; i--;) {
FM10K_WRITE_REG(hw, FM10K_TDBAL(vf_q_idx + i), tdbal);
FM10K_WRITE_REG(hw, FM10K_TDBAH(vf_q_idx + i), tdbah);
+   /* See definition of FM10K_TDLEN_ITR_SCALE_SHIFT for an
+* explanation of how TDLEN is used.
+*/
FM10K_WRITE_REG(hw, FM10K_TDLEN(vf_q_idx + i),
hw->mac.itr_scale <<
FM10K_TDLEN_ITR_SCALE_SHIFT);
diff --git a/drivers/net/fm10k/base/fm10k_type.h 
b/drivers/net/fm10k/base/fm10k_type.h
index 4bc0d53..4ec18fb 100644
--- a/drivers/net/fm10k/base/fm10k_type.h
+++ b/drivers/net/fm10k/base/fm10k_type.h
@@ -350,6 +350,15 @@ struct fm10k_hw;
 #define FM10K_TDBAL(_n)((0x40 * (_n)) + 0x8000)
 #define FM10K_TDBAH(_n)((0x40 * (_n)) + 0x8001)
 #define FM10K_TDLEN(_n)((0x40 * (_n)) + 0x8002)
+/* When fist initialized, VFs need to know the Interrupt Throttle Rate (ITR)
+ * scale which is based on the PCIe speed but the speed information in the PCI
+ * configuration space may not be accurate. The PF already knows the ITR scale
+ * but there is no defined method to pass that information from the PF to the
+ * VF. This is accomplished during VF initialization by temporarily co-opting
+ * the yet-to-be-used TDLEN register to have the PF store the ITR shift for
+ * the VF to retrieve before the VF needs to use the TDLEN register for its
+ * intended purpose, i.e. before the Tx resources are allocated.
+ */
 #define FM10K_TDLEN_ITR_SCALE_SHIFT9
 #define FM10K_TDLEN_ITR_SCALE_MASK 0x0E00
 #define FM10K_TDLEN_ITR_SCALE_GEN1 2
diff --git a/drivers/net/fm10k/base/fm10k_vf.c 
b/drivers/net/fm10k/base/fm10k_vf.c
index 9b10ee4..43eb081 100644
--- a/drivers/net/fm10k/base/fm10k_vf.c
+++ b/drivers/net/fm10k/base/fm10k_vf.c
@@ -74,6 +74,11 @@ STATIC s32 fm10k_stop_hw_vf(struct fm10k_hw *hw)
FM10K_WRITE_REG(hw, FM10K_TDBAH(i), bah);
FM10K_WRITE_REG(hw, FM10K_RDBAL(i), bal);
FM10K_WRITE_REG(hw, FM10K_RDBAH(i), bah);
+   /* Restore ITR scale in software-defined mechanism in TDLEN
+* for next VF initialization. See definition of
+* FM10K_TDLEN_ITR_SCALE_SHIFT for more details on the use of
+* TDLEN here.
+*/
FM10K_WRITE_REG(hw, FM10K_TDLEN(i), tdlen);
}

@@ -157,6 +162,10 @@ STATIC s32 fm10k_init_hw_vf(struct fm10k_hw *hw)
/* fetch default VLAN and ITR scale */
hw->mac.default_vid = (FM10K_READ_REG(hw, FM10K_TXQCTL(0)) &
   FM10K_TXQCTL_VID_MASK) >> FM10K_TXQCTL_VID_SHIFT;
+   /* Read the ITR scale from TDLEN. See the definition of
+* FM10K_TDLEN_ITR_SCALE_SHIFT for more information about how TDLEN is
+* used here.
+*/
hw->mac.itr_scale = (FM10K_READ_REG(hw, FM10K_TDLEN(0)) &
 FM10K_TDLEN_ITR_SCALE_MASK) >>
FM10K_TDLEN_ITR_SCALE_SHIFT;
-- 
1.9.3



[dpdk-dev] [PATCH v3 05/18] fm10k/base: reset max_queues on init_hw_vf failure

2016-02-19 Thread Wang Xiao W
VF drivers must detect how many queues are available. Previously, the
driver assumed that each VF has at minimum 1 queue. This assumption is
incorrect, since it is possible that the PF has not yet assigned the
queues to the VF by the time the VF checks. To resolve this, we added a
check first to ensure that the first queue is infact owned by the VF at
init_hw_vf time. However, the code flow did not reset hw->mac.max_queues
to 0. In some cases, such as during reinit flows, we call init_hw_vf
without clearing the previous value of hw->mac.max_queues. Due to this,
when init_hw_vf errors out, if its error code is not properly handled
the VF driver may still believe it has queues which no longer belong to
it. Fix this by clearing the hw->mac.max_queues on exit due to errors.

Fixes: 20dc3445d100 ("fm10k: do not assume VF always has 1 queue")

Signed-off-by: Wang Xiao W 
---
 drivers/net/fm10k/base/fm10k_vf.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/fm10k/base/fm10k_vf.c 
b/drivers/net/fm10k/base/fm10k_vf.c
index 39bc927..9b10ee4 100644
--- a/drivers/net/fm10k/base/fm10k_vf.c
+++ b/drivers/net/fm10k/base/fm10k_vf.c
@@ -128,8 +128,10 @@ STATIC s32 fm10k_init_hw_vf(struct fm10k_hw *hw)

/* verify we have at least 1 queue */
if (!~FM10K_READ_REG(hw, FM10K_TXQCTL(0)) ||
-   !~FM10K_READ_REG(hw, FM10K_RXQCTL(0)))
-   return FM10K_ERR_NO_RESOURCES;
+   !~FM10K_READ_REG(hw, FM10K_RXQCTL(0))) {
+   err = FM10K_ERR_NO_RESOURCES;
+   goto reset_max_queues;
+   }

/* determine how many queues we have */
for (i = 1; tqdloc0 && (i < FM10K_MAX_QUEUES_POOL); i++) {
@@ -147,7 +149,7 @@ STATIC s32 fm10k_init_hw_vf(struct fm10k_hw *hw)
/* shut down queues we own and reset DMA configuration */
err = fm10k_disable_queues_generic(hw, i);
if (err)
-   return err;
+   goto reset_max_queues;

/* record maximum queue count */
hw->mac.max_queues = i;
@@ -160,6 +162,11 @@ STATIC s32 fm10k_init_hw_vf(struct fm10k_hw *hw)
FM10K_TDLEN_ITR_SCALE_SHIFT;

return FM10K_SUCCESS;
+
+reset_max_queues:
+   hw->mac.max_queues = 0;
+
+   return err;
 }

 /**
-- 
1.9.3



[dpdk-dev] [PATCH v3 04/18] fm10k/base: use bitshift for itr_scale

2016-02-19 Thread Wang Xiao W
Upstream community wishes us to use bitshift instead of a divisor,
because this is faster, and prevents any need for a '0' check. In our
case, this even works out because default Gen3 will be 0.

Because of this, we are also able to remove the check for non-zero value
in the VF code path since that will already be the default Gen3 case.

Signed-off-by: Wang Xiao W 
---
 drivers/net/fm10k/base/fm10k_type.h | 6 +++---
 drivers/net/fm10k/base/fm10k_vf.c   | 4 
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/net/fm10k/base/fm10k_type.h 
b/drivers/net/fm10k/base/fm10k_type.h
index df1d276..4bc0d53 100644
--- a/drivers/net/fm10k/base/fm10k_type.h
+++ b/drivers/net/fm10k/base/fm10k_type.h
@@ -352,9 +352,9 @@ struct fm10k_hw;
 #define FM10K_TDLEN(_n)((0x40 * (_n)) + 0x8002)
 #define FM10K_TDLEN_ITR_SCALE_SHIFT9
 #define FM10K_TDLEN_ITR_SCALE_MASK 0x0E00
-#define FM10K_TDLEN_ITR_SCALE_GEN1 4
-#define FM10K_TDLEN_ITR_SCALE_GEN2 2
-#define FM10K_TDLEN_ITR_SCALE_GEN3 1
+#define FM10K_TDLEN_ITR_SCALE_GEN1 2
+#define FM10K_TDLEN_ITR_SCALE_GEN2 1
+#define FM10K_TDLEN_ITR_SCALE_GEN3 0
 #define FM10K_TPH_TXCTRL(_n)   ((0x40 * (_n)) + 0x8003)
 #define FM10K_TPH_TXCTRL_DESC_TPHEN0x0020
 #define FM10K_TPH_TXCTRL_DESC_RROEN0x0200
diff --git a/drivers/net/fm10k/base/fm10k_vf.c 
b/drivers/net/fm10k/base/fm10k_vf.c
index 7822ab6..39bc927 100644
--- a/drivers/net/fm10k/base/fm10k_vf.c
+++ b/drivers/net/fm10k/base/fm10k_vf.c
@@ -159,10 +159,6 @@ STATIC s32 fm10k_init_hw_vf(struct fm10k_hw *hw)
 FM10K_TDLEN_ITR_SCALE_MASK) >>
FM10K_TDLEN_ITR_SCALE_SHIFT;

-   /* ensure a non-zero itr scale */
-   if (!hw->mac.itr_scale)
-   hw->mac.itr_scale = FM10K_TDLEN_ITR_SCALE_GEN3;
-
return FM10K_SUCCESS;
 }

-- 
1.9.3



[dpdk-dev] [PATCH v3 03/18] fm10k/base: cleanup namespace pollution

2016-02-19 Thread Wang Xiao W
Make functions that are only referenced locally static.

Wrap fm10k_msg_data fm10k_iov_msg_data_pf[] in the new ifndef
NO_DEFAULT_SRIOV_MSG_HANDLERS so that drivers with custom SR-IOV
message handlers can strip it.

Signed-off-by: Wang Xiao W 
---
 drivers/net/fm10k/base/fm10k_pf.c  |  6 --
 drivers/net/fm10k/base/fm10k_pf.h  |  4 ++--
 drivers/net/fm10k/base/fm10k_tlv.c | 14 +++---
 drivers/net/fm10k/base/fm10k_tlv.h |  5 -
 drivers/net/fm10k/base/fm10k_vf.c  |  2 --
 5 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/drivers/net/fm10k/base/fm10k_pf.c 
b/drivers/net/fm10k/base/fm10k_pf.c
index 4a028f3..5b8c039 100644
--- a/drivers/net/fm10k/base/fm10k_pf.c
+++ b/drivers/net/fm10k/base/fm10k_pf.c
@@ -1457,6 +1457,7 @@ s32 fm10k_iov_msg_lport_state_pf(struct fm10k_hw *hw, u32 
**results,
return err;
 }

+#ifndef NO_DEFAULT_SRIOV_MSG_HANDLERS
 const struct fm10k_msg_data fm10k_iov_msg_data_pf[] = {
FM10K_TLV_MSG_TEST_HANDLER(fm10k_tlv_msg_test),
FM10K_VF_MSG_MSIX_HANDLER(fm10k_iov_msg_msix_pf),
@@ -1465,6 +1466,7 @@ const struct fm10k_msg_data fm10k_iov_msg_data_pf[] = {
FM10K_TLV_MSG_ERROR_HANDLER(fm10k_tlv_msg_error),
 };

+#endif
 /**
  *  fm10k_update_stats_hw_pf - Updates hardware related statistics of PF
  *  @hw: pointer to hardware structure
@@ -1754,8 +1756,8 @@ const struct fm10k_tlv_attr fm10k_update_pvid_msg_attr[] 
= {
  *
  *  This handler configures the default VLAN for the PF
  **/
-s32 fm10k_msg_update_pvid_pf(struct fm10k_hw *hw, u32 **results,
-struct fm10k_mbx_info *mbx)
+static s32 fm10k_msg_update_pvid_pf(struct fm10k_hw *hw, u32 **results,
+   struct fm10k_mbx_info *mbx)
 {
u16 glort, pvid;
u32 pvid_update;
diff --git a/drivers/net/fm10k/base/fm10k_pf.h 
b/drivers/net/fm10k/base/fm10k_pf.h
index 44bd193..92e2962 100644
--- a/drivers/net/fm10k/base/fm10k_pf.h
+++ b/drivers/net/fm10k/base/fm10k_pf.h
@@ -149,8 +149,6 @@ extern const struct fm10k_tlv_attr 
fm10k_lport_map_msg_attr[];
 #define FM10K_PF_MSG_LPORT_MAP_HANDLER(func) \
FM10K_MSG_HANDLER(FM10K_PF_MSG_ID_LPORT_MAP, \
  fm10k_lport_map_msg_attr, func)
-s32 fm10k_msg_update_pvid_pf(struct fm10k_hw *, u32 **,
-struct fm10k_mbx_info *);
 extern const struct fm10k_tlv_attr fm10k_update_pvid_msg_attr[];
 #define FM10K_PF_MSG_UPDATE_PVID_HANDLER(func) \
FM10K_MSG_HANDLER(FM10K_PF_MSG_ID_UPDATE_PVID, \
@@ -183,7 +181,9 @@ s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *, u32 **,
  struct fm10k_mbx_info *);
 s32 fm10k_iov_msg_lport_state_pf(struct fm10k_hw *, u32 **,
 struct fm10k_mbx_info *);
+#ifndef NO_DEFAULT_SRIOV_MSG_HANDLERS
 extern const struct fm10k_msg_data fm10k_iov_msg_data_pf[];
+#endif

 s32 fm10k_init_ops_pf(struct fm10k_hw *hw);
 #endif /* _FM10K_PF_H */
diff --git a/drivers/net/fm10k/base/fm10k_tlv.c 
b/drivers/net/fm10k/base/fm10k_tlv.c
index 1d9d7d8..4b5f684 100644
--- a/drivers/net/fm10k/base/fm10k_tlv.c
+++ b/drivers/net/fm10k/base/fm10k_tlv.c
@@ -63,8 +63,8 @@ s32 fm10k_tlv_msg_init(u32 *msg, u16 msg_id)
  *  the attribute buffer.  It will return success if provided with a valid
  *  pointers.
  **/
-s32 fm10k_tlv_attr_put_null_string(u32 *msg, u16 attr_id,
-  const unsigned char *string)
+static s32 fm10k_tlv_attr_put_null_string(u32 *msg, u16 attr_id,
+ const unsigned char *string)
 {
u32 attr_data = 0, len = 0;
u32 *attr;
@@ -115,7 +115,7 @@ s32 fm10k_tlv_attr_put_null_string(u32 *msg, u16 attr_id,
  *  it in the array pointed by by string.  It will return success if provided
  *  with a valid pointers.
  **/
-s32 fm10k_tlv_attr_get_null_string(u32 *attr, unsigned char *string)
+static s32 fm10k_tlv_attr_get_null_string(u32 *attr, unsigned char *string)
 {
u32 len;

@@ -386,7 +386,7 @@ s32 fm10k_tlv_attr_get_le_struct(u32 *attr, void 
*le_struct, u32 len)
  *  function will return NULL on failure, and a pointer to the start
  *  of the nested attributes on success.
  **/
-u32 *fm10k_tlv_attr_nest_start(u32 *msg, u16 attr_id)
+static u32 *fm10k_tlv_attr_nest_start(u32 *msg, u16 attr_id)
 {
u32 *attr;

@@ -413,7 +413,7 @@ u32 *fm10k_tlv_attr_nest_start(u32 *msg, u16 attr_id)
  *  the case of a nest within the nest this would be the outer nest pointer.
  *  This function will return success provided all pointers are valid.
  **/
-s32 fm10k_tlv_attr_nest_stop(u32 *msg)
+static s32 fm10k_tlv_attr_nest_stop(u32 *msg)
 {
u32 *attr;
u32 len;
@@ -522,8 +522,8 @@ STATIC s32 fm10k_tlv_attr_validate(u32 *attr,
  *  FM10K_NOT_IMPLEMENTED for any attribute that is outside of the array
  *  and 0 on success.
  **/
-s32 fm10k_tlv_attr_parse(u32 *attr, u32 **results,
-const struct fm10k_tlv_attr *tlv_attr)
+static s32 fm10k_tlv_a

[dpdk-dev] [PATCH v3 02/18] fm10k/base: correct typecast in fm10k_update_xc_addr_pf

2016-02-19 Thread Wang Xiao W
Since the resultant data type of the mac_update.mac_upper field is u16,
it does not make sense to typecast u8 variables to u32 first.

Signed-off-by: Wang Xiao W 
---
 drivers/net/fm10k/base/fm10k_pf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/fm10k/base/fm10k_pf.c 
b/drivers/net/fm10k/base/fm10k_pf.c
index 6e6d71e..4a028f3 100644
--- a/drivers/net/fm10k/base/fm10k_pf.c
+++ b/drivers/net/fm10k/base/fm10k_pf.c
@@ -379,8 +379,8 @@ STATIC s32 fm10k_update_xc_addr_pf(struct fm10k_hw *hw, u16 
glort,
 ((u32)mac[3] << 16) |
 ((u32)mac[4] << 8) |
 ((u32)mac[5]));
-   mac_update.mac_upper = FM10K_CPU_TO_LE16(((u32)mac[0] << 8) |
-((u32)mac[1]));
+   mac_update.mac_upper = FM10K_CPU_TO_LE16(((u16)mac[0] << 8) |
+  ((u16)mac[1]));
mac_update.vlan = FM10K_CPU_TO_LE16(vid);
mac_update.glort = FM10K_CPU_TO_LE16(glort);
mac_update.action = add ? 0 : 1;
-- 
1.9.3



[dpdk-dev] [PATCH v3 01/18] fm10k: use default mailbox message handler for PF

2016-02-19 Thread Wang Xiao W
The new share code makes fm10k_msg_update_pvid_pf function static, so we
can not refer to it now in fm10k_ethdev.c. The registered PF handler is
almost the same as the default PF handler, removing it has no impact on
mailbox.

Signed-off-by: Wang Xiao W 
Acked-by: Jing Chen 
---
 drivers/net/fm10k/fm10k_ethdev.c | 17 ++---
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index 068a20c..6d929cb 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -2501,29 +2501,16 @@ static const struct fm10k_msg_data fm10k_msgdata_vf[] = 
{
FM10K_TLV_MSG_ERROR_HANDLER(fm10k_tlv_msg_error),
 };

-/* Mailbox message handler in PF */
-static const struct fm10k_msg_data fm10k_msgdata_pf[] = {
-   FM10K_PF_MSG_ERR_HANDLER(XCAST_MODES, fm10k_msg_err_pf),
-   FM10K_PF_MSG_ERR_HANDLER(UPDATE_MAC_FWD_RULE, fm10k_msg_err_pf),
-   FM10K_PF_MSG_LPORT_MAP_HANDLER(fm10k_msg_lport_map_pf),
-   FM10K_PF_MSG_ERR_HANDLER(LPORT_CREATE, fm10k_msg_err_pf),
-   FM10K_PF_MSG_ERR_HANDLER(LPORT_DELETE, fm10k_msg_err_pf),
-   FM10K_PF_MSG_UPDATE_PVID_HANDLER(fm10k_msg_update_pvid_pf),
-   FM10K_TLV_MSG_ERROR_HANDLER(fm10k_tlv_msg_error),
-};
-
 static int
 fm10k_setup_mbx_service(struct fm10k_hw *hw)
 {
-   int err;
+   int err = 0;

/* Initialize mailbox lock */
fm10k_mbx_initlock(hw);

/* Replace default message handler with new ones */
-   if (hw->mac.type == fm10k_mac_pf)
-   err = hw->mbx.ops.register_handlers(&hw->mbx, fm10k_msgdata_pf);
-   else
+   if (hw->mac.type == fm10k_mac_vf)
err = hw->mbx.ops.register_handlers(&hw->mbx, fm10k_msgdata_vf);

if (err) {
-- 
1.9.3



[dpdk-dev] [PATCH v3 00/18] fm10k: update shared code

2016-02-19 Thread Wang Xiao W
v3:
* Fixed checkpatch.pl warning about long commit message.
* Fixed the issue of compile failure about part of patches applied.
* Split the misc-small-fixes patch into several patches.

v2:
* Put the two extra fix patches ahead of the base code patches.

This patch set has passed regression test.

Wang Xiao W (18):
  fm10k: use default mailbox message handler for PF
  fm10k/base: correct typecast in fm10k_update_xc_addr_pf
  fm10k/base: cleanup namespace pollution
  fm10k/base: use bitshift for itr_scale
  fm10k/base: reset max_queues on init_hw_vf failure
  fm10k/base: document ITR scale workaround in VF TDLEN register
  fm10k/base: cleanup lines over 80 characters
  fm10k/base: cleanup useless else
  fm10k/base: use BIT macro instead of open-coded bit-shifting of 1
  fm10k/base: do not use CamelCase
  fm10k/base: use memcpy for mac addr copy
  fm10k/base: allow removal of is_slot_appropriate function
  fm10k/base: consistently use VLAN ID when referencing vid variables
  fm10k/base: imporve comment per upstream review changes
  fm10k/base: fix TLV structures alignment
  fm10k/base: move constants to the right of binary operators
  fm10k/base: minor cleanups
  fm10k/base: remove unused struct element

 drivers/net/fm10k/base/fm10k_api.c   |   2 +
 drivers/net/fm10k/base/fm10k_api.h   |   2 +
 drivers/net/fm10k/base/fm10k_mbx.c   |  63 +++-
 drivers/net/fm10k/base/fm10k_mbx.h   |  11 +--
 drivers/net/fm10k/base/fm10k_osdep.h |  32 ++
 drivers/net/fm10k/base/fm10k_pf.c|  88 +
 drivers/net/fm10k/base/fm10k_pf.h|  18 ++--
 drivers/net/fm10k/base/fm10k_tlv.c   |  40 
 drivers/net/fm10k/base/fm10k_tlv.h   |   9 +-
 drivers/net/fm10k/base/fm10k_type.h  | 182 +++
 drivers/net/fm10k/base/fm10k_vf.c|  32 --
 drivers/net/fm10k/fm10k_ethdev.c |  41 +++-
 12 files changed, 222 insertions(+), 298 deletions(-)

-- 
1.9.3



[dpdk-dev] [PATCH v2] tools/dpdk_nic_bind: fix flake8 warnings

2016-02-19 Thread Mauricio Vasquez B
flake8 checks were run for both python 2.7 and 3.4

There were some style issues as:
- Line width > 79
- No double blank line before function definition
- No double blank space before inline comment
- Some other minor issues

Signed-off-by: Mauricio Vasquez B 
---
v2:
fix still existing warning
 tools/dpdk_nic_bind.py | 167 -
 1 file changed, 97 insertions(+), 70 deletions(-)

diff --git a/tools/dpdk_nic_bind.py b/tools/dpdk_nic_bind.py
index 14c5311..28eace3 100755
--- a/tools/dpdk_nic_bind.py
+++ b/tools/dpdk_nic_bind.py
@@ -32,10 +32,12 @@
 #   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #

-import sys, os, getopt, subprocess
+import sys
+import os
+import getopt
+import subprocess
 from os.path import exists, abspath, dirname, basename

-
 # The PCI device class for ETHERNET devices
 ETHERNET_CLASS = "0200"

@@ -43,7 +45,7 @@ ETHERNET_CLASS = "0200"
 # Each device within this is itself a dictionary of device properties
 devices = {}
 # list of supported DPDK drivers
-dpdk_drivers = [ "igb_uio", "vfio-pci", "uio_pci_generic" ]
+dpdk_drivers = ["igb_uio", "vfio-pci", "uio_pci_generic"]

 # command-line arg flags
 b_flag = None
@@ -51,10 +53,11 @@ status_flag = False
 force_flag = False
 args = []

+
 def usage():
 '''Print usage information for the program'''
 argv0 = basename(sys.argv[0])
-print ("""
+print("""
 Usage:
 --

@@ -78,8 +81,9 @@ Options:
 * the driver being used e.g. drv=igb_uio
 * any suitable drivers not currently using that device
 e.g. unused=igb_uio
-NOTE: if this flag is passed along with a bind/unbind option, the 
status
-display will always occur after the other operations have taken place.
+NOTE: if this flag is passed along with a bind/unbind option, the
+status display will always occur after the other operations have taken
+place.

 -b driver, --bind=driver:
 Select the driver to use or \"none\" to unbind the device
@@ -110,7 +114,8 @@ To unbind :01:00.0 from using any driver
 To bind :02:00.0 and :02:00.1 to the ixgbe kernel driver
 %(argv0)s -b ixgbe 02:00.0 02:00.1

-""" % locals()) # replace items from local variables
+""" % locals())  # replace items from local variables
+

 # This is roughly compatible with check_output function in subprocess module
 # which is only available in python 2.7.
@@ -119,6 +124,7 @@ def check_output(args, stderr=None):
 return subprocess.Popen(args, stdout=subprocess.PIPE,
 stderr=stderr).communicate()[0]

+
 def find_module(mod):
 '''find the .ko file for kernel module named mod.
 Searches the $RTE_SDK/$RTE_TARGET directory, the kernel
@@ -126,20 +132,20 @@ def find_module(mod):
 the script '''
 # check $RTE_SDK/$RTE_TARGET directory
 if 'RTE_SDK' in os.environ and 'RTE_TARGET' in os.environ:
-path = "%s/%s/kmod/%s.ko" % (os.environ['RTE_SDK'],\
+path = "%s/%s/kmod/%s.ko" % (os.environ['RTE_SDK'],
  os.environ['RTE_TARGET'], mod)
 if exists(path):
 return path

 # check using depmod
 try:
-depmod_out = check_output(["modinfo", "-n", mod], \
+depmod_out = check_output(["modinfo", "-n", mod],
   stderr=subprocess.STDOUT).lower()
 if "error" not in depmod_out:
 path = depmod_out.strip()
 if exists(path):
 return path
-except: # if modinfo can't find module, it fails, so continue
+except:  # if modinfo can't find module, it fails, so continue
 pass

 # check for a copy based off current path
@@ -147,17 +153,18 @@ def find_module(mod):
 if (tools_dir.endswith("tools")):
 base_dir = dirname(tools_dir)
 find_out = check_output(["find", base_dir, "-name", mod + ".ko"])
-if len(find_out) > 0: #something matched
+if len(find_out) > 0:  # something matched
 path = find_out.splitlines()[0]
 if exists(path):
 return path

+
 def check_modules():
 '''Checks that igb_uio is loaded'''
 global dpdk_drivers

 # list of supported modules
-mods =  [{"Name" : driver, "Found" : False} for driver in dpdk_drivers]
+mods = [{"Name": driver, "Found": False} for driver in dpdk_drivers]

 # first check if module is loaded
 try:
@@ -186,18 +193,20 @@ def check_modules():
 # check if we have at least one loaded module
 if True not in [mod["Found"] for mod in mods] and b_flag is not None:
 if b_flag in dpdk_drivers:
-print ("Error - no supported modules(DPDK driver) are loaded")
+print("Error - no supported modules(DPDK driver) are loaded")
 sys.exit(1)
 else:
-print ("Warning - no supported modules(DPDK driver) are loaded")
+print("Warning - no sup

[dpdk-dev] [PATCH v2 1/3] doc: fix keepalive sample app guide

2016-02-19 Thread Mcnamara, John
> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Harry van Haaren
> Sent: Thursday, January 21, 2016 11:05 AM
> To: Horton, Remy 
> Cc: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH v2 1/3] doc: fix keepalive sample app guide
> 
> This patch fixes some mismatches between the keepalive code and the docs.
> Struct names, and descriptions are not in line with the codebase.
> 
> Fixes: e64833f2273a ("examples/l2fwd-keepalive: add sample application")
> 
> Signed-off-by: Harry van Haaren 

Acked-by: John McNamara 



[dpdk-dev] [PATCH v2] tools/dpdk_nic_bind: fix flake8 warnings

2016-02-19 Thread Mcnamara, John
> -Original Message-
> From: Mauricio Vasquez B
> [mailto:mauricio.vasquezbernal at studenti.polito.it]
> Sent: Friday, February 19, 2016 5:03 PM
> To: dev at dpdk.org
> Cc: Mcnamara, John 
> Subject: [PATCH v2] tools/dpdk_nic_bind: fix flake8 warnings
> 
> flake8 checks were run for both python 2.7 and 3.4
> 
> There were some style issues as:
> - Line width > 79
> - No double blank line before function definition
> - No double blank space before inline comment
> - Some other minor issues
> 
> Signed-off-by: Mauricio Vasquez B
> 

Acked-by: John McNamara 




[dpdk-dev] [PATCH v3 6/8] vhost: handle VHOST_USER_SEND_RARP request

2016-02-19 Thread Yuanhan Liu
On Fri, Feb 19, 2016 at 03:03:26PM +0800, Yuanhan Liu wrote:
> On Fri, Feb 19, 2016 at 02:11:36PM +0800, Tan, Jianfeng wrote:
> > What I suggest here is to move user_send_rarp() to rte_vhost_dequeue_burst()
> > using a flag to control, so that this arp packet can be broadcasted in its
> > own L2 network.
> 
> I have thought of that, too. It was given up because SEND_RARP request was
> handled in different thread from rte_vhost_dequeue_burst(), leading to the
> fact that the RARP packet will not be broadcasted immediately after migration
> is done: it will be broadcasted only when rte_vhost_dequeue_burst() is 
> invoked.
> 
> I was thinking the delay might be a problem. While thinking it twice, it
> doesn't look like one then. As GUEST_ANNOUNCE is also broadcasted by
> rte_vhost_dequeue_burst(); it's enqueued by guest kernel though. And
> judging that we are polling mode driver, it won't be an issue then.
> 
> So, thanks. I will give it a quick try; it should work.

It worked like a charm :) Thanks.

--yliu



[dpdk-dev] [PATCH] Correcting upstream kernel version in driver

2016-02-19 Thread Declan Doherty
On 10/02/16 23:28, John Griffin wrote:
> Fixing the version of the kernel required in the QAT documentation.
>
> Signed-off-by: John Griffin 
> ---
>   doc/guides/cryptodevs/qat.rst | 12 ++--
>   1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/doc/guides/cryptodevs/qat.rst b/doc/guides/cryptodevs/qat.rst
> index 1901842..23402b4 100644
> --- a/doc/guides/cryptodevs/qat.rst
>
...
>

Acked by: Declan Doherty 



[dpdk-dev] [PATCH] mempool: fix leak when mempool creation fails

2016-02-19 Thread Thomas Monjalon
2016-02-16 15:40, Olivier Matz:
> Since commits ff909fe21f and 4e32101f9b, it is now possible to free
> memzones and rings.
> 
> The rte_mempool_create() should be modified to take advantage of this
> and not leak memory when an allocation fails.
> 
> Signed-off-by: Olivier Matz 

Applied, thanks



[dpdk-dev] [PATCH v2] vhost: remove vhost_net_device_ops

2016-02-19 Thread Thomas Monjalon
2016-02-17 20:58, Yuanhan Liu:
> On Tue, Feb 16, 2016 at 02:45:04PM -0800, Rich Lane wrote:
> > The indirection is unnecessary because there is only one implementation
> > of the vhost common code. Removing it makes the code more readable.
> > 
> > Signed-off-by: Rich Lane 
> 
> Acked-by: Yuanhan Liu 

Please Rich, may I ask a rebased v3? Thanks


[dpdk-dev] [PATCH v3] vhost: fix leak of fds and mmaps

2016-02-19 Thread Thomas Monjalon
2016-02-10 10:40, Rich Lane:
> The common vhost code only supported a single mmap per device. vhost-user
> worked around this by saving the address/length/fd of each mmap after the end
> of the rte_virtio_memory struct. This only works if the vhost-user code frees
> dev->mem, since the common code is unaware of the extra info. The
> VHOST_USER_RESET_OWNER message is one situation where the common code frees
> dev->mem and leaks the fds and mappings. This happens every time I shut down a
> VM.
> 
> The new code calls back into the implementation (vhost-user or vhost-cuse) to
> clean up these resources.
> 
> The vhost-cuse changes are only compile tested.
> 
> Signed-off-by: Rich Lane 
> Acked-by: Yuanhan Liu 

Applied, thanks



[dpdk-dev] [PATCH v3 0/8] vhost-user live migration support

2016-02-19 Thread Thomas Monjalon
2016-01-29 12:57, Yuanhan Liu:
> This patch set adds the vhost-user live migration support.
> 
> The major task behind that is to log pages we touched during
> live migration, including used vring and desc buffer. So, this
> patch set is basically about adding vhost log support, and
> using it.
> 
> Another important thing is that you need notify the switches
> about the VM location change after migration is done. GUEST_ANNOUNCE
> feature is for that, which sends an GARP message after migration.
> For older kernel (<= v3.4) without GUEST_ANNOUNCE support,
> we construct and broadcast a RARP message, with the mac address
> from VHOST_USER_SEND_RARP payload.
> 
> Patchset
> 
> - Patch 1 handles VHOST_USER_SET_LOG_BASE, which tells us where
>   the dirty memory bitmap is.
> 
> - Patch 2 introduces a vhost_log_write() helper function to log
>   pages we are gonna change.
> 
> - Patch 3 logs changes we made to used vring.
> 
> - Patch 4 logs changes we made to vring desc buffer.
> 
> - Patch 5 and 7 add some feature bits related to live migration.
> 
> - patch 6 does the RARP construction and broadcast job.

Patches 2 and 3 have been merged to avoid a compilation error.
Applied, thanks


[dpdk-dev] [PATCH v5 4/4] app/test-pmd: test tunnel filter for IP in GRE

2016-02-19 Thread Xutao Sun
This patch add some options in tunnel_filter command to test IP in GRE packet 
classification on i40e.

Signed-off-by: Xutao Sun 
Signed-off-by: Jijiang Liu 
---
 app/test-pmd/cmdline.c | 32 ++--
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index c707318..3e7cec8 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -302,11 +302,13 @@ static void cmd_help_long_parsed(void *parsed_result,
" a port\n\n"

"tunnel_filter add (port_id) (outer_mac) (inner_mac) 
(ip_addr) "
-   "(inner_vlan) (vxlan|nvgre) (filter_type) (tenant_id) 
(queue_id)\n"
+   "(inner_vlan) (vxlan|nvgre|ipingre) 
(imac-ivlan|imac-ivlan-tenid|"
+   "imac-tenid|imac|omac-imac-tenid|oip|iip) (tenant_id) 
(queue_id)\n"
"   add a tunnel filter of a port.\n\n"

"tunnel_filter rm (port_id) (outer_mac) (inner_mac) 
(ip_addr) "
-   "(inner_vlan) (vxlan|nvgre) (filter_type) (tenant_id) 
(queue_id)\n"
+   "(inner_vlan) (vxlan|nvgre|ipingre) 
(imac-ivlan|imac-ivlan-tenid|"
+   "imac-tenid|imac|omac-imac-tenid|oip|iip) (tenant_id) 
(queue_id)\n"
"   remove a tunnel filter of a port.\n\n"

"rx_vxlan_port add (udp_port) (port_id)\n"
@@ -6640,6 +6642,8 @@ cmd_tunnel_filter_parsed(void *parsed_result,
struct rte_eth_tunnel_filter_conf tunnel_filter_conf;
int ret = 0;

+   memset(&tunnel_filter_conf, 0, sizeof(tunnel_filter_conf));
+
rte_memcpy(&tunnel_filter_conf.outer_mac, &res->outer_mac,
ETHER_ADDR_LEN);
rte_memcpy(&tunnel_filter_conf.inner_mac, &res->inner_mac,
@@ -6648,12 +6652,14 @@ cmd_tunnel_filter_parsed(void *parsed_result,

if (res->ip_value.family == AF_INET) {
tunnel_filter_conf.ip_addr.ipv4_addr =
-   res->ip_value.addr.ipv4.s_addr;
+   rte_be_to_cpu_32(res->ip_value.addr.ipv4.s_addr);
tunnel_filter_conf.ip_type = RTE_TUNNEL_IPTYPE_IPV4;
} else {
-   memcpy(&(tunnel_filter_conf.ip_addr.ipv6_addr),
-   &(res->ip_value.addr.ipv6),
-   sizeof(struct in6_addr));
+   int i;
+   for (i = 0; i < 4; i++) {
+   tunnel_filter_conf.ip_addr.ipv6_addr[i] =
+   rte_be_to_cpu_32(res->ip_value.addr.ipv6.s6_addr32[i]);
+   }
tunnel_filter_conf.ip_type = RTE_TUNNEL_IPTYPE_IPV6;
}

@@ -6669,6 +6675,10 @@ cmd_tunnel_filter_parsed(void *parsed_result,
else if (!strcmp(res->filter_type, "omac-imac-tenid"))
tunnel_filter_conf.filter_type =
RTE_TUNNEL_FILTER_OMAC_TENID_IMAC;
+   else if (!strcmp(res->filter_type, "oip"))
+   tunnel_filter_conf.filter_type = ETH_TUNNEL_FILTER_OIP;
+   else if (!strcmp(res->filter_type, "iip"))
+   tunnel_filter_conf.filter_type = ETH_TUNNEL_FILTER_IIP;
else {
printf("The filter type is not supported");
return;
@@ -6678,6 +6688,8 @@ cmd_tunnel_filter_parsed(void *parsed_result,
tunnel_filter_conf.tunnel_type = RTE_TUNNEL_TYPE_VXLAN;
else if (!strcmp(res->tunnel_type, "nvgre"))
tunnel_filter_conf.tunnel_type = RTE_TUNNEL_TYPE_NVGRE;
+   else if (!strcmp(res->tunnel_type, "ipingre"))
+   tunnel_filter_conf.tunnel_type = RTE_TUNNEL_TYPE_IP_IN_GRE;
else {
printf("The tunnel type %s not supported.\n", res->tunnel_type);
return;
@@ -6723,11 +6735,11 @@ cmdline_parse_token_ipaddr_t cmd_tunnel_filter_ip_value 
=
ip_value);
 cmdline_parse_token_string_t cmd_tunnel_filter_tunnel_type =
TOKEN_STRING_INITIALIZER(struct cmd_tunnel_filter_result,
-   tunnel_type, "vxlan#nvgre");
+   tunnel_type, "vxlan#nvgre#ipingre");

 cmdline_parse_token_string_t cmd_tunnel_filter_filter_type =
TOKEN_STRING_INITIALIZER(struct cmd_tunnel_filter_result,
-   filter_type, "imac-ivlan#imac-ivlan-tenid#imac-tenid#"
+   filter_type, "oip#iip#imac-ivlan#imac-ivlan-tenid#imac-tenid#"
"imac#omac-imac-tenid");
 cmdline_parse_token_num_t cmd_tunnel_filter_tenant_id =
TOKEN_NUM_INITIALIZER(struct cmd_tunnel_filter_result,
@@ -6741,8 +6753,8 @@ cmdline_parse_inst_t cmd_tunnel_filter = {
.data = (void *)0,
.help_str = "add/rm tunnel filter of a port: "
"tunnel_filter add port_id outer_mac inner_mac ip "
-   "inner_vlan tunnel_type(vxlan|nvgre) filter_type "
-   "(imac-ivlan|imac-ivlan-tenid|imac-tenid|"
+   "inner_vlan tunnel_type(vxlan|nvgre|i

[dpdk-dev] [PATCH v5 3/4] driver/i40e: implement tunnel filter for IP in GRE

2016-02-19 Thread Xutao Sun
Signed-off-by: Xutao Sun 
Signed-off-by: Jijiang Liu 
---
 drivers/net/i40e/i40e_ethdev.c | 32 
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 7c22358..a33fef5 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -5797,6 +5797,12 @@ i40e_dev_get_filter_type(uint16_t filter_type, uint16_t 
*flag)
case ETH_TUNNEL_FILTER_IMAC:
*flag = I40E_AQC_ADD_CLOUD_FILTER_IMAC;
break;
+   case ETH_TUNNEL_FILTER_OIP:
+   *flag = I40E_AQC_ADD_CLOUD_FILTER_OIP;
+   break;
+   case ETH_TUNNEL_FILTER_IIP:
+   *flag = I40E_AQC_ADD_CLOUD_FILTER_IIP;
+   break;
default:
PMD_DRV_LOG(ERR, "invalid tunnel filter type");
return -EINVAL;
@@ -5811,7 +5817,7 @@ i40e_dev_tunnel_filter_set(struct i40e_pf *pf,
uint8_t add)
 {
uint16_t ip_type;
-   uint8_t tun_type = 0;
+   uint8_t i, tun_type = 0;
int val, ret = 0;
struct i40e_hw *hw = I40E_PF_TO_HW(pf);
struct i40e_vsi *vsi = pf->main_vsi;
@@ -5833,16 +5839,22 @@ i40e_dev_tunnel_filter_set(struct i40e_pf *pf,
(void)rte_memcpy(&pfilter->inner_mac, &tunnel_filter->inner_mac,
ETHER_ADDR_LEN);

-   pfilter->inner_vlan = tunnel_filter->inner_vlan;
+   pfilter->inner_vlan = rte_cpu_to_le_16(tunnel_filter->inner_vlan);
if (tunnel_filter->ip_type == RTE_TUNNEL_IPTYPE_IPV4) {
ip_type = I40E_AQC_ADD_CLOUD_FLAGS_IPV4;
+   tunnel_filter->ip_addr.ipv4_addr =
+   rte_cpu_to_le_32(tunnel_filter->ip_addr.ipv4_addr);
(void)rte_memcpy(&pfilter->ipaddr.v4.data,
-   &tunnel_filter->ip_addr,
+   &tunnel_filter->ip_addr.ipv4_addr,
sizeof(pfilter->ipaddr.v4.data));
} else {
ip_type = I40E_AQC_ADD_CLOUD_FLAGS_IPV6;
+   for (i = 0; i < 4; i++) {
+   tunnel_filter->ip_addr.ipv6_addr[i] =
+   rte_cpu_to_le_32(tunnel_filter->ip_addr.ipv6_addr[i]);
+   }
(void)rte_memcpy(&pfilter->ipaddr.v6.data,
-   &tunnel_filter->ip_addr,
+   &tunnel_filter->ip_addr.ipv6_addr,
sizeof(pfilter->ipaddr.v6.data));
}

@@ -5854,6 +5866,9 @@ i40e_dev_tunnel_filter_set(struct i40e_pf *pf,
case RTE_TUNNEL_TYPE_NVGRE:
tun_type = I40E_AQC_ADD_CLOUD_TNL_TYPE_NVGRE_OMAC;
break;
+   case RTE_TUNNEL_TYPE_IP_IN_GRE:
+   tun_type = I40E_AQC_ADD_CLOUD_TNL_TYPE_IP;
+   break;
default:
/* Other tunnel types is not supported. */
PMD_DRV_LOG(ERR, "tunnel type is not supported.");
@@ -5868,10 +5883,11 @@ i40e_dev_tunnel_filter_set(struct i40e_pf *pf,
return -EINVAL;
}

-   pfilter->flags |= I40E_AQC_ADD_CLOUD_FLAGS_TO_QUEUE | ip_type |
-   (tun_type << I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT);
-   pfilter->tenant_id = tunnel_filter->tenant_id;
-   pfilter->queue_number = tunnel_filter->queue_id;
+   pfilter->flags |= rte_cpu_to_le_16(
+   I40E_AQC_ADD_CLOUD_FLAGS_TO_QUEUE
+   | ip_type | (tun_type << I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT));
+   pfilter->tenant_id = rte_cpu_to_le_32(tunnel_filter->tenant_id);
+   pfilter->queue_number = rte_cpu_to_le_16(tunnel_filter->queue_id);

if (add)
ret = i40e_aq_add_cloud_filters(hw, vsi->seid, cld_filter, 1);
-- 
1.9.3



[dpdk-dev] [PATCH v5 2/4] lib/ether: add IP in GRE type

2016-02-19 Thread Xutao Sun
Signed-off-by: Xutao Sun 
Signed-off-by: Jijiang Liu 
---
 lib/librte_ether/rte_eth_ctrl.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/librte_ether/rte_eth_ctrl.h b/lib/librte_ether/rte_eth_ctrl.h
index 30cbde7..0e948a1 100644
--- a/lib/librte_ether/rte_eth_ctrl.h
+++ b/lib/librte_ether/rte_eth_ctrl.h
@@ -244,6 +244,7 @@ enum rte_eth_tunnel_type {
RTE_TUNNEL_TYPE_GENEVE,
RTE_TUNNEL_TYPE_TEREDO,
RTE_TUNNEL_TYPE_NVGRE,
+   RTE_TUNNEL_TYPE_IP_IN_GRE,
RTE_TUNNEL_TYPE_MAX,
 };

-- 
1.9.3



[dpdk-dev] [PATCH v5 1/4] lib/ether: optimize the'rte_eth_tunnel_filter_conf' structure

2016-02-19 Thread Xutao Sun
Change the fields of outer_mac and inner_mac from pointer to struct in order to 
keep the code's readability.

Signed-off-by: Xutao Sun 
Signed-off-by: Jijiang Liu 
---
 app/test-pmd/cmdline.c  |  6 +++--
 doc/guides/rel_notes/deprecation.rst|  5 -
 doc/guides/rel_notes/release_16_04.rst  |  2 ++
 doc/guides/testpmd_app_ug/testpmd_funcs.rst | 35 +++--
 drivers/net/i40e/i40e_ethdev.c  | 12 +-
 lib/librte_ether/rte_eth_ctrl.h |  4 ++--
 6 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index 52e9f5f..c707318 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -6640,8 +6640,10 @@ cmd_tunnel_filter_parsed(void *parsed_result,
struct rte_eth_tunnel_filter_conf tunnel_filter_conf;
int ret = 0;

-   tunnel_filter_conf.outer_mac = &res->outer_mac;
-   tunnel_filter_conf.inner_mac = &res->inner_mac;
+   rte_memcpy(&tunnel_filter_conf.outer_mac, &res->outer_mac,
+   ETHER_ADDR_LEN);
+   rte_memcpy(&tunnel_filter_conf.inner_mac, &res->inner_mac,
+   ETHER_ADDR_LEN);
tunnel_filter_conf.inner_vlan = res->inner_vlan;

if (res->ip_value.family == AF_INET) {
diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index e94d4a2..a895364 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -32,11 +32,6 @@ Deprecation Notices
   RTE_ETH_FLOW_MAX. The release 2.2 does not contain these ABI changes,
   but release 2.3 will.

-* ABI changes are planned for rte_eth_tunnel_filter_conf. Change the fields
-  of outer_mac and inner_mac from pointer to struct in order to keep the
-  code's readability. The release 2.2 does not contain these ABI changes, but
-  release 2.3 will, and no backwards compatibility is planned.
-
 * The scheduler statistics structure will change to allow keeping track of
   RED actions.

diff --git a/doc/guides/rel_notes/release_16_04.rst 
b/doc/guides/rel_notes/release_16_04.rst
index eb1b3b2..2588225 100644
--- a/doc/guides/rel_notes/release_16_04.rst
+++ b/doc/guides/rel_notes/release_16_04.rst
@@ -104,6 +104,8 @@ ABI Changes
   the previous releases and made in this release. Use fixed width quotes for
   ``rte_function_names`` or ``rte_struct_names``. Use the past tense.

+* The fields of outer_mac and inner_mac were changed from pointer
+  to struct in order to keep the code's readability.

 Shared Library Versions
 ---
diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst 
b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
index a520cc5..3ee629a 100644
--- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
+++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
@@ -553,7 +553,37 @@ tunnel_filter add
 Add a tunnel filter on a port::

testpmd> tunnel_filter add (port_id) (outer_mac) (inner_mac) (ip_addr) \
-(inner_vlan) (tunnel_type) (filter_type) (tenant_id) (queue_id)
+(inner_vlan) (vxlan|nvgre|ipingre) (imac-ivlan|imac-ivlan-tenid|\
+imac-tenid|imac|omac-imac-tenid|oip|iip) (tenant_id) (queue_id)
+
+The available information categories are:
+
+* ``vxlan``: Set tunnel type as VXLAN.
+
+* ``nvgre``: Set tunnel type as NVGRE.
+
+* ``ipingre``: Set tunnel type as IP-in-GRE.
+
+* ``imac-ivlan``: Set filter type as Inner MAC and VLAN.
+
+* ``imac-ivlan-tenid``: Set filter type as Inner MAC, VLAN and tenant ID.
+
+* ``imac-tenid``: Set filter type as Inner MAC and tenant ID.
+
+* ``imac``: Set filter type as Inner MAC.
+
+* ``omac-imac-tenid``: Set filter type as Outer MAC, Inner MAC and tenant ID.
+
+* ``oip``: Set filter type as Outer IP.
+
+* ``iip``: Set filter type as Inner IP.
+
+Example::
+
+   testpmd> tunnel_filter add 0 68:05:CA:28:09:82 00:00:00:00:00:00 \
+192.168.2.2 0 ipingre oip 1 1
+
+   Set an IP-in-GRE tunnel on port 0, and the filter type is Outer IP.

 tunnel_filter remove
 
@@ -561,7 +591,8 @@ tunnel_filter remove
 Remove a tunnel filter on a port::

testpmd> tunnel_filter rm (port_id) (outer_mac) (inner_mac) (ip_addr) \
-(inner_vlan) (tunnel_type) (filter_type) (tenant_id) (queue_id)
+(inner_vlan) (vxlan|nvgre|ipingre) (imac-ivlan|imac-ivlan-tenid|\
+imac-tenid|imac|omac-imac-tenid|oip|iip) (tenant_id) (queue_id)

 rx_vxlan_port add
 ~
diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index ef24122..7c22358 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -5828,10 +5828,10 @@ i40e_dev_tunnel_filter_set(struct i40e_pf *pf,
}
pfilter = cld_filter;

-   (void)rte_memcpy(&pfilter->outer_mac, tunnel_filter->outer_mac,
-   sizeof(struct ether_addr));
-   (void)rte_memcpy(&pfilter->inner_mac, tunnel_filter->inner_mac,
-

[dpdk-dev] [PATCH v5 0/4] Add tunnel filter support for IP in GRE on i40e

2016-02-19 Thread Xutao Sun
This patch set adds tunnel filter support for IP in GRE on i40e.

v2 changes:
  Fix the byte order problem.

v3 changes:
  Remove the deprecation notice and update the release notes.

v4 changes:
  Modify the mistakes in cmdline.c in the old patch.

v5 changes:
  Fix type errors and update the testpmd documentation.


Xutao Sun (4):
  lib/ether: optimize the'rte_eth_tunnel_filter_conf' structure
  lib/ether: add IP in GRE type
  driver/i40e: implement tunnel filter for IP in GRE
  app/test-pmd: test tunnel filter for IP in GRE

 app/test-pmd/cmdline.c  | 38 +
 doc/guides/rel_notes/deprecation.rst|  5 
 doc/guides/rel_notes/release_16_04.rst  |  2 ++
 doc/guides/testpmd_app_ug/testpmd_funcs.rst | 35 +--
 drivers/net/i40e/i40e_ethdev.c  | 44 -
 lib/librte_ether/rte_eth_ctrl.h |  5 ++--
 6 files changed, 94 insertions(+), 35 deletions(-)

-- 
1.9.3



[dpdk-dev] [PATCH v3 2/8] vhost: introduce vhost_log_write

2016-02-19 Thread Thomas Monjalon
2016-01-29 12:57, Yuanhan Liu:
> Introduce vhost_log_write() helper function to log the dirty pages we
> touched. Page size is harded code to 4096 (VHOST_LOG_PAGE), and each
> log is presented by 1 bit.
> 
> Therefore, vhost_log_write() simply finds the right bit for related
> page we are gonna change, and set it to 1. dev->log_base denotes the
> start of the dirty page bitmap.
> 
> Signed-off-by: Yuanhan Liu 
> Signed-off-by: Victor Kaplansky 
> Tested-by: Pavel Fedin 
[...]
> +static inline void __attribute__((always_inline))
> +vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)

lib/librte_vhost/vhost_rxtx.c:59:1: error: unused function 'vhost_log_write'

I think it's better to squash with the next patch.



[dpdk-dev] [PATCH v4] cfgfile: support looking up sections by index

2016-02-19 Thread Dumitrescu, Cristian


> -Original Message-
> From: Rich Lane [mailto:rich.lane at bigswitch.com]
> Sent: Tuesday, February 16, 2016 10:59 PM
> To: dev at dpdk.org
> Cc: Dumitrescu, Cristian ; Panu Matilainen
> 
> Subject: [PATCH v4] cfgfile: support looking up sections by index
> 
> This is useful when sections have duplicate names.
> 
> Signed-off-by: Rich Lane 
> ---
> v3->v4:
> - Added section name return value.
> - Updated API docs for other functions.
> v2->v3
> - Added check for index < 0.
> v1->v2:
> - Added new symbol to version script.
> 
>  lib/librte_cfgfile/rte_cfgfile.c   | 18 ++
>  lib/librte_cfgfile/rte_cfgfile.h   | 39
> ++
>  lib/librte_cfgfile/rte_cfgfile_version.map |  6 +
>  3 files changed, 63 insertions(+)
> 
> diff --git a/lib/librte_cfgfile/rte_cfgfile.c 
> b/lib/librte_cfgfile/rte_cfgfile.c
> index a677dad..c086fc5 100644
> --- a/lib/librte_cfgfile/rte_cfgfile.c
> +++ b/lib/librte_cfgfile/rte_cfgfile.c
> @@ -333,6 +333,24 @@ rte_cfgfile_section_entries(struct rte_cfgfile *cfg,
> const char *sectionname,
>   return i;
>  }
> 
> +int
> +rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg, int index,
> + struct rte_cfgfile_entry *entries, int max_entries,
> + char *sectionname)

To be inline with the other API functions, can we please place the sectionname 
parameter after cfg and index and before entries and max_entries (be the 3rd 
parameter). Thanks!

> +{
> + int i;
> + const struct rte_cfgfile_section *sect;
> +
> + if (index < 0 || index >= cfg->num_sections)
> + return -1;
> +
> + sect = cfg->sections[index];
> + snprintf(sectionname, CFG_NAME_LEN, "%s", sect->name);
> + for (i = 0; i < max_entries && i < sect->num_entries; i++)
> + entries[i] = *sect->entries[i];
> + return i;
> +}
> +
>  const char *
>  rte_cfgfile_get_entry(struct rte_cfgfile *cfg, const char *sectionname,
>   const char *entryname)
> diff --git a/lib/librte_cfgfile/rte_cfgfile.h 
> b/lib/librte_cfgfile/rte_cfgfile.h
> index d443782..67c9f6c 100644
> --- a/lib/librte_cfgfile/rte_cfgfile.h
> +++ b/lib/librte_cfgfile/rte_cfgfile.h
> @@ -126,6 +126,9 @@ int rte_cfgfile_has_section(struct rte_cfgfile *cfg,
> const char *sectionname);
>  /**
>  * Get number of entries in given config file section
>  *
> +* If multiple sections have the given name this function operates on the
> +* first one.
> +*
>  * @param cfg
>  *   Config file
>  * @param sectionname
> @@ -138,6 +141,9 @@ int rte_cfgfile_section_num_entries(struct rte_cfgfile
> *cfg,
> 
>  /** Get section entries as key-value pairs
>  *
> +* If multiple sections have the given name this function operates on the
> +* first one.
> +*
>  * @param cfg
>  *   Config file
>  * @param sectionname
> @@ -155,8 +161,38 @@ int rte_cfgfile_section_entries(struct rte_cfgfile
> *cfg,
>   struct rte_cfgfile_entry *entries,
>   int max_entries);
> 
> +/** Get section entries as key-value pairs
> +*
> +* The index of a section is the same as the index of its name in the
> +* result of rte_cfgfile_sections. This API can be used when there are
> +* multiple sections with the same name.
> +*
> +* @param cfg
> +*   Config file
> +* @param index
> +*   Section index
> +* @param entries
> +*   Pre-allocated array of at least max_entries entries where the section
> +*   entries are stored as key-value pair after successful invocation
> +* @param max_entries
> +*   Maximum number of section entries to be stored in entries array
> +* @param sectionname
> +*   Pre-allocated string of at least CFG_NAME_LEN characters where the
> +*   section name is stored after successful invocation.
> +* @return
> +*   Number of entries populated on success, negative error code otherwise
> +*/
> +int rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg,
> + int index,
> + struct rte_cfgfile_entry *entries,
> + int max_entries,
> + char *sectionname);
> +
>  /** Get value of the named entry in named config file section
>  *
> +* If multiple sections have the given name this function operates on the
> +* first one.
> +*
>  * @param cfg
>  *   Config file
>  * @param sectionname
> @@ -172,6 +208,9 @@ const char *rte_cfgfile_get_entry(struct rte_cfgfile
> *cfg,
> 
>  /** Check if given entry exists in named config file section
>  *
> +* If multiple sections have the given name this function operates on the
> +* first one.
> +*
>  * @param cfg
>  *   Config file
>  * @param sectionname
> diff --git a/lib/librte_cfgfile/rte_cfgfile_version.map
> b/lib/librte_cfgfile/rte_cfgfile_version.map
> index bf6c6fd..f6a27a9 100644
> --- a/lib/librte_cfgfile/rte_cfgfile_version.map
> +++ b/lib/librte_cfgfile/rte_cfgfile_version.map
> @@ -13,3 +13,9 @@ DPDK_2.0 {
> 
>   local: *;
>  };
> +
> +DPDK_2.3 {
> + global:
> +
> + rte_cfgfile_section_entries_by_index;
> +} DPDK_2.0;
> --
> 1.9.1



[dpdk-dev] [PATCH v2] ixgbe: Fix disable interrupt twice

2016-02-19 Thread Ananyev, Konstantin
Hi Michael


> 
> On 2016/2/2 19:03, Ananyev, Konstantin wrote:
> >
> 
> [...]
> 
>  I don't think i40e miss it, because it not the right please to disable 
>  interrupt.
>  because all interrupts are enabled in init stage.
> 
>  Actually, ixgbe enable the interrupt in init stage, but in dev_start, it 
>  disable it
>  first and re-enable, so it just the same with doing nothing about 
>  interrupt.
> 
>  Just think below:
> 
>  1. start the port.(interrupt already enabled in init stage, disable -->
>  re-enable)
>  2. stop the port.(disable interrupt)
>  3. start port again(Try to disable, but failed, already disabled)
> 
>  Would you think the code has issue?
> >>> [Zhang, Helin] in ixgbe PMD, it can be seen that uninit() calls 
> >>> dev_close(),
> >>> which calls dev_stop(). So I think the disabling can be done only in 
> >>> dev_stop().
> >>> All others can make use of dev_stop to disable the interrupt.
> >> As I said, if it is in dev_stop, it will has issue when dev_start -->
> >> dev_stop --> dev_start, this also could applied in i40e and fm10k. If
> >> you want to put it in dev_stop, better to remove enable interrupts in
> >> init stage, and only put it in dev_start.
> > We can't remove enabling interrupt at init stage and put it only in 
> > dev_start().
> > That means PF couldn't handle interrupts from VF till dev_start() will be 
> > executed on PF
> >  - which could never happen.
> > For same reason we can't disable all interrupts in dev_stop().
> > See: http://dpdk.org/ml/archives/dev/2015-November/027238.html
> 
> Hi, Konstantin
> 
> Yes, you are right.
> 
> So the only way to fix this issue should remove it in dev_stop(), and
> left it in uinit() stage, which my patch does.
> 
> Am I right?

Yes, I think so.
PF should be able to receive MBOX interrupts  after dev_stop().
Konstantin

> 
> Thanks,
> Michael
> > Konstantin
> >
> >> Thanks,
> >> Michael
> >>> Regards,
> >>> Helin
> >>>
>  Thanks,
>  Michael
> 
> > Maybe we can follow fm10k's style.
> >
> >> On other hand, if we remove it in dev_stop, any side effect? In ixgbe
> >> start, it will always disable it first and then re-enable it, so it's 
> >> safe.
> > I think you mean we can disable intr anyway even if it has been 
> > disabled.
>  Actually, we couldn't, DPDK call VFIO ioctl to kernel to disable 
>  interrupts, and
>  if we try disable twice, it will return and error.
>  That's why I mean we need a flag to show the interrupts stats. If it 
>  already
>  disabled, we do not need call in to kernel. just return and give a 
>  warning
>  message.
> 
>  Thanks,
>  Michael
> 
> >  Sounds more like why we don't
> > need this patch :)
> >
> >> Thanks,
> >> Michael
> >



[dpdk-dev] [PATCH RFC 4/4] doc: add note about rte_vhost_enqueue_burst thread safety.

2016-02-19 Thread Yuanhan Liu
On Fri, Feb 19, 2016 at 09:32:43AM +0300, Ilya Maximets wrote:
> Signed-off-by: Ilya Maximets 
> ---
>  doc/guides/prog_guide/thread_safety_dpdk_functions.rst | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/doc/guides/prog_guide/thread_safety_dpdk_functions.rst 
> b/doc/guides/prog_guide/thread_safety_dpdk_functions.rst
> index 403e5fc..13a6c89 100644
> --- a/doc/guides/prog_guide/thread_safety_dpdk_functions.rst
> +++ b/doc/guides/prog_guide/thread_safety_dpdk_functions.rst
> @@ -67,6 +67,7 @@ then locking, or some other form of mutual exclusion, is 
> necessary.
>  The ring library is based on a lockless ring-buffer algorithm that maintains 
> its original design for thread safety.
>  Moreover, it provides high performance for either multi- or 
> single-consumer/producer enqueue/dequeue operations.
>  The mempool library is based on the DPDK lockless ring library and therefore 
> is also multi-thread safe.
> +rte_vhost_enqueue_burst() is also thread safe because based on lockless 
> ring-buffer algorithm like the ring library.

FYI, Huawei meant to make rte_vhost_enqueue_burst() not be thread-safe,
to aligh with the usage of rte_eth_tx_burst().

--yliu


[dpdk-dev] [PATCH v3 0/2] Fix CRC32c computation

2016-02-19 Thread De Lara Guarch, Pablo


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Didier Pallard
> Sent: Friday, February 19, 2016 11:00 AM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH v3 0/2] Fix CRC32c computation
> 
> CRC32c computation is not valid when buffer length is not a multiple of 4
> bytes.
> Values returned by rte_hash_crc functions does not match the one
> computed by a trivial crc32c implementation.
> 
> First patch fixes crc hash function autotests, to outline the problem.
> Second patch fixes CRC32c computation.
> 
> Didier Pallard (2):
>   test: fix CRC hash function autotest
>   hash: fix CRC32c computation
> 
>  app/test/test_hash_functions.c |  17 +++--
>  doc/guides/rel_notes/release_16_04.rst |   5 ++
>  lib/librte_hash/rte_crc_arm64.h|  64 +
>  lib/librte_hash/rte_hash_crc.h | 125 ++-
> --
>  4 files changed, 178 insertions(+), 33 deletions(-)
> 
> --
> 2.1.4

Series-acked-by: Pablo de Lara 

Not sure if you need to include a "Fixes" line in the commit messages.
In the first commit, probably you should, the commit that you are fixing is
6298d2c55ae8 ("app/test: add new functional tests for hash functions").
In the second commit, it is a bit more difficult, as we don't know that the 
commit is,
that code was integrated a while ago, before 1.2.3, which is the first public 
release in dpdk.org.

Also, there is a typo "lengthes", in both commit messages.

You can leave the ack in both patches. Thanks!!




[dpdk-dev] [PATCH RFC 2/4] vhost: make buf vector for scatter RX local.

2016-02-19 Thread Yuanhan Liu
On Fri, Feb 19, 2016 at 09:32:41AM +0300, Ilya Maximets wrote:
> Array of buf_vector's is just an array for temporary storing information
> about available descriptors. It used only locally in virtio_dev_merge_rx()
> and there is no reason for that array to be shared.
> 
> Fix that by allocating local buf_vec inside virtio_dev_merge_rx().
> 
> Signed-off-by: Ilya Maximets 
> ---
>  lib/librte_vhost/rte_virtio_net.h |  1 -
>  lib/librte_vhost/vhost_rxtx.c | 45 
> ---
>  2 files changed, 23 insertions(+), 23 deletions(-)
> 
> diff --git a/lib/librte_vhost/rte_virtio_net.h 
> b/lib/librte_vhost/rte_virtio_net.h
> index 10dcb90..ae1e4fb 100644
> --- a/lib/librte_vhost/rte_virtio_net.h
> +++ b/lib/librte_vhost/rte_virtio_net.h
> @@ -91,7 +91,6 @@ struct vhost_virtqueue {
>   int kickfd; /**< Currently unused 
> as polling mode is enabled. */
>   int enabled;
>   uint64_treserved[16];   /**< Reserve some 
> spaces for future extension. */
> - struct buf_vector   buf_vec[BUF_VECTOR_MAX];/**< for 
> scatter RX. */
>  } __rte_cache_aligned;

I like this kind of cleanup, however, it breaks ABI.

--yliu


[dpdk-dev] [PATCH v3 6/8] vhost: handle VHOST_USER_SEND_RARP request

2016-02-19 Thread Yuanhan Liu
On Fri, Feb 19, 2016 at 02:11:36PM +0800, Tan, Jianfeng wrote:
> Hi Yuanhan,
> 
> On 1/29/2016 12:58 PM, Yuanhan Liu wrote:
> >While in former patch we enabled GUEST_ANNOUNCE feature, so that the
> >guest OS will broadcast a GARP message after migration to notify the
> >switch about the new location of migrated VM, the thing is that
> >GUEST_ANNOUNCE is enabled since kernel v3.5 only. For older kernel,
> >VHOST_USER_SEND_RARP request comes to rescue.
> >
> >The payload of this new request is the mac address of the migrated VM,
> >with that, we could construct a RARP message, and then broadcast it
> >to host interfaces.
> >
> >That's how this patch works:
> >
> >- list all interfaces, with the help of SIOCGIFCONF ioctl command
> >
> >- construct an RARP message and broadcast it
> >
> >Cc: Thibaut Collet 
> >Signed-off-by: Yuanhan Liu 
> >---
> ...
> >+
> >+/*
> >+ * Broadcast a RARP message to all interfaces, to update
> >+ * switch's mac table
> >+ */
> >+int
> >+user_send_rarp(struct VhostUserMsg *msg)
> >+{
> >+uint8_t *mac = (uint8_t *)&msg->payload.u64;
> >+uint8_t rarp[RARP_BUF_SIZE];
> >+struct ifconf ifc = {0, };
> >+struct ifreq *ifr;
> >+int nr = 16;
> >+int fd;
> >+uint32_t i;
> >+
> >+RTE_LOG(DEBUG, VHOST_CONFIG,
> >+":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n",
> >+mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
> >+
> >+make_rarp_packet(rarp, mac);
> >+
> >+/*
> >+ * Get all interfaces
> >+ */
> >+fd = socket(AF_INET, SOCK_DGRAM, 0);
> >+if (fd < 0) {
> >+perror("failed to create AF_INET socket");
> >+return -1;
> >+}
> >+
> >+again:
> >+ifc.ifc_len = sizeof(*ifr) * nr;
> >+ifc.ifc_buf = realloc(ifc.ifc_buf, ifc.ifc_len);
> >+
> >+if (ioctl(fd, SIOCGIFCONF, &ifc) < 0) {
> >+perror("failed at SIOCGIFCONF");
> >+close(fd);
> >+return -1;
> >+}
> >+
> >+if (ifc.ifc_len == (int)sizeof(struct ifreq) * nr) {
> >+/*
> >+ * current ifc_buf is not big enough to hold
> >+ * all interfaces; double it and try again.
> >+ */
> >+nr *= 2;
> >+goto again;
> >+}
> >+
> >+ifr = (struct ifreq *)ifc.ifc_buf;
> >+for (i = 0; i < ifc.ifc_len / sizeof(struct ifreq); i++)
> >+send_rarp(ifr[i].ifr_name, rarp);
> >+
> >+close(fd);
> >+
> >+return 0;
> >+}
> 
> From how you implement user_send_rarp(), if I understand it correctly, it
> broadcasts this ARP packets to all host interfaces, which I don't think it's
> appropriate. This ARP packets should be sent to it's own L2 networking. You
> should not make the hypothesis that all interfaces maintained in the kernel
> are in the same L2 networking. Even worse, this could bring problems when
> used in overlay networking, in which two VM in two different overlay
> networking, can have same MAC address.
> 
> What I suggest here is to move user_send_rarp() to rte_vhost_dequeue_burst()
> using a flag to control, so that this arp packet can be broadcasted in its
> own L2 network.

I have thought of that, too. It was given up because SEND_RARP request was
handled in different thread from rte_vhost_dequeue_burst(), leading to the
fact that the RARP packet will not be broadcasted immediately after migration
is done: it will be broadcasted only when rte_vhost_dequeue_burst() is invoked.

I was thinking the delay might be a problem. While thinking it twice, it
doesn't look like one then. As GUEST_ANNOUNCE is also broadcasted by
rte_vhost_dequeue_burst(); it's enqueued by guest kernel though. And
judging that we are polling mode driver, it won't be an issue then.

So, thanks. I will give it a quick try; it should work.

--yliu


[dpdk-dev] DPDK-QoS - link sharing across classes

2016-02-19 Thread Dumitrescu, Cristian


From: sreenaath vasudevan [mailto:sreenaat...@gmail.com]
Sent: Thursday, February 18, 2016 8:01 PM
To: Dumitrescu, Cristian 
Cc: dev at dpdk.org
Subject: Re: [dpdk-dev] DPDK-QoS - link sharing across classes

Hi Cristian
Thanks for detailed response.
Your solution works so long as I have four queues in my current implementation.
[Cristian] Yes, I was relying on you saying you can actually group Q0 and Q1 
together, as well as Q2 and Q3, Q4 and Q5, Q6 and Q7.

Following are the two issues I have now
1. I have 8 queues in the current implementation. This means I need to map the 
existing 8 queues to two sets of 4 queues across two different classes  (C0 and 
C1) in DPDK-QOs right? The problem with that approach is that queue weights are 
not relative across classes. Is there a way to work around this?
[Cristian] You can, of course, change to code to implement 8 queues per pipe 
traffic class, but this is not going to be a trivial exercise. With the current 
implementation unmodified, you could simply map 4 queues to e.g. TC0 and 4 
queues to e.g. TC1, provided that it makes sense to prioritize the 4 queues of 
TC0 as higher priority than those 4 queues mapped to TC1, so basically you are 
OK with having strict priority (up to an upper limit) between the 2 sets of 4 
queues.


2. B/w redistribution -
 a) The moment I map the current implementation's 8 queues across two 
different classes (say C0 and C1), would remaining b/w be distributed across 
the two classes C0 and C1? Is it true that in the current DPDK-QoS 
implementation, unused b/w gets distributed only to the last class C3? Would 
not un-used b/w from C0 come to C1?
[Cristian] With 4 queues mapped to TC0 and 4 queues mapped to TC1, you can set 
TC0 rate to X% of pipe rate and TC1 rate to 100% of pipe rate. This is the idea 
behind strict priority: the sum of TC rates is usually bigger than 100% of pipe 
rate, but this is fine, as due to strict priority the lowest priority TC used 
(which in this case is TC1, not TC3, as you are not using TC2 and TC3 at all) 
only gets 100% of the pipe rate when no traffic from higher priority TCs 
exists. In this case, the extremes are: (1) TC0 demand is high, so TC0 uses 
everything up to X%, so TC1 can use a max of (100 ? X)% and (2) TC0 demand is 
zero, in which case TC1 is free to use up to 100% of pipe rate. So the answers 
to your 3 questions are: yes/no/yes.

 b) In current DPDK QoS implemention, if C1 has un-used b/w would that get 
used by C0? Or is it only "lower priority class (C3, more specifically) uses 
the un-used b/w from higher priority classes (C0,C1,C2) ?
[Cristian] Due to strict priority, it does not make sense to think about higher 
priority classes using whatever is not used by low priority classes: If high 
priority classes have traffic to send, they will always be picked in the 
detriment of lower ones; when the high priority classes hit their upper limit 
rate, then they are not allowed to send more, otherwise the upper limit makes 
no sense. So the bandwidth reuse concept makes sense only for low prio TCs to 
reuse whatever is not used from high prio TCs. Usually, this is configured by 
fully provisioning TC0 .. TC2 and setting TC3 to 100% of pipe rate, so TC3 can 
use its rate plus whatever gets unused by TC0 .. TC2, so TC3 rate is between: 
100% - (rate TC0 + rate TC1 + rate TC2) and 100% of pipe rate. As explained 
above, this can be applicable to e.g TC1 as well when only TC0 and TC1 are 
actually used. So the answers to your 2 questions are: no/no.


On Tue, Feb 16, 2016 at 2:46 PM, Dumitrescu, Cristian mailto:cristian.dumitrescu at intel.com>> wrote:


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On 
> Behalf Of sreenaath
> vasudevan
> Sent: Tuesday, February 2, 2016 9:09 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] DPDK-QoS - link sharing across classes
>
> Hi
> I currently have QoS implemented in hardware and I am thinking of using
> DPDK's QoS feature to move it to software.
> Currently in the hardware,Based on the 4 class per pipe and 4 queues per
> class limitation, I was thinking of using 4 classes in DPDK-QoS and spread
> out the 8 h/w queues across the 4 classes.
> Let me explain with an example. Currently, this is how the h/w queue is
> represented
> Q0 - 10% b/w
> Q1- 10%  b/w
> Q2- 15% b/w
> Q3 - 15% b/w
> Q4 - 15% b/w
> Q5 - 15% b/w
> Q6 - 10% b/w
> Q7 - 10% b/w
>
> Translating the above config to DPDK-QoS, based on my application need, Q0
> and Q1 can be logically grouped under class0 with upper b/w = 20%; Q2, Q3,
> Q4, Q5 can be logically grouped under class2 with upper b/w = 60%; Q6 and
> Q7 can be logically grouped under class 3 with super b/w = 20%.
>
> However, in the h/w, link sharing is available across all the 8 queues.
> DPDK materials say link sharing "typically" is enabled for last class, in
> this case class2. However, I also want class 1 or class 0 to use the
> rem

[dpdk-dev] snapshot for 2.2.0 problem?

2016-02-19 Thread Thomas F Herbert
Hi,

I am not sure if anyone has noticed yet this but is the dpdk snapshot 
bad today?

wget http://dpdk.org/browse/dpdk/snapshot/dpdk-2.2.0.tar.gz
--2016-02-19 19:35:23-- 
http://dpdk.org/browse/dpdk/snapshot/dpdk-2.2.0.tar.gz
Resolving dpdk.org (dpdk.org)... 92.243.14.124
Connecting to dpdk.org (dpdk.org)|92.243.14.124|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/x-gzip]
Saving to: ?dpdk-2.2.0.tar.gz?

 [ <=> ] 2,160,380704KB/s   in 3.0s

2016-02-19 19:35:27 (704 KB/s) - ?dpdk-2.2.0.tar.gz? saved [2160380]

[therbert at d2fxl02 tmp]$ tar -tvzf dpdk-2.2.0.tar.gz
drwxrwxr-x root/root 0 2015-12-15 17:06 dpdk-2.2.0/
-rw-rw-r-- root/root 0 2015-12-15 17:06 dpdk-2.2.0/.gitignore
-rw-rw-r-- root/root  1826 2015-12-15 17:06 dpdk-2.2.0/GNUmakefile
-rw-rw-r-- root/root 17987 2015-12-15 17:06 dpdk-2.2.0/LICENSE.GPL
-rw-rw-r-- root/root 26530 2015-12-15 17:06 dpdk-2.2.0/LICENSE.LGPL
-rw-rw-r-- root/root 15336 2015-12-15 17:06 dpdk-2.2.0/MAINTAINERS
-rw-rw-r-- root/root  1708 2015-12-15 17:06 dpdk-2.2.0/Makefile
...
-rw-rw-r-- root/root  21510564 2015-12-15 17:06 
dpdk-2.2.0/app/test/test_lpm_routes.h

gzip: stdin: unexpected end of file
tar: Unexpected EOF in archive
tar: Error is not recoverable: exiting now

--TFH


[dpdk-dev] [PATCH v7 2/4] virtio: Introduce config RTE_VIRTIO_INC_VECTOR

2016-02-19 Thread Yuanhan Liu
On Fri, Feb 19, 2016 at 10:16:42AM +0530, Santosh Shukla wrote:
> On Tue, Feb 16, 2016 at 8:35 AM, Yuanhan Liu
>  wrote:
> > On Mon, Feb 15, 2016 at 04:48:36PM +0530, Santosh Shukla wrote:
> >> Hi Yuanhan,
> >>
> >> On Mon, Feb 15, 2016 at 4:27 PM, Yuanhan Liu
> >>  wrote:
> >> > On Mon, Feb 15, 2016 at 03:22:11PM +0530, Santosh Shukla wrote:
> >> >> Hi Yuanhan,
> >> >>
> >> >> I guess you are back from vacation.
> >> >>
> >> >> Can you pl. review this patch, Except this patch, rest of patches
> >> >> received ack-by:
> >> >
> >> > I had a quick glimpse of the comments from Thomas: he made a good point.
> >> > I will have a deeper thought tomorrow, to see what I can do to fix it.
> >> >
> >>
> >> I agree to what Thomas pointed out about runtime mode switch (vectored
> >> vs non-vectored). I have a proposal in my mind and Like to know you
> >> opinion:
> >>
> >> - need for apis like is_arch_support_vec().
> >>
> >> if (is_arch_support_vec())
> >>  simpple_ = 1 /* Switch code path to vector mode */
> >> else
> >>  simple_ = 0  /* Switch code path to non-vector mode */
> >>
> >> That api should reside to arch file. i.e.. arch like i686/arm{for
> >> implementation not exist so say no supported} will return 0 and for
> >> x86_64 = 1
> >
> > I was thinking that Thomas meant to something like below (like what
> > we did at rte_memcpy.h):
> >
> > #ifdef RTE_MACHINE_CPUFLAG_SSE (or whatever)
> >
> > /* with vec here */
> >
> > #else
> >
> > /* without vec here */
> >
> > #endif
> >
> > I mean, you have to bypass the build first; otherwise, you can't
> > go that further to runtime, right?
> >
> 
> I meant: move virtio_recv_pkt_vec() implementation in
> lib/libeal_rte/xx/include/arch/xx/virtio_vec.h. virtio driver to check
> for CPUFLAG supported or not and then use _recv_pkt() call back
> function from arch files. This approach will avoid #ifdef ARCH
> clutter.

Moving virtio stuff to eal looks wrong to me.

Huawei, please comment on this.

--yliu


[dpdk-dev] [PATCH 6/6] mempool: add in the RTE_NEXT_ABI protection for ABI breakages

2016-02-19 Thread Olivier MATZ
Hi David,

On 02/16/2016 03:48 PM, David Hunt wrote:
> v2: Kept all the NEXT_ABI defs to this patch so as to make the
> previous patches easier to read, and also to imake it clear what
> code is necessary to keep ABI compatibility when NEXT_ABI is
> disabled.
> 
> Signed-off-by: David Hunt 
> ---
>  app/test/Makefile|   2 +
>  app/test/test_mempool_perf.c |   3 +
>  lib/librte_mbuf/rte_mbuf.c   |   7 ++
>  lib/librte_mempool/Makefile  |   2 +
>  lib/librte_mempool/rte_mempool.c | 240 
> ++-
>  lib/librte_mempool/rte_mempool.h |  68 ++-
>  6 files changed, 320 insertions(+), 2 deletions(-)

Given the size of this patch, I don't think it's worth adding the
NEXT ABI in that case.

Regards,
Olivier



[dpdk-dev] [PATCH 2/6] mempool: add stack (lifo) based external mempool handler

2016-02-19 Thread Olivier MATZ
Hi David,

On 02/16/2016 03:48 PM, David Hunt wrote:
> adds a simple stack based mempool handler
> 
> Signed-off-by: David Hunt 
> ---
>  lib/librte_mempool/Makefile|   2 +-
>  lib/librte_mempool/rte_mempool.c   |   4 +-
>  lib/librte_mempool/rte_mempool.h   |   1 +
>  lib/librte_mempool/rte_mempool_stack.c | 164 
> +
>  4 files changed, 169 insertions(+), 2 deletions(-)
>  create mode 100644 lib/librte_mempool/rte_mempool_stack.c
> 

I don't get what is the purpose of this handler. Is it an example
or is it something that could be useful for dpdk applications?

If it's an example, we should find a way to put the code outside
the librte_mempool library, maybe in the test program. I see there
is also a "custom handler". Do we really need to have both?


Regards,
Olivier




[dpdk-dev] [PATCH 1/6] mempool: add external mempool manager support

2016-02-19 Thread Olivier MATZ
Hi David,

On 02/16/2016 03:48 PM, David Hunt wrote:
> Adds the new rte_mempool_create_ext api and callback mechanism for
> external mempool handlers
> 
> Modifies the existing rte_mempool_create to set up the handler_idx to
> the relevant mempool handler based on the handler name:
> ring_sp_sc
> ring_mp_mc
> ring_sp_mc
> ring_mp_sc
> 
> v2: merges the duplicated code in rte_mempool_xmem_create and
> rte_mempool_create_ext into one common function. The old functions
> now call the new common function with the relevant parameters.
> 
> Signed-off-by: David Hunt 

I think the refactoring of rte_mempool_create() (adding of
mempool_create()) should go in another commit. It will make the
patches much easier to read.

Also, I'm sorry but it seems that several comments or question I've made
in http://dpdk.org/ml/archives/dev/2016-February/032706.html are
not addressed.

Examples:
- putting some part of the patch in separate commits
- meaning of "rt_pool"
- put_pool_bulk unclear comment
- should we also have get_pool_bulk stats?
- missing _MEMPOOL_STAT_ADD() in mempool_bulk()
- why internal in rte_mempool_internal.h?
- why default in rte_mempool_default.c?
- remaining references to stack handler (in a comment)
- ...?

As you know, doing a proper code review takes a lot of time. If I
have to re-check all of my previous comments, it will take even
more. I'm not saying all my comments require a code change, but in case
you don't agree, please at least explain your opinion so we can debate
on the list.

Regards,
Olivier


[dpdk-dev] [PATCH 0/6] external mempool manager

2016-02-19 Thread Olivier MATZ
Hi,

On 02/16/2016 03:48 PM, David Hunt wrote:
> Hi list.
> 
> Here's the v2 version of a proposed patch for an external mempool manager

Just to notice the "v2" is missing in the title, it would help
to have it for next versions of the series.

Regards,
Olivier



[dpdk-dev] [PATCH v3 6/8] vhost: handle VHOST_USER_SEND_RARP request

2016-02-19 Thread Tan, Jianfeng
Hi Yuanhan,

On 1/29/2016 12:58 PM, Yuanhan Liu wrote:
> While in former patch we enabled GUEST_ANNOUNCE feature, so that the
> guest OS will broadcast a GARP message after migration to notify the
> switch about the new location of migrated VM, the thing is that
> GUEST_ANNOUNCE is enabled since kernel v3.5 only. For older kernel,
> VHOST_USER_SEND_RARP request comes to rescue.
>
> The payload of this new request is the mac address of the migrated VM,
> with that, we could construct a RARP message, and then broadcast it
> to host interfaces.
>
> That's how this patch works:
>
> - list all interfaces, with the help of SIOCGIFCONF ioctl command
>
> - construct an RARP message and broadcast it
>
> Cc: Thibaut Collet 
> Signed-off-by: Yuanhan Liu 
> ---
...
> +
> +/*
> + * Broadcast a RARP message to all interfaces, to update
> + * switch's mac table
> + */
> +int
> +user_send_rarp(struct VhostUserMsg *msg)
> +{
> + uint8_t *mac = (uint8_t *)&msg->payload.u64;
> + uint8_t rarp[RARP_BUF_SIZE];
> + struct ifconf ifc = {0, };
> + struct ifreq *ifr;
> + int nr = 16;
> + int fd;
> + uint32_t i;
> +
> + RTE_LOG(DEBUG, VHOST_CONFIG,
> + ":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n",
> + mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
> +
> + make_rarp_packet(rarp, mac);
> +
> + /*
> +  * Get all interfaces
> +  */
> + fd = socket(AF_INET, SOCK_DGRAM, 0);
> + if (fd < 0) {
> + perror("failed to create AF_INET socket");
> + return -1;
> + }
> +
> +again:
> + ifc.ifc_len = sizeof(*ifr) * nr;
> + ifc.ifc_buf = realloc(ifc.ifc_buf, ifc.ifc_len);
> +
> + if (ioctl(fd, SIOCGIFCONF, &ifc) < 0) {
> + perror("failed at SIOCGIFCONF");
> + close(fd);
> + return -1;
> + }
> +
> + if (ifc.ifc_len == (int)sizeof(struct ifreq) * nr) {
> + /*
> +  * current ifc_buf is not big enough to hold
> +  * all interfaces; double it and try again.
> +  */
> + nr *= 2;
> + goto again;
> + }
> +
> + ifr = (struct ifreq *)ifc.ifc_buf;
> + for (i = 0; i < ifc.ifc_len / sizeof(struct ifreq); i++)
> + send_rarp(ifr[i].ifr_name, rarp);
> +
> + close(fd);
> +
> + return 0;
> +}

 From how you implement user_send_rarp(), if I understand it correctly, 
it broadcasts this ARP packets to all host interfaces, which I don't 
think it's appropriate. This ARP packets should be sent to it's own L2 
networking. You should not make the hypothesis that all interfaces 
maintained in the kernel are in the same L2 networking. Even worse, this 
could bring problems when used in overlay networking, in which two VM in 
two different overlay networking, can have same MAC address.

What I suggest here is to move user_send_rarp() to 
rte_vhost_dequeue_burst() using a flag to control, so that this arp 
packet can be broadcasted in its own L2 network.

Thanks,
Jianfeng




[dpdk-dev] [PATCH] tools/dpdk_nic_bind: fix flake8 warnings

2016-02-19 Thread Mcnamara, John
> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Mauricio Vasquez B
> Sent: Thursday, February 18, 2016 10:33 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH] tools/dpdk_nic_bind: fix flake8 warnings
> 
> flake8 checks were run for both python 2.7 and 3.4
> 
> There were some style issues as:
> - Line width > 79
> - No double blank line before function definition
> - No double blank space before inline comment
> - Some other minor issues
> 
> Signed-off-by: Mauricio Vasquez B
> 

Hi,

Thanks for that. A lot of these issues were introduced with patch:

16c1814c802c ("tools: support Python 3 in bind script")

I meant to comment on the previous patch but it was merged before I
got a chance.

>  dev_lines = check_output(["lspci", "-Dvmmn"]).splitlines()
>  for dev_line in dev_lines:
>  if (len(dev_line) == 0):
>  if dev["Class"] == ETHERNET_CLASS:
>  #convert device and vendor ids to numbers, then add to

There is still one pep8 issue reported here:

   $ pep8 tools/dpdk_nic_bind.py 
   tools/dpdk_nic_bind.py:252:17: E265 block comment should start with '# '

Perhaps you can submit a v2 with that minor change. Otherwise:

Acked-by: John McNamara 



[dpdk-dev] [PATCH RFC v2 3/3] vhost: avoid reordering of used->idx and last_used_idx updating.

2016-02-19 Thread Ilya Maximets
Calling rte_vhost_enqueue_burst() simultaneously from different threads
for the same queue_id requires additional SMP memory barrier to avoid
reordering of used->idx and last_used_idx updates.

In case of virtio_dev_rx() memory barrier rte_mb() simply moved one
instruction higher.

Signed-off-by: Ilya Maximets 
---
 lib/librte_vhost/vhost_rxtx.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 9095fb1..a03f687 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -281,10 +281,13 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
rte_pause();

*(volatile uint16_t *)&vq->used->idx += count;
-   vq->last_used_idx = res_end_idx;

-   /* flush used->idx update before we read avail->flags. */
+   /*
+* Flush used->idx update to make it visible to virtio and all other
+* threads before allowing to modify it.
+*/
rte_mb();
+   vq->last_used_idx = res_end_idx;

/* Kick the guest if necessary. */
if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
@@ -586,19 +589,24 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t 
queue_id,
rte_pause();

*(volatile uint16_t *)&vq->used->idx += entry_success;
+   /*
+* Flush used->idx update to make it visible to all
+* other threads before allowing to modify it.
+*/
+   rte_smp_wmb();
+
vq->last_used_idx = res_cur_idx;
}

 merge_rx_exit:
if (likely(pkt_idx)) {
-   /* flush used->idx update before we read avail->flags. */
+   /* Flush used->idx update to make it visible to virtio. */
rte_mb();

/* Kick the guest if necessary. */
if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
eventfd_write(vq->callfd, (eventfd_t)1);
}
-
return pkt_idx;
 }

-- 
2.5.0



[dpdk-dev] [PATCH RFC v2 2/3] vhost: make buf vector for scatter RX local.

2016-02-19 Thread Ilya Maximets
Array of buf_vector's is just an array for temporary storing information
about available descriptors. It used only locally in virtio_dev_merge_rx()
and there is no reason for that array to be shared.

Fix that by allocating local buf_vec inside virtio_dev_merge_rx().
buf_vec field of struct vhost_virtqueue marked as deprecated.

Signed-off-by: Ilya Maximets 
---
 doc/guides/rel_notes/deprecation.rst |  1 +
 lib/librte_vhost/rte_virtio_net.h|  2 +-
 lib/librte_vhost/vhost_rxtx.c| 45 ++--
 3 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index e94d4a2..40f350d 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -7,6 +7,7 @@ API and ABI deprecation notices are to be posted here.

 Deprecation Notices
 ---
+* Field buf_vec of struct vhost_virtqueue have been deprecated.

 * The following fields have been deprecated in rte_eth_stats:
   ibadcrc, ibadlen, imcasts, fdirmatch, fdirmiss,
diff --git a/lib/librte_vhost/rte_virtio_net.h 
b/lib/librte_vhost/rte_virtio_net.h
index 10dcb90..db05d68 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -91,7 +91,7 @@ struct vhost_virtqueue {
int kickfd; /**< Currently unused 
as polling mode is enabled. */
int enabled;
uint64_treserved[16];   /**< Reserve some 
spaces for future extension. */
-   struct buf_vector   buf_vec[BUF_VECTOR_MAX];/**< for 
scatter RX. */
+   struct buf_vector   buf_vec[BUF_VECTOR_MAX] __rte_deprecated;   
 /**< @deprecated Buffer for scatter RX. */
 } __rte_cache_aligned;


diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 411dd95..9095fb1 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -295,7 +295,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 static inline uint32_t __attribute__((always_inline))
 copy_from_mbuf_to_vring(struct virtio_net *dev, uint32_t queue_id,
uint16_t res_base_idx, uint16_t res_end_idx,
-   struct rte_mbuf *pkt)
+   struct rte_mbuf *pkt, struct buf_vector *buf_vec)
 {
uint32_t vec_idx = 0;
uint32_t entry_success = 0;
@@ -325,7 +325,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint32_t 
queue_id,
 */
vq = dev->virtqueue[queue_id];

-   vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
+   vb_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr);
vb_hdr_addr = vb_addr;

/* Prefetch buffer address. */
@@ -345,19 +345,19 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint32_t 
queue_id,

seg_avail = rte_pktmbuf_data_len(pkt);
vb_offset = vq->vhost_hlen;
-   vb_avail = vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
+   vb_avail = buf_vec[vec_idx].buf_len - vq->vhost_hlen;

entry_len = vq->vhost_hlen;

if (vb_avail == 0) {
uint32_t desc_idx =
-   vq->buf_vec[vec_idx].desc_idx;
+   buf_vec[vec_idx].desc_idx;

if ((vq->desc[desc_idx].flags
& VRING_DESC_F_NEXT) == 0) {
/* Update used ring with desc information */
vq->used->ring[cur_idx & (vq->size - 1)].id
-   = vq->buf_vec[vec_idx].desc_idx;
+   = buf_vec[vec_idx].desc_idx;
vq->used->ring[cur_idx & (vq->size - 1)].len
= entry_len;

@@ -367,12 +367,12 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint32_t 
queue_id,
}

vec_idx++;
-   vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
+   vb_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr);

/* Prefetch buffer address. */
rte_prefetch0((void *)(uintptr_t)vb_addr);
vb_offset = 0;
-   vb_avail = vq->buf_vec[vec_idx].buf_len;
+   vb_avail = buf_vec[vec_idx].buf_len;
}

cpy_len = RTE_MIN(vb_avail, seg_avail);
@@ -399,11 +399,11 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint32_t 
queue_id,
 * entry reach to its end.
 * But the segment doesn't complete.
 */
-   if ((vq->desc[vq->buf_vec[vec_idx].desc_idx].flags &
+   if ((vq->desc[buf_vec[vec_idx].desc_idx].flags &
VRING_DESC_F_NEXT) == 0) {
/* Update used ring with desc information */
vq->used->ring[cur_idx & (vq->size - 1)].id
-

[dpdk-dev] [PATCH RFC v2 1/3] vhost: use SMP barriers instead of compiler ones.

2016-02-19 Thread Ilya Maximets
Since commit 4c02e453cc62 ("eal: introduce SMP memory barriers") virtio
uses architecture dependent SMP barriers. vHost should use them too.

Fixes: 4c02e453cc62 ("eal: introduce SMP memory barriers")

Signed-off-by: Ilya Maximets 
---
 lib/librte_vhost/vhost_rxtx.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 5e7e5b1..411dd95 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -274,7 +274,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
}
}

-   rte_compiler_barrier();
+   rte_smp_wmb();

/* Wait until it's our turn to add our buffer to the used ring. */
while (unlikely(vq->last_used_idx != res_base_idx))
@@ -575,7 +575,7 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t 
queue_id,
entry_success = copy_from_mbuf_to_vring(dev, queue_id,
res_base_idx, res_cur_idx, pkts[pkt_idx]);

-   rte_compiler_barrier();
+   rte_smp_wmb();

/*
 * Wait until it's our turn to add our buffer
@@ -917,7 +917,7 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
entry_success++;
}

-   rte_compiler_barrier();
+   rte_smp_rmb();
vq->used->idx += entry_success;
/* Kick guest if required. */
if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
-- 
2.5.0



[dpdk-dev] [PATCH RFC v2 0/3] Thread safe rte_vhost_enqueue_burst().

2016-02-19 Thread Ilya Maximets
Implementation of rte_vhost_enqueue_burst() based on lockless ring-buffer
algorithm and contains almost all to be thread-safe, but it's not.

This set adds required changes.

First patch in set is a standalone patch that fixes many times discussed
issue with barriers on different architectures.

Second and third adds fixes to make rte_vhost_enqueue_burst thread safe.

version 2:
* Documentation patch dropped. Other patches of series still
  may be merged to fix existing issues and keep code in
  consistent state for the future.
* buf_vec field of struct vhost_virtqueue marked as deprecated.

Ilya Maximets (3):
  vhost: use SMP barriers instead of compiler ones.
  vhost: make buf vector for scatter RX local.
  vhost: avoid reordering of used->idx and last_used_idx updating.

 doc/guides/rel_notes/deprecation.rst |  1 +
 lib/librte_vhost/rte_virtio_net.h|  2 +-
 lib/librte_vhost/vhost_rxtx.c| 67 
 3 files changed, 40 insertions(+), 30 deletions(-)

-- 
2.5.0



[dpdk-dev] TX performance regression caused by the mbuf cachline split

2016-02-19 Thread Olivier MATZ
Hi Paul,

On 02/15/2016 08:15 PM, Paul Emmerich wrote:
> The bulk_alloc patch is great and helps. I'd love to see such a function
> in DPDK.
> 

A patch has been submitted by Huawei. I guess it will be integrated
soon.
See http://dpdk.org/dev/patchwork/patch/10122/


Regards,
Olivier


[dpdk-dev] [PATCH RFC 4/4] doc: add note about rte_vhost_enqueue_burst thread safety.

2016-02-19 Thread Ilya Maximets
On 19.02.2016 11:36, Xie, Huawei wrote:
> On 2/19/2016 3:10 PM, Yuanhan Liu wrote:
>> On Fri, Feb 19, 2016 at 09:32:43AM +0300, Ilya Maximets wrote:
>>> Signed-off-by: Ilya Maximets 
>>> ---
>>>  doc/guides/prog_guide/thread_safety_dpdk_functions.rst | 1 +
>>>  1 file changed, 1 insertion(+)
>>>
>>> diff --git a/doc/guides/prog_guide/thread_safety_dpdk_functions.rst 
>>> b/doc/guides/prog_guide/thread_safety_dpdk_functions.rst
>>> index 403e5fc..13a6c89 100644
>>> --- a/doc/guides/prog_guide/thread_safety_dpdk_functions.rst
>>> +++ b/doc/guides/prog_guide/thread_safety_dpdk_functions.rst
>>> @@ -67,6 +67,7 @@ then locking, or some other form of mutual exclusion, is 
>>> necessary.
>>>  The ring library is based on a lockless ring-buffer algorithm that 
>>> maintains its original design for thread safety.
>>>  Moreover, it provides high performance for either multi- or 
>>> single-consumer/producer enqueue/dequeue operations.
>>>  The mempool library is based on the DPDK lockless ring library and 
>>> therefore is also multi-thread safe.
>>> +rte_vhost_enqueue_burst() is also thread safe because based on lockless 
>>> ring-buffer algorithm like the ring library.
>> FYI, Huawei meant to make rte_vhost_enqueue_burst() not be thread-safe,
>> to aligh with the usage of rte_eth_tx_burst().
>>
>>  --yliu
> 
> I have a patch to remove the lockless enqueue. Unless there is strong
> reason, i prefer vhost PMD to behave like other PMDs, with no internal
> lockless algorithm. In future, for people who really need it, we could
> have dynamic/static switch to enable it.

OK, got it. So, I think, this documentation patch may be dropped.
Other patches of series still may be merged to fix existing issues and
keep code in consistent state for the future.
Am I right?

Best regards, Ilya Maximets.


[dpdk-dev] [PATCH v3 2/2] hash: fix CRC32c computation

2016-02-19 Thread Didier Pallard
Fix crc32c hash functions to return a valid crc32c value for
data lengthes not multiple of 4 bytes.
ARM code is not tested.

Signed-off-by: Didier Pallard 
Acked-by: David Marchand 
---
 doc/guides/rel_notes/release_16_04.rst |   5 ++
 lib/librte_hash/rte_crc_arm64.h|  64 +
 lib/librte_hash/rte_hash_crc.h | 125 ++---
 3 files changed, 167 insertions(+), 27 deletions(-)

diff --git a/doc/guides/rel_notes/release_16_04.rst 
b/doc/guides/rel_notes/release_16_04.rst
index eb1b3b2..45b35dd 100644
--- a/doc/guides/rel_notes/release_16_04.rst
+++ b/doc/guides/rel_notes/release_16_04.rst
@@ -68,6 +68,11 @@ Drivers
 Libraries
 ~

+* **hash: Fixed CRC32c hash computation for non multiple of 4 bytes sizes.**
+
+  Fix crc32c hash functions to return a valid crc32c value for data lengthes
+  not multiple of 4 bytes.
+

 Examples
 
diff --git a/lib/librte_hash/rte_crc_arm64.h b/lib/librte_hash/rte_crc_arm64.h
index 02e26bc..7dd6334 100644
--- a/lib/librte_hash/rte_crc_arm64.h
+++ b/lib/librte_hash/rte_crc_arm64.h
@@ -50,6 +50,28 @@ extern "C" {
 #include 

 static inline uint32_t
+crc32c_arm64_u8(uint8_t data, uint32_t init_val)
+{
+   asm(".arch armv8-a+crc");
+   __asm__ volatile(
+   "crc32cb %w[crc], %w[crc], %w[value]"
+   : [crc] "+r" (init_val)
+   : [value] "r" (data));
+   return init_val;
+}
+
+static inline uint32_t
+crc32c_arm64_u16(uint16_t data, uint32_t init_val)
+{
+   asm(".arch armv8-a+crc");
+   __asm__ volatile(
+   "crc32ch %w[crc], %w[crc], %w[value]"
+   : [crc] "+r" (init_val)
+   : [value] "r" (data));
+   return init_val;
+}
+
+static inline uint32_t
 crc32c_arm64_u32(uint32_t data, uint32_t init_val)
 {
asm(".arch armv8-a+crc");
@@ -103,6 +125,48 @@ rte_hash_crc_init_alg(void)
 }

 /**
+ * Use single crc32 instruction to perform a hash on a 1 byte value.
+ * Fall back to software crc32 implementation in case arm64 crc intrinsics is
+ * not supported
+ *
+ * @param data
+ *   Data to perform hash on.
+ * @param init_val
+ *   Value to initialise hash generator.
+ * @return
+ *   32bit calculated hash value.
+ */
+static inline uint32_t
+rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
+{
+   if (likely(crc32_alg & CRC32_ARM64))
+   return crc32c_arm64_u8(data, init_val);
+
+   return crc32c_1byte(data, init_val);
+}
+
+/**
+ * Use single crc32 instruction to perform a hash on a 2 bytes value.
+ * Fall back to software crc32 implementation in case arm64 crc intrinsics is
+ * not supported
+ *
+ * @param data
+ *   Data to perform hash on.
+ * @param init_val
+ *   Value to initialise hash generator.
+ * @return
+ *   32bit calculated hash value.
+ */
+static inline uint32_t
+rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
+{
+   if (likely(crc32_alg & CRC32_ARM64))
+   return crc32c_arm64_u16(data, init_val);
+
+   return crc32c_2bytes(data, init_val);
+}
+
+/**
  * Use single crc32 instruction to perform a hash on a 4 byte value.
  * Fall back to software crc32 implementation in case arm64 crc intrinsics is
  * not supported
diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
index 78a34b7..63e74aa 100644
--- a/lib/librte_hash/rte_hash_crc.h
+++ b/lib/librte_hash/rte_hash_crc.h
@@ -328,6 +328,28 @@ static const uint32_t crc32c_tables[8][256] = {{
 crc32c_tables[(n)-1][((crc) >> 8) & 0xFF])

 static inline uint32_t
+crc32c_1byte(uint8_t data, uint32_t init_val)
+{
+   uint32_t crc;
+   crc = init_val;
+   crc ^= data;
+
+   return crc32c_tables[0][crc & 0xff] ^ (crc >> 8);
+}
+
+static inline uint32_t
+crc32c_2bytes(uint16_t data, uint32_t init_val)
+{
+   uint32_t crc;
+   crc = init_val;
+   crc ^= data;
+
+   crc = CRC32_UPD(crc, 1) ^ (crc >> 16);
+
+   return crc;
+}
+
+static inline uint32_t
 crc32c_1word(uint32_t data, uint32_t init_val)
 {
uint32_t crc, term1, term2;
@@ -367,6 +389,26 @@ crc32c_2words(uint64_t data, uint32_t init_val)

 #if defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_64)
 static inline uint32_t
+crc32c_sse42_u8(uint8_t data, uint32_t init_val)
+{
+   __asm__ volatile(
+   "crc32b %[data], %[init_val];"
+   : [init_val] "+r" (init_val)
+   : [data] "rm" (data));
+   return init_val;
+}
+
+static inline uint32_t
+crc32c_sse42_u16(uint16_t data, uint32_t init_val)
+{
+   __asm__ volatile(
+   "crc32w %[data], %[init_val];"
+   : [init_val] "+r" (init_val)
+   : [data] "rm" (data));
+   return init_val;
+}
+
+static inline uint32_t
 crc32c_sse42_u32(uint32_t data, uint32_t init_val)
 {
__asm__ volatile(
@@ -453,6 +495,52 @@ rte_hash_crc_init_alg(void)
 }

 /**
+ * Use single crc32 inst

[dpdk-dev] [PATCH v3 1/2] test: fix CRC hash function autotest

2016-02-19 Thread Didier Pallard
Add some small key lengthes (below 4 bytes), and fix odd key lengthes
expected returned values for CRC computation to match real CRC values.

Signed-off-by: Didier Pallard 
---
 app/test/test_hash_functions.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/app/test/test_hash_functions.c b/app/test/test_hash_functions.c
index 3ad6d80..f767a48 100644
--- a/app/test/test_hash_functions.c
+++ b/app/test/test_hash_functions.c
@@ -54,26 +54,30 @@
  * e.g.: key size = 4, key = 0x03020100
  *   key size = 8, key = 0x0706050403020100
  */
-static uint32_t hash_values_jhash[2][10] = {{
+static uint32_t hash_values_jhash[2][12] = {{
+   0x8ba9414b, 0xdf0d39c9,
0xe4cf1d42, 0xd4ccb93c, 0x5e84eafc, 0x21362cfe,
0x2f4775ab, 0x9ff036cc, 0xeca51474, 0xbc9d6816,
0x12926a31, 0x1c9fa888
 },
 {
+   0x5c62c303, 0x1b8cf784,
0x8270ac65, 0x05fa6668, 0x762df861, 0xda088f2f,
0x59614cd4, 0x7a94f690, 0xdc1e4993, 0x30825494,
0x91d0e462, 0x768087fc
 }
 };
-static uint32_t hash_values_crc[2][10] = {{
+static uint32_t hash_values_crc[2][12] = {{
+   0x, 0xf26b8303,
0x91545164, 0x06040eb1, 0x9bb99201, 0xcc4c4fe4,
-   0x14a90993, 0xf8a5dd8c, 0xc62beb31, 0x32bf340e,
-   0x72f9d22b, 0x4a11475e
+   0x14a90993, 0xf8a5dd8c, 0xcaa1ad0b, 0x7ac1e03e,
+   0x43f44466, 0x4a11475e
 },
 {
+   0xbdfd3980, 0x70204542,
0x98cd4c70, 0xd52c702f, 0x41fc0e1c, 0x3905f65c,
-   0x94bff47f, 0x1bab102d, 0xd2911ed7, 0xe8faa813,
-   0x6bea184b, 0x53028d3e
+   0x94bff47f, 0x1bab102d, 0xf4a2c645, 0xbf441539,
+   0x789c104f, 0x53028d3e
 }
 };

@@ -89,6 +93,7 @@ static uint32_t hash_values_crc[2][10] = {{
 static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc};
 static uint32_t hashtest_initvals[] = {0, 0xdeadbeef};
 static uint32_t hashtest_key_lens[] = {
+   1, 2, /* Unusual key sizes */
4, 8, 16, 32, 48, 64, /* standard key sizes */
9,/* IPv4 SRC + DST + protocol, unpadded */
13,   /* IPv4 5-tuple, unpadded */
-- 
2.1.4



[dpdk-dev] [PATCH v3 0/2] Fix CRC32c computation

2016-02-19 Thread Didier Pallard
CRC32c computation is not valid when buffer length is not a multiple of 4 bytes.
Values returned by rte_hash_crc functions does not match the one
computed by a trivial crc32c implementation.

First patch fixes crc hash function autotests, to outline the problem.
Second patch fixes CRC32c computation.

Didier Pallard (2):
  test: fix CRC hash function autotest
  hash: fix CRC32c computation

 app/test/test_hash_functions.c |  17 +++--
 doc/guides/rel_notes/release_16_04.rst |   5 ++
 lib/librte_hash/rte_crc_arm64.h|  64 +
 lib/librte_hash/rte_hash_crc.h | 125 ++---
 4 files changed, 178 insertions(+), 33 deletions(-)

-- 
2.1.4



[dpdk-dev] [PATCH] vhost: add missing build dependency on librte_net

2016-02-19 Thread Thomas Monjalon
2016-02-19 09:56, Yuanhan Liu:
> On Thu, Feb 18, 2016 at 04:07:52PM +0200, Panu Matilainen wrote:
> > So where do the CC's vanish?
> 
> No idea. I also have met this issue __many__ times before: I made a
> group reply, with lots of people CC'ed, later I then received a copy
> (from the mailing list) with all cc list being vanished -- only
> dev at dpdk.org is left. However, I found the CC list was there while
> I checked the sent box.
> 
> I was firstly thinking it might be an issue of my email client. However,
> I also found same phenomenon from other's reply. Just not sure whether
> they removed the cc list on purpose or not, though. IIRC, this also
> happened to Bruce (CC'ed).
> 
> Anyway, since you have met similar issue just now, I guess it's time to 
> shout out and let this issue get noticed, or fixed if there is indeed
> an issue. The mailing list is with high chance being the culprit, IMO.
> Hence, Thomas is CC'ed.

Yes this is due to the "no duplicates" option of mailman:
https://bugs.launchpad.net/mailman/+bug/1216960

I am checking how to disable it.


[dpdk-dev] [PATCH v2 6/6] bond: do not activate slave twice

2016-02-19 Thread Eric Kinzie
From: Eric Kinzie 

The current code for detecting link during slave addition can cause a
slave interface to be activated twice -- once during slave_configure()
and again at the end of __eth_bond_slave_add_lock_free().  This will
either cause the active slave count to be incorrect or will cause the
802.3ad activation function to panic.  Ensure that the interface is not
activated more than once.

Signed-off-by: Eric Kinzie 
Signed-off-by: Stephen Hemminger 
Acked-by: Declan Doherty 
---
 drivers/net/bonding/rte_eth_bond_api.c |6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bonding/rte_eth_bond_api.c 
b/drivers/net/bonding/rte_eth_bond_api.c
index 630a461..def22d0 100644
--- a/drivers/net/bonding/rte_eth_bond_api.c
+++ b/drivers/net/bonding/rte_eth_bond_api.c
@@ -432,7 +432,11 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, 
uint8_t slave_port_id)
!internals->user_defined_primary_port)
bond_ethdev_primary_set(internals,
slave_port_id);
-   activate_slave(bonded_eth_dev, slave_port_id);
+
+   if (find_slave_by_id(internals->active_slaves,
+internals->active_slave_count,
+slave_port_id) == 
internals->active_slave_count)
+   activate_slave(bonded_eth_dev, slave_port_id);
}
}
return 0;
-- 
1.7.10.4



[dpdk-dev] [PATCH v2 5/6] bond: active slaves with no primary

2016-02-19 Thread Eric Kinzie
From: Eric Kinzie 

If the link state of a slave is "up" when added, it is added to the list
of active slaves but, even if it is the only slave, is not selected as
the primary interface.  Generally, handling of link state interrupts
selects an interface to be primary, but only if the active count is zero.
This change avoids the situation where there are active slaves but
no primary.

Signed-off-by: Eric Kinzie 
Signed-off-by: Stephen Hemminger 
Acked-by: Declan Doherty 
---
 drivers/net/bonding/rte_eth_bond_api.c |7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bonding/rte_eth_bond_api.c 
b/drivers/net/bonding/rte_eth_bond_api.c
index 8a000c8..630a461 100644
--- a/drivers/net/bonding/rte_eth_bond_api.c
+++ b/drivers/net/bonding/rte_eth_bond_api.c
@@ -427,8 +427,13 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, 
uint8_t slave_port_id)
if (bonded_eth_dev->data->dev_started) {
rte_eth_link_get_nowait(slave_port_id, &link_props);

-if (link_props.link_status == 1)
+if (link_props.link_status == 1) {
+   if (internals->active_slave_count == 0 &&
+   !internals->user_defined_primary_port)
+   bond_ethdev_primary_set(internals,
+   slave_port_id);
activate_slave(bonded_eth_dev, slave_port_id);
+   }
}
return 0;

-- 
1.7.10.4



[dpdk-dev] [PATCH v2 4/6] bond mode 4: allow external state machine

2016-02-19 Thread Eric Kinzie
From: Eric Kinzie 

Provide functions to allow an external 802.3ad state machine to transmit
and recieve LACPDUs and to set the collection/distribution flags on
slave interfaces.

Signed-off-by: Eric Kinzie 
Signed-off-by: Stephen Hemminger 
Acked-by: Declan Doherty 
---
 drivers/net/bonding/rte_eth_bond_8023ad.c |  173 +
 drivers/net/bonding/rte_eth_bond_8023ad.h |   44 ++
 drivers/net/bonding/rte_eth_bond_8023ad_private.h |2 +
 drivers/net/bonding/rte_eth_bond_version.map  |6 +
 4 files changed, 225 insertions(+)

diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.c 
b/drivers/net/bonding/rte_eth_bond_8023ad.c
index 1b7e93a..a260e06 100644
--- a/drivers/net/bonding/rte_eth_bond_8023ad.c
+++ b/drivers/net/bonding/rte_eth_bond_8023ad.c
@@ -42,6 +42,8 @@

 #include "rte_eth_bond_private.h"

+static void bond_mode_8023ad_ext_periodic_cb(void *arg);
+
 #ifdef RTE_LIBRTE_BOND_DEBUG_8023AD
 #define MODE4_DEBUG(fmt, ...) RTE_LOG(DEBUG, PMD, "%6u [Port %u: %s] " fmt, \
bond_dbg_get_time_diff_ms(), slave_id, \
@@ -1020,6 +1022,7 @@ bond_mode_8023ad_conf_get(struct rte_eth_dev *dev,
conf->tx_period_ms = mode4->tx_period_timeout / ms_ticks;
conf->update_timeout_ms = mode4->update_timeout_us / 1000;
conf->rx_marker_period_ms = mode4->rx_marker_timeout / ms_ticks;
+   conf->slowrx_cb = mode4->slowrx_cb;
 }

 void
@@ -1041,8 +1044,11 @@ bond_mode_8023ad_setup(struct rte_eth_dev *dev,
conf->tx_period_ms = BOND_8023AD_TX_MACHINE_PERIOD_MS;
conf->rx_marker_period_ms = BOND_8023AD_RX_MARKER_PERIOD_MS;
conf->update_timeout_ms = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS;
+   conf->slowrx_cb = NULL;
}

+   bond_mode_8023ad_stop(dev);
+
mode4->fast_periodic_timeout = conf->fast_periodic_ms * ms_ticks;
mode4->slow_periodic_timeout = conf->slow_periodic_ms * ms_ticks;
mode4->short_timeout = conf->short_timeout_ms * ms_ticks;
@@ -1051,6 +1057,10 @@ bond_mode_8023ad_setup(struct rte_eth_dev *dev,
mode4->tx_period_timeout = conf->tx_period_ms * ms_ticks;
mode4->rx_marker_timeout = conf->rx_marker_period_ms * ms_ticks;
mode4->update_timeout_us = conf->update_timeout_ms * 1000;
+   mode4->slowrx_cb = conf->slowrx_cb;
+
+   if (dev->data->dev_started)
+   bond_mode_8023ad_start(dev);
 }

 int
@@ -1068,6 +1078,13 @@ bond_mode_8023ad_enable(struct rte_eth_dev *bond_dev)
 int
 bond_mode_8023ad_start(struct rte_eth_dev *bond_dev)
 {
+   struct bond_dev_private *internals = bond_dev->data->dev_private;
+   struct mode8023ad_private *mode4 = &internals->mode4;
+
+   if (mode4->slowrx_cb)
+   return rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 
1000,
+   &bond_mode_8023ad_ext_periodic_cb, bond_dev);
+
return rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000,
&bond_mode_8023ad_periodic_cb, bond_dev);
 }
@@ -1075,6 +1092,13 @@ bond_mode_8023ad_start(struct rte_eth_dev *bond_dev)
 void
 bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev)
 {
+   struct bond_dev_private *internals = bond_dev->data->dev_private;
+   struct mode8023ad_private *mode4 = &internals->mode4;
+
+   if (mode4->slowrx_cb) {
+   rte_eal_alarm_cancel(&bond_mode_8023ad_ext_periodic_cb, 
bond_dev);
+   return;
+   }
rte_eal_alarm_cancel(&bond_mode_8023ad_periodic_cb, bond_dev);
 }

@@ -1221,3 +1245,152 @@ rte_eth_bond_8023ad_slave_info(uint8_t port_id, uint8_t 
slave_id,
info->agg_port_id = port->aggregator_port_id;
return 0;
 }
+
+int
+rte_eth_bond_8023ad_ext_collect(uint8_t port_id, uint8_t slave_id, int enabled)
+{
+   struct rte_eth_dev *bond_dev;
+   struct bond_dev_private *internals;
+   struct mode8023ad_private *mode4;
+   struct port *port;
+
+   if (rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD)
+   return -EINVAL;
+
+   bond_dev = &rte_eth_devices[port_id];
+
+   if (!bond_dev->data->dev_started)
+   return -EINVAL;
+
+   internals = bond_dev->data->dev_private;
+   if (find_slave_by_id(internals->active_slaves,
+   internals->active_slave_count, slave_id) ==
+   internals->active_slave_count)
+   return -EINVAL;
+
+   mode4 = &internals->mode4;
+   if (mode4->slowrx_cb == NULL)
+   return -EINVAL;
+
+   port = &mode_8023ad_ports[slave_id];
+
+   if (enabled)
+   ACTOR_STATE_SET(port, COLLECTING);
+   else
+   ACTOR_STATE_CLR(port, COLLECTING);
+
+   return 0;
+}
+
+int
+rte_eth_bond_8023ad_ext_distrib(uint8_t port_id, uint8_t slave_id, int enabled)
+{
+   struct rte_eth_dev *bond_dev;
+   struct bond_dev_private *internals;
+   struct mode8023ad_private *mode4;
+   struct 

[dpdk-dev] [PATCH v2 3/6] bond mode 4: do not ignore multicast

2016-02-19 Thread Eric Kinzie
From: Eric Kinzie 

The bonding PMD in mode 4 puts all enslaved interfaces into promiscuous
mode in order to receive LACPDUs and must filter unwanted packets
after the traffic has been "collected".  Allow broadcast and multicast
through so that ARP and IPv6 neighbor discovery continue to work.

Fixes: 46fb43683679 ("bond: add mode 4")

Signed-off-by: Eric Kinzie 
Signed-off-by: Stephen Hemminger 
Acked-by: Declan Doherty 
---
 app/test/test_link_bonding_mode4.c |7 +--
 drivers/net/bonding/rte_eth_bond_pmd.c |1 +
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/app/test/test_link_bonding_mode4.c 
b/app/test/test_link_bonding_mode4.c
index 713368d..31640cd 100644
--- a/app/test/test_link_bonding_mode4.c
+++ b/app/test/test_link_bonding_mode4.c
@@ -747,8 +747,11 @@ test_mode4_rx(void)
rte_eth_macaddr_get(test_params.bonded_port_id, &bonded_mac);
ether_addr_copy(&bonded_mac, &dst_mac);

-   /* Assert that dst address is not bonding address */
-   dst_mac.addr_bytes[0]++;
+   /* Assert that dst address is not bonding address.  Do not set the
+* least significant bit of the zero byte as this would create a
+* multicast address.
+*/
+   dst_mac.addr_bytes[0] += 2;

/* First try with promiscuous mode enabled.
 * Add 2 packets to each slave. First with bonding MAC address, second 
with
diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c 
b/drivers/net/bonding/rte_eth_bond_pmd.c
index 2f193db..b938a68 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -171,6 +171,7 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf 
**bufs,
 * mode and packet address does not match. */
if (unlikely(hdr->ether_type == ether_type_slow_be ||
!collecting || (!promisc &&
+   !is_multicast_ether_addr(&hdr->d_addr) 
&&
!is_same_ether_addr(&bond_mac, 
&hdr->d_addr {

if (hdr->ether_type == ether_type_slow_be) {
-- 
1.7.10.4



[dpdk-dev] [PATCH v2 2/6] bond mode 4: copy entire config structure

2016-02-19 Thread Eric Kinzie
From: Eric Kinzie 

Copy all needed fields from the mode8023ad_private structure in
bond_mode_8023ad_conf_get().  This help ensure that a subsequent call
to rte_eth_bond_8023ad_setup() is not passed uninitialized data that
would result in either incorrect behavior or a failed sanity check.

Fixes: 46fb43683679 ("bond: add mode 4")

Signed-off-by: Eric Kinzie 
Signed-off-by: Stephen Hemminger 
Acked-by: Declan Doherty 
---
 drivers/net/bonding/rte_eth_bond_8023ad.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.c 
b/drivers/net/bonding/rte_eth_bond_8023ad.c
index b3b30f6..1b7e93a 100644
--- a/drivers/net/bonding/rte_eth_bond_8023ad.c
+++ b/drivers/net/bonding/rte_eth_bond_8023ad.c
@@ -1019,6 +1019,7 @@ bond_mode_8023ad_conf_get(struct rte_eth_dev *dev,
conf->aggregate_wait_timeout_ms = mode4->aggregate_wait_timeout / 
ms_ticks;
conf->tx_period_ms = mode4->tx_period_timeout / ms_ticks;
conf->update_timeout_ms = mode4->update_timeout_us / 1000;
+   conf->rx_marker_period_ms = mode4->rx_marker_timeout / ms_ticks;
 }

 void
-- 
1.7.10.4



[dpdk-dev] [PATCH v2 1/6] bond: use existing enslaved device queues

2016-02-19 Thread Eric Kinzie
This solves issues when an active device is added to a bond.

If a device to be enslaved already has transmit and/or receive queues
allocated, use those and then create any additional queues that are
necessary.

Signed-off-by: Eric Kinzie 
Signed-off-by: Stephen Hemminger 
Acked-by: Declan Doherty 
---
 drivers/net/bonding/rte_eth_bond_pmd.c |8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c 
b/drivers/net/bonding/rte_eth_bond_pmd.c
index b63c886..2f193db 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -1344,7 +1344,9 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev,
}

/* Setup Rx Queues */
-   for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
+   /* Use existing queues, if any */
+   for (q_id = slave_eth_dev->data->nb_rx_queues;
+q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
bd_rx_q = (struct bond_rx_queue 
*)bonded_eth_dev->data->rx_queues[q_id];

errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, 
q_id,
@@ -1360,7 +1362,9 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev,
}

/* Setup Tx Queues */
-   for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
+   /* Use existing queues, if any */
+   for (q_id = slave_eth_dev->data->nb_tx_queues;
+q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
bd_tx_q = (struct bond_tx_queue 
*)bonded_eth_dev->data->tx_queues[q_id];

errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, 
q_id,
-- 
1.7.10.4



[dpdk-dev] [PATCH v2 0/6] bonding: fixes and enhancements

2016-02-19 Thread Eric Kinzie
These are bug fixes and some small enhancements to allow bonding
to work with external control (teamd). Please consider integrating
these into DPDK 2.2

Changes in v2:
- remove "bond: handle slaves with fewer queues than bonding device"
- remove "bond: per-slave intermediate rx ring"

Eric Kinzie (6):
  bond: use existing enslaved device queues
  bond mode 4: copy entire config structure
  bond mode 4: do not ignore multicast
  bond mode 4: allow external state machine
  bond: active slaves with no primary
  bond: do not activate slave twice

 app/test/test_link_bonding_mode4.c|7 +-
 drivers/net/bonding/rte_eth_bond_8023ad.c |  174 +
 drivers/net/bonding/rte_eth_bond_8023ad.h |   44 ++
 drivers/net/bonding/rte_eth_bond_8023ad_private.h |2 +
 drivers/net/bonding/rte_eth_bond_api.c|   13 +-
 drivers/net/bonding/rte_eth_bond_pmd.c|9 +-
 drivers/net/bonding/rte_eth_bond_version.map  |6 +
 7 files changed, 249 insertions(+), 6 deletions(-)

-- 
1.7.10.4



[dpdk-dev] [PATCH v2 2/2] cryptodev: change burst API to be crypto op oriented

2016-02-19 Thread Declan Doherty
This patch modifies the crypto burst enqueue/dequeue APIs to operate on bursts
rte_crypto_op's rather than the current implementation which operates on
rte_mbuf bursts, this simplifies the burst processing in the crypto PMDs and the
use of crypto operations in general.

This change set also continues the separation of the symmetric operation 
parameters
from the more general operation parameters, this will simplify the integration 
of
asymmetric crypto operations in the future.

As well as the changes to the crypto APIs this patch adds functions for managing
rte_crypto_op pools to the cryptodev API. It modifies the existing PMDs, unit
tests and sample application to work with the modified APIs.

Finally this change set removes the now unused rte_mbuf_offload library.

Signed-off-by: Declan Doherty 
---
 MAINTAINERS|   4 -
 app/test/test_cryptodev.c  | 800 +++--
 app/test/test_cryptodev.h  |   9 +-
 app/test/test_cryptodev_perf.c | 253 +++
 config/common_bsdapp   |   7 -
 config/common_linuxapp |  11 +-
 doc/api/doxy-api-index.md  |   1 -
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c | 171 +++--
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd_ops.c |  12 +-
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd_private.h |   2 +-
 drivers/crypto/qat/qat_crypto.c| 125 ++--
 drivers/crypto/qat/qat_crypto.h|  12 +-
 drivers/crypto/qat/rte_qat_cryptodev.c |   4 +-
 examples/l2fwd-crypto/main.c   | 264 ---
 lib/Makefile   |   1 -
 lib/librte_cryptodev/rte_crypto.h  | 345 +
 lib/librte_cryptodev/rte_crypto_sym.h  | 377 +-
 lib/librte_cryptodev/rte_cryptodev.c   |  74 ++
 lib/librte_cryptodev/rte_cryptodev.h   | 107 +--
 lib/librte_cryptodev/rte_cryptodev_version.map |   1 +
 lib/librte_mbuf/rte_mbuf.h |   6 -
 lib/librte_mbuf_offload/Makefile   |  52 --
 lib/librte_mbuf_offload/rte_mbuf_offload.c | 100 ---
 lib/librte_mbuf_offload/rte_mbuf_offload.h | 307 
 .../rte_mbuf_offload_version.map   |   7 -
 25 files changed, 1543 insertions(+), 1509 deletions(-)
 delete mode 100644 lib/librte_mbuf_offload/Makefile
 delete mode 100644 lib/librte_mbuf_offload/rte_mbuf_offload.c
 delete mode 100644 lib/librte_mbuf_offload/rte_mbuf_offload.h
 delete mode 100644 lib/librte_mbuf_offload/rte_mbuf_offload_version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index 628bc05..8d84dda 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -222,10 +222,6 @@ F: lib/librte_mbuf/
 F: doc/guides/prog_guide/mbuf_lib.rst
 F: app/test/test_mbuf.c

-Packet buffer offload - EXPERIMENTAL
-M: Declan Doherty 
-F: lib/librte_mbuf_offload/
-
 Ethernet API
 M: Thomas Monjalon 
 F: lib/librte_ether/
diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c
index 951b443..29e4b29 100644
--- a/app/test/test_cryptodev.c
+++ b/app/test/test_cryptodev.c
@@ -35,7 +35,6 @@
 #include 
 #include 
 #include 
-#include 

 #include 
 #include 
@@ -48,7 +47,7 @@ static enum rte_cryptodev_type gbl_cryptodev_type;

 struct crypto_testsuite_params {
struct rte_mempool *mbuf_pool;
-   struct rte_mempool *mbuf_ol_pool;
+   struct rte_mempool *op_mpool;
struct rte_cryptodev_config conf;
struct rte_cryptodev_qp_conf qp_conf;

@@ -62,8 +61,7 @@ struct crypto_unittest_params {

struct rte_cryptodev_sym_session *sess;

-   struct rte_mbuf_offload *ol;
-   struct rte_crypto_sym_op *op;
+   struct rte_crypto_op *op;

struct rte_mbuf *obuf, *ibuf;

@@ -104,7 +102,7 @@ setup_test_string(struct rte_mempool *mpool,
return m;
 }

-#if HEX_DUMP
+#ifdef HEX_DUMP
 static void
 hexdump_mbuf_data(FILE *f, const char *title, struct rte_mbuf *m)
 {
@@ -112,27 +110,29 @@ hexdump_mbuf_data(FILE *f, const char *title, struct 
rte_mbuf *m)
 }
 #endif

-static struct rte_mbuf *
-process_crypto_request(uint8_t dev_id, struct rte_mbuf *ibuf)
+static struct rte_crypto_op *
+process_crypto_request(uint8_t dev_id, struct rte_crypto_op *op)
 {
-   struct rte_mbuf *obuf = NULL;
-#if HEX_DUMP
+#ifdef HEX_DUMP
hexdump_mbuf_data(stdout, "Enqueued Packet", ibuf);
 #endif

-   if (rte_cryptodev_enqueue_burst(dev_id, 0, &ibuf, 1) != 1) {
+   if (rte_cryptodev_enqueue_burst(dev_id, 0, &op, 1) != 1) {
printf("Error sending packet for encryption");
return NULL;
}
-   while (rte_cryptodev_dequeue_burst(dev_id, 0, &obuf, 1) == 0)
+
+   op = NULL;
+
+   while (rte_cryptodev_dequeue_burst(dev_id, 0, &op, 1) == 0)
rte_pause();

-#if HEX_DUMP
+#ifdef HEX_DUMP
if (obuf)
hexdump_mbuf_

[dpdk-dev] [PATCH v2 1/2] cryptodev: API tidy and changes to support future extensions

2016-02-19 Thread Declan Doherty
From: Fiona Trahe 

This patch splits symmetric specific definitions and functions away from the
common crypto APIs to facilitate the future extension and expansion of the
cryptodev framework, in order to allow  asymmetric crypto operations to be
introduced at a later date, as well as to clean the logical structure of the
public includes. The patch also introduces the _sym prefix to symmetric
specific structure and functions to improve clarity in the API.

Signed-off-by: Fiona Trahe 
Signed-off-by: Declan Doherty 
---
 app/test/test_cryptodev.c  | 164 +++---
 app/test/test_cryptodev_perf.c |  79 +--
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c |  44 +-
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd_ops.c |   6 +-
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd_private.h |   4 +-
 drivers/crypto/qat/qat_crypto.c|  51 +-
 drivers/crypto/qat/qat_crypto.h|  10 +-
 drivers/crypto/qat/rte_qat_cryptodev.c |   8 +-
 examples/l2fwd-crypto/main.c   |  33 +-
 lib/librte_cryptodev/Makefile  |   1 +
 lib/librte_cryptodev/rte_crypto.h  | 563 +--
 lib/librte_cryptodev/rte_crypto_sym.h  | 613 +
 lib/librte_cryptodev/rte_cryptodev.c   |  39 +-
 lib/librte_cryptodev/rte_cryptodev.h   |  80 ++-
 lib/librte_cryptodev/rte_cryptodev_pmd.h   |  32 +-
 lib/librte_mbuf_offload/rte_mbuf_offload.h |  22 +-
 16 files changed, 912 insertions(+), 837 deletions(-)
 create mode 100644 lib/librte_cryptodev/rte_crypto_sym.h

diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c
index 62f8fb0..951b443 100644
--- a/app/test/test_cryptodev.c
+++ b/app/test/test_cryptodev.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2015-2016 Intel Corporation. All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -57,13 +57,13 @@ struct crypto_testsuite_params {
 };

 struct crypto_unittest_params {
-   struct rte_crypto_xform cipher_xform;
-   struct rte_crypto_xform auth_xform;
+   struct rte_crypto_sym_xform cipher_xform;
+   struct rte_crypto_sym_xform auth_xform;

-   struct rte_cryptodev_session *sess;
+   struct rte_cryptodev_sym_session *sess;

struct rte_mbuf_offload *ol;
-   struct rte_crypto_op *op;
+   struct rte_crypto_sym_op *op;

struct rte_mbuf *obuf, *ibuf;

@@ -78,7 +78,7 @@ test_AES_CBC_HMAC_SHA512_decrypt_create_session_params(
struct crypto_unittest_params *ut_params);

 static int
-test_AES_CBC_HMAC_SHA512_decrypt_perform(struct rte_cryptodev_session *sess,
+test_AES_CBC_HMAC_SHA512_decrypt_perform(struct rte_cryptodev_sym_session 
*sess,
struct crypto_unittest_params *ut_params,
struct crypto_testsuite_params *ts_param);

@@ -165,7 +165,8 @@ testsuite_setup(void)
ts_params->mbuf_ol_pool = rte_pktmbuf_offload_pool_create(
"MBUF_OFFLOAD_POOL",
NUM_MBUFS, MBUF_CACHE_SIZE,
-   DEFAULT_NUM_XFORMS * sizeof(struct rte_crypto_xform),
+   DEFAULT_NUM_XFORMS *
+   sizeof(struct rte_crypto_sym_xform),
rte_socket_id());
if (ts_params->mbuf_ol_pool == NULL) {
RTE_LOG(ERR, USER1, "Can't create CRYPTO_OP_POOL\n");
@@ -220,7 +221,7 @@ testsuite_setup(void)

ts_params->conf.nb_queue_pairs = info.max_nb_queue_pairs;
ts_params->conf.socket_id = SOCKET_ID_ANY;
-   ts_params->conf.session_mp.nb_objs = info.max_nb_sessions;
+   ts_params->conf.session_mp.nb_objs = info.sym.max_nb_sessions;

TEST_ASSERT_SUCCESS(rte_cryptodev_configure(dev_id,
&ts_params->conf),
@@ -275,7 +276,7 @@ ut_setup(void)
ts_params->conf.nb_queue_pairs = DEFAULT_NUM_QPS_PER_QAT_DEVICE;
ts_params->conf.socket_id = SOCKET_ID_ANY;
ts_params->conf.session_mp.nb_objs =
-   (gbl_cryptodev_type == RTE_CRYPTODEV_QAT_PMD) ?
+   (gbl_cryptodev_type == RTE_CRYPTODEV_QAT_SYM_PMD) ?
DEFAULT_NUM_OPS_INFLIGHT :
DEFAULT_NUM_OPS_INFLIGHT;

@@ -319,7 +320,7 @@ ut_teardown(void)

/* free crypto session structure */
if (ut_params->sess) {
-   rte_cryptodev_session_free(ts_params->valid_devs[0],
+   rte_cryptodev_sym_session_free(ts_params->valid_devs[0],
ut_params->sess);
ut_params->sess = NULL;
}
@@ -464,7 +465,7 @@ test_queue_pair_descriptor_setup(

[dpdk-dev] [PATCH v2 0/2] cryptodev API changes

2016-02-19 Thread Declan Doherty
This patch set separates the symmetric crypto operations from generic operations
and then modifies the cryptodev burst API to accept bursts of rte_crypto_op
rather than rte_mbufs.

This patch set is dependent on the following bug fixes patches:

aesni_mb: strict-aliasing rule compilation fix
(http://dpdk.org/ml/archives/dev/2016-February/033193.html)

qat:fix build on 32-bit systems
(http://dpdk.org/ml/archives/dev/2016-February/033442.html)

aesni_mb: fix wrong return value
(http://dpdk.org/ml/archives/dev/2016-February/033193.html)

Various fixes for L2fwd-crypto

Declan Doherty (1):
  cryptodev: change burst API to be crypto op oriented

Fiona Trahe (1):
  cryptodev: API tidy and changes to support future extensions

 MAINTAINERS|   4 -
 app/test/test_cryptodev.c  | 890 +++--
 app/test/test_cryptodev.h  |   9 +-
 app/test/test_cryptodev_perf.c | 270 ---
 config/common_bsdapp   |   7 -
 config/common_linuxapp |  11 +-
 doc/api/doxy-api-index.md  |   1 -
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c | 199 ++---
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd_ops.c |  18 +-
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd_private.h |   6 +-
 drivers/crypto/qat/qat_crypto.c| 154 ++--
 drivers/crypto/qat/qat_crypto.h|  14 +-
 drivers/crypto/qat/rte_qat_cryptodev.c |   8 +-
 examples/l2fwd-crypto/main.c   | 281 ---
 lib/Makefile   |   1 -
 lib/librte_cryptodev/Makefile  |   1 +
 lib/librte_cryptodev/rte_crypto.h  | 802 +++
 lib/librte_cryptodev/rte_crypto_sym.h  | 642 +++
 lib/librte_cryptodev/rte_cryptodev.c   | 113 ++-
 lib/librte_cryptodev/rte_cryptodev.h   | 183 ++---
 lib/librte_cryptodev/rte_cryptodev_pmd.h   |  32 +-
 lib/librte_cryptodev/rte_cryptodev_version.map |   1 +
 lib/librte_mbuf/rte_mbuf.h |   6 -
 lib/librte_mbuf_offload/Makefile   |  52 --
 lib/librte_mbuf_offload/rte_mbuf_offload.c | 100 ---
 lib/librte_mbuf_offload/rte_mbuf_offload.h | 307 ---
 .../rte_mbuf_offload_version.map   |   7 -
 27 files changed, 2114 insertions(+), 2005 deletions(-)
 create mode 100644 lib/librte_cryptodev/rte_crypto_sym.h
 delete mode 100644 lib/librte_mbuf_offload/Makefile
 delete mode 100644 lib/librte_mbuf_offload/rte_mbuf_offload.c
 delete mode 100644 lib/librte_mbuf_offload/rte_mbuf_offload.h
 delete mode 100644 lib/librte_mbuf_offload/rte_mbuf_offload_version.map

-- 
2.5.0



[dpdk-dev] [PATCH RFC 2/4] vhost: make buf vector for scatter RX local.

2016-02-19 Thread Ilya Maximets
On 19.02.2016 10:06, Yuanhan Liu wrote:
> On Fri, Feb 19, 2016 at 09:32:41AM +0300, Ilya Maximets wrote:
>> Array of buf_vector's is just an array for temporary storing information
>> about available descriptors. It used only locally in virtio_dev_merge_rx()
>> and there is no reason for that array to be shared.
>>
>> Fix that by allocating local buf_vec inside virtio_dev_merge_rx().
>>
>> Signed-off-by: Ilya Maximets 
>> ---
>>  lib/librte_vhost/rte_virtio_net.h |  1 -
>>  lib/librte_vhost/vhost_rxtx.c | 45 
>> ---
>>  2 files changed, 23 insertions(+), 23 deletions(-)
>>
>> diff --git a/lib/librte_vhost/rte_virtio_net.h 
>> b/lib/librte_vhost/rte_virtio_net.h
>> index 10dcb90..ae1e4fb 100644
>> --- a/lib/librte_vhost/rte_virtio_net.h
>> +++ b/lib/librte_vhost/rte_virtio_net.h
>> @@ -91,7 +91,6 @@ struct vhost_virtqueue {
>>  int kickfd; /**< Currently unused 
>> as polling mode is enabled. */
>>  int enabled;
>>  uint64_treserved[16];   /**< Reserve some 
>> spaces for future extension. */
>> -struct buf_vector   buf_vec[BUF_VECTOR_MAX];/**< for 
>> scatter RX. */
>>  } __rte_cache_aligned;
> 
> I like this kind of cleanup, however, it breaks ABI.

Should I prepare version of this patch with field above marked as
deprecated and add note to doc/guides/rel_notes/release_16_04.rst
about future deletion?

Best regards, Ilya Maximets.


[dpdk-dev] [PATCH v7 2/4] virtio: Introduce config RTE_VIRTIO_INC_VECTOR

2016-02-19 Thread Santosh Shukla
On Tue, Feb 16, 2016 at 8:35 AM, Yuanhan Liu
 wrote:
> On Mon, Feb 15, 2016 at 04:48:36PM +0530, Santosh Shukla wrote:
>> Hi Yuanhan,
>>
>> On Mon, Feb 15, 2016 at 4:27 PM, Yuanhan Liu
>>  wrote:
>> > On Mon, Feb 15, 2016 at 03:22:11PM +0530, Santosh Shukla wrote:
>> >> Hi Yuanhan,
>> >>
>> >> I guess you are back from vacation.
>> >>
>> >> Can you pl. review this patch, Except this patch, rest of patches
>> >> received ack-by:
>> >
>> > I had a quick glimpse of the comments from Thomas: he made a good point.
>> > I will have a deeper thought tomorrow, to see what I can do to fix it.
>> >
>>
>> I agree to what Thomas pointed out about runtime mode switch (vectored
>> vs non-vectored). I have a proposal in my mind and Like to know you
>> opinion:
>>
>> - need for apis like is_arch_support_vec().
>>
>> if (is_arch_support_vec())
>>  simpple_ = 1 /* Switch code path to vector mode */
>> else
>>  simple_ = 0  /* Switch code path to non-vector mode */
>>
>> That api should reside to arch file. i.e.. arch like i686/arm{for
>> implementation not exist so say no supported} will return 0 and for
>> x86_64 = 1
>
> I was thinking that Thomas meant to something like below (like what
> we did at rte_memcpy.h):
>
> #ifdef RTE_MACHINE_CPUFLAG_SSE (or whatever)
>
> /* with vec here */
>
> #else
>
> /* without vec here */
>
> #endif
>
> I mean, you have to bypass the build first; otherwise, you can't
> go that further to runtime, right?
>

I meant: move virtio_recv_pkt_vec() implementation in
lib/libeal_rte/xx/include/arch/xx/virtio_vec.h. virtio driver to check
for CPUFLAG supported or not and then use _recv_pkt() call back
function from arch files. This approach will avoid #ifdef ARCH
clutter.

This patch is blocking virtio-for-arm entry which is floating close to
month or so, if no taker for this topic then pl. let me know, I'll
propose a patch. Thanks!

>
> Huawei, since it's your patch introduced such issue, mind to fix
> it?
>
> --yliu
>>
>> Does this make sense?
>>
>> Thanks
>> > --yliu
>> >>
>> >> Thanks
>> >>
>> >> On Mon, Feb 8, 2016 at 11:15 AM, Santosh Shukla  
>> >> wrote:
>> >> > On Mon, Feb 8, 2016 at 2:55 AM, Thomas Monjalon
>> >> >  wrote:
>> >> >> 2016-02-07 19:21, Santosh Shukla:
>> >> >>> - virtio_recv_pkts_vec and other virtio vector friend apis are 
>> >> >>> written for
>> >> >>>   sse/avx instructions. For arm64 in particular, virtio vector 
>> >> >>> implementation
>> >> >>>   does not exist(todo).
>> >> >>>
>> >> >>> So virtio pmd driver wont build for targets like i686, arm64.  By 
>> >> >>> making
>> >> >>> RTE_VIRTIO_INC_VECTOR=n, Driver can build for non-sse/avx targets and 
>> >> >>> will work
>> >> >>> in non-vectored virtio mode.
>> >> >>>
>> >> >>> Disabling RTE_VIRTIO_INC_VECTOR config for :
>> >> >>>
>> >> >>> - i686 arch as i686 target config says:
>> >> >>>   config/defconfig_i686-native-linuxapp-gcc says "Vectorized PMD is 
>> >> >>> not
>> >> >>>   supported on 32-bit".
>> >> >>>
>> >> >>> - armv7/v8 arch.
>> >> >>
>> >> >> Yes it can be useful to disable vector optimizations, but it should 
>> >> >> done
>> >> >> at runtime, not a compilation option. I know it is already wrongly 
>> >> >> configured
>> >> >> at compilation for other drivers, we should fix them.
>> >> >>
>> >> >
>> >> > Can't we consider this separate topic. My intent is virtio works for 
>> >> > arm.
>> >> >
>> >> >> Here, you want to avoid SSE/AVX code on ARM. So we should just add the
>> >> >> appropriate ifdefs. Adding a compilation option does not prevent from 
>> >> >> enabling
>> >> >> it on ARM or old x86 which do not support these instructions.
>> >> >>
>> >> >
>> >> > By disabling VIRTIO_INC_VEC, compiler wont build
>> >> > virtio_recv_pkts_vec(), so wont generate SSE/AVX code. Adding ifdef
>> >> > for other arch example arm, is next step. Vector instruction for arm
>> >> > are not fully supported, Its a todolist (Pl. refer my early v1/2
>> >> > cover-letter), We'll add that after virtio functionally works for arm.
>> >> >
>> >> >> Please virtio maintainers, we need to fix this code. Thanks


[dpdk-dev] [PATCH v3] vhost: remove vhost_net_device_ops

2016-02-19 Thread Rich Lane
The indirection is unnecessary because there is only one implementation
of the vhost common code. Removing it makes the code more readable.

Signed-off-by: Rich Lane 
Acked-by: Yuanhan Liu 
---
v2->v3:
- Rebased.
v1->v2:
- Fix long lines.

 examples/vhost_xen/virtio-net.h   |  2 -
 lib/librte_vhost/vhost-net.h  | 42 +---
 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c  | 27 
 lib/librte_vhost/vhost_cuse/virtio-net-cdev.c |  4 +-
 lib/librte_vhost/vhost_user/vhost-net-user.c  | 23 +++
 lib/librte_vhost/vhost_user/virtio-net-user.c |  6 +-
 lib/librte_vhost/virtio-net.c | 94 +--
 7 files changed, 73 insertions(+), 125 deletions(-)

diff --git a/examples/vhost_xen/virtio-net.h b/examples/vhost_xen/virtio-net.h
index c8c5a7a..ab69726 100644
--- a/examples/vhost_xen/virtio-net.h
+++ b/examples/vhost_xen/virtio-net.h
@@ -110,6 +110,4 @@ struct virtio_net_device_ops {
void (* destroy_device) (volatile struct virtio_net *); /* Remove 
device. */
 };

-struct vhost_net_device_ops const * get_virtio_net_callbacks(void);
-
 #endif
diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
index affbd1a..f193a1f 100644
--- a/lib/librte_vhost/vhost-net.h
+++ b/lib/librte_vhost/vhost-net.h
@@ -43,8 +43,6 @@

 #include "rte_virtio_net.h"

-extern struct vhost_net_device_ops const *ops;
-
 /* Macros for printing using RTE_LOG */
 #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
 #define RTE_LOGTYPE_VHOST_DATA   RTE_LOGTYPE_USER1
@@ -85,36 +83,28 @@ struct vhost_device_ctx {
uint64_tfh; /* Populated with fi->fh to track the device 
index. */
 };

-/*
- * Structure contains function pointers to be defined in virtio-net.c. These
- * functions are called in CUSE context and are used to configure devices.
- */
-struct vhost_net_device_ops {
-   int (*new_device)(struct vhost_device_ctx);
-   void (*destroy_device)(struct vhost_device_ctx);
-
-   void (*set_ifname)(struct vhost_device_ctx,
-   const char *if_name, unsigned int if_len);
-
-   int (*get_features)(struct vhost_device_ctx, uint64_t *);
-   int (*set_features)(struct vhost_device_ctx, uint64_t *);
+int vhost_new_device(struct vhost_device_ctx);
+void vhost_destroy_device(struct vhost_device_ctx);

-   int (*set_vring_num)(struct vhost_device_ctx, struct vhost_vring_state 
*);
-   int (*set_vring_addr)(struct vhost_device_ctx, struct vhost_vring_addr 
*);
-   int (*set_vring_base)(struct vhost_device_ctx, struct vhost_vring_state 
*);
-   int (*get_vring_base)(struct vhost_device_ctx, uint32_t, struct 
vhost_vring_state *);
+void vhost_set_ifname(struct vhost_device_ctx,
+   const char *if_name, unsigned int if_len);

-   int (*set_vring_kick)(struct vhost_device_ctx, struct vhost_vring_file 
*);
-   int (*set_vring_call)(struct vhost_device_ctx, struct vhost_vring_file 
*);
+int vhost_get_features(struct vhost_device_ctx, uint64_t *);
+int vhost_set_features(struct vhost_device_ctx, uint64_t *);

-   int (*set_backend)(struct vhost_device_ctx, struct vhost_vring_file *);
+int vhost_set_vring_num(struct vhost_device_ctx, struct vhost_vring_state *);
+int vhost_set_vring_addr(struct vhost_device_ctx, struct vhost_vring_addr *);
+int vhost_set_vring_base(struct vhost_device_ctx, struct vhost_vring_state *);
+int vhost_get_vring_base(struct vhost_device_ctx,
+   uint32_t, struct vhost_vring_state *);

-   int (*set_owner)(struct vhost_device_ctx);
-   int (*reset_owner)(struct vhost_device_ctx);
-};
+int vhost_set_vring_kick(struct vhost_device_ctx, struct vhost_vring_file *);
+int vhost_set_vring_call(struct vhost_device_ctx, struct vhost_vring_file *);

+int vhost_set_backend(struct vhost_device_ctx, struct vhost_vring_file *);

-struct vhost_net_device_ops const *get_virtio_net_callbacks(void);
+int vhost_set_owner(struct vhost_device_ctx);
+int vhost_reset_owner(struct vhost_device_ctx);

 /*
  * Backend-specific cleanup. Defined by vhost-cuse and vhost-user.
diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c 
b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
index ae7ad8d..c613e68 100644
--- a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
@@ -58,7 +58,6 @@ static const char cuse_device_name[] = "/dev/cuse";
 static const char default_cdev[] = "vhost-net";

 static struct fuse_session *session;
-struct vhost_net_device_ops const *ops;

 /*
  * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
@@ -86,7 +85,7 @@ vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
int err = 0;

-   err = ops->new_device(ctx);
+   err = vhost_new_device(ctx);
if (err == -1) {
fuse_reply_err(req, EPERM);
return;
@@ -108,7 +107,7 @@ vhost_net_release(fuse_req_t req, struct fuse

[dpdk-dev] [PATCH] vhost: add missing build dependency on librte_net

2016-02-19 Thread Yuanhan Liu
On Thu, Feb 18, 2016 at 04:07:52PM +0200, Panu Matilainen wrote:
> >I didn't see the author was cc'ed from my email client:
> >
> > Date: Thu, 18 Feb 2016 11:47:43 +0200
> > From: Panu Matilainen 
> > To: dev at dpdk.org
> > Subject: [dpdk-dev] [PATCH] vhost: add missing build dependency on 
> > librte_net
> 
> Hmm, indeed. But this is what git told me (happened to have the whole thing
> in scrollback buffer):
> 
> [pmatilai at sopuli dpdk]$ git send-email --cc="jijiang.liu at intel.com"
> --cc="huawei.xie at intel.com" -1
> /tmp/ZAW8ErlHWe/0001-vhost-add-missing-build-dependency-on-librte_net.patch
> 
> From: Panu Matilainen 
> To: dev at dpdk.org
> Cc: jijiang.liu at intel.com,
>   huawei.xie at intel.com
> Subject: [PATCH] vhost: add missing build dependency on librte_net
> Date: Thu, 18 Feb 2016 11:47:43 +0200
> Message-Id:
>  redhat.com>
> X-Mailer: git-send-email 2.5.0
> 
> Send this email? ([y]es|[n]o|[q]uit|[a]ll): a
> OK. Log says:
> Server: smtp.corp.redhat.com
> MAIL FROM:
> RCPT TO:
> RCPT TO:
> RCPT TO:
> From: Panu Matilainen 
> To: dev at dpdk.org
> Cc: jijiang.liu at intel.com,
>   huawei.xie at intel.com
> Subject: [PATCH] vhost: add missing build dependency on librte_net
> Date: Thu, 18 Feb 2016 11:47:43 +0200
> Message-Id:
>  redhat.com>
> X-Mailer: git-send-email 2.5.0
> 
> So where do the CC's vanish?

No idea. I also have met this issue __many__ times before: I made a
group reply, with lots of people CC'ed, later I then received a copy
(from the mailing list) with all cc list being vanished -- only
dev at dpdk.org is left. However, I found the CC list was there while
I checked the sent box.

I was firstly thinking it might be an issue of my email client. However,
I also found same phenomenon from other's reply. Just not sure whether
they removed the cc list on purpose or not, though. IIRC, this also
happened to Bruce (CC'ed).

Anyway, since you have met similar issue just now, I guess it's time to 
shout out and let this issue get noticed, or fixed if there is indeed
an issue. The mailing list is with high chance being the culprit, IMO.
Hence, Thomas is CC'ed.

BTW, I have never meet this issue with git send-email.

--yliu


[dpdk-dev] [dpdk-dev, v3] Implement memcmp using Intel SIMD instrinsics.

2016-02-19 Thread Ravi Kerur
On Wed, Jan 27, 2016 at 7:08 PM, Zhihong Wang 
wrote:

> > diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcmp.h b/lib
> > /librte_eal/common/include/arch/x86/rte_memcmp.h
>
> [...]
>
> > +#ifdef __cplusplus
> > +extern "C" {
> > +#endif
> > +
> > +/**
> > + * Compare bytes between two locations. The locations must not overlap.
> > + *
>
> Parameter names should be kept consistent as they are in function body.
>
> > + * @param src_1
> > + *   Pointer to the first source of the data.
> > + * @param src_2
> > + *   Pointer to the second source of the data.
> > + * @param n
> > + *   Number of bytes to compare.
> > + * @return
> > + *   zero if src_1 equal src_2
> > + *   -ve if src_1 less than src_2
> > + *   +ve if src_1 greater than src_2
> > + */
> > +static inline int
> > +rte_memcmp(const void *src_1, const void *src,
> > + size_t n) __attribute__((always_inline));
> > +
> > +/**
> > + * Find the first different bit for comparison.
> > + */
> > +static inline int
> > +rte_cmpffd (uint32_t x, uint32_t y)
> > +{
> > + int i;
> > + int pos = x ^ y;
> > + for (i = 0; i < 32; i++)
> > + if (pos & (1<
> Coding style check :-)
> BTW, does the bsf instruction provide this check?
>
> > + return i;
> > + return -1;
> > +}
> > +
>
> [...]
>
> > +/**
> > + * Compare 48 bytes between two locations.
> > + * Locations should not overlap.
> > + */
> > +static inline int
> > +rte_cmp48(const void *src_1, const void *src_2)
>
> Guess this is not used.
>

I had left _unused_ with the assumption that it might be needed when actual
performance tests are done on high end servers.

>
> [...]
>
> > +/**
> > + * Compare 256 bytes between two locations.
> > + * Locations should not overlap.
> > + */
> > +static inline int
> > +rte_cmp256(const void *src_1, const void *src_2)
> > +{
> > + int ret;
> > +
> > + ret = rte_cmp64((const uint8_t *)src_1 + 0 * 64,
> > + (const uint8_t *)src_2 + 0 * 64);
>
> Why not just use rte_cmp128?
>
>
> [...]
>
> > +static inline int
> > +rte_memcmp(const void *_src_1, const void *_src_2, size_t n)
> > +{
> > + const uint8_t *src_1 = (const uint8_t *)_src_1;
> > + const uint8_t *src_2 = (const uint8_t *)_src_2;
> > + int ret = 0;
> > +
> > + if (n < 16)
> > + return rte_memcmp_regular(src_1, src_2, n);
> > +
> > + if (n <= 32) {
> > + ret = rte_cmp16(src_1, src_2);
> > + if (unlikely(ret != 0))
> > + return ret;
> > +
> > + return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
> > + }
> > +
>
> Too many conditions here may harm the overall performance.
> It's a trade-off thing, all about balancing the overhead.
> Just make sure this is tuned based on actual test numbers.
>
>
> > + if (n <= 48) {
> > + ret = rte_cmp32(src_1, src_2);
> > + if (unlikely(ret != 0))
> > + return ret;
> > +
> > + return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
> > + }
> > +
> > + if (n <= 64) {
> > + ret = rte_cmp32(src_1, src_2);
> > + if (unlikely(ret != 0))
> > + return ret;
> > +
> > + ret = rte_cmp16(src_1 + 32, src_2 + 32);
> > +
> > + if (unlikely(ret != 0))
> > + return ret;
> > +
> > + return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
> > + }
> > +
> > + if (n <= 96) {
> > + ret = rte_cmp64(src_1, src_2);
> > + if (unlikely(ret != 0))
> > + return ret;
> > +
> > + ret = rte_cmp16(src_1 + 64, src_2 + 64);
> > + if (unlikely(ret != 0))
> > + return ret;
> > +
> > + return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
> > + }
> > +
> > + if (n <= 128) {
> > + ret = rte_cmp64(src_1, src_2);
> > + if (unlikely(ret != 0))
> > + return ret;
> > +
> > + ret = rte_cmp32(src_1 + 64, src_2 + 64);
> > + if (unlikely(ret != 0))
> > + return ret;
> > +
> > + ret = rte_cmp16(src_1 + 96, src_2 + 96);
> > + if (unlikely(ret != 0))
> > + return ret;
> > +
> > + return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
> > + }
>
> [...]
>
> > +/**
> > + * Compare 48 bytes between two locations.
> > + * Locations should not overlap.
> > + */
> > +static inline int
> > +rte_cmp48(const void *src_1, const void *src_2)
>
> Not used.
>
> > +{
> > + int ret;
> > +
> > + ret = rte_cmp16((const uint8_t *)src_1 + 0 * 16,
> > + (const uint8_t *)src_2 + 0 * 16);
> > +
> > + if (unlikely(ret != 0))
> > + return ret;
> > +
> > + ret = rte_cmp16((const uint8_t *)src_1 + 1 * 16,
> > + (const uint8_t *)src_2 + 1 * 16);
> > +
> > + if (unlikely(ret != 0))
> > + retur

[dpdk-dev] [dpdk-dev,v2] Clean up rte_memcpy.h file

2016-02-19 Thread Ravi Kerur
On Wed, Jan 27, 2016 at 8:18 PM, Zhihong Wang 
wrote:

> > Remove unnecessary type casting in functions.
> >
> > Tested on Ubuntu (14.04 x86_64) with "make test".
> > "make test" results match the results with baseline.
> > "Memcpy perf" results match the results with baseline.
> >
> > Signed-off-by: Ravi Kerur 
> > Acked-by: Stephen Hemminger 
> >
> > ---
> > .../common/include/arch/x86/rte_memcpy.h   | 340
> +++--
> >  1 file changed, 175 insertions(+), 165 deletions(-)
> >
> > diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
> b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
> > index 6a57426..839d4ec 100644
> > --- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
> > +++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
>
> [...]
>
> >  /**
> > @@ -150,13 +150,16 @@ rte_mov64blocks(uint8_t *dst, const uint8_t *src,
> size_t n)
> >   __m256i ymm0, ymm1;
> >
> >   while (n >= 64) {
> > - ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t
> *)src + 0 * 32));
> > +
> > + ymm0 = _mm256_loadu_si256((const __m256i *)(src + 0 * 32));
> > + ymm1 = _mm256_loadu_si256((const __m256i *)(src + 1 * 32));
> > +
> > + _mm256_storeu_si256((__m256i *)(dst + 0 * 32), ymm0);
> > + _mm256_storeu_si256((__m256i *)(dst + 1 * 32), ymm1);
> > +
>
> Any particular reason to change the order of the statements here? :)
> Overall this patch looks good.
>

Sorry for the late response. Let me double check and get back to you, it's
been a while since I did the changes.


> >   n -= 64;
> > - ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t
> *)src + 1 * 32));
> > - src = (const uint8_t *)src + 64;
> > - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32),
> ymm0);
> > - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32),
> ymm1);
> > - dst = (uint8_t *)dst + 64;
> > + src = src + 64;
> > + dst = dst + 64;
> >   }
> >  }
> >
>
>


[dpdk-dev] [PATCH] aesni_mb: fix build clean

2016-02-19 Thread Declan Doherty
On 18/02/16 19:21, Thomas Monjalon wrote:
> The variable AESNI_MULTI_BUFFER_LIB_PATH is not required for
>   make clean
>
> Signed-off-by: Thomas Monjalon 
> ---
>   drivers/crypto/aesni_mb/Makefile | 2 ++
>   1 file changed, 2 insertions(+)
>
> diff --git a/drivers/crypto/aesni_mb/Makefile 
> b/drivers/crypto/aesni_mb/Makefile
> index 3bf83d1..ec65291 100644
> --- a/drivers/crypto/aesni_mb/Makefile
> +++ b/drivers/crypto/aesni_mb/Makefile
> @@ -30,9 +30,11 @@
>
>   include $(RTE_SDK)/mk/rte.vars.mk
>
> +ifneq ($(MAKECMDGOALS),clean)
>   ifeq ($(AESNI_MULTI_BUFFER_LIB_PATH),)
>   $(error "Please define AESNI_MULTI_BUFFER_LIB_PATH environment variable")
>   endif
> +endif
>
>   # library name
>   LIB = librte_pmd_aesni_mb.a
>

Acked-by: Declan Doherty 


[dpdk-dev] [PATCH RFC 4/4] doc: add note about rte_vhost_enqueue_burst thread safety.

2016-02-19 Thread Ilya Maximets
Signed-off-by: Ilya Maximets 
---
 doc/guides/prog_guide/thread_safety_dpdk_functions.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/guides/prog_guide/thread_safety_dpdk_functions.rst 
b/doc/guides/prog_guide/thread_safety_dpdk_functions.rst
index 403e5fc..13a6c89 100644
--- a/doc/guides/prog_guide/thread_safety_dpdk_functions.rst
+++ b/doc/guides/prog_guide/thread_safety_dpdk_functions.rst
@@ -67,6 +67,7 @@ then locking, or some other form of mutual exclusion, is 
necessary.
 The ring library is based on a lockless ring-buffer algorithm that maintains 
its original design for thread safety.
 Moreover, it provides high performance for either multi- or 
single-consumer/producer enqueue/dequeue operations.
 The mempool library is based on the DPDK lockless ring library and therefore 
is also multi-thread safe.
+rte_vhost_enqueue_burst() is also thread safe because based on lockless 
ring-buffer algorithm like the ring library.

 Performance Insensitive API
 ---
-- 
2.5.0



[dpdk-dev] [PATCH RFC 3/4] vhost: avoid reordering of used->idx and last_used_idx updating.

2016-02-19 Thread Ilya Maximets
Calling rte_vhost_enqueue_burst() simultaneously from different threads
for the same queue_id requires additional SMP memory barrier to avoid
reordering of used->idx and last_used_idx updates.

In case of virtio_dev_rx() memory barrier rte_mb() simply moved one
instruction higher.

Signed-off-by: Ilya Maximets 
---
 lib/librte_vhost/vhost_rxtx.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 9095fb1..a03f687 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -281,10 +281,13 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
rte_pause();

*(volatile uint16_t *)&vq->used->idx += count;
-   vq->last_used_idx = res_end_idx;

-   /* flush used->idx update before we read avail->flags. */
+   /*
+* Flush used->idx update to make it visible to virtio and all other
+* threads before allowing to modify it.
+*/
rte_mb();
+   vq->last_used_idx = res_end_idx;

/* Kick the guest if necessary. */
if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
@@ -586,19 +589,24 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t 
queue_id,
rte_pause();

*(volatile uint16_t *)&vq->used->idx += entry_success;
+   /*
+* Flush used->idx update to make it visible to all
+* other threads before allowing to modify it.
+*/
+   rte_smp_wmb();
+
vq->last_used_idx = res_cur_idx;
}

 merge_rx_exit:
if (likely(pkt_idx)) {
-   /* flush used->idx update before we read avail->flags. */
+   /* Flush used->idx update to make it visible to virtio. */
rte_mb();

/* Kick the guest if necessary. */
if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
eventfd_write(vq->callfd, (eventfd_t)1);
}
-
return pkt_idx;
 }

-- 
2.5.0



[dpdk-dev] [PATCH RFC 2/4] vhost: make buf vector for scatter RX local.

2016-02-19 Thread Ilya Maximets
Array of buf_vector's is just an array for temporary storing information
about available descriptors. It used only locally in virtio_dev_merge_rx()
and there is no reason for that array to be shared.

Fix that by allocating local buf_vec inside virtio_dev_merge_rx().

Signed-off-by: Ilya Maximets 
---
 lib/librte_vhost/rte_virtio_net.h |  1 -
 lib/librte_vhost/vhost_rxtx.c | 45 ---
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/lib/librte_vhost/rte_virtio_net.h 
b/lib/librte_vhost/rte_virtio_net.h
index 10dcb90..ae1e4fb 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -91,7 +91,6 @@ struct vhost_virtqueue {
int kickfd; /**< Currently unused 
as polling mode is enabled. */
int enabled;
uint64_treserved[16];   /**< Reserve some 
spaces for future extension. */
-   struct buf_vector   buf_vec[BUF_VECTOR_MAX];/**< for 
scatter RX. */
 } __rte_cache_aligned;


diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 411dd95..9095fb1 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -295,7 +295,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 static inline uint32_t __attribute__((always_inline))
 copy_from_mbuf_to_vring(struct virtio_net *dev, uint32_t queue_id,
uint16_t res_base_idx, uint16_t res_end_idx,
-   struct rte_mbuf *pkt)
+   struct rte_mbuf *pkt, struct buf_vector *buf_vec)
 {
uint32_t vec_idx = 0;
uint32_t entry_success = 0;
@@ -325,7 +325,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint32_t 
queue_id,
 */
vq = dev->virtqueue[queue_id];

-   vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
+   vb_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr);
vb_hdr_addr = vb_addr;

/* Prefetch buffer address. */
@@ -345,19 +345,19 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint32_t 
queue_id,

seg_avail = rte_pktmbuf_data_len(pkt);
vb_offset = vq->vhost_hlen;
-   vb_avail = vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
+   vb_avail = buf_vec[vec_idx].buf_len - vq->vhost_hlen;

entry_len = vq->vhost_hlen;

if (vb_avail == 0) {
uint32_t desc_idx =
-   vq->buf_vec[vec_idx].desc_idx;
+   buf_vec[vec_idx].desc_idx;

if ((vq->desc[desc_idx].flags
& VRING_DESC_F_NEXT) == 0) {
/* Update used ring with desc information */
vq->used->ring[cur_idx & (vq->size - 1)].id
-   = vq->buf_vec[vec_idx].desc_idx;
+   = buf_vec[vec_idx].desc_idx;
vq->used->ring[cur_idx & (vq->size - 1)].len
= entry_len;

@@ -367,12 +367,12 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint32_t 
queue_id,
}

vec_idx++;
-   vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
+   vb_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr);

/* Prefetch buffer address. */
rte_prefetch0((void *)(uintptr_t)vb_addr);
vb_offset = 0;
-   vb_avail = vq->buf_vec[vec_idx].buf_len;
+   vb_avail = buf_vec[vec_idx].buf_len;
}

cpy_len = RTE_MIN(vb_avail, seg_avail);
@@ -399,11 +399,11 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint32_t 
queue_id,
 * entry reach to its end.
 * But the segment doesn't complete.
 */
-   if ((vq->desc[vq->buf_vec[vec_idx].desc_idx].flags &
+   if ((vq->desc[buf_vec[vec_idx].desc_idx].flags &
VRING_DESC_F_NEXT) == 0) {
/* Update used ring with desc information */
vq->used->ring[cur_idx & (vq->size - 1)].id
-   = vq->buf_vec[vec_idx].desc_idx;
+   = buf_vec[vec_idx].desc_idx;
vq->used->ring[cur_idx & (vq->size - 1)].len
= entry_len;
entry_len = 0;
@@ -413,9 +413,9 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint32_t 
queue_id,

vec_idx++;
vb_addr = gpa_to_vva(dev,
-   vq->buf_vec[vec_idx].buf_addr);
+   buf_vec[vec_idx].buf_addr);
vb_offset = 0;
-   vb_avail = vq->buf_vec[vec_idx].buf_len;
+   vb_avail = buf_vec[vec_i

[dpdk-dev] [PATCH RFC 1/4] vhost: use SMP barriers instead of compiler ones.

2016-02-19 Thread Ilya Maximets
Since commit 4c02e453cc62 ("eal: introduce SMP memory barriers") virtio
uses architecture dependent SMP barriers. vHost should use them too.

Fixes: 4c02e453cc62 ("eal: introduce SMP memory barriers")

Signed-off-by: Ilya Maximets 
---
 lib/librte_vhost/vhost_rxtx.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 5e7e5b1..411dd95 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -274,7 +274,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
}
}

-   rte_compiler_barrier();
+   rte_smp_wmb();

/* Wait until it's our turn to add our buffer to the used ring. */
while (unlikely(vq->last_used_idx != res_base_idx))
@@ -575,7 +575,7 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t 
queue_id,
entry_success = copy_from_mbuf_to_vring(dev, queue_id,
res_base_idx, res_cur_idx, pkts[pkt_idx]);

-   rte_compiler_barrier();
+   rte_smp_wmb();

/*
 * Wait until it's our turn to add our buffer
@@ -917,7 +917,7 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
entry_success++;
}

-   rte_compiler_barrier();
+   rte_smp_rmb();
vq->used->idx += entry_success;
/* Kick guest if required. */
if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
-- 
2.5.0



[dpdk-dev] [PATCH RFC 0/4] Thread safe rte_vhost_enqueue_burst().

2016-02-19 Thread Ilya Maximets
Implementation of rte_vhost_enqueue_burst() based on lockless ring-buffer
algorithm and contains almost all to be thread-safe, but it's not.

This set adds required changes.

First patch in set is a standalone patch that fixes many times discussed
issue with barriers on different architectures.

Second and third adds fixes to make rte_vhost_enqueue_burst thread safe.
Last is a documentation fix.

Ilya Maximets (4):
  vhost: use SMP barriers instead of compiler ones.
  vhost: make buf vector for scatter RX local.
  vhost: avoid reordering of used->idx and last_used_idx updating.
  doc: add note about rte_vhost_enqueue_burst thread safety.

 .../prog_guide/thread_safety_dpdk_functions.rst|  1 +
 lib/librte_vhost/rte_virtio_net.h  |  1 -
 lib/librte_vhost/vhost_rxtx.c  | 67 --
 3 files changed, 39 insertions(+), 30 deletions(-)

-- 
2.5.0



[dpdk-dev] Future Direction for rte_eth_stats_get()

2016-02-19 Thread Tahhan, Maryam
> From: David Harton (dharton) [mailto:dharton at cisco.com]
> Sent: Friday, February 5, 2016 9:16 PM
> To: Van Haaren, Harry ; Thomas
> Monjalon 
> Cc: dev at dpdk.org; Tahhan, Maryam ;
> Mcnamara, John 
> Subject: RE: [dpdk-dev] Future Direction for rte_eth_stats_get()
> 
> 
> > From: Van Haaren, Harry [mailto:harry.van.haaren at intel.com]
> >
> > > From: David Harton
> > > Subject: RE: [dpdk-dev] Future Direction for rte_eth_stats_get()
> > >
> > > Hi folks,
> > >
> > > I didn't see any follow up to this response.
> >
> > I think you may have missed one:
> > http://dpdk.org/ml/archives/dev/2016-January/032211.html
> 
> Apologies Harry!  I didn't see your original post because the IT gods had
> decided your response was "Junk" mail and it didn't make it to my
> dev_dpdk.org mail folder. :(
> 
> A colleague actually pointed me to this post separately today.  I've made
> the Junk mailer a little smarter now...hopefully.
> 
> 
> >
> > I'm looking at the enum thinking it will grow out of control.
> > Have you thought about adding metadata for RX / TX, PF / VF?
> 
> Yes, after thinking about it more I think it could get crazy.
> 
> >
> > If metadata info is added, it would make retrieving a set of
> > statistics based on a certain mask much easier. Do you think this may
> be of use?
> 
> Actually, I put a fair bit of thought into things and then realized, why re-
> invent the wheel?
> Why not follow the ethtool stats model?
> 
> struct rte_eth_xstats_name {
> char name[RTE_ETH_XSTATS_NAME_SIZE]; };
> 
> extern int rte_eth_xtats_count(uint8_t port_id, unsigned *count); extern
> int rte_eth_xtats_strings(uint8_t port_id, unsigned count, struct
> rte_eth_xtats_name *names); extern int rte_eth_xtats_values(uint8_t
> port_id, unsigned count, uint64_t *values);
> 
> The existing API could be left in-place and these could be added for folks
> that don't want to grab the strings all the time.
> 
> The cons compared to providing an enum or extending struct
> rte_eth_stats are:
>  - you have to perform a query immediately after the device is attached
>  - doesn't require conformity...which has pros and cons
> 
> I'm actually testing the changes above if folks think this would be a
> reasonable compromise I can patch them up.
> 

I think this is a reasonable compromise. 

> I still feel the feedback myself and others gave about rte_eth_stats_get()
> being closer to a standard MIB should get some consideration.
 +1

> Applications that run with a number of different drivers/device types
> likely want to avoid having to create "xstats mapping tables" every time
> a new device pops out just so they can debug problems.
> 
> Thanks,
> Dave



[dpdk-dev] [PATCH RFC 4/4] doc: add note about rte_vhost_enqueue_burst thread safety.

2016-02-19 Thread Xie, Huawei
On 2/19/2016 3:10 PM, Yuanhan Liu wrote:
> On Fri, Feb 19, 2016 at 09:32:43AM +0300, Ilya Maximets wrote:
>> Signed-off-by: Ilya Maximets 
>> ---
>>  doc/guides/prog_guide/thread_safety_dpdk_functions.rst | 1 +
>>  1 file changed, 1 insertion(+)
>>
>> diff --git a/doc/guides/prog_guide/thread_safety_dpdk_functions.rst 
>> b/doc/guides/prog_guide/thread_safety_dpdk_functions.rst
>> index 403e5fc..13a6c89 100644
>> --- a/doc/guides/prog_guide/thread_safety_dpdk_functions.rst
>> +++ b/doc/guides/prog_guide/thread_safety_dpdk_functions.rst
>> @@ -67,6 +67,7 @@ then locking, or some other form of mutual exclusion, is 
>> necessary.
>>  The ring library is based on a lockless ring-buffer algorithm that 
>> maintains its original design for thread safety.
>>  Moreover, it provides high performance for either multi- or 
>> single-consumer/producer enqueue/dequeue operations.
>>  The mempool library is based on the DPDK lockless ring library and 
>> therefore is also multi-thread safe.
>> +rte_vhost_enqueue_burst() is also thread safe because based on lockless 
>> ring-buffer algorithm like the ring library.
> FYI, Huawei meant to make rte_vhost_enqueue_burst() not be thread-safe,
> to aligh with the usage of rte_eth_tx_burst().
>
>   --yliu

I have a patch to remove the lockless enqueue. Unless there is strong
reason, i prefer vhost PMD to behave like other PMDs, with no internal
lockless algorithm. In future, for people who really need it, we could
have dynamic/static switch to enable it.




[dpdk-dev] [PATCH RFC 2/4] vhost: make buf vector for scatter RX local.

2016-02-19 Thread Xie, Huawei
On 2/19/2016 3:31 PM, Ilya Maximets wrote:
> On 19.02.2016 10:06, Yuanhan Liu wrote:
>> On Fri, Feb 19, 2016 at 09:32:41AM +0300, Ilya Maximets wrote:
>>> Array of buf_vector's is just an array for temporary storing information
>>> about available descriptors. It used only locally in virtio_dev_merge_rx()
>>> and there is no reason for that array to be shared.
>>>
>>> Fix that by allocating local buf_vec inside virtio_dev_merge_rx().
>>>
>>> Signed-off-by: Ilya Maximets 
>>> ---
>>>  lib/librte_vhost/rte_virtio_net.h |  1 -
>>>  lib/librte_vhost/vhost_rxtx.c | 45 
>>> ---
>>>  2 files changed, 23 insertions(+), 23 deletions(-)
>>>
>>> diff --git a/lib/librte_vhost/rte_virtio_net.h 
>>> b/lib/librte_vhost/rte_virtio_net.h
>>> index 10dcb90..ae1e4fb 100644
>>> --- a/lib/librte_vhost/rte_virtio_net.h
>>> +++ b/lib/librte_vhost/rte_virtio_net.h
>>> @@ -91,7 +91,6 @@ struct vhost_virtqueue {
>>> int kickfd; /**< Currently unused 
>>> as polling mode is enabled. */
>>> int enabled;
>>> uint64_treserved[16];   /**< Reserve some 
>>> spaces for future extension. */
>>> -   struct buf_vector   buf_vec[BUF_VECTOR_MAX];/**< for 
>>> scatter RX. */
>>>  } __rte_cache_aligned;
>> I like this kind of cleanup, however, it breaks ABI.
> Should I prepare version of this patch with field above marked as
> deprecated and add note to doc/guides/rel_notes/release_16_04.rst
> about future deletion?

Ilya, you could follow the ABI process:
http://dpdk.org/doc/guides/contributing/versioning.html

>
> Best regards, Ilya Maximets.
>



[dpdk-dev] [PATCH v2] ixgbe: Fix disable interrupt twice

2016-02-19 Thread Qiu, Michael
On 2016/2/2 19:03, Ananyev, Konstantin wrote:
>

[...]

 I don't think i40e miss it, because it not the right please to disable 
 interrupt.
 because all interrupts are enabled in init stage.

 Actually, ixgbe enable the interrupt in init stage, but in dev_start, it 
 disable it
 first and re-enable, so it just the same with doing nothing about 
 interrupt.

 Just think below:

 1. start the port.(interrupt already enabled in init stage, disable -->
 re-enable)
 2. stop the port.(disable interrupt)
 3. start port again(Try to disable, but failed, already disabled)

 Would you think the code has issue?
>>> [Zhang, Helin] in ixgbe PMD, it can be seen that uninit() calls dev_close(),
>>> which calls dev_stop(). So I think the disabling can be done only in 
>>> dev_stop().
>>> All others can make use of dev_stop to disable the interrupt.
>> As I said, if it is in dev_stop, it will has issue when dev_start -->
>> dev_stop --> dev_start, this also could applied in i40e and fm10k. If
>> you want to put it in dev_stop, better to remove enable interrupts in
>> init stage, and only put it in dev_start.
> We can't remove enabling interrupt at init stage and put it only in 
> dev_start().
> That means PF couldn't handle interrupts from VF till dev_start() will be 
> executed on PF
>  - which could never happen.
> For same reason we can't disable all interrupts in dev_stop().
> See: http://dpdk.org/ml/archives/dev/2015-November/027238.html

Hi, Konstantin

Yes, you are right.

So the only way to fix this issue should remove it in dev_stop(), and
left it in uinit() stage, which my patch does.

Am I right?

Thanks,
Michael
> Konstantin
>
>> Thanks,
>> Michael
>>> Regards,
>>> Helin
>>>
 Thanks,
 Michael

> Maybe we can follow fm10k's style.
>
>> On other hand, if we remove it in dev_stop, any side effect? In ixgbe
>> start, it will always disable it first and then re-enable it, so it's 
>> safe.
> I think you mean we can disable intr anyway even if it has been disabled.
 Actually, we couldn't, DPDK call VFIO ioctl to kernel to disable 
 interrupts, and
 if we try disable twice, it will return and error.
 That's why I mean we need a flag to show the interrupts stats. If it 
 already
 disabled, we do not need call in to kernel. just return and give a warning
 message.

 Thanks,
 Michael

>  Sounds more like why we don't
> need this patch :)
>
>> Thanks,
>> Michael
>



[dpdk-dev] [PATCH v4 0/3] add lpm support for NEON

2016-02-19 Thread Jerin Jacob
On Thu, Feb 18, 2016 at 10:26:44AM +, Kobylinski, MichalX wrote:
> 
> 
> > -Original Message-
> > From: Jerin Jacob [mailto:jerin.jacob at caviumnetworks.com]
> > Sent: Tuesday, February 16, 2016 5:44 PM
> > To: Kobylinski, MichalX 
> > Cc: dev at dpdk.org; viktorin at rehivetech.com
> > Subject: Re: [dpdk-dev] [PATCH v4 0/3] add lpm support for NEON
> > Importance: High
> > 
> > On Tue, Feb 16, 2016 at 01:27:02PM +, Kobylinski, MichalX wrote:
> > >
> > >
> > > > -Original Message-
> > > > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Jerin Jacob
> > > > Sent: Friday, February 12, 2016 1:29 PM
> > > > To: dev at dpdk.org
> > > > Cc: viktorin at rehivetech.com
> > > > Subject: [dpdk-dev] [PATCH v4 0/3] add lpm support for NEON
> > > >
> > [snip]
> > > >
> > > > Jerin Jacob (3):
> > > >   lpm: make rte_lpm_lookupx4 API definition architecture agnostic
> > > >   lpm: add support for NEON
> > > >   maintainers: claim responsibility for arm64 specific files of hash and
> > > > lpm
> > > >
> > > >  MAINTAINERS|   3 +
> > > >  app/test/test_lpm.c|  21 ++--
> > > >  app/test/test_xmmt_ops.h   |  67 +
> > > >  config/defconfig_arm-armv7a-linuxapp-gcc   |   3 -
> > > >  config/defconfig_arm64-armv8a-linuxapp-gcc |   3 -
> > > >  lib/librte_lpm/Makefile|   6 ++
> > > >  lib/librte_lpm/rte_lpm.h   |  99 ++-
> > > >  lib/librte_lpm/rte_lpm_neon.h  | 148
> > +
> > > >  lib/librte_lpm/rte_lpm_sse.h   | 143
> > 
> > > >  9 files changed, 386 insertions(+), 107 deletions(-)  create mode
> > > > 100644 app/test/test_xmmt_ops.h  create mode 100644
> > > > lib/librte_lpm/rte_lpm_neon.h create mode 100644
> > > > lib/librte_lpm/rte_lpm_sse.h
> > > >
> > > > --
> > > > 2.1.0
> > >
> > > Hi Jerin,
> > 
> > Hi Michal,
> > 
> > > Are you planning increase next_hop field for ARM? I extended next_hop 
> > > field
> > from 8 bits to 24 bits and created structure to configure LPM for x86.
> > 
> > Yes, I am planning to increase next_hop field for ARM as a separate patch.  
> > Let
> > this base patchset get merges.
> > 
> > I will make  ARM specific changes for your new feature in 
> > 'rte_lpm_lookupx4' as
> > a separate patch on top of your series.
> > So that in case if I want to go back to 8 bit then I can do it
> > 
> > Jerin
> 
> Thank you for your answer.
> Do you prepare separate patch with changes for ARM architecture on the top my 
> series?
> If you want I can support you with prepare new patch.
Yes, Can you rebase your patch with this patch(add lpm support for NEON).

I can fill in  ARM specific changes of 'rte_lpm_lookupx4' as a seperate
patch on top it.

Jerin

> 
> Michal
> 
> > 
> > > Please look at my patchset with proposal increase next_hop field and 
> > > structure
> > to configure.
> > >
> > > http://patchwork.dpdk.org/dev/patchwork/patch/10249/
> > > http://patchwork.dpdk.org/dev/patchwork/patch/10250/
> > >
> > > Best Regards,
> > > Michal
> > >


[dpdk-dev] [PATCH v2 2/2] i40evf: support interrupt based pf reset request

2016-02-19 Thread Wu, Jingjing
> I reported an issue on ixgbe.
Yes, thanks, we also notice such issue on ixgbe.

> What you provide here is a workaround for i40e.
> I am not even sure this can be applied to ixgbe.
>
Yes, not just workaround, also a basic one, without the patch, DPDK VF even
doesn't know the pf reset happened. I think ixgbe also need to know that.

> Does it mean that anytime we have a problem with drivers, workarounds
> should be applied to ethdev / eal ... so that you don't have to handle
> anything in the drivers ?

Currently as my understanding DPDK PMD driver is part of DPDK library.
Even the driver loading is in the thread which is created by application. From 
this
side, there is no a task which managed by driver internally. In fact, we also 
help
the reset process can be down automatically or at least provide an simple API to
application to help them recovery simply. Maybe the latter one is following the
current DPDK's framework. Otherwise, we need a thread for each driver?

And back to this patch, the patch just make the interrupt of pf reset can be 
received
by i40e vf PMD driver. It didn't change the ethdev/eal. 
I don't think you have objection to it, right?

About how to process the reset event, we can raise another thread to discuss?

> This is not the first time I complain about this kind of design issues.
> 
> 
> > If we need to support driver recovery automatically, we'd better to find a 
> > way to do that.
> > Do you have any idea?
> 
> First, list those "lots of resources" that "are managed by application".
> If your driver needs to keep track of those, this is i40e driver job
> to do this internally without requiring ethdev to be modified.
>
Agree about the resource listing. But again, about the "internally", can you 
share your idea about it?
As you know, pmd driver even have no internal thread.

> If this proves to be generic enough, maybe moving part of this to
> ethdev will then make sense.
>
We can discuss, I think most NICs may have such issue. We need to make 
agreement on that.

Thanks
Jingjing
> 
> Thanks.
> 
> --
> David Marchand


[dpdk-dev] [PATCH] i40e: add VEB switching support for i40e

2016-02-19 Thread Wu, Jingjing


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Zhe Tao
> Sent: Thursday, January 21, 2016 2:50 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH] i40e: add VEB switching support for i40e
> 
> VEB switching feature for i40e is used to enable the switching between the
>  VSIs connect to the virtual bridge. The old implementation is setting the
>  virtual bridge mode as VEPA which is port aggregation. Enable the switching
>  ability by setting the loop back mode for the specific VSIs which connect to 
> PF
>  or VFs.

As I know, there is a known issue about the veb switch on older NVM version.
I would be better to add a NVM version verification, if version > xx.xx, then 
enable it?

Thanks
Jingjing

> Signed-off-by: Zhe Tao 
> ---
>  drivers/net/i40e/i40e_ethdev.c | 48 
> +++---
>  1 file changed, 40 insertions(+), 8 deletions(-)
> 



[dpdk-dev] [PATCH v3 00/30] i40e base driver update

2016-02-19 Thread Wu, Jingjing


> -Original Message-
> From: Zhang, Helin
> Sent: Thursday, February 18, 2016 10:34 PM
> To: dev at dpdk.org
> Cc: Wu, Jingjing; Zhang, Helin
> Subject: [PATCH v3 00/30] i40e base driver update
> 
> i40e base driver is updated, to support new X722 device IDs, and
> use rx control AQ commands to read/write rx control registers.
> Of cause, fixes and enhancements are added as listed as below.
> 
> v3:
>  - As release_2_3.rst has been renamed to release_16_04.rst, then
>all modifications in release_2_3.rst should be moved into
>release_16_04.rst.
> 
> v2:
>  - Used i40e_set_mac_type() in base driver to replace the similar
>in PMD source files, in order to support newly added X722 VF
>device IDs.
>  - Used small letter in all commit log titles.
> 

Acked-by: Jingjing Wu 
> --
> 2.5.0



[dpdk-dev] [PATCH v2 2/2] kdp: add virtual PMD for kernel slow data path communication

2016-02-19 Thread Ferruh Yigit
This patch provides slow data path communication to the Linux kernel.
Patch is based on librte_kni, and heavily re-uses it.

The main difference is librte_kni library converted into a PMD, to
provide ease of use for applications.

Now any application can use slow path communication without any update
in application, because of existing eal support for virtual PMD.

Also this PMD supports two methods to send packets to the Linux, first
one is custom FIFO implementation with help of KDP kernel module, second
one is Linux in-kernel tun/tap support. PMD first checks for KDP kernel
module, if fails it tries to create and use a tap interface.

With FIFO method: PMD's rx_pkt_burst() get packets from FIFO,
and tx_pkt_burst() puts packet to the FIFO.
The corresponding Linux virtual network device driver code
also gets/puts packets from FIFO as they are coming from hardware.

With tun/tap method: no external kernel module required, PMD reads from
and writes packets to the tap interface file descriptor. Tap interface
has performance penalty against FIFO implementation.

Signed-off-by: Ferruh Yigit 
---

v2:
* Use rtnetlink to create interfaces
---
 MAINTAINERS |   1 +
 config/common_linuxapp  |   1 +
 doc/guides/nics/pcap_ring.rst   | 125 ++-
 doc/guides/rel_notes/release_16_04.rst  |   6 +
 drivers/net/Makefile|   3 +-
 drivers/net/kdp/Makefile|  61 +++
 drivers/net/kdp/rte_eth_kdp.c   | 501 +
 drivers/net/kdp/rte_kdp.c   | 633 
 drivers/net/kdp/rte_kdp.h   | 116 ++
 drivers/net/kdp/rte_kdp_fifo.h  |  91 +
 drivers/net/kdp/rte_kdp_tap.c   | 101 +
 drivers/net/kdp/rte_pmd_kdp_version.map |   4 +
 lib/librte_eal/common/include/rte_log.h |   3 +-
 mk/rte.app.mk   |   3 +-
 14 files changed, 1643 insertions(+), 6 deletions(-)
 create mode 100644 drivers/net/kdp/Makefile
 create mode 100644 drivers/net/kdp/rte_eth_kdp.c
 create mode 100644 drivers/net/kdp/rte_kdp.c
 create mode 100644 drivers/net/kdp/rte_kdp.h
 create mode 100644 drivers/net/kdp/rte_kdp_fifo.h
 create mode 100644 drivers/net/kdp/rte_kdp_tap.c
 create mode 100644 drivers/net/kdp/rte_pmd_kdp_version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index 05ffe26..deaeea3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -260,6 +260,7 @@ F: doc/guides/sample_app_ug/kernel_nic_interface.rst
 Linux KDP
 M: Ferruh Yigit 
 F: lib/librte_eal/linuxapp/kdp/
+F: drivers/net/kdp/

 Linux AF_PACKET
 M: John W. Linville 
diff --git a/config/common_linuxapp b/config/common_linuxapp
index e1b5032..aa13719 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -316,6 +316,7 @@ CONFIG_RTE_LIBRTE_PMD_NULL=y
 #
 # Compile KDP PMD
 #
+CONFIG_RTE_LIBRTE_PMD_KDP=y
 CONFIG_RTE_KDP_KMOD=y
 CONFIG_RTE_KDP_PREEMPT_DEFAULT=y

diff --git a/doc/guides/nics/pcap_ring.rst b/doc/guides/nics/pcap_ring.rst
index aa48d33..b602e65 100644
--- a/doc/guides/nics/pcap_ring.rst
+++ b/doc/guides/nics/pcap_ring.rst
@@ -28,11 +28,11 @@
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-Libpcap and Ring Based Poll Mode Drivers
-
+Software Poll Mode Drivers
+==

 In addition to Poll Mode Drivers (PMDs) for physical and virtual hardware,
-the DPDK also includes two pure-software PMDs. These two drivers are:
+the DPDK also includes pure-software PMDs. These drivers are:

 *   A libpcap -based PMD (librte_pmd_pcap) that reads and writes packets using 
libpcap,
 - both from files on disk, as well as from physical NIC devices using 
standard Linux kernel drivers.
@@ -40,6 +40,10 @@ the DPDK also includes two pure-software PMDs. These two 
drivers are:
 *   A ring-based PMD (librte_pmd_ring) that allows a set of software FIFOs 
(that is, rte_ring)
 to be accessed using the PMD APIs, as though they were physical NICs.

+*   A slow data path PMD (librte_pmd_kdp) that allows send/get packets to/from 
OS network
+stack as it is a physical NIC.
+
+
 .. note::

 The libpcap -based PMD is disabled by default in the build configuration 
files,
@@ -211,6 +215,121 @@ Multiple devices may be specified, separated by commas.
 Done.


+Kernel Data Path PMD
+
+
+Kernel Data Path (KDP) PMD is to communicate with OS network stack easily by 
application.
+
+.. code-block:: console
+
+./testpmd --vdev eth_kdp0 --vdev eth_kdp1 -- -i
+...
+Configuring Port 0 (socket 0)
+Port 0: 00:00:00:00:00:00
+Configuring Port 1 (socket 0)
+Port 1: 00:00:00:00:00:00
+Checking link statuses...
+Port 0 Link Up - speed 1 Mbps - full-duplex
+Port 1 Link Up - speed 1 Mbps - full-duplex
+Done
+
+KDP PMD supports two type of commu

[dpdk-dev] [PATCH v2 1/2] kdp: add kernel data path kernel module

2016-02-19 Thread Ferruh Yigit
This kernel module is based on KNI module, but this one is stripped
version of it and only for data messages, no control functionality
provided.

FIFO implementation of the KNI is kept exact same, but ethtool related
code removed and virtual network management related code simplified.

This module contains kernel support to create network devices and
this module has a simple driver for virtual network device, the driver
simply puts/gets packets to/from FIFO instead of real hardware.

FIFO is created owned by userspace application, which is for this case
KDP PMD.

In long term this patch intends to replace the KNI and KNI will be
depreciated.

Signed-off-by: Ferruh Yigit 
---

v2:
* Use rtnetlink to create interfaces
* include modules.h to prevent compile error in old kernels
---
 MAINTAINERS|   4 +
 config/common_linuxapp |   8 +-
 lib/librte_eal/linuxapp/Makefile   |   5 +-
 lib/librte_eal/linuxapp/eal/Makefile   |   3 +-
 .../linuxapp/eal/include/exec-env/rte_kdp_common.h | 139 
 lib/librte_eal/linuxapp/kdp/Makefile   |  55 ++
 lib/librte_eal/linuxapp/kdp/kdp_dev.h  |  78 ++
 lib/librte_eal/linuxapp/kdp/kdp_fifo.h |  91 +++
 lib/librte_eal/linuxapp/kdp/kdp_net.c  | 862 +
 9 files changed, 1242 insertions(+), 3 deletions(-)
 create mode 100644 
lib/librte_eal/linuxapp/eal/include/exec-env/rte_kdp_common.h
 create mode 100644 lib/librte_eal/linuxapp/kdp/Makefile
 create mode 100644 lib/librte_eal/linuxapp/kdp/kdp_dev.h
 create mode 100644 lib/librte_eal/linuxapp/kdp/kdp_fifo.h
 create mode 100644 lib/librte_eal/linuxapp/kdp/kdp_net.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 628bc05..05ffe26 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -257,6 +257,10 @@ F: app/test/test_kni.c
 F: examples/kni/
 F: doc/guides/sample_app_ug/kernel_nic_interface.rst

+Linux KDP
+M: Ferruh Yigit 
+F: lib/librte_eal/linuxapp/kdp/
+
 Linux AF_PACKET
 M: John W. Linville 
 F: drivers/net/af_packet/
diff --git a/config/common_linuxapp b/config/common_linuxapp
index f1638db..e1b5032 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -1,6 +1,6 @@
 #   BSD LICENSE
 #
-#   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+#   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
 #   All rights reserved.
 #
 #   Redistribution and use in source and binary forms, with or without
@@ -314,6 +314,12 @@ CONFIG_RTE_LIBRTE_PMD_XENVIRT=n
 CONFIG_RTE_LIBRTE_PMD_NULL=y

 #
+# Compile KDP PMD
+#
+CONFIG_RTE_KDP_KMOD=y
+CONFIG_RTE_KDP_PREEMPT_DEFAULT=y
+
+#
 # Do prefetch of packet data within PMD driver receive function
 #
 CONFIG_RTE_PMD_PACKET_PREFETCH=y
diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile
index d9c5233..e3f91a7 100644
--- a/lib/librte_eal/linuxapp/Makefile
+++ b/lib/librte_eal/linuxapp/Makefile
@@ -1,6 +1,6 @@
 #   BSD LICENSE
 #
-#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
 #   All rights reserved.
 #
 #   Redistribution and use in source and binary forms, with or without
@@ -38,6 +38,9 @@ DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal
 ifeq ($(CONFIG_RTE_KNI_KMOD),y)
 DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += kni
 endif
+ifeq ($(CONFIG_RTE_KDP_KMOD),y)
+DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += kdp
+endif
 ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y)
 DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += xen_dom0
 endif
diff --git a/lib/librte_eal/linuxapp/eal/Makefile 
b/lib/librte_eal/linuxapp/eal/Makefile
index 6e26250..a70b793 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -1,6 +1,6 @@
 #   BSD LICENSE
 #
-#   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+#   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
 #   All rights reserved.
 #
 #   Redistribution and use in source and binary forms, with or without
@@ -121,6 +121,7 @@ CFLAGS_eal_thread.o += -Wno-return-type
 endif

 INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h
+INC += rte_kdp_common.h

 SYMLINK-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP)-include/exec-env := \
$(addprefix include/exec-env/,$(INC))
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kdp_common.h 
b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kdp_common.h
new file mode 100644
index 000..0334876
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kdp_common.h
@@ -0,0 +1,139 @@
+/*-
+ *   This file is provided under a dual BSD/LGPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GNU LESSER GENERAL PUBLIC LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2.1 of the GNU Lesse

[dpdk-dev] [PATCH v2 0/2] slow data path communication between DPDK port and Linux

2016-02-19 Thread Ferruh Yigit
This is slow data path communication implementation based on existing KNI.

Difference is: librte_kni converted into a PMD, kdp kernel module is almost
same except all control path functionality removed and some simplification done.

Motivation is to simplify slow path data communication.
Now any application can use this new PMD to send/get data to Linux kernel.

PMD supports two communication methods:

1) KDP kernel module
PMD initialization functions handles creating virtual interfaces (with help of
kdp kernel module) and created FIFO. FIFO is used to share data between
userspace and kernelspace. This is default method.

2) tun/tap module
When KDP module is not inserted, PMD creates tap interface and transfers
packets using tap interface.

In long term this patch intends to replace the KNI and KNI will be
depreciated.

v2:
u* Use rtnetlink to create interfaces
* include modules.h to prevent compile error in old kernels


Sample usage:
1) Transfer any packet received from NIC that bound to DPDK, to the Linux kernel

a) insert kdp kernel module
insmod build/kmod/rte_kdp.ko

b) bind NIC to the DPDK using dpdk_nic_bind.py

c) ./testpmd --vdev eth_kdp0

c1) testpmd show two ports, one of them physical, other virtual
...
Configuring Port 0 (socket 0)
Port 0: 00:00:00:00:00:00
Configuring Port 1 (socket 0)
...
Checking link statuses...
Port 0 Link Up - speed 1 Mbps - full-duplex
Port 1 Link Up - speed 1 Mbps - full-duplex
Done

c2) This will create "kdp0" Linux interface
$ ip l show kdp0
21: kdp0:  mtu 1500 qdisc noop state DOWN mode DEFAULT 
group default qlen 1000
link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff

d) Linux port can be used for data

d1)
$ ifconfig kdp0 1.0.0.2
$ ping 1.0.0.1
PING 1.0.0.1 (1.0.0.1) 56(84) bytes of data.
64 bytes from 1.0.0.1: icmp_seq=1 ttl=64 time=0.789 ms
64 bytes from 1.0.0.1: icmp_seq=2 ttl=64 time=0.881 ms

d2)
$ tcpdump -nn -i kdp0
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on kdp0, link-type EN10MB (Ethernet), capture size 262144 bytes
15:01:22.407506 IP 1.0.0.1 > 1.0.0.2: ICMP echo request, id 40016, seq 18, 
length 64
15:01:22.408521 IP 1.0.0.2 > 1.0.0.1: ICMP echo reply, id 40016, seq 18, length 
64



2) Data travels between virtual Linux interfaces pass from DPDK application,
application can alter data

a) insert kdp kernel module
insmod build/kmod/rte_kdp.ko

b) No physical NIC involved

c) ./testpmd --vdev eth_kdp0 --vdev eth_kdp1

c1) testpmd show two ports, both of them are virtual
...
Configuring Port 0 (socket 0)
Port 0: 00:00:00:00:00:00
Configuring Port 1 (socket 0)
Port 1: 00:00:00:00:00:00
Checking link statuses...
Port 0 Link Up - speed 1 Mbps - full-duplex
Port 1 Link Up - speed 1 Mbps - full-duplex
Done

c2) This will create "kdp0"  and "kdp1" Linux interfaces
$ ip l show kdp0; ip l show kdp1
22: kdp0:  mtu 1500 qdisc noop state DOWN mode DEFAULT 
group default qlen 1000
link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
23: kdp1:  mtu 1500 qdisc noop state DOWN mode DEFAULT 
group default qlen 1000
link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff

d) Data travel between virtual ports pass from DPDK application
$ifconfig kdp0 1.0.0.1
$ifconfig kdp1 1.0.0.2

d1)
$ ping 1.0.0.1
PING 1.0.0.1 (1.0.0.1) 56(84) bytes of data.
64 bytes from 1.0.0.1: icmp_seq=1 ttl=64 time=3.57 ms
64 bytes from 1.0.0.1: icmp_seq=2 ttl=64 time=1.85 ms
64 bytes from 1.0.0.1: icmp_seq=3 ttl=64 time=1.89 ms

d2)
$ tcpdump -nn -i kdp0
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on kdp0, link-type EN10MB (Ethernet), capture size 262144 bytes
15:20:51.908543 IP 1.0.0.2 > 1.0.0.1: ICMP echo request, id 41234, seq 1, 
length 64
15:20:51.909570 IP 1.0.0.1 > 1.0.0.2: ICMP echo reply, id 41234, seq 1, length 
64
15:20:52.909551 IP 1.0.0.2 > 1.0.0.1: ICMP echo request, id 41234, seq 2, 
length 64
15:20:52.910577 IP 1.0.0.1 > 1.0.0.2: ICMP echo reply, id 41234, seq 2, length 
64



3) tun/tap interface usage

a) No external module required, tun/tap support in kernel required

b) ./testpmd --vdev eth_kdp0 --vdev eth_kdp1

b1) This will create "tap_kdp0"  and "tap_kdp1" Linux interfaces
$ ip l show tap_kdp0; ip l show tap_kdp1
25: tap_kdp0:  mtu 1500 qdisc noop state DOWN mode DEFAULT 
group default qlen 500
link/ether 56:47:97:9c:03:8e brd ff:ff:ff:ff:ff:ff
26: tap_kdp1:  mtu 1500 qdisc noop state DOWN mode DEFAULT 
group default qlen 500
link/ether 5e:15:22:b0:52:42 brd ff:ff:ff:ff:ff:ff

Ferruh Yigit (2):
  kdp: add kernel data path kernel module
  kdp: add virtual PMD for kernel slow data path communication

 MAINTAINERS|   5 +
 config/common_linuxapp |   9 +-
 doc/guides/nics/pcap_ring.rst  | 125 ++-
 doc/guides/rel_notes/release_16_04.rst |   6 +
 drivers/net/Makefile   |   3 +-
 drivers/net/kdp/Makefile   |  61 ++
 d

[dpdk-dev] [PATCH v4 4/4] app/test-pmd: test tunnel filter for IP in GRE

2016-02-19 Thread Sun, Xutao
Hi Pablo,

> -Original Message-
> From: De Lara Guarch, Pablo
> Sent: Thursday, February 18, 2016 8:17 PM
> To: Sun, Xutao ; dev at dpdk.org
> Cc: Zhang, Helin ; Wu, Jingjing
> ; Liu, Jijiang 
> Subject: RE: [PATCH v4 4/4] app/test-pmd: test tunnel filter for IP in GRE
> 
> Hi Xutao,
> 
> > -Original Message-
> > From: Sun, Xutao
> > Sent: Thursday, February 18, 2016 9:58 AM
> > To: dev at dpdk.org
> > Cc: Zhang, Helin; Wu, Jingjing; De Lara Guarch, Pablo; Sun, Xutao;
> > Liu, Jijiang
> > Subject: [PATCH v4 4/4] app/test-pmd: test tunnel filter for IP in GRE
> >
> > This patch add some options in tunnel_filter command to test IP in GRE
> > packet classification on i40e.
> >
> > Signed-off-by: Xutao Sun 
> > Signed-off-by: Jijiang Liu 
> 
> Sorry, I see your point now, that the tunnel/filter types are not being
> specified in the testpmd documentation.
> But actually, I think it would be a good idea to enumerate the different types
> in that document as well (and maybe add a description like "set fwd").
> 
> Plus, see one extra comment below.
> 
> > ---
> >  app/test-pmd/cmdline.c | 36 
> >  1 file changed, 24 insertions(+), 12 deletions(-)
> >
> > diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index
> > c707318..6a5cd9f 100644
> > --- a/app/test-pmd/cmdline.c
> > +++ b/app/test-pmd/cmdline.c
> > @@ -301,12 +301,14 @@ static void cmd_help_long_parsed(void
> > *parsed_result,
> > "Set the outer VLAN TPID for Packet Filtering on"
> > " a port\n\n"
> >
> > -   "tunnel_filter add (port_id) (outer_mac) (inner_mac)
> > (ip_addr) "
> > -   "(inner_vlan) (vxlan|nvgre) (filter_type) (tenant_id)
> > (queue_id)\n"
> > +   "tunnel_filter add (port_id) (outer_mac) (inner_mac)
> > (ip_addr)"
> > +   "(inner_vlan) (vxlan|nvgre|iningre) (filter_type)"
> 
> Should be ipingre?
> 
> > +   "(tenant_id) (queue_id)\n"
> > "   add a tunnel filter of a port.\n\n"
> >
> > -   "tunnel_filter rm (port_id) (outer_mac) (inner_mac)
> > (ip_addr) "
> > -   "(inner_vlan) (vxlan|nvgre) (filter_type) (tenant_id)
> > (queue_id)\n"
> > +   "tunnel_filter rm (port_id) (outer_mac) (inner_mac)
> > (ip_addr)"
> > +   "(inner_vlan) (vxlan|nvgre|ipingre) (filter_type)"
> > +   "(tenant_id) (queue_id)\n"
> > "   remove a tunnel filter of a port.\n\n"
> >

Thanks for your advice, I will update the document.

Regards,
Xutao