[PATCH RFC 08/11] net/mlx5e: XDP fast RX drop bpf programs support

2016-09-07 Thread Saeed Mahameed
From: Rana Shahout <ra...@mellanox.com>

Add support for the BPF_PROG_TYPE_PHYS_DEV hook in mlx5e driver.

When XDP is on we make sure to change channels RQs type to
MLX5_WQ_TYPE_LINKED_LIST rather than "striding RQ" type to
ensure "page per packet".

On XDP set, we fail if HW LRO is set and request from user to turn it
off.  Since on ConnectX4-LX HW LRO is always on by default, this will be
annoying, but we prefer not to enforce LRO off from XDP set function.

Full channels reset (close/open) is required only when setting XDP
on/off.

When XDP set is called just to exchange programs, we will update
each RQ xdp program on the fly and for synchronization with current
data path RX activity of that RQ, we temporally disable that RQ and
ensure RX path is not running, quickly update and re-enable that RQ,
for that we do:
- rq.state = disabled
- napi_synnchronize
- xchg(rq->xdp_prg)
- rq.state = enabled
- napi_schedule // Just in case we've missed an IRQ

Packet rate performance testing was done with pktgen 64B packets and on
TX side and, TC drop action on RX side compared to XDP fast drop.

CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz

Comparison is done between:
1. Baseline, Before this patch with TC drop action
2. This patch with TC drop action
3. This patch with XDP RX fast drop

StreamsBaseline(TC drop)TC dropXDP fast Drop
--
1   5.51Mpps5.14Mpps 13.5Mpps
2   11.5Mpps10.0Mpps 25.1Mpps
4   16.3Mpps17.2Mpps 35.4Mpps
8   29.6Mpps28.2Mpps 45.8Mpps*
16  34.0Mpps30.1Mpps 45.8Mpps*

It seems that there is around ~5% degradation between Baseline
and this patch with single stream when comparing packet rate with TC drop,
it might be related to XDP code overhead or new cache misses added by
XDP code.

*My xmitter was limited to 45Mpps, so for 8/16 streams the xmitter is
the bottlenick and it seems that XDP drop can handle more.

Signed-off-by: Rana Shahout <ra...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |   2 +
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 100 -
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c|  26 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h |   4 +
 4 files changed, 130 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 7dfb34e..729bae8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -334,6 +334,7 @@ struct mlx5e_rq {
intix;
 
struct mlx5e_rx_am am; /* Adaptive Moderation */
+   struct bpf_prog   *xdp_prog;
 
/* control */
struct mlx5_wq_ctrlwq_ctrl;
@@ -627,6 +628,7 @@ struct mlx5e_priv {
/* priv data path fields - start */
struct mlx5e_sq**txq_to_sq_map;
int channeltc_to_txq_map[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC];
+   struct bpf_prog *xdp_prog;
/* priv data path fields - end */
 
unsigned long  state;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index a6a2e60..dab8486 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "en.h"
 #include "en_tc.h"
 #include "eswitch.h"
@@ -104,7 +105,8 @@ static void mlx5e_set_rq_type_params(struct mlx5e_priv 
*priv, u8 rq_type)
 
 static void mlx5e_set_rq_priv_params(struct mlx5e_priv *priv)
 {
-   u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(priv->mdev) ?
+   u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(priv->mdev) &&
+   !priv->xdp_prog ?
MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
MLX5_WQ_TYPE_LINKED_LIST;
mlx5e_set_rq_type_params(priv, rq_type);
@@ -177,6 +179,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv 
*priv)
s->rx_csum_none += rq_stats->csum_none;
s->rx_csum_complete += rq_stats->csum_complete;
s->rx_csum_unnecessary_inner += 
rq_stats->csum_unnecessary_inner;
+   s->rx_xdp_drop += rq_stats->xdp_drop;
s->rx_wqe_err   += rq_stats->wqe_err;
s->rx_mpwqe_filler += rq_stats->mpwqe_filler;
s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
@@ -476,6 +479,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
rq->channel = c;
   

[PATCH RFC 05/11] net/mlx5e: Union RQ RX info per RQ type

2016-09-07 Thread Saeed Mahameed
We have two types of RX RQs, and they use two separate sets of
info arrays and structures in RX data path function.  Today those
structures are mutually exclusive per RQ type, hence one kind is
allocated on RQ creation according to the RQ type.

For better cache locality and to minimalize the
sizeof(struct mlx5e_rq), in this patch we define them as a union.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  | 14 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 32 +++
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   | 10 +++
 3 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index a346112..7dfb34e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -305,9 +305,14 @@ struct mlx5e_rq {
/* data path */
struct mlx5_wq_ll  wq;
 
-   struct mlx5e_dma_info *dma_info;
-   struct mlx5e_mpw_info *wqe_info;
-   void  *mtt_no_align;
+   union {
+   struct mlx5e_dma_info *dma_info;
+   struct {
+   struct mlx5e_mpw_info *info;
+   void  *mtt_no_align;
+   u32mtt_offset;
+   } mpwqe;
+   };
struct {
u8 page_order;
u32wqe_sz;/* wqe data buffer size */
@@ -327,7 +332,6 @@ struct mlx5e_rq {
 
unsigned long  state;
intix;
-   u32mpwqe_mtt_offset;
 
struct mlx5e_rx_am am; /* Adaptive Moderation */
 
@@ -804,7 +808,7 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
 
 static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix)
 {
-   return rq->mpwqe_mtt_offset +
+   return rq->mpwqe.mtt_offset +
wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index c9f1dea..9f0f5f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -317,7 +317,7 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, 
struct mlx5e_sq *sq,
struct mlx5_wqe_ctrl_seg  *cseg = >ctrl;
struct mlx5_wqe_umr_ctrl_seg *ucseg = >uctrl;
struct mlx5_wqe_data_seg  *dseg = >data;
-   struct mlx5e_mpw_info *wi = >wqe_info[ix];
+   struct mlx5e_mpw_info *wi = >mpwqe.info[ix];
u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS);
u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix);
 
@@ -345,21 +345,21 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
int i;
 
-   rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info),
-   GFP_KERNEL, cpu_to_node(c->cpu));
-   if (!rq->wqe_info)
+   rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
+ GFP_KERNEL, cpu_to_node(c->cpu));
+   if (!rq->mpwqe.info)
goto err_out;
 
/* We allocate more than mtt_sz as we will align the pointer */
-   rq->mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
+   rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
cpu_to_node(c->cpu));
-   if (unlikely(!rq->mtt_no_align))
+   if (unlikely(!rq->mpwqe.mtt_no_align))
goto err_free_wqe_info;
 
for (i = 0; i < wq_sz; i++) {
-   struct mlx5e_mpw_info *wi = >wqe_info[i];
+   struct mlx5e_mpw_info *wi = >mpwqe.info[i];
 
-   wi->umr.mtt = PTR_ALIGN(rq->mtt_no_align + i * mtt_alloc,
+   wi->umr.mtt = PTR_ALIGN(rq->mpwqe.mtt_no_align + i * mtt_alloc,
MLX5_UMR_ALIGN);
wi->umr.mtt_addr = dma_map_single(c->pdev, wi->umr.mtt, mtt_sz,
  PCI_DMA_TODEVICE);
@@ -373,14 +373,14 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
 
 err_unmap_mtts:
while (--i >= 0) {
-   struct mlx5e_mpw_info *wi = >wqe_info[i];
+   struct mlx5e_mpw_info *wi = >mpwqe.info[i];
 
dma_unmap_single(c->pdev, wi->umr.mtt_addr, mtt_sz,
 PCI_DMA_TODEVICE);
}
-   kfree(rq->mtt_no_align);
+   kfree(rq->mpwqe.mtt_no_align);
 err_free_wqe_info:
-   kfree(rq->wqe_info);
+   kfree(rq->mpwqe.info);
 
 err_out:
return -ENOMEM;
@@ -39

Re: [PATCH RFC 08/11] net/mlx5e: XDP fast RX drop bpf programs support

2016-09-07 Thread Saeed Mahameed
On Wed, Sep 7, 2016 at 4:32 PM, Or Gerlitz <gerlitz...@gmail.com> wrote:
> On Wed, Sep 7, 2016 at 3:42 PM, Saeed Mahameed <sae...@mellanox.com> wrote:
>
>> Packet rate performance testing was done with pktgen 64B packets and on
>> TX side and, TC drop action on RX side compared to XDP fast drop.
>>
>> CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz
>>
>> Comparison is done between:
>> 1. Baseline, Before this patch with TC drop action
>> 2. This patch with TC drop action
>> 3. This patch with XDP RX fast drop
>>
>> StreamsBaseline(TC drop)TC dropXDP fast Drop
>> --
>> 1   5.51Mpps5.14Mpps 13.5Mpps
>> 2   11.5Mpps10.0Mpps 25.1Mpps
>> 4   16.3Mpps17.2Mpps 35.4Mpps
>> 8   29.6Mpps28.2Mpps 45.8Mpps*
>> 16  34.0Mpps30.1Mpps 45.8Mpps*
>
> Rana, Guys, congrat!!
>
> When you say X streams, does each stream mapped by RSS to different RX ring?
> or we're on the same RX ring for all rows of the above table?

Yes, I will make this more clear in the actual submission,
Here we are talking about different RSS core rings.

>
> In the CX3 work, we had X sender "streams" that all mapped to the same RX 
> ring,
> I don't think we went beyond one RX ring.

Here we did, the first row is what you are describing the other rows
are the same test
with increasing the number of the RSS receiving cores, The xmit side is sending
as many streams as possible to be as much uniformly spread as possible
across the
different RSS cores on the receiver.

>
> Here, I guess you want to 1st get an initial max for N pktgen TX
> threads all sending
> the same stream so you land on single RX ring, and then move to M * N pktgen 
> TX
> threads to max that further.
>
> I don't see how the current Linux stack would be able to happily drive 34M PPS
> (== allocate SKB, etc, you know...) on a single CPU, Jesper?
>
> Or.


Re: [iovisor-dev] [PATCH RFC 11/11] net/mlx5e: XDP TX xmit more

2016-09-07 Thread Saeed Mahameed
On Wed, Sep 7, 2016 at 4:44 PM, John Fastabend via iovisor-dev
<iovisor-...@lists.iovisor.org> wrote:
> On 16-09-07 05:42 AM, Saeed Mahameed wrote:
>> Previously we rang XDP SQ doorbell on every forwarded XDP packet.
>>
>> Here we introduce a xmit more like mechanism that will queue up more
>> than one packet into SQ (up to RX napi budget) w/o notifying the hardware.
>>
>> Once RX napi budget is consumed and we exit napi RX loop, we will
>> flush (doorbell) all XDP looped packets in case there are such.
>>
>> XDP forward packet rate:
>>
>> Comparing XDP with and w/o xmit more (bulk transmit):
>>
>> Streams XDP TX   XDP TX (xmit more)
>> ---
>> 1   4.90Mpps  7.50Mpps
>> 2   9.50Mpps  14.8Mpps
>> 4   16.5Mpps  25.1Mpps
>> 8   21.5Mpps  27.5Mpps*
>> 16  24.1Mpps  27.5Mpps*
>>
>
> Hi Saeed,
>
> How many cores are you using with these numbers? Just a single
> core? Or are streams being RSS'd across cores somehow.
>

Hi John,

Right I should have been more clear here, numbers of streams refers to
the active RSS cores.
We just manipulate the number of rings with ethtool -L to test this.

>> *It seems we hit a wall of 27.5Mpps, for 8 and 16 streams,
>> we will be working on the analysis and will publish the conclusions
>> later.
>>
>
> Thanks,
> John
> ___
> iovisor-dev mailing list
> iovisor-...@lists.iovisor.org
> https://lists.iovisor.org/mailman/listinfo/iovisor-dev


Re: [PATCH RFC 11/11] net/mlx5e: XDP TX xmit more

2016-09-07 Thread Saeed Mahameed
On Wed, Sep 7, 2016 at 5:41 PM, Eric Dumazet <eric.duma...@gmail.com> wrote:
> On Wed, 2016-09-07 at 15:42 +0300, Saeed Mahameed wrote:
>> Previously we rang XDP SQ doorbell on every forwarded XDP packet.
>>
>> Here we introduce a xmit more like mechanism that will queue up more
>> than one packet into SQ (up to RX napi budget) w/o notifying the hardware.
>>
>> Once RX napi budget is consumed and we exit napi RX loop, we will
>> flush (doorbell) all XDP looped packets in case there are such.
>
> Why is this idea depends on XDP ?
>
> It looks like we could apply it to any driver having one IRQ servicing
> one RX and one TX, without XDP being involved.
>

Yes but it is more complicated than XDP case, where the RX ring posts
the TX descriptors and once done
the RX ring hits the doorbell once for all the TX descriptors it
posted, and it is the only possible place to hit a doorbell
for XDP TX ring.

For regular TX and RX ring sharing the same IRQ, there is no such
simple connection between them, and hitting a doorbell
from RX ring napi would race with xmit ndo function of the TX ring.

How do you synchronize in such case ?
isn't the existing xmit more mechanism sufficient enough ? maybe we
can have a fence from napi RX function
that will hold the xmit queue until done and then flush the TX queue
with the setting the right xmit more flags, without the need
of explicitly intervening with TX flow (hitting the doorbell).


[PATCH net 2/5] net/mlx5e: Move an_disable_cap bit to a new position

2016-09-07 Thread Saeed Mahameed
From: Bodong Wang <bod...@mellanox.com>

Previous an_disable_cap position bit31 is deprecated to be use in driver
with newer firmware.  New firmware will advertise the same capability
in bit29.

Old capability didn't allow setting more than one protocol for a
specific speed when autoneg is off, while newer firmware will allow
this and it is indicated in the new capability location.

Signed-off-by: Bodong Wang <bod...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 21bc455..d1f9a58 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -6710,9 +6710,10 @@ struct mlx5_ifc_pude_reg_bits {
 };
 
 struct mlx5_ifc_ptys_reg_bits {
-   u8 an_disable_cap[0x1];
+   u8 reserved_at_0[0x1];
u8 an_disable_admin[0x1];
-   u8 reserved_at_2[0x6];
+   u8 an_disable_cap[0x1];
+   u8 reserved_at_3[0x5];
u8 local_port[0x8];
u8 reserved_at_10[0xd];
u8 proto_mask[0x3];
-- 
2.7.4



[PATCH net 3/5] net/mlx5e: Prevent casting overflow

2016-09-07 Thread Saeed Mahameed
From: Gal Pressman <g...@mellanox.com>

On 64 bits architectures unsigned long is longer than u32,
casting to unsigned long will result in overflow.
We need to first allocate an unsigned long variable, then assign the
wanted value.

Fixes: 665bc53969d7 ('net/mlx5e: Use new ethtool get/set link ksettings API')
Signed-off-by: Gal Pressman <g...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index d0cf8fa..98e1a4a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -659,9 +659,10 @@ out:
 static void ptys2ethtool_supported_link(unsigned long *supported_modes,
u32 eth_proto_cap)
 {
+   unsigned long proto_cap = eth_proto_cap;
int proto;
 
-   for_each_set_bit(proto, (unsigned long *)_proto_cap, 
MLX5E_LINK_MODES_NUMBER)
+   for_each_set_bit(proto, _cap, MLX5E_LINK_MODES_NUMBER)
bitmap_or(supported_modes, supported_modes,
  ptys2ethtool_table[proto].supported,
  __ETHTOOL_LINK_MODE_MASK_NBITS);
@@ -670,9 +671,10 @@ static void ptys2ethtool_supported_link(unsigned long 
*supported_modes,
 static void ptys2ethtool_adver_link(unsigned long *advertising_modes,
u32 eth_proto_cap)
 {
+   unsigned long proto_cap = eth_proto_cap;
int proto;
 
-   for_each_set_bit(proto, (unsigned long *)_proto_cap, 
MLX5E_LINK_MODES_NUMBER)
+   for_each_set_bit(proto, _cap, MLX5E_LINK_MODES_NUMBER)
bitmap_or(advertising_modes, advertising_modes,
  ptys2ethtool_table[proto].advertised,
  __ETHTOOL_LINK_MODE_MASK_NBITS);
-- 
2.7.4



[PATCH net 1/5] net/mlx5e: Fix xmit_more counter race issue

2016-09-07 Thread Saeed Mahameed
From: Tariq Toukan <tar...@mellanox.com>

Update the xmit_more counter before notifying the HW,
to prevent a possible use-after-free of the skb.

Fixes: c8cf78fe100b ("net/mlx5e: Add ethtool counter for TX xmit_more")
Signed-off-by: Tariq Toukan <tar...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 988eca9..eb0e725 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -356,6 +356,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, 
struct sk_buff *skb)
sq->stats.stopped++;
}
 
+   sq->stats.xmit_more += skb->xmit_more;
if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) {
int bf_sz = 0;
 
@@ -375,7 +376,6 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, 
struct sk_buff *skb)
 
sq->stats.packets++;
sq->stats.bytes += num_bytes;
-   sq->stats.xmit_more += skb->xmit_more;
return NETDEV_TX_OK;
 
 dma_unmap_wqe_err:
-- 
2.7.4



[PATCH net 4/5] net/mlx5e: Fix global PFC counters replication

2016-09-07 Thread Saeed Mahameed
From: Gal Pressman <g...@mellanox.com>

Currently when reading global PFC statistics we left the counter
iterator out of the equation and we ended up reading the same counter
over and over again.

Instead of reading the counter at index 0 on every iteration we now read
the counter at index (i).

Fixes: e989d5a532ce ('net/mlx5e: Expose flow control counters to ethtool')
Signed-off-by: Gal Pressman <g...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 98e1a4a..7a346bb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -331,7 +331,7 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev,
if (mlx5e_query_global_pause_combined(priv)) {
for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
data[idx++] = 
MLX5E_READ_CTR64_BE(>stats.pport.per_prio_counters[0],
- 
pport_per_prio_pfc_stats_desc, 0);
+ 
pport_per_prio_pfc_stats_desc, i);
}
}
 
-- 
2.7.4



[PATCH net 0/5] Mellanox 100G mlx5 fixes 2016-09-07

2016-09-07 Thread Saeed Mahameed
Hi Dave,

The following series contains bug fixes for the mlx5e driver.

from Gal,
- Static code checker cleanup (casting overflow)
- Fix global PFC counter statistics reading
- Fix HW LRO when vlan stripping is off

>From Bodong,
- Deprecate old autoneg capability bit and use new one.

>From Tariq,
- Fix xmit more counter race condition


For -stable:
('net/mlx5e: Fix parsing of vlan packets when updating lro header')

No conflicts are introduced with the mlx5 ongoing net-next submission.

Thanks,
Saeed.

Bodong Wang (1):
  net/mlx5e: Move an_disable_cap bit to a new position

Gal Pressman (3):
  net/mlx5e: Prevent casting overflow
  net/mlx5e: Fix global PFC counters replication
  net/mlx5e: Fix parsing of vlan packets when updating lro header

Tariq Toukan (1):
  net/mlx5e: Fix xmit_more counter race issue

 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  8 +---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c| 22 +++---
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c|  2 +-
 include/linux/mlx5/mlx5_ifc.h  |  5 +++--
 4 files changed, 24 insertions(+), 13 deletions(-)

-- 
2.7.4



[PATCH net 5/5] net/mlx5e: Fix parsing of vlan packets when updating lro header

2016-09-07 Thread Saeed Mahameed
From: Gal Pressman <g...@mellanox.com>

Currently vlan tagged packets were not parsed correctly
and assumed to be regular IPv4/IPv6 packets.
We should check for 802.1Q/802.1ad tags and update the lro header
accordingly.
This fixes the use case where LRO is on and rxvlan is off
(vlan stripping is off).

Fixes: e586b3b0baee ('net/mlx5: Ethernet Datapath files')
Signed-off-by: Gal Pressman <g...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 22 +++---
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index b6f8ebb..e7c969d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -637,24 +637,32 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
 static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
 u32 cqe_bcnt)
 {
-   struct ethhdr   *eth= (struct ethhdr *)(skb->data);
-   struct iphdr*ipv4   = (struct iphdr *)(skb->data + ETH_HLEN);
-   struct ipv6hdr  *ipv6   = (struct ipv6hdr *)(skb->data + ETH_HLEN);
+   struct ethhdr   *eth = (struct ethhdr *)(skb->data);
+   struct iphdr*ipv4;
+   struct ipv6hdr  *ipv6;
struct tcphdr   *tcp;
+   int network_depth = 0;
+   __be16 proto;
+   u16 tot_len;
 
u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA  == l4_hdr_type) ||
   (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type));
 
-   u16 tot_len = cqe_bcnt - ETH_HLEN;
+   skb->mac_len = ETH_HLEN;
+   proto = __vlan_get_protocol(skb, eth->h_proto, _depth);
 
-   if (eth->h_proto == htons(ETH_P_IP)) {
-   tcp = (struct tcphdr *)(skb->data + ETH_HLEN +
+   ipv4 = (struct iphdr *)(skb->data + network_depth);
+   ipv6 = (struct ipv6hdr *)(skb->data + network_depth);
+   tot_len = cqe_bcnt - network_depth;
+
+   if (proto == htons(ETH_P_IP)) {
+   tcp = (struct tcphdr *)(skb->data + network_depth +
sizeof(struct iphdr));
ipv6 = NULL;
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
} else {
-   tcp = (struct tcphdr *)(skb->data + ETH_HLEN +
+   tcp = (struct tcphdr *)(skb->data + network_depth +
sizeof(struct ipv6hdr));
ipv4 = NULL;
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
-- 
2.7.4



Re: [PATCH net-next V2 6/6] net/mlx5: Add handling for port module event

2016-09-07 Thread Saeed Mahameed
On Tue, Sep 6, 2016 at 7:15 PM, Joe Perches <j...@perches.com> wrote:
> On Tue, 2016-09-06 at 19:04 +0300, Saeed Mahameed wrote:
>> From: Huy Nguyen <h...@mellanox.com>
>
> []
>
>> +void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
>> +{
>> > +   struct mlx5_eqe_port_module *module_event_eqe;
>> > +   u8 module_status;
>> > +   u8 module_num;
>> > +   u8 error_type;
>> +
>> > +   module_event_eqe = >data.port_module;
>> > +   module_num = module_event_eqe->module;
>> > +   module_status = module_event_eqe->module_status &
>> > +   PORT_MODULE_EVENT_MODULE_STATUS_MASK;
>> > +   error_type = module_event_eqe->error_type &
>> > +PORT_MODULE_EVENT_ERROR_TYPE_MASK;
>> +
>> > +   switch (module_status) {
>> > +   case MLX5_MODULE_STATUS_PLUGGED:
>> + mlx5_core_info(dev, "Module %u, status: plugged", module_num);
>
>
> Missing format '\n' line terminations

Right, will fix this.

Thanks

>
>> + break;
>> +
>> > +   case MLX5_MODULE_STATUS_UNPLUGGED:
>> > +   mlx5_core_info(dev, "Module %u, status: unplugged", 
>> > module_num);
>> > +   break;
>> +
>> > +   case MLX5_MODULE_STATUS_ERROR:
>> > +   mlx5_core_info(dev, "Module %u, status: error, %s", module_num,
>> > +  
>> > mlx5_port_event_error_type_to_string(error_type));
>> > +   break;
>> +
>> > +   default:
>> > +   mlx5_core_info(dev, "Module %u, unknown module status %x",
>> > +  module_num, module_status);
>> + }
>
> Should any of these be ratelimited?
>

Huy is checking with the HW/FW guys.  they shouldn't flood the driver
by design, but to be
in the safe side, we will double check.

Thanks,
Saeed.


Re: [iovisor-dev] [PATCH RFC 08/11] net/mlx5e: XDP fast RX drop bpf programs support

2016-09-07 Thread Saeed Mahameed
On Wed, Sep 7, 2016 at 11:55 PM, Or Gerlitz via iovisor-dev
<iovisor-...@lists.iovisor.org> wrote:
> On Wed, Sep 7, 2016 at 3:42 PM, Saeed Mahameed <sae...@mellanox.com> wrote:
>> From: Rana Shahout <ra...@mellanox.com>
>>
>> Add support for the BPF_PROG_TYPE_PHYS_DEV hook in mlx5e driver.
>>
>> When XDP is on we make sure to change channels RQs type to
>> MLX5_WQ_TYPE_LINKED_LIST rather than "striding RQ" type to
>> ensure "page per packet".
>>
>> On XDP set, we fail if HW LRO is set and request from user to turn it
>> off.  Since on ConnectX4-LX HW LRO is always on by default, this will be
>> annoying, but we prefer not to enforce LRO off from XDP set function.
>>
>> Full channels reset (close/open) is required only when setting XDP
>> on/off.
>>
>> When XDP set is called just to exchange programs, we will update
>> each RQ xdp program on the fly and for synchronization with current
>> data path RX activity of that RQ, we temporally disable that RQ and
>> ensure RX path is not running, quickly update and re-enable that RQ,
>> for that we do:
>> - rq.state = disabled
>> - napi_synnchronize
>> - xchg(rq->xdp_prg)
>> - rq.state = enabled
>> - napi_schedule // Just in case we've missed an IRQ
>>
>> Packet rate performance testing was done with pktgen 64B packets and on
>> TX side and, TC drop action on RX side compared to XDP fast drop.
>>
>> CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz
>>
>> Comparison is done between:
>> 1. Baseline, Before this patch with TC drop action
>> 2. This patch with TC drop action
>> 3. This patch with XDP RX fast drop
>>
>> StreamsBaseline(TC drop)TC dropXDP fast Drop
>> --
>> 1   5.51Mpps5.14Mpps 13.5Mpps
>
> This (13.5 M PPS) is less than 50% of the result we presented @ the
> XDP summit which was obtained by Rana. Please see if/how much does
> this grows if you use more sender threads, but all of them to xmit the
> same stream/flows, so we're on one ring. That (XDP with single RX ring
> getting packets from N remote TX rings) would be your canonical
> base-line for any further numbers.
>

I used N TX senders sending 48Mpps to a single RX core.
The single RX core could handle only 13.5Mpps.

The implementation here is different from the one we presented at the
summit, before, it was with striding RQ, now it is regular linked list
RQ, (Striding RQ ring can handle 32K 64B packets and regular RQ rings
handles only 1K).

In striding RQ we register only 16 HW descriptors for every 32K
packets. I.e for
every 32K packets we access the HW only 16 times.  on the other hand,
regular RQ will access the HW (register descriptors) once per packet,
i.e we write to HW 1K time for 1K packets. i think this explains the
difference.

the catch here is that we can't use striding RQ for XDP, bummer!

As i said, we will have the full and final performance results on V1.
This is just a RFC with barely quick and dirty testing.


> ___
> iovisor-dev mailing list
> iovisor-...@lists.iovisor.org
> https://lists.iovisor.org/mailman/listinfo/iovisor-dev


Re: [PATCH net-next V2 0/6] Mellanox 100G mlx5 DCBX CEE and firmware support

2016-09-07 Thread Saeed Mahameed
On Tue, Sep 6, 2016 at 7:04 PM, Saeed Mahameed <sae...@mellanox.com> wrote:
> Hi Dave,
>

Hi Dave, Sorry to bother, but i would like to drop this series for now
Huy is working to define and come up with a better mechanism to
enable/disable the new DCBX hybrid mode he is adding.

Thanks for understanding,
Saeed.

> This series from Huy provides mlx5 DCBX updates to support DCBX CEE
> API and DCBX firmware/host modes support.
>
> 1st patch ensures the dcbnl_rtnl_ops is published only when the qos 
> capability bits is on.
>
> 2nd patch adds the support for CEE interfaces into mlx5 dcbnl_rtnl_ops.
>
> 3rd patch refactors ETS query to read ETS configuration directly from 
> firmware rather
> than having a software shadow to it. The existing IEEE interfaces stays the 
> same.
>
> 4th patch adds the support for MLX5_REG_DCBX_PARAM and MLX5_REG_DCBX_APP 
> firmware
> commands to manipulate mlx5 DCBX mode.
>
> 5th patch adds the driver support for the new DCBX firmware.
> This ensures the backward compatibility versus the old and new firmware.
> With the new DCBX firmware, qos settings can be controlled by either firmware
> or software depending on the DCBX mode.
>
> 6th patch adds support for module events log.
>
> Changes since V1:
> 1. Add qos capability check
> 2. In port module events eqe structure, change rsvd_n to reserved_at_n to be 
> consistent with mlx5_ifc.h
> 3. Improve commit messages
> 4. Drop DCBX private flags patch
> 5. Add patch to check for qos capability bit check before exposing dcb 
> interfaces
> 6. Replace memset with static array initialization
>
> Thanks,
> Saeed.
>
> Huy Nguyen (6):
>   net/mlx5e: Add qos capability check
>   net/mlx5e: Support DCBX CEE API
>   net/mlx5e: Read ETS settings directly from firmware
>   net/mlx5: Add DCBX firmware commands support
>   net/mlx5e: ConnectX-4 firmware support for DCBX
>   net/mlx5: Add handling for port module event
>
>  drivers/net/ethernet/mellanox/mlx5/core/en.h   |  36 +-
>  drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 483 
> -
>  drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  27 +-
>  drivers/net/ethernet/mellanox/mlx5/core/eq.c   |  12 +
>  .../net/ethernet/mellanox/mlx5/core/mlx5_core.h|   1 +
>  drivers/net/ethernet/mellanox/mlx5/core/port.c | 148 +++
>  include/linux/mlx5/device.h|  11 +
>  include/linux/mlx5/driver.h|   7 +
>  include/linux/mlx5/mlx5_ifc.h  |   3 +-
>  include/linux/mlx5/port.h  |   6 +
>  10 files changed, 698 insertions(+), 36 deletions(-)
>
> --
> 2.7.4
>


Re: [PATCH RFC 11/11] net/mlx5e: XDP TX xmit more

2016-09-07 Thread Saeed Mahameed
On Wed, Sep 7, 2016 at 9:19 PM, Eric Dumazet <eric.duma...@gmail.com> wrote:
> On Wed, 2016-09-07 at 19:57 +0300, Saeed Mahameed wrote:
>
>> Jesper has a similar Idea to make the qdisc think it is under
>> pressure, when the device
>> TX ring is idle most of the time, i think his idea can come in handy here.
>> I am not fully involved in the details, maybe he can elaborate more.
>>
>> But if it works, it will be transparent to napi, and xmit more will
>> happen by design.
>
> I do not think qdisc is relevant here.
>
> Right now, skb->xmit_more is set only by qdisc layer (and pktgen tool),
> because only this layer can know if more packets are to come.
>
>
> What I am saying is that regardless of skb->xmit_more being set or not,
> (for example if no qdisc is even used)
> a NAPI driver can arm a bit asking the doorbell being sent at the end of
> NAPI.
>
> I am not saying this must be done, only that the idea could be extended
> to non XDP world, if we care enough.
>

Yes, and i am just trying to suggest Ideas that do not require
communication between RX (NAPI) and TX.

The problem here is the synchronization (TX doorbell from RX) which is
not as simple as atomic operation for some drivers.

How about RX bulking ? it also can help here, since for the forwarding
case, the forwarding path will be able to
process bulk of RX SKBs and can bulk xmit the portion of SKBs that
will be forwarded.

As Jesper suggested, Let's talk in Netdev1.2 at jesper's session. ( if
you are joining of course).

Thanks
Saeed.


Re: [PATCH RFC 11/11] net/mlx5e: XDP TX xmit more

2016-09-07 Thread Saeed Mahameed
On Wed, Sep 7, 2016 at 6:32 PM, Eric Dumazet <eric.duma...@gmail.com> wrote:
> On Wed, 2016-09-07 at 18:08 +0300, Saeed Mahameed wrote:
>> On Wed, Sep 7, 2016 at 5:41 PM, Eric Dumazet <eric.duma...@gmail.com> wrote:
>> > On Wed, 2016-09-07 at 15:42 +0300, Saeed Mahameed wrote:
>> >> Previously we rang XDP SQ doorbell on every forwarded XDP packet.
>> >>
>> >> Here we introduce a xmit more like mechanism that will queue up more
>> >> than one packet into SQ (up to RX napi budget) w/o notifying the hardware.
>> >>
>> >> Once RX napi budget is consumed and we exit napi RX loop, we will
>> >> flush (doorbell) all XDP looped packets in case there are such.
>> >
>> > Why is this idea depends on XDP ?
>> >
>> > It looks like we could apply it to any driver having one IRQ servicing
>> > one RX and one TX, without XDP being involved.
>> >
>>
>> Yes but it is more complicated than XDP case, where the RX ring posts
>> the TX descriptors and once done
>> the RX ring hits the doorbell once for all the TX descriptors it
>> posted, and it is the only possible place to hit a doorbell
>> for XDP TX ring.
>>
>> For regular TX and RX ring sharing the same IRQ, there is no such
>> simple connection between them, and hitting a doorbell
>> from RX ring napi would race with xmit ndo function of the TX ring.
>>
>> How do you synchronize in such case ?
>> isn't the existing xmit more mechanism sufficient enough ?
>
> Only if a qdisc is present and pressure is high enough.
>
> But in a forwarding setup, we likely receive at a lower rate than the
> NIC can transmit.
>

Jesper has a similar Idea to make the qdisc think it is under
pressure, when the device
TX ring is idle most of the time, i think his idea can come in handy here.
I am not fully involved in the details, maybe he can elaborate more.

But if it works, it will be transparent to napi, and xmit more will
happen by design.

> A simple cmpxchg could be used to synchronize the thing, if we really
> cared about doorbell cost. (Ie if the cost of this cmpxchg() is way
> smaller than doorbell one)
>


Re: [PATCH RFC 08/11] net/mlx5e: XDP fast RX drop bpf programs support

2016-09-07 Thread Saeed Mahameed
On Wed, Sep 7, 2016 at 7:54 PM, Tom Herbert <t...@herbertland.com> wrote:
> On Wed, Sep 7, 2016 at 7:48 AM, Saeed Mahameed
> <sae...@dev.mellanox.co.il> wrote:
>> On Wed, Sep 7, 2016 at 4:32 PM, Or Gerlitz <gerlitz...@gmail.com> wrote:
>>> On Wed, Sep 7, 2016 at 3:42 PM, Saeed Mahameed <sae...@mellanox.com> wrote:
>>>
>>>> Packet rate performance testing was done with pktgen 64B packets and on
>>>> TX side and, TC drop action on RX side compared to XDP fast drop.
>>>>
>>>> CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz
>>>>
>>>> Comparison is done between:
>>>> 1. Baseline, Before this patch with TC drop action
>>>> 2. This patch with TC drop action
>>>> 3. This patch with XDP RX fast drop
>>>>
>>>> StreamsBaseline(TC drop)TC dropXDP fast Drop
>>>> --
>>>> 1   5.51Mpps5.14Mpps 13.5Mpps
>>>> 2   11.5Mpps10.0Mpps 25.1Mpps
>>>> 4   16.3Mpps17.2Mpps 35.4Mpps
>>>> 8   29.6Mpps28.2Mpps 45.8Mpps*
>>>> 16  34.0Mpps30.1Mpps 45.8Mpps*
>>>
>>> Rana, Guys, congrat!!
>>>
>>> When you say X streams, does each stream mapped by RSS to different RX ring?
>>> or we're on the same RX ring for all rows of the above table?
>>
>> Yes, I will make this more clear in the actual submission,
>> Here we are talking about different RSS core rings.
>>
>>>
>>> In the CX3 work, we had X sender "streams" that all mapped to the same RX 
>>> ring,
>>> I don't think we went beyond one RX ring.
>>
>> Here we did, the first row is what you are describing the other rows
>> are the same test
>> with increasing the number of the RSS receiving cores, The xmit side is 
>> sending
>> as many streams as possible to be as much uniformly spread as possible
>> across the
>> different RSS cores on the receiver.
>>
> Hi Saeed,
>
> Please report CPU utilization also. The expectation is that
> performance should scale linearly with increasing number of CPUs (i.e.
> pps/CPU_utilization should be constant).
>

Hi Tom

That was my expectation too.

We didn't do the full analysis yet, It could be that RSS was not
spreading the workload on all the cores evenly.
Those numbers are from my humble machine with a quick and dirty
testing, the idea of this submission
is to let the folks look at the code while we continue testing and
analyzing those patches.

Anyway we will share more accurate results when we have them, with CPU
utilization statistics as well.

Thanks,
Saeed.

> Tom
>


[PATCH net-next 5/6] net/mlx5e: Add DCBX control interface

2016-08-30 Thread Saeed Mahameed
From: Huy Nguyen <h...@mellanox.com>

1. Use setdcbx interface to set the DCBX mode to firmware or os.
   If setdcbx is called with mode value of zero, the DCBX mode
   is set to firmware.

2. Add private ethtool flag "qos_with_dcbx_by_fw". When this
   flag is "On", the DCBX is forced to firmware mode.

Signed-off-by: Huy Nguyen <h...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  2 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 24 -
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   | 25 ++
 3 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 806f5e8..501b1e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -158,10 +158,12 @@ struct mlx5e_umr_wqe {
 
 static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = {
"rx_cqe_moder",
+   "qos_with_dcbx_by_fw",
 };
 
 enum mlx5e_priv_flag {
MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0),
+   MLX5E_PFLAG_QOS_WITH_DCBX_BY_FW = (1 << 1),
 };
 
 #define MLX5E_SET_PRIV_FLAG(priv, pflag, enable)\
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 5d1b402..6c739c1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -54,6 +54,9 @@ static inline bool mlx5e_dcbnl_is_allowed(struct mlx5e_priv 
*priv)
if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_HOST)
return true;
 
+   if (priv->pflags & MLX5E_PFLAG_QOS_WITH_DCBX_BY_FW)
+   return true;
+
if (mlx5e_dcbnl_set_dcbx_mode(priv, MLX5E_DCBX_PARAM_VER_OPER_HOST))
return false;
 
@@ -283,13 +286,32 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev,
 
 static u8 mlx5e_dcbnl_getdcbx(struct net_device *dev)
 {
-   return DCB_CAP_DCBX_HOST |
+   struct mlx5e_priv *priv = netdev_priv(dev);
+   struct mlx5e_dcbx *dcbx = >dcbx;
+   u8 mode = 0;
+
+   if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_HOST)
+   mode = DCB_CAP_DCBX_HOST;
+
+   return mode |
   DCB_CAP_DCBX_VER_IEEE |
   DCB_CAP_DCBX_VER_CEE;
 }
 
 static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode)
 {
+   struct mlx5e_priv *priv = netdev_priv(dev);
+   struct mlx5e_dcbx *dcbx = >dcbx;
+
+   if ((!mode) && MLX5_CAP_GEN(priv->mdev, dcbx)) {
+   /* set dcbx to fw controlled */
+   if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_HOST)
+   if (!mlx5e_dcbnl_set_dcbx_mode(priv, 
MLX5E_DCBX_PARAM_VER_OPER_AUTO))
+   dcbx->mode = MLX5E_DCBX_PARAM_VER_OPER_AUTO;
+
+   return 0;
+   }
+
if (!mlx5e_dcbnl_is_allowed(netdev_priv(dev)))
return 1;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index d1cd156..9bc26cd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1460,6 +1460,25 @@ static int mlx5e_handle_pflag(struct net_device *netdev,
return 0;
 }
 
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+static int qos_with_dcbx_by_fw_handler(struct net_device *netdev, bool enable)
+{
+   struct mlx5e_priv *priv = netdev_priv(netdev);
+
+   if (!MLX5_CAP_GEN(priv->mdev, dcbx))
+   return -EPERM;
+
+   if (!enable)
+   return 0;
+
+   /* Not allow to turn on the flag if the dcbx mode is host */
+   if (priv->dcbx.mode == MLX5E_DCBX_PARAM_VER_OPER_HOST)
+   return -EPERM;
+
+   return 0;
+}
+#endif
+
 static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags)
 {
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -1471,6 +1490,12 @@ static int mlx5e_set_priv_flags(struct net_device 
*netdev, u32 pflags)
 MLX5E_PFLAG_RX_CQE_BASED_MODER,
 set_pflag_rx_cqe_based_moder);
 
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+   err  = mlx5e_handle_pflag(netdev, pflags,
+ MLX5E_PFLAG_QOS_WITH_DCBX_BY_FW,
+ qos_with_dcbx_by_fw_handler);
+#endif
+
mutex_unlock(>state_lock);
return err ? -EINVAL : 0;
 }
-- 
2.7.4



[PATCH net-next 6/6] net/mlx5: Add handling for port module event

2016-08-30 Thread Saeed Mahameed
From: Huy Nguyen <h...@mellanox.com>

Add dmesg log for asynchronous port module event.

Signed-off-by: Huy Nguyen <h...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c   | 12 +++
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h|  1 +
 drivers/net/ethernet/mellanox/mlx5/core/port.c | 85 ++
 include/linux/mlx5/device.h| 11 +++
 include/linux/mlx5/mlx5_ifc.h  |  3 +-
 5 files changed, 111 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index aaca090..d775fea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -139,6 +139,8 @@ static const char *eqe_type_str(u8 type)
return "MLX5_EVENT_TYPE_PORT_CHANGE";
case MLX5_EVENT_TYPE_GPIO_EVENT:
return "MLX5_EVENT_TYPE_GPIO_EVENT";
+   case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
+   return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
case MLX5_EVENT_TYPE_REMOTE_CONFIG:
return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
@@ -285,6 +287,11 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct 
mlx5_eq *eq)
mlx5_eswitch_vport_event(dev->priv.eswitch, eqe);
break;
 #endif
+
+   case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
+   mlx5_port_module_event(dev, eqe);
+   break;
+
default:
mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
   eqe->type, eq->eqn);
@@ -480,6 +487,11 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
mlx5_core_is_pf(dev))
async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
 
+   if (MLX5_CAP_GEN(dev, port_module_event))
+   async_event_mask |= (1ull << MLX5_EVENT_TYPE_PORT_MODULE_EVENT);
+   else
+   mlx5_core_dbg(dev, "port_module_event is not set\n");
+
err = mlx5_create_map_eq(dev, >cmd_eq, MLX5_EQ_VEC_CMD,
 MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
 "mlx5_cmd_eq", >priv.uuari.uars[0]);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h 
b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 714b71b..d023d05 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -87,6 +87,7 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev);
 int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
 void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
 unsigned long param);
+void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
 void mlx5_enter_error_state(struct mlx5_core_dev *dev);
 void mlx5_disable_device(struct mlx5_core_dev *dev);
 int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c 
b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 8a66595..e5f62bb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -36,6 +36,25 @@
 #include 
 #include "mlx5_core.h"
 
+#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF
+#define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF
+enum {
+   MLX5_MODULE_STATUS_PLUGGED  = 0x1,
+   MLX5_MODULE_STATUS_UNPLUGGED  = 0x2,
+   MLX5_MODULE_STATUS_ERROR  = 0x3,
+};
+
+enum {
+   MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED  = 0x0,
+   MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE  = 0x1,
+   MLX5_MODULE_EVENT_ERROR_BUS_STUCK  = 0x2,
+   MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT  = 0x3,
+   MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST  = 0x4,
+   MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER  = 0x5,
+   MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE  = 0x6,
+   MLX5_MODULE_EVENT_ERROR_BAD_CABLE = 0x7,
+};
+
 int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
 int size_in, void *data_out, int size_out,
 u16 reg_id, int arg, int write)
@@ -811,3 +830,69 @@ void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool 
*supported,
*supported = !!(MLX5_GET(pcmr_reg, out, fcs_cap));
*enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk));
 }
+
+static const char *mlx5_port_event_error_type_to_string(u8 error_type)
+{
+   switch (error_type) {
+   case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
+   return "Power Budget Exceeded";
+
+   cas

[PATCH net-next 1/6] net/mlx5e: Support DCBX CEE API

2016-08-30 Thread Saeed Mahameed
From: Huy Nguyen <h...@mellanox.com>

Add DCBX CEE API interface for CX4. Configurations are stored in a
temporary structure and are applied to the card's firmware when the
CEE's setall callback function is called.

Note:
  priority group in CEE is equivalent to traffic class in ConnectX-4
  hardware spec.

  bw allocation per priority in CEE is not supported because CX4
  only supports bw allocation per traffic class.

  user priority in CEE does not have an equivalent term in CX4.
  Therefore, user priority to priority mapping in CEE is not supported.

Test: see DCBX_LinuxDriverCX4 document section 6.4
Signed-off-by: Huy Nguyen <h...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  24 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 301 -
 drivers/net/ethernet/mellanox/mlx5/core/port.c |  43 +++
 include/linux/mlx5/port.h  |   4 +
 4 files changed, 370 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 9699560..6919e3c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -209,6 +209,26 @@ struct mlx5e_params {
bool rx_am_enabled;
 };
 
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+struct mlx5e_cee_config {
+   /* bw pct for priority group */
+   u8 pg_bw_pct[CEE_DCBX_MAX_PGS];
+   u8 prio_to_pg_map[CEE_DCBX_MAX_PRIO];
+   bool   pfc_setting[CEE_DCBX_MAX_PRIO];
+   bool   pfc_enable;
+};
+
+enum {
+   MLX5_DCB_CHG_RESET,
+   MLX5_DCB_NO_CHG,
+   MLX5_DCB_CHG_NO_RESET,
+};
+
+struct mlx5e_dcbx {
+   struct mlx5e_cee_configcee_cfg; /* pending configuration */
+};
+#endif
+
 struct mlx5e_tstamp {
rwlock_t   lock;
struct cyclecountercycles;
@@ -650,6 +670,10 @@ struct mlx5e_priv {
struct mlx5e_stats stats;
struct mlx5e_tstamptstamp;
u16 q_counter;
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+   struct mlx5e_dcbx  dcbx;
+#endif
+
const struct mlx5e_profile *profile;
void  *ppriv;
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 762af16..b161dd9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -38,6 +38,9 @@
 #define MLX5E_100MB (10)
 #define MLX5E_1GB   (100)
 
+#define MLX5E_CEE_STATE_UP1
+#define MLX5E_CEE_STATE_DOWN  0
+
 static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev,
   struct ieee_ets *ets)
 {
@@ -222,13 +225,15 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev,
 
 static u8 mlx5e_dcbnl_getdcbx(struct net_device *dev)
 {
-   return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
+   return DCB_CAP_DCBX_HOST |
+  DCB_CAP_DCBX_VER_IEEE |
+  DCB_CAP_DCBX_VER_CEE;
 }
 
 static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode)
 {
if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
-   (mode & DCB_CAP_DCBX_VER_CEE) ||
+   !(mode & DCB_CAP_DCBX_VER_CEE) ||
!(mode & DCB_CAP_DCBX_VER_IEEE) ||
!(mode & DCB_CAP_DCBX_HOST))
return 1;
@@ -304,6 +309,281 @@ static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device 
*netdev,
return mlx5_modify_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit);
 }
 
+static u8 mlx5e_dcbnl_setall(struct net_device *netdev)
+{
+   struct mlx5e_priv *priv = netdev_priv(netdev);
+   struct mlx5e_cee_config *cee_cfg = >dcbx.cee_cfg;
+   struct mlx5_core_dev *mdev = priv->mdev;
+   struct ieee_ets ets;
+   struct ieee_pfc pfc;
+   int err;
+   int i;
+
+   memset(, 0, sizeof(ets));
+   memset(, 0, sizeof(pfc));
+
+   ets.ets_cap = IEEE_8021QAZ_MAX_TCS;
+   for (i = 0; i < CEE_DCBX_MAX_PGS; i++) {
+   ets.tc_tx_bw[i] = cee_cfg->pg_bw_pct[i];
+   ets.tc_rx_bw[i] = cee_cfg->pg_bw_pct[i];
+   ets.tc_tsa[i]   = IEEE_8021QAZ_TSA_ETS;
+   ets.prio_tc[i]  = cee_cfg->prio_to_pg_map[i];
+   }
+
+   err = mlx5e_dbcnl_validate_ets();
+   if (err) {
+   netdev_err(netdev,
+  "%s, Failed to validate ETS: %d\n", __func__, err);
+   goto out;
+   }
+
+   err = mlx5e_dcbnl_ieee_setets_core(priv, );
+   if (err) {
+   netdev_err(netdev,
+  "%s, Failed to set ETS: %d\n", __func__, err);
+   goto out;
+   }
+
+   /* Set PFC */
+   pfc.pfc_cap = mlx5_max_tc(mdev) + 1;
+   if (!cee_cfg->

[PATCH net-next 2/6] net/mlx5e: Read ETS settings directly from firmware

2016-08-30 Thread Saeed Mahameed
From: Huy Nguyen <h...@mellanox.com>

Current implementation does not read the setting
directly from FW when ieee_getets is called.

Solution:
1. Read the ETS settings directly from firmware.
2. For tc_tsa:
   a. Initialize tc_tsa to vendor IEEE_8021QAZ_TSA_VENDOR at netdev
  creation.
   b. When reading ETS setting from FW, if the traffic class bandwidth
  is less than 100, set tc_tsa to IEEE_8021QAZ_TSA_ETS. This
  implementation solves the scenarios when the DCBX is in FW control
  and willing bit is on which means the ETS setting is dictated
  by remote switch.

Signed-off-by: Huy Nguyen <h...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  6 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 35 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 26 
 3 files changed, 46 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 6919e3c..0d41287 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -203,9 +203,6 @@ struct mlx5e_params {
u8  toeplitz_hash_key[40];
u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE];
bool vlan_strip_disable;
-#ifdef CONFIG_MLX5_CORE_EN_DCB
-   struct ieee_ets ets;
-#endif
bool rx_am_enabled;
 };
 
@@ -226,6 +223,9 @@ enum {
 
 struct mlx5e_dcbx {
struct mlx5e_cee_configcee_cfg; /* pending configuration */
+
+   /* The only setting that cannot be read from FW */
+   u8 tc_tsa[IEEE_8021QAZ_MAX_TCS];
 };
 #endif
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index b161dd9..1c10f9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -45,12 +45,31 @@ static int mlx5e_dcbnl_ieee_getets(struct net_device 
*netdev,
   struct ieee_ets *ets)
 {
struct mlx5e_priv *priv = netdev_priv(netdev);
+   struct mlx5_core_dev *mdev = priv->mdev;
+   int i;
+   int err = 0;
 
if (!MLX5_CAP_GEN(priv->mdev, ets))
return -ENOTSUPP;
 
-   memcpy(ets, >params.ets, sizeof(*ets));
-   return 0;
+   ets->ets_cap = mlx5_max_tc(priv->mdev) + 1;
+   for (i = 0; i < ets->ets_cap; i++) {
+   err = mlx5_query_port_prio_tc(mdev, i, >prio_tc[i]);
+   if (err)
+   return err;
+   }
+
+   for (i = 0; i < ets->ets_cap; i++) {
+   err = mlx5_query_port_tc_bw_alloc(mdev, i, >tc_tx_bw[i]);
+   if (err)
+   return err;
+   if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC)
+   priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS;
+   }
+
+   memcpy(ets->tc_tsa, priv->dcbx.tc_tsa, sizeof(ets->tc_tsa));
+
+   return err;
 }
 
 enum {
@@ -127,7 +146,14 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, 
struct ieee_ets *ets)
if (err)
return err;
 
-   return mlx5_set_port_tc_bw_alloc(mdev, tc_tx_bw);
+   err = mlx5_set_port_tc_bw_alloc(mdev, tc_tx_bw);
+
+   if (err)
+   return err;
+
+   memcpy(priv->dcbx.tc_tsa, ets->tc_tsa, sizeof(ets->tc_tsa));
+
+   return err;
 }
 
 static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
@@ -181,9 +207,6 @@ static int mlx5e_dcbnl_ieee_setets(struct net_device 
*netdev,
if (err)
return err;
 
-   memcpy(>params.ets, ets, sizeof(*ets));
-   priv->params.ets.ets_cap = mlx5_max_tc(priv->mdev) + 1;
-
return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 03586ee..8f17928 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2875,17 +2875,23 @@ u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev)
 static void mlx5e_ets_init(struct mlx5e_priv *priv)
 {
int i;
+   struct ieee_ets ets;
 
-   priv->params.ets.ets_cap = mlx5_max_tc(priv->mdev) + 1;
-   for (i = 0; i < priv->params.ets.ets_cap; i++) {
-   priv->params.ets.tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
-   priv->params.ets.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR;
-   priv->params.ets.prio_tc[i] = i;
+   memset(, 0, sizeof(ets));
+   ets.ets_cap = mlx5_max_tc(priv->mdev) + 1;
+   for (i = 0; i < ets.ets_cap; i++) {
+   ets.tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
+   ets.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR;
+   ets.prio_tc[i] = i;
}
 
+   memcpy(pri

[PATCH net-next 4/6] net/mlx5e: ConnectX-4 firmware support for DCBX

2016-08-30 Thread Saeed Mahameed
From: Huy Nguyen <h...@mellanox.com>

DBCX by default is controlled by firmware. In this
mode, firmware is responsible for reading/sending the TLVs packets
from/to the remote partner. When the driver is loaded, the driver
can leave the DCBX in firmware controlled mode or
switch the DCBX back to host controlled mode.

This patch sets up the infrastructure to support changing
DCBX control mode.

Signed-off-by: Huy Nguyen <h...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |   6 +
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 147 +
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  26 +---
 3 files changed, 154 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 0d41287..806f5e8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -222,6 +222,7 @@ enum {
 };
 
 struct mlx5e_dcbx {
+   enum mlx5_dcbx_oper_mode   mode;
struct mlx5e_cee_configcee_cfg; /* pending configuration */
 
/* The only setting that cannot be read from FW */
@@ -810,6 +811,11 @@ extern const struct ethtool_ops mlx5e_ethtool_ops;
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops;
 int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets 
*ets);
+int mlx5e_dcbnl_set_dcbx_mode(struct mlx5e_priv *priv,
+ enum mlx5_dcbx_oper_mode mode);
+void mlx5e_dcbnl_query_dcbx_mode(struct mlx5e_priv *priv,
+enum mlx5_dcbx_oper_mode *mode);
+void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv);
 #endif
 
 #ifndef CONFIG_RFS_ACCEL
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 1c10f9c..5d1b402 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -41,6 +41,26 @@
 #define MLX5E_CEE_STATE_UP1
 #define MLX5E_CEE_STATE_DOWN  0
 
+/* If dcbx mode is non-host and qos_with_dcbx_by_fw is off, set the
+ * dcbx mode to host.
+ */
+static inline bool mlx5e_dcbnl_is_allowed(struct mlx5e_priv *priv)
+{
+   struct mlx5e_dcbx *dcbx = >dcbx;
+
+   if (!MLX5_CAP_GEN(priv->mdev, dcbx))
+   return true;
+
+   if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_HOST)
+   return true;
+
+   if (mlx5e_dcbnl_set_dcbx_mode(priv, MLX5E_DCBX_PARAM_VER_OPER_HOST))
+   return false;
+
+   dcbx->mode = MLX5E_DCBX_PARAM_VER_OPER_HOST;
+   return true;
+}
+
 static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev,
   struct ieee_ets *ets)
 {
@@ -52,6 +72,9 @@ static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev,
if (!MLX5_CAP_GEN(priv->mdev, ets))
return -ENOTSUPP;
 
+   if (!mlx5e_dcbnl_is_allowed(priv))
+   return -EPERM;
+
ets->ets_cap = mlx5_max_tc(priv->mdev) + 1;
for (i = 0; i < ets->ets_cap; i++) {
err = mlx5_query_port_prio_tc(mdev, i, >prio_tc[i]);
@@ -199,6 +222,12 @@ static int mlx5e_dcbnl_ieee_setets(struct net_device 
*netdev,
struct mlx5e_priv *priv = netdev_priv(netdev);
int err;
 
+   if (!MLX5_CAP_GEN(priv->mdev, ets))
+   return -ENOTSUPP;
+
+   if (!mlx5e_dcbnl_is_allowed(priv))
+   return -EPERM;
+
err = mlx5e_dbcnl_validate_ets(netdev, ets);
if (err)
return err;
@@ -218,6 +247,9 @@ static int mlx5e_dcbnl_ieee_getpfc(struct net_device *dev,
struct mlx5e_pport_stats *pstats = >stats.pport;
int i;
 
+   if (!mlx5e_dcbnl_is_allowed(priv))
+   return -EPERM;
+
pfc->pfc_cap = mlx5_max_tc(mdev) + 1;
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
pfc->requests[i]= PPORT_PER_PRIO_GET(pstats, i, tx_pause);
@@ -235,6 +267,9 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev,
u8 curr_pfc_en;
int ret;
 
+   if (!mlx5e_dcbnl_is_allowed(priv))
+   return -EPERM;
+
mlx5_query_port_pfc(mdev, _pfc_en, NULL);
 
if (pfc->pfc_en == curr_pfc_en)
@@ -255,6 +290,9 @@ static u8 mlx5e_dcbnl_getdcbx(struct net_device *dev)
 
 static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode)
 {
+   if (!mlx5e_dcbnl_is_allowed(netdev_priv(dev)))
+   return 1;
+
if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
!(mode & DCB_CAP_DCBX_VER_CEE) ||
!(mode & DCB_CAP_DCBX_VER_IEEE) ||
@@ -274,6 +312,9 @@ static int mlx5e_dcbnl_ieee_getmaxrate(struct net_device 
*netdev,
int err;
int i;
 
+   if (!mlx5e_dcbnl_is_allowed(priv))
+   return -EPERM;

[PATCH net-next 0/6] Mellanox 100G mlx5 DCBX CEE and firmware support

2016-08-30 Thread Saeed Mahameed
Hi Dave,

This series from Huy provides mlx5 DCBX updates to support DCBX CEE
API and DCBX firmware/host modes support.

1st patch adds the support for CEE interfaces into mlx5 dcbnl_rtnl_ops.

2nd patch refactors ETS query to read ETS configuration directly from 
firmware rather than having a software shadow to it.

3rd patch adds the support for MLX5_REG_DCBX_PARAM and MLX5_REG_DCBX_APP
firmware commands to manipulate mlx5 DCBX mode.

4th patch adds driver support for moving mlx5 DCBX mode between firmware
and host.

5th patch adds a private flag for setting special hybrid DCBX firmware/host
mode "qos_with_dcbx_by_fw".

6th patch adds support for module events log.

Thanks,
Saeed.

Huy Nguyen (6):
  net/mlx5e: Support DCBX CEE API
  net/mlx5e: Read ETS settings directly from firmware
  net/mlx5: Add DCBX firmware commands support
  net/mlx5e: ConnectX-4 firmware support for DCBX
  net/mlx5e: Add DCBX control interface
  net/mlx5: Add handling for port module event

 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  38 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 505 -
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  25 +
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  24 +-
 drivers/net/ethernet/mellanox/mlx5/core/eq.c   |  12 +
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h|   1 +
 drivers/net/ethernet/mellanox/mlx5/core/port.c | 150 ++
 include/linux/mlx5/device.h|  11 +
 include/linux/mlx5/driver.h|   7 +
 include/linux/mlx5/mlx5_ifc.h  |   3 +-
 include/linux/mlx5/port.h  |   6 +
 11 files changed, 747 insertions(+), 35 deletions(-)

-- 
2.7.4



[PATCH net-next 3/6] net/mlx5: Add DCBX firmware commands support

2016-08-30 Thread Saeed Mahameed
From: Huy Nguyen <h...@mellanox.com>

Add set/query commands for DCBX_PARAM register

Signed-off-by: Huy Nguyen <h...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/port.c | 22 ++
 include/linux/mlx5/driver.h|  7 +++
 include/linux/mlx5/port.h  |  2 ++
 3 files changed, 31 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c 
b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 2f75f86..8a66595 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -548,6 +548,28 @@ int mlx5_max_tc(struct mlx5_core_dev *mdev)
return num_tc - 1;
 }
 
+int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out)
+{
+   u32 in[MLX5_ST_SZ_DW(dcbx_param)];
+
+   memset(in, 0, sizeof(in));
+
+   MLX5_SET(dcbx_param, in, port_number, 1);
+
+   return  mlx5_core_access_reg(mdev, in, sizeof(in), out,
+   sizeof(in), MLX5_REG_DCBX_PARAM, 0, 0);
+}
+
+int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in)
+{
+   u32 out[MLX5_ST_SZ_DW(dcbx_param)];
+
+   MLX5_SET(dcbx_param, in, port_number, 1);
+
+   return mlx5_core_access_reg(mdev, in, sizeof(out), out,
+   sizeof(out), MLX5_REG_DCBX_PARAM, 0, 1);
+}
+
 int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc)
 {
u32 in[MLX5_ST_SZ_DW(qtct_reg)] = {0};
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 5cb9fa7..b53f19c 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -104,6 +104,8 @@ enum {
 enum {
MLX5_REG_QETCR   = 0x4005,
MLX5_REG_QTCT= 0x400a,
+   MLX5_REG_DCBX_PARAM  = 0x4020,
+   MLX5_REG_DCBX_APP= 0x4021,
MLX5_REG_PCAP= 0x5001,
MLX5_REG_PMTU= 0x5003,
MLX5_REG_PTYS= 0x5004,
@@ -123,6 +125,11 @@ enum {
MLX5_REG_MLCR= 0x902b,
 };
 
+enum mlx5_dcbx_oper_mode {
+   MLX5E_DCBX_PARAM_VER_OPER_HOST  = 0x0,
+   MLX5E_DCBX_PARAM_VER_OPER_AUTO  = 0x3,
+};
+
 enum {
MLX5_ATOMIC_OPS_CMP_SWAP= 1 << 0,
MLX5_ATOMIC_OPS_FETCH_ADD   = 1 << 1,
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index ddad24d..62e2259 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -159,4 +159,6 @@ void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool 
*supported,
 int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
 u16 offset, u16 size, u8 *data);
 
+int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out);
+int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in);
 #endif /* __MLX5_PORT_H__ */
-- 
2.7.4



Re: [PATCH] net/mlx4_en: protect ring->xdp_prog with rcu_read_lock

2016-08-30 Thread Saeed Mahameed
On Mon, Aug 29, 2016 at 8:46 PM, Tom Herbert  wrote:
> On Mon, Aug 29, 2016 at 8:55 AM, Brenden Blanco  wrote:
>> On Mon, Aug 29, 2016 at 05:59:26PM +0300, Tariq Toukan wrote:
>>> Hi Brenden,
>>>
>>> The solution direction should be XDP specific that does not hurt the
>>> regular flow.
>> An rcu_read_lock is _already_ taken for _every_ packet. This is 1/64th of

In other words "let's add  new small speed bump, we already have
plenty ahead, so why not slow down now anyway".

Every single new instruction hurts performance, in this case maybe you
are right, maybe we won't feel any performance
impact, but that doesn't mean it is ok to do this.


>> that.
>>>
>>> On 26/08/2016 11:38 PM, Brenden Blanco wrote:
>>> >Depending on the preempt mode, the bpf_prog stored in xdp_prog may be
>>> >freed despite the use of call_rcu inside bpf_prog_put. The situation is
>>> >possible when running in PREEMPT_RCU=y mode, for instance, since the rcu
>>> >callback for destroying the bpf prog can run even during the bh handling
>>> >in the mlx4 rx path.
>>> >
>>> >Several options were considered before this patch was settled on:
>>> >
>>> >Add a napi_synchronize loop in mlx4_xdp_set, which would occur after all
>>> >of the rings are updated with the new program.
>>> >This approach has the disadvantage that as the number of rings
>>> >increases, the speed of udpate will slow down significantly due to
>>> >napi_synchronize's msleep(1).
>>> I prefer this option as it doesn't hurt the data path. A delay in a
>>> control command can be tolerated.
>>> >Add a new rcu_head in bpf_prog_aux, to be used by a new bpf_prog_put_bh.
>>> >The action of the bpf_prog_put_bh would be to then call bpf_prog_put
>>> >later. Those drivers that consume a bpf prog in a bh context (like mlx4)
>>> >would then use the bpf_prog_put_bh instead when the ring is up. This has
>>> >the problem of complexity, in maintaining proper refcnts and rcu lists,
>>> >and would likely be harder to review. In addition, this approach to
>>> >freeing must be exclusive with other frees of the bpf prog, for instance
>>> >a _bh prog must not be referenced from a prog array that is consumed by
>>> >a non-_bh prog.
>>> >
>>> >The placement of rcu_read_lock in this patch is functionally the same as
>>> >putting an rcu_read_lock in napi_poll. Actually doing so could be a
>>> >potentially controversial change, but would bring the implementation in
>>> >line with sk_busy_loop (though of course the nature of those two paths
>>> >is substantially different), and would also avoid future copy/paste
>>> >problems with future supporters of XDP. Still, this patch does not take
>>> >that opinionated option.
>>> So you decided to add a lock for all non-XDP flows, which are 99% of
>>> the cases.
>>> We should avoid this.
>> The whole point of rcu_read_lock architecture is to be taken in the fast
>> path. There won't be a performance impact from this patch.
>
> +1, this is nothing at all like a spinlock and really this should be
> just like any other rcu like access.
>
> Brenden, tracking down how the structure is freed needed a few steps,
> please make sure the RCU requirements are well documented. Also, I'm
> still not a fan of using xchg to set the program, seems that a lock
> could be used in that path.
>
> Thanks,
> Tom

Sorry folks I am with Tariq on this, you can't just add a single
instruction which is only valid/needed for 1% of the use cases
to the driver's general data path, even if it was as cheap as one cpu cycle!

Let me try to suggest something:
instead of taking the rcu_read_lock for the whole
mlx4_en_process_rx_cq, we can minimize to XDP code path only
by double checking xdp_prog (non-protected check followed by a
protected check inside mlx4 XDP critical path).

i.e instead of:

rcu_read_lock();

xdp_prog = ring->xdp_prog;

//__Do lots of non-XDP related stuff__

if (xdp_prog) {
//Do XDP magic ..
}
//__Do more of non-XDP related stuff__

rcu_read_unlock();


We can minimize it to XDP critical path only:

//Non protected xdp_prog dereference.
if (xdp_prog) {
 rcu_read_lock();
 //Protected dereference to ring->xdp_prog
 xdp_prog = ring->xdp_prog;
 if(unlikely(!xdp_prg)) goto unlock;
//Do XDP magic ..

unlock:
 rcu_read_unlock();
}


Re: [PATCH net-next 3/6] net/mlx5: Add DCBX firmware commands support

2016-08-31 Thread Saeed Mahameed
On Wed, Aug 31, 2016 at 2:24 PM, zhuyj  wrote:
> +   u32 in[MLX5_ST_SZ_DW(dcbx_param)];
> +
> +   memset(in, 0, sizeof(in));
>
> can we replace the above with "u32 in[MLX5_ST_SZ_DW(dcbx_param)] = {0};"?

yes, we will change it in V2.

Thanks.


[PATCH net-next 11/11] net/mlx5: Organize device list API in one place

2016-09-09 Thread Saeed Mahameed
From: Mohamad Haj Yahia <moha...@mellanox.com>

Hide the exposed (external) mlx5_dev_list and mlx5_intf_mutex and expose
an organized modular API to manage and manipulate mlx5 devices list.

Signed-off-by: Mohamad Haj Yahia <moha...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/dev.c  | 345 +
 drivers/net/ethernet/mellanox/mlx5/core/lag.c  |  24 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 270 
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h|  17 +-
 5 files changed, 362 insertions(+), 296 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/dev.c

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile 
b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index dad326c..0343725 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -3,7 +3,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
 mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
-   fs_counters.o rl.o lag.o
+   fs_counters.o rl.o lag.o dev.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c 
b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
new file mode 100644
index 000..a9dbc28
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -0,0 +1,345 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ *  - Redistributions of source code must retain the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer.
+ *
+ *  - Redistributions in binary form must reproduce the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer in the documentation and/or other materials
+ *provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include 
+#include "mlx5_core.h"
+
+static LIST_HEAD(intf_list);
+static LIST_HEAD(mlx5_dev_list);
+/* intf dev list mutex */
+static DEFINE_MUTEX(mlx5_intf_mutex);
+
+struct mlx5_device_context {
+   struct list_headlist;
+   struct mlx5_interface  *intf;
+   void   *context;
+   unsigned long   state;
+};
+
+enum {
+   MLX5_INTERFACE_ADDED,
+   MLX5_INTERFACE_ATTACHED,
+};
+
+void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+   struct mlx5_device_context *dev_ctx;
+   struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, 
priv);
+
+   if (!mlx5_lag_intf_add(intf, priv))
+   return;
+
+   dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL);
+   if (!dev_ctx)
+   return;
+
+   dev_ctx->intf = intf;
+   dev_ctx->context = intf->add(dev);
+   set_bit(MLX5_INTERFACE_ADDED, _ctx->state);
+   if (intf->attach)
+   set_bit(MLX5_INTERFACE_ATTACHED, _ctx->state);
+
+   if (dev_ctx->context) {
+   spin_lock_irq(>ctx_lock);
+   list_add_tail(_ctx->list, >ctx_list);
+   spin_unlock_irq(>ctx_lock);
+   } else {
+   kfree(dev_ctx);
+   }
+}
+
+static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf,
+  struct mlx5_priv *priv)
+{
+   struct mlx5_device_context *dev_ctx;
+
+   list_for_each_entry(dev_ctx, >ctx_list, list)
+   if (dev_ctx->intf == intf)
+   return dev_ctx;
+   return NULL;
+}
+
+void mlx5_remove_device(struct mlx5_i

[PATCH net-next 09/11] net/mlx5e: Implement mlx5e interface attach/detach callbacks

2016-09-09 Thread Saeed Mahameed
From: Mohamad Haj Yahia <moha...@mellanox.com>

Needed to support seamless and lightweight PCI/Internal error recovery.
Implement the attach/detach interface callbacks.
In attach callback we only allocate HW resources.
In detach callback we only deallocate HW resources.
All SW/kernel objects initialzing/destroying is kept in add/remove
callbacks.

Signed-off-by: Mohamad Haj Yahia <moha...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |   7 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 200 --
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c  |  39 -
 3 files changed, 183 insertions(+), 63 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 9699560..a9358cf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -844,9 +844,12 @@ void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv);
 int mlx5e_close(struct net_device *netdev);
 int mlx5e_open(struct net_device *netdev);
 void mlx5e_update_stats_work(struct work_struct *work);
-void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
- const struct mlx5e_profile *profile, void *ppriv);
+struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
+  const struct mlx5e_profile *profile,
+  void *ppriv);
 void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv);
+int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
+void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device 
*netdev);
 struct rtnl_link_stats64 *
 mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 03586ee..af4c61e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1883,6 +1883,9 @@ int mlx5e_close(struct net_device *netdev)
struct mlx5e_priv *priv = netdev_priv(netdev);
int err;
 
+   if (!netif_device_present(netdev))
+   return -ENODEV;
+
mutex_lock(>state_lock);
err = mlx5e_close_locked(netdev);
mutex_unlock(>state_lock);
@@ -3401,13 +3404,13 @@ static const struct mlx5e_profile mlx5e_nic_profile = {
.max_tc= MLX5E_MAX_NUM_TC,
 };
 
-void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
- const struct mlx5e_profile *profile, void *ppriv)
+struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
+  const struct mlx5e_profile *profile,
+  void *ppriv)
 {
+   int nch = profile->max_nch(mdev);
struct net_device *netdev;
struct mlx5e_priv *priv;
-   int nch = profile->max_nch(mdev);
-   int err;
 
netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
nch * profile->max_tc,
@@ -3425,12 +3428,31 @@ void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
 
priv->wq = create_singlethread_workqueue("mlx5e");
if (!priv->wq)
-   goto err_free_netdev;
+   goto err_cleanup_nic;
+
+   return netdev;
+
+err_cleanup_nic:
+   profile->cleanup(priv);
+   free_netdev(netdev);
+
+   return NULL;
+}
+
+int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev)
+{
+   const struct mlx5e_profile *profile;
+   struct mlx5e_priv *priv;
+   int err;
+
+   priv = netdev_priv(netdev);
+   profile = priv->profile;
+   clear_bit(MLX5E_STATE_DESTROYING, >state);
 
err = mlx5e_create_umr_mkey(priv);
if (err) {
mlx5_core_err(mdev, "create umr mkey failed, %d\n", err);
-   goto err_destroy_wq;
+   goto out;
}
 
err = profile->init_tx(priv);
@@ -3453,20 +3475,16 @@ void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
 
mlx5e_set_dev_port_mtu(netdev);
 
-   err = register_netdev(netdev);
-   if (err) {
-   mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
-   goto err_dealloc_q_counters;
-   }
-
if (profile->enable)
profile->enable(priv);
 
-   return priv;
+   rtnl_lock();
+   if (netif_running(netdev))
+   mlx5e_open(netdev);
+   netif_device_attach(netdev);
+   rtnl_unlock();
 
-err_dealloc_q_counters:
-   mlx5e_destroy_q_counter(priv);
-   profile->cleanup_rx(priv);
+   return 0;
 
 err_close_drop_rq:
mlx5e_close_drop_rq(priv);
@@ -3477,13 +3495,8 @@ err_cleanup_tx:

[PATCH net-next 03/11] net/mlx5: Introduce attach/detach to interface API

2016-09-09 Thread Saeed Mahameed
From: Mohamad Haj Yahia <moha...@mellanox.com>

Add attach/detach callbacks to interface API.
This is crucial for implementing seamless reset flow which releases the
hardware and it's resources upon detach while keeping software
structures and state (e.g netdev) then reset and reallocate the hardware
needed resources upon attach.

Signed-off-by: Mohamad Haj Yahia <moha...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 149 +
 include/linux/mlx5/driver.h|   2 +
 2 files changed, 131 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index baba53f..108d8f2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -81,6 +81,7 @@ struct mlx5_device_context {
struct list_headlist;
struct mlx5_interface  *intf;
void   *context;
+   unsigned long   state;
 };
 
 enum {
@@ -778,6 +779,11 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
return -ENOTSUPP;
 }
 
+enum {
+   MLX5_INTERFACE_ADDED,
+   MLX5_INTERFACE_ATTACHED,
+};
+
 static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv 
*priv)
 {
struct mlx5_device_context *dev_ctx;
@@ -786,12 +792,15 @@ static void mlx5_add_device(struct mlx5_interface *intf, 
struct mlx5_priv *priv)
if (!mlx5_lag_intf_add(intf, priv))
return;
 
-   dev_ctx = kmalloc(sizeof(*dev_ctx), GFP_KERNEL);
+   dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL);
if (!dev_ctx)
return;
 
-   dev_ctx->intf= intf;
+   dev_ctx->intf = intf;
dev_ctx->context = intf->add(dev);
+   set_bit(MLX5_INTERFACE_ADDED, _ctx->state);
+   if (intf->attach)
+   set_bit(MLX5_INTERFACE_ATTACHED, _ctx->state);
 
if (dev_ctx->context) {
spin_lock_irq(>ctx_lock);
@@ -802,21 +811,114 @@ static void mlx5_add_device(struct mlx5_interface *intf, 
struct mlx5_priv *priv)
}
 }
 
+static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf,
+  struct mlx5_priv *priv)
+{
+   struct mlx5_device_context *dev_ctx;
+
+   list_for_each_entry(dev_ctx, >ctx_list, list)
+   if (dev_ctx->intf == intf)
+   return dev_ctx;
+   return NULL;
+}
+
 static void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv 
*priv)
 {
struct mlx5_device_context *dev_ctx;
struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, 
priv);
 
-   list_for_each_entry(dev_ctx, >ctx_list, list)
-   if (dev_ctx->intf == intf) {
-   spin_lock_irq(>ctx_lock);
-   list_del(_ctx->list);
-   spin_unlock_irq(>ctx_lock);
+   dev_ctx = mlx5_get_device(intf, priv);
+   if (!dev_ctx)
+   return;
+
+   spin_lock_irq(>ctx_lock);
+   list_del(_ctx->list);
+   spin_unlock_irq(>ctx_lock);
+
+   if (test_bit(MLX5_INTERFACE_ADDED, _ctx->state))
+   intf->remove(dev, dev_ctx->context);
 
-   intf->remove(dev, dev_ctx->context);
-   kfree(dev_ctx);
+   kfree(dev_ctx);
+}
+
+static void mlx5_attach_interface(struct mlx5_interface *intf, struct 
mlx5_priv *priv)
+{
+   struct mlx5_device_context *dev_ctx;
+   struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, 
priv);
+
+   dev_ctx = mlx5_get_device(intf, priv);
+   if (!dev_ctx)
+   return;
+
+   if (intf->attach) {
+   if (test_bit(MLX5_INTERFACE_ATTACHED, _ctx->state))
return;
-   }
+   intf->attach(dev, dev_ctx->context);
+   set_bit(MLX5_INTERFACE_ATTACHED, _ctx->state);
+   } else {
+   if (test_bit(MLX5_INTERFACE_ADDED, _ctx->state))
+   return;
+   dev_ctx->context = intf->add(dev);
+   set_bit(MLX5_INTERFACE_ADDED, _ctx->state);
+   }
+}
+
+static void mlx5_attach_device(struct mlx5_core_dev *dev)
+{
+   struct mlx5_priv *priv = >priv;
+   struct mlx5_interface *intf;
+
+   mutex_lock(_intf_mutex);
+   list_for_each_entry(intf, _list, list)
+   mlx5_attach_interface(intf, priv);
+   mutex_unlock(_intf_mutex);
+}
+
+static void mlx5_detach_interface(struct mlx5_interface *intf, struct 
mlx5_priv *priv)
+{
+   struct mlx5_device_context *dev_ctx;
+   struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, 
priv);
+
+   dev_ctx = mlx5_get_devic

[PATCH net-next 04/11] net/mlx5: Split the load/unload flow into hardware and software flows

2016-09-09 Thread Saeed Mahameed
From: Mohamad Haj Yahia <moha...@mellanox.com>

Gather all software context creating/destroying in one function and call
it once in the first load and in the last unload.
load/unload functions will now receive indication if we need to
create/destroy the software contexts.
In internal/pci error do the unload/load flows without releasing the
software objects.
In this way we perserve the sw core state and it help us restoring old
driver state after PCI error/shutdown.

Signed-off-by: Mohamad Haj Yahia <moha...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 171 -
 1 file changed, 107 insertions(+), 64 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 108d8f2..966647f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1093,8 +1093,76 @@ static void mlx5_pci_close(struct mlx5_core_dev *dev, 
struct mlx5_priv *priv)
debugfs_remove(priv->dbg_root);
 }
 
-#define MLX5_IB_MOD "mlx5_ib"
-static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+{
+   struct pci_dev *pdev = dev->pdev;
+   int err;
+
+   err = mlx5_query_hca_caps(dev);
+   if (err) {
+   dev_err(>dev, "query hca failed\n");
+   goto out;
+   }
+
+   err = mlx5_query_board_id(dev);
+   if (err) {
+   dev_err(>dev, "query board id failed\n");
+   goto out;
+   }
+
+   err = mlx5_eq_init(dev);
+   if (err) {
+   dev_err(>dev, "failed to initialize eq\n");
+   goto out;
+   }
+
+   MLX5_INIT_DOORBELL_LOCK(>cq_uar_lock);
+
+   err = mlx5_init_cq_table(dev);
+   if (err) {
+   dev_err(>dev, "failed to initialize cq table\n");
+   goto err_eq_cleanup;
+   }
+
+   mlx5_init_qp_table(dev);
+
+   mlx5_init_srq_table(dev);
+
+   mlx5_init_mkey_table(dev);
+
+   err = mlx5_init_rl_table(dev);
+   if (err) {
+   dev_err(>dev, "Failed to init rate limiting\n");
+   goto err_tables_cleanup;
+   }
+
+   return 0;
+
+err_tables_cleanup:
+   mlx5_cleanup_mkey_table(dev);
+   mlx5_cleanup_srq_table(dev);
+   mlx5_cleanup_qp_table(dev);
+   mlx5_cleanup_cq_table(dev);
+
+err_eq_cleanup:
+   mlx5_eq_cleanup(dev);
+
+out:
+   return err;
+}
+
+static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
+{
+   mlx5_cleanup_rl_table(dev);
+   mlx5_cleanup_mkey_table(dev);
+   mlx5_cleanup_srq_table(dev);
+   mlx5_cleanup_qp_table(dev);
+   mlx5_cleanup_cq_table(dev);
+   mlx5_eq_cleanup(dev);
+}
+
+static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
+bool boot)
 {
struct pci_dev *pdev = dev->pdev;
int err;
@@ -1127,12 +1195,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, 
struct mlx5_priv *priv)
goto out_err;
}
 
-   mlx5_pagealloc_init(dev);
-
err = mlx5_core_enable_hca(dev, 0);
if (err) {
dev_err(>dev, "enable hca failed\n");
-   goto err_pagealloc_cleanup;
+   goto err_cmd_cleanup;
}
 
err = mlx5_core_set_issi(dev);
@@ -1185,34 +1251,21 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, 
struct mlx5_priv *priv)
 
mlx5_start_health_poll(dev);
 
-   err = mlx5_query_hca_caps(dev);
-   if (err) {
-   dev_err(>dev, "query hca failed\n");
-   goto err_stop_poll;
-   }
-
-   err = mlx5_query_board_id(dev);
-   if (err) {
-   dev_err(>dev, "query board id failed\n");
+   if (boot && mlx5_init_once(dev, priv)) {
+   dev_err(>dev, "sw objs init failed\n");
goto err_stop_poll;
}
 
err = mlx5_enable_msix(dev);
if (err) {
dev_err(>dev, "enable msix failed\n");
-   goto err_stop_poll;
-   }
-
-   err = mlx5_eq_init(dev);
-   if (err) {
-   dev_err(>dev, "failed to initialize eq\n");
-   goto disable_msix;
+   goto err_cleanup_once;
}
 
err = mlx5_alloc_uuars(dev, >uuari);
if (err) {
dev_err(>dev, "Failed allocating uar, aborting\n");
-   goto err_eq_cleanup;
+   goto err_disable_msix;
}
 
err = mlx5_start_eqs(dev);
@@ -1228,15 +1281,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, 
struct mlx5_priv *priv)
}

[PATCH net-next 07/11] net/mlx5: Align sriov/eswitch modules with the new load/unload flow.

2016-09-09 Thread Saeed Mahameed
From: Mohamad Haj Yahia <moha...@mellanox.com>

Init/cleanup sriov/eswitch in the core software context init/cleanup
flows.
Attach/detach sriov/eswitch in the core load/unload flows.

Signed-off-by: Mohamad Haj Yahia <moha...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c |  2 --
 drivers/net/ethernet/mellanox/mlx5/core/main.c| 42 +--
 drivers/net/ethernet/mellanox/mlx5/core/sriov.c   |  4 +--
 3 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 24058894..015f1bfe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1654,7 +1654,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
esw->enabled_vports = 0;
esw->mode = SRIOV_NONE;
 
-   mlx5_eswitch_attach(esw);
dev->priv.eswitch = esw;
return 0;
 abort:
@@ -1675,7 +1674,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
 
esw_info(esw->dev, "cleanup\n");
 
-   mlx5_eswitch_detach(esw);
esw->dev->priv.eswitch = NULL;
destroy_workqueue(esw->work_queue);
kfree(esw->l2_table.bitmap);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 966647f..16660cf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1136,8 +1136,30 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, 
struct mlx5_priv *priv)
goto err_tables_cleanup;
}
 
+#ifdef CONFIG_MLX5_CORE_EN
+   err = mlx5_eswitch_init(dev);
+   if (err) {
+   dev_err(>dev, "Failed to init eswitch %d\n", err);
+   goto err_rl_cleanup;
+   }
+#endif
+
+   err = mlx5_sriov_init(dev);
+   if (err) {
+   dev_err(>dev, "Failed to init sriov %d\n", err);
+   goto err_eswitch_cleanup;
+   }
+
return 0;
 
+err_eswitch_cleanup:
+#ifdef CONFIG_MLX5_CORE_EN
+   mlx5_eswitch_cleanup(dev->priv.eswitch);
+
+err_rl_cleanup:
+#endif
+   mlx5_cleanup_rl_table(dev);
+
 err_tables_cleanup:
mlx5_cleanup_mkey_table(dev);
mlx5_cleanup_srq_table(dev);
@@ -1153,6 +1175,10 @@ out:
 
 static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 {
+   mlx5_sriov_cleanup(dev);
+#ifdef CONFIG_MLX5_CORE_EN
+   mlx5_eswitch_cleanup(dev->priv.eswitch);
+#endif
mlx5_cleanup_rl_table(dev);
mlx5_cleanup_mkey_table(dev);
mlx5_cleanup_srq_table(dev);
@@ -1293,14 +1319,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, 
struct mlx5_priv *priv,
}
 
 #ifdef CONFIG_MLX5_CORE_EN
-   err = mlx5_eswitch_init(dev);
-   if (err) {
-   dev_err(>dev, "eswitch init failed %d\n", err);
-   goto err_reg_dev;
-   }
+   mlx5_eswitch_attach(dev->priv.eswitch);
 #endif
 
-   err = mlx5_sriov_init(dev);
+   err = mlx5_sriov_attach(dev);
if (err) {
dev_err(>dev, "sriov init failed %d\n", err);
goto err_sriov;
@@ -1324,11 +1346,11 @@ out:
return 0;
 
 err_reg_dev:
-   mlx5_sriov_cleanup(dev);
+   mlx5_sriov_detach(dev);
 
 err_sriov:
 #ifdef CONFIG_MLX5_CORE_EN
-   mlx5_eswitch_cleanup(dev->priv.eswitch);
+   mlx5_eswitch_detach(dev->priv.eswitch);
 #endif
mlx5_cleanup_fs(dev);
 
@@ -1394,9 +1416,9 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, 
struct mlx5_priv *priv,
if (mlx5_device_registered(dev))
mlx5_detach_device(dev);
 
-   mlx5_sriov_cleanup(dev);
+   mlx5_sriov_detach(dev);
 #ifdef CONFIG_MLX5_CORE_EN
-   mlx5_eswitch_cleanup(dev->priv.eswitch);
+   mlx5_eswitch_detach(dev->priv.eswitch);
 #endif
mlx5_cleanup_fs(dev);
mlx5_irq_clear_affinity_hints(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c 
b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index f4f02b6..e086277 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -222,7 +222,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev)
if (!sriov->vfs_ctx)
return -ENOMEM;
 
-   return mlx5_sriov_attach(dev);
+   return 0;
 }
 
 void mlx5_sriov_cleanup(struct mlx5_core_dev *dev)
@@ -231,6 +231,6 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev)
 
if (!mlx5_core_is_pf(dev))
return;
-   mlx5_sriov_detach(dev);
+
kfree(sriov->vfs_ctx);
 }
-- 
2.7.4



[PATCH net-next 06/11] net/mlx5: Implement eswitch attach/detach flows

2016-09-09 Thread Saeed Mahameed
From: Mohamad Haj Yahia <moha...@mellanox.com>

Needed for lightweight and modular internal/pci error handling.
Implement eswitch attach function which allocates/starts hw related
resources.
Implement eswitch detach function which releases/stops hw related
resources.
Init/cleanup function only handle eswitch software context allocation
and destruction.

Signed-off-by: Mohamad Haj Yahia <moha...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 24 ---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h |  2 ++
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 1014305..24058894 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1559,6 +1559,25 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
 }
 
+void mlx5_eswitch_attach(struct mlx5_eswitch *esw)
+{
+   if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
+   MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+   return;
+
+   esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
+   /* VF Vports will be enabled when SRIOV is enabled */
+}
+
+void mlx5_eswitch_detach(struct mlx5_eswitch *esw)
+{
+   if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
+   MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+   return;
+
+   esw_disable_vport(esw, 0);
+}
+
 int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 {
int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table);
@@ -1635,9 +1654,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
esw->enabled_vports = 0;
esw->mode = SRIOV_NONE;
 
+   mlx5_eswitch_attach(esw);
dev->priv.eswitch = esw;
-   esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
-   /* VF Vports will be enabled when SRIOV is enabled */
return 0;
 abort:
if (esw->work_queue)
@@ -1656,8 +1674,8 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
return;
 
esw_info(esw->dev, "cleanup\n");
-   esw_disable_vport(esw, 0);
 
+   mlx5_eswitch_detach(esw);
esw->dev->priv.eswitch = NULL;
destroy_workqueue(esw->work_queue);
kfree(esw->l2_table.bitmap);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index a961409..48c273d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -204,6 +204,8 @@ struct mlx5_eswitch {
 /* E-Switch API */
 int mlx5_eswitch_init(struct mlx5_core_dev *dev);
 void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
+void mlx5_eswitch_attach(struct mlx5_eswitch *esw);
+void mlx5_eswitch_detach(struct mlx5_eswitch *esw);
 void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe);
 int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
 void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
-- 
2.7.4



[PATCH net-next 05/11] net/mlx5: Implement SRIOV attach/detach flows

2016-09-09 Thread Saeed Mahameed
From: Mohamad Haj Yahia <moha...@mellanox.com>

Needed for lightweight and modular internal/pci error handling.
Implement sriov attach function which enables pre-saved number of vfs on
the device side.
Implement sriov detach function which disable the current vfs on the
device side.
Init/cleanup function only handles sriov software context allocation and
destruction.

Signed-off-by: Mohamad Haj Yahia <moha...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h|  2 ++
 drivers/net/ethernet/mellanox/mlx5/core/sriov.c| 29 --
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h 
b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 7dd14cf..04b719a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -91,6 +91,8 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev);
 void mlx5_disable_device(struct mlx5_core_dev *dev);
 int mlx5_sriov_init(struct mlx5_core_dev *dev);
 void mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
+int mlx5_sriov_attach(struct mlx5_core_dev *dev);
+void mlx5_sriov_detach(struct mlx5_core_dev *dev);
 int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
 bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev);
 int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c 
b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index 72a8215..f4f02b6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -188,6 +188,25 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int 
num_vfs)
return err ? err : num_vfs;
 }
 
+int mlx5_sriov_attach(struct mlx5_core_dev *dev)
+{
+   struct mlx5_core_sriov *sriov = >priv.sriov;
+
+   if (!mlx5_core_is_pf(dev) || !sriov->num_vfs)
+   return 0;
+
+   /* If sriov VFs exist in PCI level, enable them in device level */
+   return mlx5_device_enable_sriov(dev, sriov->num_vfs);
+}
+
+void mlx5_sriov_detach(struct mlx5_core_dev *dev)
+{
+   if (!mlx5_core_is_pf(dev))
+   return;
+
+   mlx5_device_disable_sriov(dev);
+}
+
 int mlx5_sriov_init(struct mlx5_core_dev *dev)
 {
struct mlx5_core_sriov *sriov = >priv.sriov;
@@ -203,12 +222,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev)
if (!sriov->vfs_ctx)
return -ENOMEM;
 
-   /* If sriov VFs exist in PCI level, enable them in device level */
-   if (!sriov->num_vfs)
-   return 0;
-
-   mlx5_device_enable_sriov(dev, sriov->num_vfs);
-   return 0;
+   return mlx5_sriov_attach(dev);
 }
 
 void mlx5_sriov_cleanup(struct mlx5_core_dev *dev)
@@ -217,7 +231,6 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev)
 
if (!mlx5_core_is_pf(dev))
return;
-
-   mlx5_device_disable_sriov(dev);
+   mlx5_sriov_detach(dev);
kfree(sriov->vfs_ctx);
 }
-- 
2.7.4



[PATCH net-next 00/11] Mellanox 100G mlx5 seamless error recovery

2016-09-09 Thread Saeed Mahameed
Hi Dave,

This series from Mohamad improves the driver load/unload flows
to seamlessly handle pci errors and device internal errors recovery
reset flows.

Current pci and internal error handling is too heavy and is done
with a full restart of the driver by unregistering mlx5 interfaces
(mlx5e netedevs and mlx5_ib) which will cause losing all the current
interfaces and mlx5 core configurations.

To improve this, we add new callback functions of mlx5 interface 
object (attach/detach) to be called upon reset flows when errors are
detected rather than calling register and unregister interfaces.

On their side, interfaces such as (mlx5e and mlx5_ib) can choose to implement
those callback, if not, the old heavy reset will be called for that interface.

For non-interface mlx5 modules such as sriov and eswitch, we refactored 
and reorganized the code in a way that the software state objects are created 
only once on driver load.  Those software state objects are kept upon reset 
recovery
flows and only freed once on driver unload. On seamless soft reset flows, only
hardware resources are released on stop and re-allocated on start according to 
the 
current soft state.

In this series only mlx5e interface implements attach/detach callbacks
so that the netdevice will be kept alive on reset. On detach only hardware 
resources 
are released and the netdevice will be marked as detached to the stack. Once
attached again it will re-allocate the hardware resources according to the 
current
netdevice state, and all the configurations and the software state will be kept 
or restored
after recovery.

Note: I will be out of office all next week, in case of any updates
or V2 is required, Tariq will post the new series, I hope it is ok.

Thanks,
Saeed.

Mohamad Haj Yahia (11):
  net/mlx5: Skip waiting for vf pages in internal error
  net/mlx5: SRIOV core code refactoring
  net/mlx5: Introduce attach/detach to interface API
  net/mlx5: Split the load/unload flow into hardware and software flows
  net/mlx5: Implement SRIOV attach/detach flows
  net/mlx5: Implement eswitch attach/detach flows
  net/mlx5: Align sriov/eswitch modules with the new load/unload flow.
  net/mlx5: Implement vports admin state backup/restore
  net/mlx5e: Implement mlx5e interface attach/detach callbacks
  net/mlx5e: Restore vlan filter after seamless reset
  net/mlx5: Organize device list API in one place

 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/dev.c  | 345 +
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |   7 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c|  38 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 200 +++---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |  39 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  | 271 +++---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  18 +-
 drivers/net/ethernet/mellanox/mlx5/core/lag.c  |  24 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 416 -
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h|  21 +-
 .../net/ethernet/mellanox/mlx5/core/pagealloc.c|  13 +-
 drivers/net/ethernet/mellanox/mlx5/core/sriov.c| 217 +--
 include/linux/mlx5/driver.h|   4 +-
 14 files changed, 1000 insertions(+), 615 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/dev.c

-- 
2.7.4



[PATCH net-next 01/11] net/mlx5: Skip waiting for vf pages in internal error

2016-09-09 Thread Saeed Mahameed
From: Mohamad Haj Yahia <moha...@mellanox.com>

In case of device in internal error state there is no need to wait for
vf pages since they will be reclaimed manually later in the unload flow.

Signed-off-by: Mohamad Haj Yahia <moha...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 673a7c9..d458515 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -326,6 +326,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
 {
struct fw_page *fwp;
struct rb_node *p;
+   u32 func_id;
u32 npages;
u32 i = 0;
 
@@ -334,12 +335,16 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
 
/* No hard feelings, we want our pages back! */
npages = MLX5_GET(manage_pages_in, in, input_num_entries);
+   func_id = MLX5_GET(manage_pages_in, in, function_id);
 
p = rb_first(>priv.page_root);
while (p && i < npages) {
fwp = rb_entry(p, struct fw_page, rb_node);
-   MLX5_SET64(manage_pages_out, out, pas[i], fwp->addr);
p = rb_next(p);
+   if (fwp->func_id != func_id)
+   continue;
+
+   MLX5_SET64(manage_pages_out, out, pas[i], fwp->addr);
i++;
}
 
@@ -540,6 +545,12 @@ int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev)
unsigned long end = jiffies + 
msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
int prev_vfs_pages = dev->priv.vfs_pages;
 
+   /* In case of internal error we will free the pages manually later */
+   if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+   mlx5_core_warn(dev, "Skipping wait for vf pages stage");
+   return 0;
+   }
+
mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages,
  dev->priv.name);
while (dev->priv.vfs_pages) {
-- 
2.7.4



[PATCH net-next 02/11] net/mlx5: SRIOV core code refactoring

2016-09-09 Thread Saeed Mahameed
From: Mohamad Haj Yahia <moha...@mellanox.com>

Simplify the code and makes it look modular and symmetric.
Split sriov enable/disable to two levels: device level and pci level.
When user enable/disable sriov (via sriov_configure driver callback) we
will enable/disable both device and pci sriov.
When driver load/unload we will enable/disable (on demand) only device
sriov while keeping the PCI sriov enabled for next driver load.
On internal/pci error, VFs will be kept enabled on PCI and the reset
is done only in device level.

Signed-off-by: Mohamad Haj Yahia <moha...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c |  12 +-
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h|   2 +
 drivers/net/ethernet/mellanox/mlx5/core/sriov.c| 216 +
 include/linux/mlx5/driver.h|   2 -
 4 files changed, 101 insertions(+), 131 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c132ef1..baba53f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1180,8 +1180,7 @@ out:
return 0;
 
 err_sriov:
-   if (mlx5_sriov_cleanup(dev))
-   dev_err(>pdev->dev, "sriov cleanup failed\n");
+   mlx5_sriov_cleanup(dev);
 
 #ifdef CONFIG_MLX5_CORE_EN
mlx5_eswitch_cleanup(dev->priv.eswitch);
@@ -1241,19 +1240,14 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, 
struct mlx5_priv *priv)
 {
int err = 0;
 
-   err = mlx5_sriov_cleanup(dev);
-   if (err) {
-   dev_warn(>pdev->dev, "%s: sriov cleanup failed - abort\n",
-__func__);
-   return err;
-   }
-
mutex_lock(>intf_state_mutex);
if (test_bit(MLX5_INTERFACE_STATE_DOWN, >intf_state)) {
dev_warn(>pdev->dev, "%s: interface is down, NOP\n",
 __func__);
goto out;
}
+
+   mlx5_sriov_cleanup(dev);
mlx5_unregister_device(dev);
 #ifdef CONFIG_MLX5_CORE_EN
mlx5_eswitch_cleanup(dev->priv.eswitch);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h 
b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 714b71b..7dd14cf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -89,6 +89,8 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum 
mlx5_dev_event event,
 unsigned long param);
 void mlx5_enter_error_state(struct mlx5_core_dev *dev);
 void mlx5_disable_device(struct mlx5_core_dev *dev);
+int mlx5_sriov_init(struct mlx5_core_dev *dev);
+void mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
 int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
 bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev);
 int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c 
b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index 78e7892..72a8215 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -44,108 +44,132 @@ bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev)
return !!sriov->num_vfs;
 }
 
-static void enable_vfs(struct mlx5_core_dev *dev, int num_vfs)
+static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
 {
struct mlx5_core_sriov *sriov = >priv.sriov;
int err;
int vf;
 
-   for (vf = 1; vf <= num_vfs; vf++) {
-   err = mlx5_core_enable_hca(dev, vf);
+   if (sriov->enabled_vfs) {
+   mlx5_core_warn(dev,
+  "failed to enable SRIOV on device, already 
enabled with %d vfs\n",
+  sriov->enabled_vfs);
+   return -EBUSY;
+   }
+
+#ifdef CONFIG_MLX5_CORE_EN
+   err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, 
SRIOV_LEGACY);
+   if (err) {
+   mlx5_core_warn(dev,
+  "failed to enable eswitch SRIOV (%d)\n", err);
+   return err;
+   }
+#endif
+
+   for (vf = 0; vf < num_vfs; vf++) {
+   err = mlx5_core_enable_hca(dev, vf + 1);
if (err) {
-   mlx5_core_warn(dev, "failed to enable VF %d\n", vf - 1);
-   } else {
-   sriov->vfs_ctx[vf - 1].enabled = 1;
-   mlx5_core_dbg(dev, "successfully enabled VF %d\n", vf - 
1);
+   mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", 
vf, err);
+   continue;
}
+   sriov->vfs_ctx[vf].enabled = 1;
+

[PATCH net-next 08/11] net/mlx5: Implement vports admin state backup/restore

2016-09-09 Thread Saeed Mahameed
From: Mohamad Haj Yahia <moha...@mellanox.com>

Save the user configuration in the vport sturct.
Restore saved old configuration upon vport enable.

Signed-off-by: Mohamad Haj Yahia <moha...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 249 ++
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h |  16 +-
 2 files changed, 124 insertions(+), 141 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 015f1bfe..654b76f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -116,57 +116,6 @@ static int arm_vport_context_events_cmd(struct 
mlx5_core_dev *dev, u16 vport,
 }
 
 /* E-Switch vport context HW commands */
-static int query_esw_vport_context_cmd(struct mlx5_core_dev *mdev, u32 vport,
-  u32 *out, int outlen)
-{
-   u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {0};
-
-   MLX5_SET(query_nic_vport_context_in, in, opcode,
-MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
-   MLX5_SET(query_esw_vport_context_in, in, vport_number, vport);
-   if (vport)
-   MLX5_SET(query_esw_vport_context_in, in, other_vport, 1);
-   return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
-}
-
-static int query_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
-u16 *vlan, u8 *qos)
-{
-   u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {0};
-   int err;
-   bool cvlan_strip;
-   bool cvlan_insert;
-
-   *vlan = 0;
-   *qos = 0;
-
-   if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) ||
-   !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
-   return -ENOTSUPP;
-
-   err = query_esw_vport_context_cmd(dev, vport, out, sizeof(out));
-   if (err)
-   goto out;
-
-   cvlan_strip = MLX5_GET(query_esw_vport_context_out, out,
-  esw_vport_context.vport_cvlan_strip);
-
-   cvlan_insert = MLX5_GET(query_esw_vport_context_out, out,
-   esw_vport_context.vport_cvlan_insert);
-
-   if (cvlan_strip || cvlan_insert) {
-   *vlan = MLX5_GET(query_esw_vport_context_out, out,
-esw_vport_context.cvlan_id);
-   *qos = MLX5_GET(query_esw_vport_context_out, out,
-   esw_vport_context.cvlan_pcp);
-   }
-
-   esw_debug(dev, "Query Vport[%d] cvlan: VLAN %d qos=%d\n",
- vport, *vlan, *qos);
-out:
-   return err;
-}
-
 static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
void *in, int inlen)
 {
@@ -921,7 +870,7 @@ static void esw_update_vport_rx_mode(struct mlx5_eswitch 
*esw, u32 vport_num)
esw_debug(esw->dev, "vport[%d] context update rx mode promisc_all=%d, 
all_multi=%d\n",
  vport_num, promisc_all, promisc_mc);
 
-   if (!vport->trusted || !vport->enabled) {
+   if (!vport->info.trusted || !vport->enabled) {
promisc_uc = 0;
promisc_mc = 0;
promisc_all = 0;
@@ -1257,30 +1206,20 @@ static int esw_vport_ingress_config(struct mlx5_eswitch 
*esw,
struct mlx5_vport *vport)
 {
struct mlx5_flow_spec *spec;
-   u8 smac[ETH_ALEN];
int err = 0;
u8 *smac_v;
 
-   if (vport->spoofchk) {
-   err = mlx5_query_nic_vport_mac_address(esw->dev, vport->vport, 
smac);
-   if (err) {
-   esw_warn(esw->dev,
-"vport[%d] configure ingress rules failed, 
query smac failed, err(%d)\n",
-vport->vport, err);
-   return err;
-   }
+   if (vport->info.spoofchk && !is_valid_ether_addr(vport->info.mac)) {
+   mlx5_core_warn(esw->dev,
+  "vport[%d] configure ingress rules failed, 
illegal mac with spoofchk\n",
+  vport->vport);
+   return -EPERM;
 
-   if (!is_valid_ether_addr(smac)) {
-   mlx5_core_warn(esw->dev,
-  "vport[%d] configure ingress rules 
failed, illegal mac with spoofchk\n",
-  vport->vport);
-   return -EPERM;
-   }
}
 
esw_vport_cleanup_ingress_rules(esw, vport);
 
-   if (!vport->vlan && !vport->qos && !vport->spoofchk) {
+   if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) {

[PATCH net-next 10/11] net/mlx5e: Restore vlan filter after seamless reset

2016-09-09 Thread Saeed Mahameed
From: Mohamad Haj Yahia <moha...@mellanox.com>

When detaching the mlx5e interface clear all the vlans rules from the
vlan flow table.
When attaching it back restore all the active vlans rules to the HW.

Signed-off-by: Mohamad Haj Yahia <moha...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 38 +
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 1587a9f..36fbc6b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -294,6 +294,36 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, 
__always_unused __be16 proto,
return 0;
 }
 
+static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv)
+{
+   int i;
+
+   mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+
+   for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) {
+   mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i);
+   }
+
+   if (priv->fs.vlan.filter_disabled &&
+   !(priv->netdev->flags & IFF_PROMISC))
+   mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0);
+}
+
+static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv)
+{
+   int i;
+
+   mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+
+   for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) {
+   mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i);
+   }
+
+   if (priv->fs.vlan.filter_disabled &&
+   !(priv->netdev->flags & IFF_PROMISC))
+   mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0);
+}
+
 #define mlx5e_for_each_hash_node(hn, tmp, hash, i) \
for (i = 0; i < MLX5E_L2_ADDR_HASH_SIZE; i++) \
hlist_for_each_entry_safe(hn, tmp, [i], hlist)
@@ -1024,14 +1054,10 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv 
*priv)
if (err)
goto err_free_g;
 
-   err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
-   if (err)
-   goto err_destroy_vlan_flow_groups;
+   mlx5e_add_vlan_rules(priv);
 
return 0;
 
-err_destroy_vlan_flow_groups:
-   mlx5e_destroy_groups(ft);
 err_free_g:
kfree(ft->g);
 err_destroy_vlan_table:
@@ -1043,6 +1069,7 @@ err_destroy_vlan_table:
 
 static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv)
 {
+   mlx5e_del_vlan_rules(priv);
mlx5e_destroy_flow_table(>fs.vlan.ft);
 }
 
@@ -1100,7 +1127,6 @@ err_destroy_arfs_tables:
 
 void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv)
 {
-   mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
mlx5e_destroy_vlan_table(priv);
mlx5e_destroy_l2_table(priv);
mlx5e_destroy_ttc_table(priv);
-- 
2.7.4



Re: [iovisor-dev] README: [PATCH RFC 11/11] net/mlx5e: XDP TX xmit more

2016-09-09 Thread Saeed Mahameed
On Fri, Sep 9, 2016 at 6:22 AM, Alexei Starovoitov via iovisor-dev
<iovisor-...@lists.iovisor.org> wrote:
> On Thu, Sep 08, 2016 at 10:11:47AM +0200, Jesper Dangaard Brouer wrote:
>>
>> I'm sorry but I have a problem with this patch!
>
> is it because the variable is called 'xdp_doorbell'?
> Frankly I see nothing scary in this patch.
> It extends existing code by adding a flag to ring doorbell or not.
> The end of rx napi is used as an obvious heuristic to flush the pipe.
> Looks pretty generic to me.
> The same code can be used for non-xdp as well once we figure out
> good algorithm for xmit_more in the stack.
>
>> Looking at this patch, I want to bring up a fundamental architectural
>> concern with the development direction of XDP transmit.
>>
>>
>> What you are trying to implement, with delaying the doorbell, is
>> basically TX bulking for TX_XDP.
>>
>>  Why not implement a TX bulking interface directly instead?!?
>>
>> Yes, the tailptr/doorbell is the most costly operation, but why not
>> also take advantage of the benefits of bulking for other parts of the
>> code? (benefit is smaller, by every cycles counts in this area)
>>
>> This hole XDP exercise is about avoiding having a transaction cost per
>> packet, that reads "bulking" or "bundling" of packets, where possible.
>>
>>  Lets do bundling/bulking from the start!

Jesper, what we did here is also bulking, instead of bulkin in a
temporary list in the driver
we list the packets in the HW and once done we transmit all at once via the
xdp_doorbell indication.

I agree with you that we can take advantage and improve the icahce by
bulkin first in software and then queue all at once in the hw then
ring one doorbell.

but I also agree with Alexei that this will introduce an extra
pointer/list handling
in the diver and we need to do the comparison between both approaches
before we decide which is better.

this must be marked as future work and not have this from the start.

>
> mlx4 already does bulking and this proposed mlx5 set of patches
> does bulking as well.
> See nothing wrong about it. RX side processes the packets and
> when it's done it tells TX to xmit whatever it collected.
>
>> The reason behind the xmit_more API is that we could not change the
>> API of all the drivers.  And we found that calling an explicit NDO
>> flush came at a cost (only approx 7 ns IIRC), but it still a cost that
>> would hit the common single packet use-case.
>>
>> It should be really easy to build a bundle of packets that need XDP_TX
>> action, especially given you only have a single destination "port".
>> And then you XDP_TX send this bundle before mlx5_cqwq_update_db_record.
>
> not sure what are you proposing here?
> Sounds like you want to extend it to multi port in the future?
> Sure. The proposed code is easily extendable.
>
> Or you want to see something like a link list of packets
> or an array of packets that RX side is preparing and then
> send the whole array/list to TX port?
> I don't think that would be efficient, since it would mean
> unnecessary copy of pointers.
>
>> In the future, XDP need to support XDP_FWD forwarding of packets/pages
>> out other interfaces.  I also want bulk transmit from day-1 here.  It
>> is slightly more tricky to sort packets for multiple outgoing
>> interfaces efficiently in the pool loop.
>
> I don't think so. Multi port is natural extension to this set of patches.
> With multi port the end of RX will tell multiple ports (that were
> used to tx) to ring the bell. Pretty trivial and doesn't involve any
> extra arrays or link lists.
>
>> But the mSwitch[1] article actually already solved this destination
>> sorting.  Please read[1] section 3.3 "Switch Fabric Algorithm" for
>> understanding the next steps, for a smarter data structure, when
>> starting to have more TX "ports".  And perhaps align your single
>> XDP_TX destination data structure to this future development.
>>
>> [1] http://info.iet.unipi.it/~luigi/papers/20150617-mswitch-paper.pdf
>
> I don't see how this particular paper applies to the existing kernel code.
> It's great to take ideas from research papers, but real code is different.
>
>> --Jesper
>> (top post)
>
> since when it's ok to top post?
>
>> On Wed,  7 Sep 2016 15:42:32 +0300 Saeed Mahameed <sae...@mellanox.com> 
>> wrote:
>>
>> > Previously we rang XDP SQ doorbell on every forwarded XDP packet.
>> >
>> > Here we introduce a xmit more like mechanism that will queue up more
>> > than one packet into SQ (up to RX napi budget) w/o notifyin

Re: [PATCH RFC 00/11] mlx5 RX refactoring and XDP support

2016-09-09 Thread Saeed Mahameed
On Wed, Sep 7, 2016 at 3:42 PM, Saeed Mahameed <sae...@mellanox.com> wrote:
> Hi All,
>
> This patch set introduces some important data path RX refactoring
> addressing mlx5e memory allocation/management improvements and XDP support.
>
> Submitting as RFC since we would like to get an early feedback, while we
> continue reviewing testing and complete the performance analysis in house.
>

Hi,

I am going to be out of office for the whole next week with a random
mail access.
I will do my best to be as active as possible, but in the meanwhile,
Tariq and Or will handle any questions
regarding this series or mlx5 in general while I am away.

Thanks,
Saeed.


Re: [PATCH] mlx5: Add ndo_poll_controller() implementation

2016-09-24 Thread Saeed Mahameed
On Fri, Sep 23, 2016 at 11:13 PM, Calvin Owens  wrote:
> This implements ndo_poll_controller in net_device_ops for mlx5, which is
> necessary to use netconsole with this driver.
>
> Signed-off-by: Calvin Owens 
> ---
>  drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 20 
>  1 file changed, 20 insertions(+)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
> b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index 2459c7f..439476f 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@ -2786,6 +2786,20 @@ static void mlx5e_tx_timeout(struct net_device *dev)
> schedule_work(>tx_timeout_work);
>  }
>
> +#ifdef CONFIG_NET_POLL_CONTROLLER
> +/* Fake "interrupt" called by netpoll (eg netconsole) to send skbs without
> + * reenabling interrupts.
> + */
> +static void mlx5e_netpoll(struct net_device *dev)
> +{
> +   struct mlx5e_priv *priv = netdev_priv(dev);
> +   int i, nr_sq = priv->params.num_channels * priv->params.num_tc;
> +
> +   for (i = 0; i < nr_sq; i++)
> +   napi_schedule(priv->txq_to_sq_map[i]->cq.napi);

Hi Calvin,

Basically all CQs on the same channel are sharing the same napi, so
here you will end up calling napi_schedule more than once for each
napi (channel).
iterating over the SQs map is irrelevant here, all you need to do is
to iterate over the channels:

 for (i = 0; i < priv->params.num_channels; i++)
napi_schedule(priv->channel[i]->napi);


Thanks,
Saeed.

> +}
> +#endif
> +
>  static const struct net_device_ops mlx5e_netdev_ops_basic = {
> .ndo_open= mlx5e_open,
> .ndo_stop= mlx5e_close,
> @@ -2805,6 +2819,9 @@ static const struct net_device_ops 
> mlx5e_netdev_ops_basic = {
> .ndo_rx_flow_steer   = mlx5e_rx_flow_steer,
>  #endif
> .ndo_tx_timeout  = mlx5e_tx_timeout,
> +#ifdef CONFIG_NET_POLL_CONTROLLER
> +   .ndo_poll_controller = mlx5e_netpoll,
> +#endif
>  };
>
>  static const struct net_device_ops mlx5e_netdev_ops_sriov = {
> @@ -2836,6 +2853,9 @@ static const struct net_device_ops 
> mlx5e_netdev_ops_sriov = {
> .ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
> .ndo_get_vf_stats= mlx5e_get_vf_stats,
> .ndo_tx_timeout  = mlx5e_tx_timeout,
> +#ifdef CONFIG_NET_POLL_CONTROLLER
> +   .ndo_poll_controller = mlx5e_netpoll,
> +#endif
>  };
>
>  static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
> --
> 2.9.3
>


[PATCH net-next 3/9] net/mlx5: E-Switch, Set vport representor fields explicitly on registration

2016-09-22 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

The structure we use for the eswitch vport representor (mlx5_eswitch_rep)
has some fields which are set from upper layers in the driver when they
register the rep. Use explicit setting on registration time for them and
avoid global memcpy. This patch doesn't add new functionality.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  | 5 +++--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 8 +++-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 6d8c5a2..ebfcde0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -178,11 +178,12 @@ struct mlx5_eswitch_rep {
void   (*unload)(struct mlx5_eswitch *esw,
 struct mlx5_eswitch_rep *rep);
u16vport;
-   struct mlx5_flow_rule *vport_rx_rule;
+   u8 hw_id[ETH_ALEN];
void  *priv_data;
+
+   struct mlx5_flow_rule *vport_rx_rule;
struct list_head   vport_sqs_list;
bool   valid;
-   u8 hw_id[ETH_ALEN];
 };
 
 struct mlx5_esw_offload {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index a73721b..b901cd4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -616,7 +616,13 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch 
*esw,
 
rep = >vport_reps[vport_index];
 
-   memcpy(rep, __rep, sizeof(struct mlx5_eswitch_rep));
+   memset(rep, 0, sizeof(*rep));
+
+   rep->load   = __rep->load;
+   rep->unload = __rep->unload;
+   rep->vport  = __rep->vport;
+   rep->priv_data = __rep->priv_data;
+   ether_addr_copy(rep->hw_id, __rep->hw_id);
 
INIT_LIST_HEAD(>vport_sqs_list);
rep->valid = true;
-- 
2.7.4



[PATCH net-next 8/9] net/mlx5e: Add TC vlan action for SRIOV offloads

2016-09-22 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Parse TC vlan actions and set the required elements to allow offloading.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 43 ++---
 1 file changed, 32 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 3eb319b..e61bd52 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -119,17 +119,27 @@ static struct mlx5_flow_rule 
*mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5_esw_flow_attr 
*attr)
 {
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+   int err;
+
+   err = mlx5_eswitch_add_vlan_action(esw, attr);
+   if (err)
+   return ERR_PTR(err);
 
return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
 }
 
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
- struct mlx5_flow_rule *rule)
+ struct mlx5_flow_rule *rule,
+ struct mlx5_esw_flow_attr *attr)
 {
+   struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_fc *counter = NULL;
 
counter = mlx5_flow_rule_counter(rule);
 
+   if (esw && esw->mode == SRIOV_OFFLOADS)
+   mlx5_eswitch_del_vlan_action(esw, attr);
+
mlx5_del_flow_rule(rule);
 
mlx5_fc_destroy(priv->mdev, counter);
@@ -369,13 +379,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, 
struct tcf_exts *exts,
 
tcf_exts_to_list(exts, );
list_for_each_entry(a, , list) {
-   /* Only support a single action per rule */
-   if (attr->action)
-   return -EINVAL;
-
if (is_tcf_gact_shot(a)) {
-   attr->action = MLX5_FLOW_CONTEXT_ACTION_DROP |
-  MLX5_FLOW_CONTEXT_ACTION_COUNT;
+   attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
+   MLX5_FLOW_CONTEXT_ACTION_COUNT;
continue;
}
 
@@ -392,12 +398,25 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, 
struct tcf_exts *exts,
return -EINVAL;
}
 
-   attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+   attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
out_priv = netdev_priv(out_dev);
attr->out_rep = out_priv->ppriv;
continue;
}
 
+   if (is_tcf_vlan(a)) {
+   if (tcf_vlan_action(a) == VLAN_F_POP) {
+   attr->action |= 
MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+   } else if (tcf_vlan_action(a) == VLAN_F_PUSH) {
+   if (tcf_vlan_push_proto(a) != 
htons(ETH_P_8021Q))
+   return -EOPNOTSUPP;
+
+   attr->action |= 
MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+   attr->vlan = tcf_vlan_push_vid(a);
+   }
+   continue;
+   }
+
return -EINVAL;
}
return 0;
@@ -413,6 +432,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 
protocol,
struct mlx5e_tc_flow *flow;
struct mlx5_flow_spec *spec;
struct mlx5_flow_rule *old = NULL;
+   struct mlx5_esw_flow_attr *old_attr;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 
if (esw && esw->mode == SRIOV_OFFLOADS)
@@ -422,6 +442,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 
protocol,
  tc->ht_params);
if (flow) {
old = flow->rule;
+   old_attr = flow->attr;
} else {
if (fdb_flow)
flow = kzalloc(sizeof(*flow) + sizeof(struct 
mlx5_esw_flow_attr),
@@ -466,7 +487,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 
protocol,
goto err_del_rule;
 
if (old)
-   mlx5e_tc_del_flow(priv, old);
+   mlx5e_tc_del_flow(priv, old, old_attr);
 
goto out;
 
@@ -494,7 +515,7 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv,
 
rhashtable_remove_fast(>ht, >node, tc->ht_params);
 
-   mlx5e_tc_del_flow(priv, flow->rule);
+   mlx5e_tc_del_flow(priv, flow->rule, flow->attr);
 
kfree(flow);
 
@@ -551,7 +572,7 @@ static void _mlx5e_tc_del_flow(void *ptr, void *arg)
   

[PATCH net-next 1/9] net_sched: act_vlan: add helper inlines to access tcf_vlan info

2016-09-22 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Needed e.g for offloading drivers to pick the relevant attributes.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 include/net/tc_act/tc_vlan.h | 25 +
 1 file changed, 25 insertions(+)

diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h
index 6b83588..48cca32 100644
--- a/include/net/tc_act/tc_vlan.h
+++ b/include/net/tc_act/tc_vlan.h
@@ -11,6 +11,7 @@
 #define __NET_TC_VLAN_H
 
 #include 
+#include 
 
 #define VLAN_F_POP 0x1
 #define VLAN_F_PUSH0x2
@@ -24,4 +25,28 @@ struct tcf_vlan {
 };
 #define to_vlan(a) ((struct tcf_vlan *)a)
 
+static inline bool is_tcf_vlan(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+   if (a->ops && a->ops->type == TCA_ACT_VLAN)
+   return true;
+#endif
+   return false;
+}
+
+static inline u32 tcf_vlan_action(const struct tc_action *a)
+{
+   return to_vlan(a)->tcfv_action;
+}
+
+static inline u16 tcf_vlan_push_vid(const struct tc_action *a)
+{
+   return to_vlan(a)->tcfv_push_vid;
+}
+
+static inline __be16 tcf_vlan_push_proto(const struct tc_action *a)
+{
+   return to_vlan(a)->tcfv_push_proto;
+}
+
 #endif /* __NET_TC_VLAN_H */
-- 
2.7.4



[PATCH net-next 7/9] net/mlx5: E-Switch, Support VLAN actions in the offloads mode

2016-09-22 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Many virtualization systems use a policy under which a vlan tag is
pushed to packets sent by guests, and popped before the packet is
forwarded to the VM.

The current generation of the mlx5 HW doesn't fully support that on
a per flow level. As such, we are addressing the above common use
case with the SRIOV e-Switch abilities to push vlan into packets
sent by VFs and pop vlan from packets forwarded to VFs.

The HW can match on the correct vlan being present in packets
forwarded to VFs (eSwitch steering is done before stripping
the tag), so this part is offloaded as is.

A common practice for vlans is to avoid both push vlan and pop vlan
for inter-host VM/VM (east-west) communication because in this case,
push on egress cancels out with pop on ingress.

For supporting that, we use a global eswitch vlan pop policy, hence
allowing guest A to communicate with both remote VM B and local VM C.
This works since the HW pops the vlan only if it exists (e.g for
C --> A packets but not for B --> A packets).

On the slow path, when a VF vport has an offloaded flow which involves
pushing vlans, wheres another flow is not currently offloaded, the
packets from the 2nd flow seen by the VF representor on the host have
vlan. The VF rep driver removes such vlan before calling into the host
networking stack.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |   1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  21 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c|  33 
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  15 ++
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 180 +
 5 files changed, 249 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 3460154..460363b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -869,6 +869,7 @@ void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw,
 int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
 void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
 int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr);
+void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 
 int mlx5e_create_direct_rqts(struct mlx5e_priv *priv);
 void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b309e7c..c127923 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -446,6 +446,16 @@ static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq)
kfree(rq->mpwqe.info);
 }
 
+static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
+{
+   struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv;
+
+   if (rep && rep->vport != FDB_UPLINK_VPORT)
+   return true;
+
+   return false;
+}
+
 static int mlx5e_create_rq(struct mlx5e_channel *c,
   struct mlx5e_rq_param *param,
   struct mlx5e_rq *rq)
@@ -487,6 +497,11 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
 
switch (priv->params.rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+   if (mlx5e_is_vf_vport_rep(priv)) {
+   err = -EINVAL;
+   goto err_rq_wq_destroy;
+   }
+
rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq;
rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
@@ -512,7 +527,11 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
goto err_rq_wq_destroy;
}
 
-   rq->handle_rx_cqe = mlx5e_handle_rx_cqe;
+   if (mlx5e_is_vf_vport_rep(priv))
+   rq->handle_rx_cqe = mlx5e_handle_rx_cqe_rep;
+   else
+   rq->handle_rx_cqe = mlx5e_handle_rx_cqe;
+
rq->alloc_wqe = mlx5e_alloc_rx_wqe;
rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index e836e47..c6de6fb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -36,6 +36,7 @@
 #include 
 #include "en.h"
 #include "en_tc.h"
+#include "eswitch.h"
 
 static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp)
 {
@@ -803,6 +804,38 @@ wq_ll_pop:
   >next.next_wqe_index);
 }
 
+void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struc

[PATCH net-next 2/9] net/mlx5: E-Switch, Set the vport when registering the uplink rep

2016-09-22 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Set the vport value in the PF entry to be that of the uplink so
we can use it blindly over the tc / eswitch offload code without
translating it each time we deal with the uplink representor.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  6 ++---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c| 10 ++--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  3 ++-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 27 +++---
 4 files changed, 20 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index a9fc9d4..b309e7c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3726,9 +3726,9 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id);
rep.load = mlx5e_nic_rep_load;
rep.unload = mlx5e_nic_rep_unload;
-   rep.vport = 0;
+   rep.vport = FDB_UPLINK_VPORT;
rep.priv_data = priv;
-   mlx5_eswitch_register_vport_rep(esw, );
+   mlx5_eswitch_register_vport_rep(esw, 0, );
}
 }
 
@@ -3867,7 +3867,7 @@ static void mlx5e_register_vport_rep(struct mlx5_core_dev 
*mdev)
rep.unload = mlx5e_vport_rep_unload;
rep.vport = vport;
ether_addr_copy(rep.hw_id, mac);
-   mlx5_eswitch_register_vport_rep(esw, );
+   mlx5_eswitch_register_vport_rep(esw, vport, );
}
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 22cfc4a..783e122 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -120,10 +120,7 @@ static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct 
mlx5e_priv *priv,
struct mlx5_eswitch_rep *rep = priv->ppriv;
u32 src_vport;
 
-   if (rep->vport) /* set source vport for the flow */
-   src_vport = rep->vport;
-   else
-   src_vport = FDB_UPLINK_VPORT;
+   src_vport = rep->vport;
 
return mlx5_eswitch_add_offloaded_rule(esw, spec, action, src_vport, 
dst_vport);
 }
@@ -399,10 +396,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, 
struct tcf_exts *exts,
 
out_priv = netdev_priv(out_dev);
out_rep  = out_priv->ppriv;
-   if (out_rep->vport == 0)
-   *dest_vport = FDB_UPLINK_VPORT;
-   else
-   *dest_vport = out_rep->vport;
+   *dest_vport = out_rep->vport;
*action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
continue;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index b96e8c9..6d8c5a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -254,9 +254,10 @@ void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw,
 int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode);
 int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode);
 void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
+int vport_index,
 struct mlx5_eswitch_rep *rep);
 void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
-  int vport);
+  int vport_index);
 
 #define MLX5_DEBUG_ESWITCH_MASK BIT(3)
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 3dc83a9..a73721b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -144,16 +144,12 @@ int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw,
 {
struct mlx5_flow_rule *flow_rule;
struct mlx5_esw_sq *esw_sq;
-   int vport;
int err;
int i;
 
if (esw->mode != SRIOV_OFFLOADS)
return 0;
 
-   vport = rep->vport == 0 ?
-   FDB_UPLINK_VPORT : rep->vport;
-
for (i = 0; i < sqns_num; i++) {
esw_sq = kzalloc(sizeof(*esw_sq), GFP_KERNEL);
if (!esw_sq) {
@@ -163,7 +159,7 @@ int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw,
 
/* Add re-inject rule to the PF/representor sqs */
 

[PATCH net-next 6/9] net/mlx5e: Refactor retrival of skb from rx completion element (cqe)

2016-09-22 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Factor the relevant code into a static inline helper (skb_from_cqe)
doing that.

Move the call to napi_gro_receive to be carried out just
after mlx5e_complete_rx_cqe returns.

Both changes are to be used for the VF representor as well
in the next commit.

This patch doesn't change any functionality.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 41 +
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 0a81bd3..e836e47 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -629,7 +629,6 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq 
*rq,
rq->stats.packets++;
rq->stats.bytes += cqe_bcnt;
mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb);
-   napi_gro_receive(rq->cq.napi, skb);
 }
 
 static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq)
@@ -733,20 +732,15 @@ static inline bool mlx5e_xdp_handle(struct mlx5e_rq *rq,
}
 }
 
-void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+static inline
+struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
+u16 wqe_counter, u32 cqe_bcnt)
 {
struct bpf_prog *xdp_prog = READ_ONCE(rq->xdp_prog);
struct mlx5e_dma_info *di;
-   struct mlx5e_rx_wqe *wqe;
-   __be16 wqe_counter_be;
struct sk_buff *skb;
-   u16 wqe_counter;
void *va, *data;
-   u32 cqe_bcnt;
 
-   wqe_counter_be = cqe->wqe_counter;
-   wqe_counter= be16_to_cpu(wqe_counter_be);
-   wqe= mlx5_wq_ll_get_wqe(>wq, wqe_counter);
di = >dma_info[wqe_counter];
va = page_address(di->page);
data   = va + MLX5_RX_HEADROOM;
@@ -757,22 +751,21 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct 
mlx5_cqe64 *cqe)
  rq->buff.wqe_sz,
  DMA_FROM_DEVICE);
prefetch(data);
-   cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
 
if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
rq->stats.wqe_err++;
mlx5e_page_release(rq, di, true);
-   goto wq_ll_pop;
+   return NULL;
}
 
if (mlx5e_xdp_handle(rq, xdp_prog, di, data, cqe_bcnt))
-   goto wq_ll_pop; /* page/packet was consumed by XDP */
+   return NULL; /* page/packet was consumed by XDP */
 
skb = build_skb(va, RQ_PAGE_SIZE(rq));
if (unlikely(!skb)) {
rq->stats.buff_alloc_err++;
mlx5e_page_release(rq, di, true);
-   goto wq_ll_pop;
+   return NULL;
}
 
/* queue up for recycling ..*/
@@ -782,7 +775,28 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct 
mlx5_cqe64 *cqe)
skb_reserve(skb, MLX5_RX_HEADROOM);
skb_put(skb, cqe_bcnt);
 
+   return skb;
+}
+
+void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+   struct mlx5e_rx_wqe *wqe;
+   __be16 wqe_counter_be;
+   struct sk_buff *skb;
+   u16 wqe_counter;
+   u32 cqe_bcnt;
+
+   wqe_counter_be = cqe->wqe_counter;
+   wqe_counter= be16_to_cpu(wqe_counter_be);
+   wqe= mlx5_wq_ll_get_wqe(>wq, wqe_counter);
+   cqe_bcnt   = be32_to_cpu(cqe->byte_cnt);
+
+   skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt);
+   if (!skb)
+   goto wq_ll_pop;
+
mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+   napi_gro_receive(rq->cq.napi, skb);
 
 wq_ll_pop:
mlx5_wq_ll_pop(>wq, wqe_counter_be,
@@ -861,6 +875,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct 
mlx5_cqe64 *cqe)
 
mlx5e_mpwqe_fill_rx_skb(rq, cqe, wi, cqe_bcnt, skb);
mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+   napi_gro_receive(rq->cq.napi, skb);
 
 mpwrq_cqe_out:
if (likely(wi->consumed_strides < rq->mpwqe_num_strides))
-- 
2.7.4



[PATCH net-next 0/9] Mellanox 100G SRIOV offloads vlan push/pop

2016-09-22 Thread Saeed Mahameed
Hi Dave,

>From Or Gerlitz:

This series further enhances the SRIOV TC offloads of mlx5 to handle
the TC vlan push and pop actions. This serves a common use-case in
virtualization systems where the virtual switch add (push) vlan tags
to packets sent from VMs and removes (pop) vlan tags before the packet
is received by the VM. We use the new E-Switch switchdev mode and the
TC vlan action to achieve that also in SW defined SRIOV environments by
offloading TC rules that contain this action along with forwarding
(TC mirred/redirect action) the packet.

In the first patch we add some helpers to access the TC vlan action info
by offloading drivers. The next five patches don't add any new functionality,
they do some refactoring and cleanups in the current code to be used next.

The seventh patch deals with supporting vlans by the mlx5 e-switch in switchdev
mode. The eighth patch does the vlan action offload from TC and the last patch
adds matching for vlans as typically required by TC flows that involve vlan
pop action.

The series was applied on top of commit 524605e "cxgb4: Convert to use 
simple_open()"

Thanks.

Or Gerlitz (9):
  net_sched: act_vlan: add helper inlines to access tcf_vlan info
  net/mlx5: E-Switch, Set the vport when registering the uplink rep
  net/mlx5: E-Switch, Set vport representor fields explicitly on
registration
  net/mlx5: E-Switch, Allow fine tuning of eswitch vport push/pop vlan
  net/mlx5: Put elements related to offloaded TC rule in one struct
  net/mlx5e: Refactor retrival of skb from rx completion element (cqe)
  net/mlx5: E-Switch, Support VLAN actions in the offloads mode
  net/mlx5e: Add TC vlan action for SRIOV offloads
  net/mlx5e: Add TC vlan match parsing

 drivers/net/ethernet/mellanox/mlx5/core/en.h   |   1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  27 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c|  74 +--
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c| 109 ++
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  33 ++-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  38 +++-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 222 +++--
 include/net/tc_act/tc_vlan.h   |  25 +++
 8 files changed, 446 insertions(+), 83 deletions(-)

-- 
2.7.4



[PATCH net-next 4/9] net/mlx5: E-Switch, Allow fine tuning of eswitch vport push/pop vlan

2016-09-22 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

The HW can be programmed to push vlan, pop vlan or both.

A factorization step towards using the push/pop capabilties in the
eswitch offloads mode. This patch doesn't add new functionality.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 33 +++
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h |  5 
 2 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 4927494..6605453 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -127,7 +127,7 @@ static int modify_esw_vport_context_cmd(struct 
mlx5_core_dev *dev, u16 vport,
 }
 
 static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
- u16 vlan, u8 qos, bool set)
+ u16 vlan, u8 qos, u8 set_flags)
 {
u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0};
 
@@ -135,14 +135,18 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev 
*dev, u32 vport,
!MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
return -ENOTSUPP;
 
-   esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%d\n",
- vport, vlan, qos, set);
-   if (set) {
+   esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%x\n",
+ vport, vlan, qos, set_flags);
+
+   if (set_flags & SET_VLAN_STRIP)
MLX5_SET(modify_esw_vport_context_in, in,
 esw_vport_context.vport_cvlan_strip, 1);
+
+   if (set_flags & SET_VLAN_INSERT) {
/* insert only if no vlan in packet */
MLX5_SET(modify_esw_vport_context_in, in,
 esw_vport_context.vport_cvlan_insert, 1);
+
MLX5_SET(modify_esw_vport_context_in, in,
 esw_vport_context.cvlan_pcp, qos);
MLX5_SET(modify_esw_vport_context_in, in,
@@ -1777,25 +1781,21 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch 
*esw,
return 0;
 }
 
-int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
-   int vport, u16 vlan, u8 qos)
+int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+ int vport, u16 vlan, u8 qos, u8 set_flags)
 {
struct mlx5_vport *evport;
int err = 0;
-   int set = 0;
 
if (!ESW_ALLOWED(esw))
return -EPERM;
if (!LEGAL_VPORT(esw, vport) || (vlan > 4095) || (qos > 7))
return -EINVAL;
 
-   if (vlan || qos)
-   set = 1;
-
mutex_lock(>state_lock);
evport = >vports[vport];
 
-   err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set);
+   err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags);
if (err)
goto unlock;
 
@@ -1813,6 +1813,17 @@ unlock:
return err;
 }
 
+int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+   int vport, u16 vlan, u8 qos)
+{
+   u8 set_flags = 0;
+
+   if (vlan || qos)
+   set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT;
+
+   return __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags);
+}
+
 int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
int vport, bool spoofchk)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index ebfcde0..4f5391a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -246,6 +246,11 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 struct mlx5_flow_rule *
 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 
tirn);
 
+enum {
+   SET_VLAN_STRIP  = BIT(0),
+   SET_VLAN_INSERT = BIT(1)
+};
+
 int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw,
 struct mlx5_eswitch_rep *rep,
 u16 *sqns_array, int sqns_num);
-- 
2.7.4



[PATCH net-next 9/9] net/mlx5e: Add TC vlan match parsing

2016-09-22 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Enhance the parsing of offloaded TC rules matches to handle vlans.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index e61bd52..a350b71 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -164,6 +164,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct 
mlx5_flow_spec *spec
~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
  BIT(FLOW_DISSECTOR_KEY_BASIC) |
  BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_VLAN) |
  BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
  BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
  BIT(FLOW_DISSECTOR_KEY_PORTS))) {
@@ -227,6 +228,24 @@ static int parse_cls_flower(struct mlx5e_priv *priv, 
struct mlx5_flow_spec *spec
key->src);
}
 
+   if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+   struct flow_dissector_key_vlan *key =
+   skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_VLAN,
+ f->key);
+   struct flow_dissector_key_vlan *mask =
+   skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_VLAN,
+ f->mask);
+   if (mask->vlan_id) {
+   MLX5_SET(fte_match_set_lyr_2_4, headers_c, vlan_tag, 1);
+   MLX5_SET(fte_match_set_lyr_2_4, headers_v, vlan_tag, 1);
+
+   MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, 
mask->vlan_id);
+   MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, 
key->vlan_id);
+   }
+   }
+
if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
struct flow_dissector_key_ipv4_addrs *key =
skb_flow_dissector_target(f->dissector,
-- 
2.7.4



[PATCH net-next 5/9] net/mlx5: Put elements related to offloaded TC rule in one struct

2016-09-22 Thread Saeed Mahameed
From: Or Gerlitz <ogerl...@mellanox.com>

Put the representors related to the source and dest vports and the
action in struct mlx5_esw_flow_attr which is used while setting the FDB rule.

This patch doesn't change any functionality.

Signed-off-by: Or Gerlitz <ogerl...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c| 51 --
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  | 10 -
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c |  9 ++--
 3 files changed, 44 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 783e122..3eb319b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -39,6 +39,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "en.h"
 #include "en_tc.h"
 #include "eswitch.h"
@@ -47,6 +48,7 @@ struct mlx5e_tc_flow {
struct rhash_head   node;
u64 cookie;
struct mlx5_flow_rule   *rule;
+   struct mlx5_esw_flow_attr *attr;
 };
 
 #define MLX5E_TC_TABLE_NUM_ENTRIES 1024
@@ -114,15 +116,11 @@ err_create_ft:
 
 static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
-   u32 action, u32 dst_vport)
+   struct mlx5_esw_flow_attr 
*attr)
 {
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-   struct mlx5_eswitch_rep *rep = priv->ppriv;
-   u32 src_vport;
 
-   src_vport = rep->vport;
-
-   return mlx5_eswitch_add_offloaded_rule(esw, spec, action, src_vport, 
dst_vport);
+   return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
 }
 
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
@@ -358,7 +356,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, 
struct tcf_exts *exts,
 }
 
 static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
-   u32 *action, u32 *dest_vport)
+   struct mlx5_esw_flow_attr *attr)
 {
const struct tc_action *a;
LIST_HEAD(actions);
@@ -366,17 +364,18 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, 
struct tcf_exts *exts,
if (tc_no_actions(exts))
return -EINVAL;
 
-   *action = 0;
+   memset(attr, 0, sizeof(*attr));
+   attr->in_rep = priv->ppriv;
 
tcf_exts_to_list(exts, );
list_for_each_entry(a, , list) {
/* Only support a single action per rule */
-   if (*action)
+   if (attr->action)
return -EINVAL;
 
if (is_tcf_gact_shot(a)) {
-   *action = MLX5_FLOW_CONTEXT_ACTION_DROP |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
+   attr->action = MLX5_FLOW_CONTEXT_ACTION_DROP |
+  MLX5_FLOW_CONTEXT_ACTION_COUNT;
continue;
}
 
@@ -384,7 +383,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, 
struct tcf_exts *exts,
int ifindex = tcf_mirred_ifindex(a);
struct net_device *out_dev;
struct mlx5e_priv *out_priv;
-   struct mlx5_eswitch_rep *out_rep;
 
out_dev = __dev_get_by_index(dev_net(priv->netdev), 
ifindex);
 
@@ -394,10 +392,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, 
struct tcf_exts *exts,
return -EINVAL;
}
 
+   attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
out_priv = netdev_priv(out_dev);
-   out_rep  = out_priv->ppriv;
-   *dest_vport = out_rep->vport;
-   *action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+   attr->out_rep = out_priv->ppriv;
continue;
}
 
@@ -411,18 +408,27 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, 
__be16 protocol,
 {
struct mlx5e_tc_table *tc = >fs.tc;
int err = 0;
-   u32 flow_tag, action, dest_vport = 0;
+   bool fdb_flow = false;
+   u32 flow_tag, action;
struct mlx5e_tc_flow *flow;
struct mlx5_flow_spec *spec;
struct mlx5_flow_rule *old = NULL;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 
+   if (esw && esw->mode == SRIOV_OFFLOADS)
+   fdb_flow = true;
+
flow = rhashtable_lookup_fast(>ht, >cookie,
  tc->ht_params);
-  

[PATCH net 7/9] net/mlx5e: Fix memory leak if refreshing TIRs fails

2016-08-28 Thread Saeed Mahameed
From: Kamal Heib <kam...@mellanox.com>

Free 'in' command object also when mlx5_core_modify_tir fails.

Fixes: 724b2aa15126 ("net/mlx5e: TIRs management refactoring")
Signed-off-by: Kamal Heib <kam...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_common.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index 673043c..9cce153 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -139,7 +139,7 @@ int mlx5e_refresh_tirs_self_loopback_enable(struct 
mlx5_core_dev *mdev)
struct mlx5e_tir *tir;
void *in;
int inlen;
-   int err;
+   int err = 0;
 
inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
in = mlx5_vzalloc(inlen);
@@ -151,10 +151,11 @@ int mlx5e_refresh_tirs_self_loopback_enable(struct 
mlx5_core_dev *mdev)
list_for_each_entry(tir, >mlx5e_res.td.tirs_list, list) {
err = mlx5_core_modify_tir(mdev, tir->tirn, in, inlen);
if (err)
-   return err;
+   goto out;
}
 
+out:
kvfree(in);
 
-   return 0;
+   return err;
 }
-- 
2.7.4



[PATCH net 3/9] net/mlx5e: Don't post fragmented MPWQE when RQ is disabled

2016-08-28 Thread Saeed Mahameed
ICO (Internal control operations) SQ (Send Queue) is closed/disabled
after RQ (Receive Queue).  After RQ is closed an ICO SQ completion
might post a fragmented MPWQE (Multi Packet Work Queue Element) into
that RQ.

As on regular RQ post, check if we are allowed to post to that
RQ (RQ is enabled). Cleanup in-progress UMR MPWQE on mlx5e_free_rx_descs
if needed.

Fixes: bc77b240b3c5 ('net/mlx5e: Add fragmented memory support for RX multi 
packet WQE')
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   | 6 ++
 2 files changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 2463eba..e259eaa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -534,6 +534,10 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
__be16 wqe_ix_be;
u16 wqe_ix;
 
+   /* UMR WQE (if in progress) is always at wq->head */
+   if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, >state))
+   mlx5e_free_rx_fragmented_mpwqe(rq, >wqe_info[wq->head]);
+
while (!mlx5_wq_ll_is_empty(wq)) {
wqe_ix_be = *wq->tail_next;
wqe_ix= be16_to_cpu(wqe_ix_be);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index fee1e47..b6f8ebb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -506,6 +506,12 @@ void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq)
struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head);
 
clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, >state);
+
+   if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, >state))) {
+   mlx5e_free_rx_fragmented_mpwqe(rq, >wqe_info[wq->head]);
+   return;
+   }
+
mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index));
rq->stats.mpwqe_frag++;
 
-- 
2.7.4



[PATCH net 6/9] net/mlx5e: Add ethtool counter for TX xmit_more

2016-08-28 Thread Saeed Mahameed
From: Tariq Toukan <tar...@mellanox.com>

Add a counter in ethtool for the number of times that
TX xmit_more was used.

Signed-off-by: Tariq Toukan <tar...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 4 
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c| 1 +
 3 files changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 297781a..2459c7f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -155,6 +155,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv 
*priv)
s->tx_queue_stopped += sq_stats->stopped;
s->tx_queue_wake+= sq_stats->wake;
s->tx_queue_dropped += sq_stats->dropped;
+   s->tx_xmit_more += sq_stats->xmit_more;
s->tx_csum_partial_inner += 
sq_stats->csum_partial_inner;
tx_offload_none += sq_stats->csum_none;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 7b9d8a9..499487c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -70,6 +70,7 @@ struct mlx5e_sw_stats {
u64 tx_queue_stopped;
u64 tx_queue_wake;
u64 tx_queue_dropped;
+   u64 tx_xmit_more;
u64 rx_wqe_err;
u64 rx_mpwqe_filler;
u64 rx_mpwqe_frag;
@@ -101,6 +102,7 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) },
+   { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_frag) },
@@ -298,6 +300,7 @@ struct mlx5e_sq_stats {
/* commonly accessed in data path */
u64 packets;
u64 bytes;
+   u64 xmit_more;
u64 tso_packets;
u64 tso_bytes;
u64 tso_inner_packets;
@@ -324,6 +327,7 @@ static const struct counter_desc sq_stats_desc[] = {
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
+   { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
 };
 
 #define NUM_SW_COUNTERSARRAY_SIZE(sw_stats_desc)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 5f209ad..988eca9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -375,6 +375,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, 
struct sk_buff *skb)
 
sq->stats.packets++;
sq->stats.bytes += num_bytes;
+   sq->stats.xmit_more += skb->xmit_more;
return NETDEV_TX_OK;
 
 dma_unmap_wqe_err:
-- 
2.7.4



[PATCH net 2/9] net/mlx5e: Don't wait for RQ completions on close

2016-08-28 Thread Saeed Mahameed
This will significantly reduce receive queue flush time on interface
down.

Instead of asking the firmware to flush the RQ (Receive Queue) via
asynchronous completions when moved to error, we handle RQ flush
manually (mlx5e_free_rx_descs) same as we did when RQ flush got timed
out.

This will reduce RQs flush time and speedup interface down procedure
(ifconfig down) from 6 sec to 0.3 sec on a 48 cores system.

Moved mlx5e_free_rx_descs en_main.c where it is needed, to keep en_rx.c
free form non critical data path code for better code locality.

Fixes: 6cd392a082de ('net/mlx5e: Handle RQ flush in error cases')
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  4 +--
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 37 +++
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   | 23 ++
 3 files changed, 22 insertions(+), 42 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index d63a1b8..26a7ec7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -223,9 +223,8 @@ struct mlx5e_tstamp {
 };
 
 enum {
-   MLX5E_RQ_STATE_POST_WQES_ENABLE,
+   MLX5E_RQ_STATE_FLUSH,
MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS,
-   MLX5E_RQ_STATE_FLUSH_TIMEOUT,
MLX5E_RQ_STATE_AM,
 };
 
@@ -703,7 +702,6 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget);
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
 void mlx5e_free_tx_descs(struct mlx5e_sq *sq);
-void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
 
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 65360b1..2463eba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -431,7 +431,6 @@ static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct 
mlx5e_rq_param *param)
 
MLX5_SET(rqc,  rqc, cqn,rq->cq.mcq.cqn);
MLX5_SET(rqc,  rqc, state,  MLX5_RQC_STATE_RST);
-   MLX5_SET(rqc,  rqc, flush_in_error_en,  1);
MLX5_SET(rqc,  rqc, vsd, priv->params.vlan_strip_disable);
MLX5_SET(wq,   wq,  log_wq_pg_sz,   rq->wq_ctrl.buf.page_shift -
MLX5_ADAPTER_PAGE_SHIFT);
@@ -528,6 +527,23 @@ static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
return -ETIMEDOUT;
 }
 
+static void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
+{
+   struct mlx5_wq_ll *wq = >wq;
+   struct mlx5e_rx_wqe *wqe;
+   __be16 wqe_ix_be;
+   u16 wqe_ix;
+
+   while (!mlx5_wq_ll_is_empty(wq)) {
+   wqe_ix_be = *wq->tail_next;
+   wqe_ix= be16_to_cpu(wqe_ix_be);
+   wqe   = mlx5_wq_ll_get_wqe(>wq, wqe_ix);
+   rq->dealloc_wqe(rq, wqe_ix);
+   mlx5_wq_ll_pop(>wq, wqe_ix_be,
+  >next.next_wqe_index);
+   }
+}
+
 static int mlx5e_open_rq(struct mlx5e_channel *c,
 struct mlx5e_rq_param *param,
 struct mlx5e_rq *rq)
@@ -551,8 +567,6 @@ static int mlx5e_open_rq(struct mlx5e_channel *c,
if (param->am_enabled)
set_bit(MLX5E_RQ_STATE_AM, >rq.state);
 
-   set_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, >state);
-
sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP;
sq->ico_wqe_info[pi].num_wqebbs = 1;
mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */
@@ -569,23 +583,8 @@ err_destroy_rq:
 
 static void mlx5e_close_rq(struct mlx5e_rq *rq)
 {
-   int tout = 0;
-   int err;
-
-   clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, >state);
+   set_bit(MLX5E_RQ_STATE_FLUSH, >state);
napi_synchronize(>channel->napi); /* prevent mlx5e_post_rx_wqes */
-
-   err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
-   while (!mlx5_wq_ll_is_empty(>wq) && !err &&
-  tout++ < MLX5_EN_QP_FLUSH_MAX_ITER)
-   msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
-
-   if (err || tout == MLX5_EN_QP_FLUSH_MAX_ITER)
-   set_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, >state);
-
-   /* avoid destroying rq before mlx5e_poll_rx_cq() is done with it */
-   napi_synchronize(>channel->napi);
-
cancel_work_sync(>am.work);
 
mlx5e_disable_rq(rq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index bdc9e33..fee1e47 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethern

[PATCH net 4/9] net/mlx5e: Don't wait for SQ completions on close

2016-08-28 Thread Saeed Mahameed
Instead of asking the firmware to flush the SQ (Send Queue) via
asynchronous completions when moved to error, we handle SQ flush
manually (mlx5e_free_tx_descs) same as we did when SQ flush got
timed out or on tx_timeout.

This will reduce SQs flush time and speedup interface down procedure.

Moved mlx5e_free_tx_descs to the end of en_tx.c for tx
critical code locality.

Fixes: 29429f3300a3 ('net/mlx5e: Timeout if SQ doesn't flush during close')
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  3 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 38 ++---
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c   | 67 +++
 drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c |  6 +-
 4 files changed, 44 insertions(+), 70 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 26a7ec7..bf722aa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -369,9 +369,8 @@ struct mlx5e_sq_dma {
 };
 
 enum {
-   MLX5E_SQ_STATE_WAKE_TXQ_ENABLE,
+   MLX5E_SQ_STATE_FLUSH,
MLX5E_SQ_STATE_BF_ENABLE,
-   MLX5E_SQ_STATE_TX_TIMEOUT,
 };
 
 struct mlx5e_ico_wqe_info {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index e259eaa..297781a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -39,13 +39,6 @@
 #include "eswitch.h"
 #include "vxlan.h"
 
-enum {
-   MLX5_EN_QP_FLUSH_TIMEOUT_MS = 5000,
-   MLX5_EN_QP_FLUSH_MSLEEP_QUANT   = 20,
-   MLX5_EN_QP_FLUSH_MAX_ITER   = MLX5_EN_QP_FLUSH_TIMEOUT_MS /
- MLX5_EN_QP_FLUSH_MSLEEP_QUANT,
-};
-
 struct mlx5e_rq_param {
u32 rqc[MLX5_ST_SZ_DW(rqc)];
struct mlx5_wq_paramwq;
@@ -827,7 +820,6 @@ static int mlx5e_open_sq(struct mlx5e_channel *c,
goto err_disable_sq;
 
if (sq->txq) {
-   set_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, >state);
netdev_tx_reset_queue(sq->txq);
netif_tx_start_queue(sq->txq);
}
@@ -851,38 +843,20 @@ static inline void netif_tx_disable_queue(struct 
netdev_queue *txq)
 
 static void mlx5e_close_sq(struct mlx5e_sq *sq)
 {
-   int tout = 0;
-   int err;
+   set_bit(MLX5E_SQ_STATE_FLUSH, >state);
+   /* prevent netif_tx_wake_queue */
+   napi_synchronize(>channel->napi);
 
if (sq->txq) {
-   clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, >state);
-   /* prevent netif_tx_wake_queue */
-   napi_synchronize(>channel->napi);
netif_tx_disable_queue(sq->txq);
 
-   /* ensure hw is notified of all pending wqes */
+   /* last doorbell out, godspeed .. */
if (mlx5e_sq_has_room_for(sq, 1))
mlx5e_send_nop(sq, true);
-
-   err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY,
- MLX5_SQC_STATE_ERR, false, 0);
-   if (err)
-   set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, >state);
}
 
-   /* wait till sq is empty, unless a TX timeout occurred on this SQ */
-   while (sq->cc != sq->pc &&
-  !test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, >state)) {
-   msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
-   if (tout++ > MLX5_EN_QP_FLUSH_MAX_ITER)
-   set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, >state);
-   }
-
-   /* avoid destroying sq before mlx5e_poll_tx_cq() is done with it */
-   napi_synchronize(>channel->napi);
-
-   mlx5e_free_tx_descs(sq);
mlx5e_disable_sq(sq);
+   mlx5e_free_tx_descs(sq);
mlx5e_destroy_sq(sq);
 }
 
@@ -2802,7 +2776,7 @@ static void mlx5e_tx_timeout(struct net_device *dev)
if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i)))
continue;
sched_work = true;
-   set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, >state);
+   set_bit(MLX5E_SQ_STATE_FLUSH, >state);
netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, 
SQ Cons: 0x%x SQ Prod: 0x%x\n",
   i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index e073bf59..5f209ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -394,35 +394,6 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct 
net_device *dev)
return mlx5e_sq_xmit(sq, skb);
 }
 
-void mlx5e_free_tx_descs(struct mlx5e_sq *sq)
-{

[PATCH net 1/9] net/mlx5e: Limit UMR length to the device's limitation

2016-08-28 Thread Saeed Mahameed
ConnectX-4 UMR (User Memory Region) MTT translation table offset in WQE
is limited to U16_MAX, before this patch we ignored that limitation and
requested the maximum possible UMR translation length that the netdev
might need (MAX channels * MAX pages per channel).
In case of a system with #cores > 32 and when linear WQE allocation fails,
falling back to using UMR WQEs will cause the RQ (Receive Queue) to get
stuck.

Here we limit UMR length to min(U16_MAX, max required pages) (while
considering the required alignments) on driver load, by default U16_MAX is
sufficient since the default RX rings value guarantees that we are in
range, dynamically (on set_ringparam/set_channels) we will check if the
new required UMR length (num mtts) is still in range, if not, fail the
request.

Fixes: bc77b240b3c5 ('net/mlx5e: Add fragmented memory support for RX multi 
packet WQE')
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h | 14 +++---
 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 19 +++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c| 11 ---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c  | 12 ++--
 4 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 1b495ef..d63a1b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -73,8 +73,12 @@
 #define MLX5_MPWRQ_PAGES_PER_WQE   BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
 #define MLX5_MPWRQ_STRIDES_PER_PAGE(MLX5_MPWRQ_NUM_STRIDES >> \
 MLX5_MPWRQ_WQE_PAGE_ORDER)
-#define MLX5_CHANNEL_MAX_NUM_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8) * \
-  BIT(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW))
+
+#define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2)
+#define MLX5E_REQUIRED_MTTS(rqs, wqes)\
+   (rqs * wqes * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8))
+#define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) <= U16_MAX)
+
 #define MLX5_UMR_ALIGN (2048)
 #define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD  (128)
 
@@ -304,6 +308,7 @@ struct mlx5e_rq {
 
unsigned long  state;
intix;
+   u32mpwqe_mtt_offset;
 
struct mlx5e_rx_am am; /* Adaptive Moderation */
 
@@ -814,11 +819,6 @@ static inline int mlx5e_get_max_num_channels(struct 
mlx5_core_dev *mdev)
 MLX5E_MAX_NUM_CHANNELS);
 }
 
-static inline int mlx5e_get_mtt_octw(int npages)
-{
-   return ALIGN(npages, 8) / 2;
-}
-
 extern const struct ethtool_ops mlx5e_ethtool_ops;
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 4a3757e..9cfe408 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -373,6 +373,7 @@ static int mlx5e_set_ringparam(struct net_device *dev,
u16 min_rx_wqes;
u8 log_rq_size;
u8 log_sq_size;
+   u32 num_mtts;
int err = 0;
 
if (param->rx_jumbo_pending) {
@@ -397,6 +398,15 @@ static int mlx5e_set_ringparam(struct net_device *dev,
1 << mlx5_max_log_rq_size(rq_wq_type));
return -EINVAL;
}
+
+   num_mtts = MLX5E_REQUIRED_MTTS(priv->params.num_channels, 
param->rx_pending);
+   if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
+   !MLX5E_VALID_NUM_MTTS(num_mtts)) {
+   netdev_info(dev, "%s: rx_pending (%d) request can't be 
satisfied, try to reduce.\n",
+   __func__, param->rx_pending);
+   return -EINVAL;
+   }
+
if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) {
netdev_info(dev, "%s: tx_pending (%d) < min (%d)\n",
__func__, param->tx_pending,
@@ -454,6 +464,7 @@ static int mlx5e_set_channels(struct net_device *dev,
unsigned int count = ch->combined_count;
bool arfs_enabled;
bool was_opened;
+   u32 num_mtts;
int err = 0;
 
if (!count) {
@@ -472,6 +483,14 @@ static int mlx5e_set_channels(struct net_device *dev,
return -EINVAL;
}
 
+   num_mtts = MLX5E_REQUIRED_MTTS(count, BIT(priv->params.log_rq_size));
+   if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
+   !MLX5E_VALID_NUM_MTTS(num_mtts)) {
+   netdev_info(dev, "%s: rx count (%d) request can't be satisfied, 
try to reduce.\n",
+   

[PATCH net 9/9] net/mlx5: Increase number of ethtool steering priorities

2016-08-28 Thread Saeed Mahameed
From: Maor Gottlieb <ma...@mellanox.com>

Ethtool has 11 flow tables, each flow table has its own priority.
Increase the number of priorities to be aligned with the number of flow
tables.

Fixes: 1174fce8d141 ('net/mlx5e: Support l3/l4 flow type specs in ethtool flow 
steering')
Signed-off-by: Maor Gottlieb <ma...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 75bb8c8..3d6c1f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -80,7 +80,7 @@
   LEFTOVERS_NUM_PRIOS)
 
 #define ETHTOOL_PRIO_NUM_LEVELS 1
-#define ETHTOOL_NUM_PRIOS 10
+#define ETHTOOL_NUM_PRIOS 11
 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
 /* Vlan, mac, ttc, aRFS */
 #define KERNEL_NIC_PRIO_NUM_LEVELS 4
-- 
2.7.4



[PATCH net 5/9] net/mlx5e: Fix ethtool -g/G rx ring parameter report with striding RQ

2016-08-28 Thread Saeed Mahameed
From: Eran Ben Elisha <era...@mellanox.com>

The driver RQ has two possible configurations: striding RQ and
non-striding RQ.  Until this patch, the driver always reported the
number of hardware WQEs (ring descriptors). For non striding RQ
configuration, this was OK since we have one WQE per pending packet
For striding RQ, multiple packets can fit into one WQE. For better
user experience we normalize the rx_pending parameter (size of wqe/mtu)
as the average ring size in case of striding RQ.

Fixes: 461017cb006a ('net/mlx5e: Support RX multi-packet WQE ...')
Signed-off-by: Eran Ben Elisha <era...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   | 76 +++---
 1 file changed, 67 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 9cfe408..d0cf8fa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -352,15 +352,61 @@ static void mlx5e_get_ethtool_stats(struct net_device 
*dev,
   
sq_stats_desc, j);
 }
 
+static u32 mlx5e_rx_wqes_to_packets(struct mlx5e_priv *priv, int rq_wq_type,
+   int num_wqe)
+{
+   int packets_per_wqe;
+   int stride_size;
+   int num_strides;
+   int wqe_size;
+
+   if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+   return num_wqe;
+
+   stride_size = 1 << priv->params.mpwqe_log_stride_sz;
+   num_strides = 1 << priv->params.mpwqe_log_num_strides;
+   wqe_size = stride_size * num_strides;
+
+   packets_per_wqe = wqe_size /
+ ALIGN(ETH_DATA_LEN, stride_size);
+   return (1 << (order_base_2(num_wqe * packets_per_wqe) - 1));
+}
+
+static u32 mlx5e_packets_to_rx_wqes(struct mlx5e_priv *priv, int rq_wq_type,
+   int num_packets)
+{
+   int packets_per_wqe;
+   int stride_size;
+   int num_strides;
+   int wqe_size;
+   int num_wqes;
+
+   if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+   return num_packets;
+
+   stride_size = 1 << priv->params.mpwqe_log_stride_sz;
+   num_strides = 1 << priv->params.mpwqe_log_num_strides;
+   wqe_size = stride_size * num_strides;
+
+   num_packets = (1 << order_base_2(num_packets));
+
+   packets_per_wqe = wqe_size /
+ ALIGN(ETH_DATA_LEN, stride_size);
+   num_wqes = DIV_ROUND_UP(num_packets, packets_per_wqe);
+   return 1 << (order_base_2(num_wqes));
+}
+
 static void mlx5e_get_ringparam(struct net_device *dev,
struct ethtool_ringparam *param)
 {
struct mlx5e_priv *priv = netdev_priv(dev);
int rq_wq_type = priv->params.rq_wq_type;
 
-   param->rx_max_pending = 1 << mlx5_max_log_rq_size(rq_wq_type);
+   param->rx_max_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type,
+1 << 
mlx5_max_log_rq_size(rq_wq_type));
param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE;
-   param->rx_pending = 1 << priv->params.log_rq_size;
+   param->rx_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type,
+1 << 
priv->params.log_rq_size);
param->tx_pending = 1 << priv->params.log_sq_size;
 }
 
@@ -370,6 +416,9 @@ static int mlx5e_set_ringparam(struct net_device *dev,
struct mlx5e_priv *priv = netdev_priv(dev);
bool was_opened;
int rq_wq_type = priv->params.rq_wq_type;
+   u32 rx_pending_wqes;
+   u32 min_rq_size;
+   u32 max_rq_size;
u16 min_rx_wqes;
u8 log_rq_size;
u8 log_sq_size;
@@ -386,20 +435,29 @@ static int mlx5e_set_ringparam(struct net_device *dev,
__func__);
return -EINVAL;
}
-   if (param->rx_pending < (1 << mlx5_min_log_rq_size(rq_wq_type))) {
+
+   min_rq_size = mlx5e_rx_wqes_to_packets(priv, rq_wq_type,
+  1 << 
mlx5_min_log_rq_size(rq_wq_type));
+   max_rq_size = mlx5e_rx_wqes_to_packets(priv, rq_wq_type,
+  1 << 
mlx5_max_log_rq_size(rq_wq_type));
+   rx_pending_wqes = mlx5e_packets_to_rx_wqes(priv, rq_wq_type,
+  param->rx_pending);
+
+   if (param->rx_pending < min_rq_size) {
netdev_info(dev, "%s: rx_pending (%d) < min (%d)\n",
__func__, param->rx_pending,
-   1 &

[PATCH net 8/9] net/mlx5: Add error prints when validate ETS failed

2016-08-28 Thread Saeed Mahameed
From: Eran Ben Elisha <era...@mellanox.com>

Upon set ETS failure due to user invalid input, add error prints to
specify the exact error to the user.

Fixes: cdcf11212b22 ('net/mlx5e: Validate BW weight values of ETS')
Signed-off-by: Eran Ben Elisha <era...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 21 -
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index caa9a3c..762af16 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -127,29 +127,40 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, 
struct ieee_ets *ets)
return mlx5_set_port_tc_bw_alloc(mdev, tc_tx_bw);
 }
 
-static int mlx5e_dbcnl_validate_ets(struct ieee_ets *ets)
+static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
+   struct ieee_ets *ets)
 {
int bw_sum = 0;
int i;
 
/* Validate Priority */
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-   if (ets->prio_tc[i] >= MLX5E_MAX_PRIORITY)
+   if (ets->prio_tc[i] >= MLX5E_MAX_PRIORITY) {
+   netdev_err(netdev,
+  "Failed to validate ETS: priority value 
greater than max(%d)\n",
+   MLX5E_MAX_PRIORITY);
return -EINVAL;
+   }
}
 
/* Validate Bandwidth Sum */
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
-   if (!ets->tc_tx_bw[i])
+   if (!ets->tc_tx_bw[i]) {
+   netdev_err(netdev,
+  "Failed to validate ETS: BW 0 is 
illegal\n");
return -EINVAL;
+   }
 
bw_sum += ets->tc_tx_bw[i];
}
}
 
-   if (bw_sum != 0 && bw_sum != 100)
+   if (bw_sum != 0 && bw_sum != 100) {
+   netdev_err(netdev,
+  "Failed to validate ETS: BW sum is illegal\n");
return -EINVAL;
+   }
return 0;
 }
 
@@ -159,7 +170,7 @@ static int mlx5e_dcbnl_ieee_setets(struct net_device 
*netdev,
struct mlx5e_priv *priv = netdev_priv(netdev);
int err;
 
-   err = mlx5e_dbcnl_validate_ets(ets);
+   err = mlx5e_dbcnl_validate_ets(netdev, ets);
if (err)
return err;
 
-- 
2.7.4



[PATCH net 0/9] Mellanox 100G mlx5 fixes 2016-08-29

2016-08-28 Thread Saeed Mahameed
Hi Dave,

This series contains some bug fixes for the mlx5 core and mlx5
ethernet driver.

>From Saeed, Fix UMR to consider hardware translation table field
size limitation when calculating the maximum number of MTTs required
by the driver.  Three patches to speed-up netdevice close time by
serializing channel (SQs & RQs) destruction rather than issuing and
waiting for hardware interrupts to free them.

>From Eran, Fix ethtool ring parameter reporting for striding RQ layout.
Add error prints on ETS validation failure.

>From Kamal, Fix memory leak on error flow.

>From Maor, Fix ethtool steering priorities number.

For -stable of 4.7.y:
  net/mlx5e: Limit UMR length to the device's limitation
  net/mlx5e: Don't wait for RQ completions on close 
  net/mlx5e: Don't post fragmented MPWQE when RQ is disabled
  net/mlx5e: Don't wait for SQ completions on close
  net/mlx5e: Add ethtool counter for TX xmit_more

Thanks,
Saeed.

Eran Ben Elisha (2):
  net/mlx5e: Fix ethtool -g/G rx ring parameter report with striding RQ
  net/mlx5: Add error prints when validate ETS failed

Kamal Heib (1):
  net/mlx5e: Fix memory leak if refreshing TIRs fails

Maor Gottlieb (1):
  net/mlx5: Increase number of ethtool steering priorities

Saeed Mahameed (4):
  net/mlx5e: Limit UMR length to the device's limitation
  net/mlx5e: Don't wait for RQ completions on close
  net/mlx5e: Don't post fragmented MPWQE when RQ is disabled
  net/mlx5e: Don't wait for SQ completions on close

Tariq Toukan (1):
  net/mlx5e: Add ethtool counter for TX xmit_more

 drivers/net/ethernet/mellanox/mlx5/core/en.h   | 21 +++--
 .../net/ethernet/mellanox/mlx5/core/en_common.c|  7 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 21 +++--
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   | 93 --
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 91 +
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c| 41 --
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h |  4 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c| 68 
 drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c  |  6 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  |  2 +-
 10 files changed, 209 insertions(+), 145 deletions(-)

-- 
2.7.4



[PATCH for-next V3 13/15] net/mlx5: Enable setting minimum inline header mode for VFs

2016-08-23 Thread Saeed Mahameed
From: Hadar Hen Zion <had...@mellanox.com>

Implement the low-level part of the PF side in setting minimum
inline header mode for VFs.

Signed-off-by: Hadar Hen Zion <had...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/vport.c | 20 
 include/linux/mlx5/mlx5_ifc.h   |  2 +-
 include/linux/mlx5/vport.h  |  2 ++
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c 
b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 3593bf7..525f17a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -125,6 +125,26 @@ void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev 
*mdev,
 }
 EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline);
 
+int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+u16 vport, u8 min_inline)
+{
+   u32 in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {0};
+   int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+   void *nic_vport_ctx;
+
+   MLX5_SET(modify_nic_vport_context_in, in,
+field_select.min_inline, 1);
+   MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
+   MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+
+   nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
+in, nic_vport_context);
+   MLX5_SET(nic_vport_context, nic_vport_ctx,
+min_wqe_inline_mode, min_inline);
+
+   return mlx5_modify_nic_vport_context(mdev, in, inlen);
+}
+
 int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 u16 vport, u8 *addr)
 {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index cb94ac5..7a8ef0a 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -4724,7 +4724,7 @@ struct mlx5_ifc_modify_nic_vport_field_select_bits {
u8 reserved_at_0[0x16];
u8 node_guid[0x1];
u8 port_guid[0x1];
-   u8 reserved_at_18[0x1];
+   u8 min_inline[0x1];
u8 mtu[0x1];
u8 change_event[0x1];
u8 promisc[0x1];
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index e087b7d..451b0bd 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -45,6 +45,8 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev 
*mdev,
 u16 vport, u8 *addr);
 void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
 u8 *min_inline);
+int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+u16 vport, u8 min_inline);
 int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev,
  u16 vport, u8 *addr);
 int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu);
-- 
2.7.4



[PATCH for-next V3 10/15] {net,IB}/mlx5: Modify QP commands via mlx5 ifc

2016-08-23 Thread Saeed Mahameed
Prior to this patch we assumed that modify QP commands have the
same layout.

In ConnectX-4 for each QP transition there is a specific command
and their layout can vary.

e.g: 2err/2rst commands don't have QP context in their layout and before
this patch we posted the QP context in those commands.

Fortunately the FW only checks the suffix of the commands and executes
them, while ignoring all invalid data sent after the valid command
layout.

This patch removes mlx5_modify_qp_mbox_in and changes
mlx5_core_qp_modify to receive the required transition and QP context
with opt_param_mask if needed.  This way the caller is not required to
provide the command inbox layout and it will be generated automatically.

mlx5_core_qp_modify will generate the command inbox/outbox layouts
according to the requested transition and will fill the requested
parameters.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/infiniband/hw/mlx5/qp.c  |  22 ++---
 drivers/net/ethernet/mellanox/mlx5/core/qp.c | 124 +--
 include/linux/mlx5/qp.h  |  20 +
 3 files changed, 124 insertions(+), 42 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index d22492f..6261737 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1871,7 +1871,6 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, 
struct mlx5_ib_qp *qp)
 {
struct mlx5_ib_cq *send_cq, *recv_cq;
struct mlx5_ib_qp_base *base = >trans_qp.base;
-   struct mlx5_modify_qp_mbox_in *in;
unsigned long flags;
int err;
 
@@ -1884,16 +1883,12 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, 
struct mlx5_ib_qp *qp)
   >raw_packet_qp.rq.base :
   >trans_qp.base;
 
-   in = kzalloc(sizeof(*in), GFP_KERNEL);
-   if (!in)
-   return;
-
if (qp->state != IB_QPS_RESET) {
if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
mlx5_ib_qp_disable_pagefaults(qp);
err = mlx5_core_qp_modify(dev->mdev,
- MLX5_CMD_OP_2RST_QP, in, 0,
- >mqp);
+ MLX5_CMD_OP_2RST_QP, 0,
+ NULL, >mqp);
} else {
err = modify_raw_packet_qp(dev, qp,
   MLX5_CMD_OP_2RST_QP);
@@ -1935,8 +1930,6 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, 
struct mlx5_ib_qp *qp)
 base->mqp.qpn);
}
 
-   kfree(in);
-
if (qp->create_type == MLX5_QP_KERNEL)
destroy_qp_kernel(dev, qp);
else if (qp->create_type == MLX5_QP_USER)
@@ -2522,7 +2515,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
struct mlx5_ib_qp_base *base = >trans_qp.base;
struct mlx5_ib_cq *send_cq, *recv_cq;
struct mlx5_qp_context *context;
-   struct mlx5_modify_qp_mbox_in *in;
struct mlx5_ib_pd *pd;
enum mlx5_qp_state mlx5_cur, mlx5_new;
enum mlx5_qp_optpar optpar;
@@ -2531,11 +2523,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
int err;
u16 op;
 
-   in = kzalloc(sizeof(*in), GFP_KERNEL);
-   if (!in)
+   context = kzalloc(sizeof(*context), GFP_KERNEL);
+   if (!context)
return -ENOMEM;
 
-   context = >ctx;
err = to_mlx5_st(ibqp->qp_type);
if (err < 0) {
mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type);
@@ -2700,12 +2691,11 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
op = optab[mlx5_cur][mlx5_new];
optpar = ib_mask_to_mlx5_opt(attr_mask);
optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
-   in->optparam = cpu_to_be32(optpar);
 
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
err = modify_raw_packet_qp(dev, qp, op);
else
-   err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event,
+   err = mlx5_core_qp_modify(dev->mdev, op, optpar, context,
  >mqp);
if (err)
goto out;
@@ -2746,7 +2736,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
}
 
 out:
-   kfree(in);
+   kfree(context);
return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c 
b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 36d240c..50875a4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -335,21 +335,127 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL_GPL(m

[PATCH for-next V3 14/15] net/mlx5: Update mlx5_ifc.h for vxlan encap/decap

2016-08-23 Thread Saeed Mahameed
From: Hadar Hen Zion <had...@mellanox.com>

Add the required definitions related to vxlan encap/decap.

Signed-off-by: Hadar Hen Zion <had...@mellanox.com>
Signed-off-by: Ilya Lesokhin <il...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 105 --
 1 file changed, 101 insertions(+), 4 deletions(-)

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 7a8ef0a..3766110 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -212,6 +212,8 @@ enum {
MLX5_CMD_OP_DEALLOC_FLOW_COUNTER  = 0x93a,
MLX5_CMD_OP_QUERY_FLOW_COUNTER= 0x93b,
MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c,
+   MLX5_CMD_OP_ALLOC_ENCAP_HEADER= 0x93d,
+   MLX5_CMD_OP_DEALLOC_ENCAP_HEADER  = 0x93e,
MLX5_CMD_OP_MAX
 };
 
@@ -281,7 +283,9 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 modify_root[0x1];
u8 identified_miss_table_mode[0x1];
u8 flow_table_modify[0x1];
-   u8 reserved_at_7[0x19];
+   u8 encap[0x1];
+   u8 decap[0x1];
+   u8 reserved_at_9[0x17];
 
u8 reserved_at_20[0x2];
u8 log_max_ft_size[0x6];
@@ -512,7 +516,15 @@ struct mlx5_ifc_e_switch_cap_bits {
u8 nic_vport_node_guid_modify[0x1];
u8 nic_vport_port_guid_modify[0x1];
 
-   u8 reserved_at_20[0x7e0];
+   u8 vxlan_encap_decap[0x1];
+   u8 nvgre_encap_decap[0x1];
+   u8 reserved_at_22[0x9];
+   u8 log_max_encap_headers[0x5];
+   u8 reserved_2b[0x6];
+   u8 max_encap_header_size[0xa];
+
+   u8 reserved_40[0x7c0];
+
 };
 
 struct mlx5_ifc_qos_cap_bits {
@@ -2067,6 +2079,8 @@ enum {
MLX5_FLOW_CONTEXT_ACTION_DROP  = 0x2,
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST  = 0x4,
MLX5_FLOW_CONTEXT_ACTION_COUNT = 0x8,
+   MLX5_FLOW_CONTEXT_ACTION_ENCAP = 0x10,
+   MLX5_FLOW_CONTEXT_ACTION_DECAP = 0x20,
 };
 
 struct mlx5_ifc_flow_context_bits {
@@ -2086,7 +2100,9 @@ struct mlx5_ifc_flow_context_bits {
u8 reserved_at_a0[0x8];
u8 flow_counter_list_size[0x18];
 
-   u8 reserved_at_c0[0x140];
+   u8 encap_id[0x20];
+
+   u8 reserved_at_e0[0x120];
 
struct mlx5_ifc_fte_match_param_bits match_value;
 
@@ -4216,6 +4232,85 @@ struct mlx5_ifc_query_eq_in_bits {
u8 reserved_at_60[0x20];
 };
 
+struct mlx5_ifc_encap_header_in_bits {
+   u8 reserved_at_0[0x5];
+   u8 header_type[0x3];
+   u8 reserved_at_8[0xe];
+   u8 encap_header_size[0xa];
+
+   u8 reserved_at_20[0x10];
+   u8 encap_header[2][0x8];
+
+   u8 more_encap_header[0][0x8];
+};
+
+struct mlx5_ifc_query_encap_header_out_bits {
+   u8 status[0x8];
+   u8 reserved_at_8[0x18];
+
+   u8 syndrome[0x20];
+
+   u8 reserved_at_40[0xa0];
+
+   struct mlx5_ifc_encap_header_in_bits encap_header[0];
+};
+
+struct mlx5_ifc_query_encap_header_in_bits {
+   u8 opcode[0x10];
+   u8 reserved_at_10[0x10];
+
+   u8 reserved_at_20[0x10];
+   u8 op_mod[0x10];
+
+   u8 encap_id[0x20];
+
+   u8 reserved_at_60[0xa0];
+};
+
+struct mlx5_ifc_alloc_encap_header_out_bits {
+   u8 status[0x8];
+   u8 reserved_at_8[0x18];
+
+   u8 syndrome[0x20];
+
+   u8 encap_id[0x20];
+
+   u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_alloc_encap_header_in_bits {
+   u8 opcode[0x10];
+   u8 reserved_at_10[0x10];
+
+   u8 reserved_at_20[0x10];
+   u8 op_mod[0x10];
+
+   u8 reserved_at_40[0xa0];
+
+   struct mlx5_ifc_encap_header_in_bits encap_header;
+};
+
+struct mlx5_ifc_dealloc_encap_header_out_bits {
+   u8 status[0x8];
+   u8 reserved_at_8[0x18];
+
+   u8 syndrome[0x20];
+
+   u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_dealloc_encap_header_in_bits {
+   u8 opcode[0x10];
+   u8 reserved_at_10[0x10];
+
+   u8 reserved_20[0x10];
+   u8 op_mod[0x10];
+
+   u8 encap_id[0x20];
+
+   u8 reserved_60[0x20];
+};
+
 struct mlx5_ifc_query_dct_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
@@ -6102,7 +6197,9 @@ struct mlx5_ifc_create_flow_table_in_bits {
 
u8 reserved_at_a0[0x20];
 
-   u8 reserved_at_c0[0x4];
+   u8 encap_en[0x1];
+   u8 decap_en[0x1];
+   u8 r

[PATCH for-next V3 15/15] net/mlx5: Introduce alloc_encap and dealloc_encap commands

2016-08-23 Thread Saeed Mahameed
From: Ilya Lesokhin <il...@mellanox.com>

Implement low-level commands to support vxlan encapsulation.

Signed-off-by: Ilya Lesokhin <il...@mellanox.com>
Signed-off-by: Hadar Hen Zion <had...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c|  4 ++
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 48 
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h |  7 
 3 files changed, 59 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 23b95da..00bec60 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -301,6 +301,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev 
*dev, u16 op,
case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
+   case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
return MLX5_CMD_STAT_OK;
 
case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -402,6 +403,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev 
*dev, u16 op,
case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
+   case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
*status = MLX5_DRIVER_STATUS_ABORTED;
*synd = MLX5_DRIVER_SYND;
return -EIO;
@@ -550,6 +552,8 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER);
MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER);
MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE);
+   MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER);
+   MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER);
default: return "unknown command opcode";
}
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index e64499e..7aaefa9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -424,3 +424,51 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
*packets = MLX5_GET64(traffic_counter, stats, packets);
*bytes = MLX5_GET64(traffic_counter, stats, octets);
 }
+
+#define MAX_ENCAP_SIZE (128)
+
+int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev,
+int header_type,
+size_t size,
+void *encap_header,
+u32 *encap_id)
+{
+   u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)];
+   u32 in[MLX5_ST_SZ_DW(alloc_encap_header_in) +
+ (MAX_ENCAP_SIZE / sizeof(u32))];
+   void *encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in,
+encap_header);
+   void *header = MLX5_ADDR_OF(encap_header_in, encap_header_in,
+   encap_header);
+   int inlen = header - (void *)in  + size;
+   int err;
+
+   if (size > MAX_ENCAP_SIZE)
+   return -EINVAL;
+
+   memset(in, 0, inlen);
+   MLX5_SET(alloc_encap_header_in, in, opcode,
+MLX5_CMD_OP_ALLOC_ENCAP_HEADER);
+   MLX5_SET(encap_header_in, encap_header_in, encap_header_size, size);
+   MLX5_SET(encap_header_in, encap_header_in, header_type, header_type);
+   memcpy(header, encap_header, size);
+
+   memset(out, 0, sizeof(out));
+   err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+
+   *encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id);
+   return err;
+}
+
+void mlx5_cmd_dealloc_encap(struct mlx5_core_dev *dev, u32 encap_id)
+{
+   u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)];
+   u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)];
+
+   memset(in, 0, sizeof(in));
+   MLX5_SET(dealloc_encap_header_in, in, opcode,
+MLX5_CMD_OP_DEALLOC_ENCAP_HEADER);
+   MLX5_SET(dealloc_encap_header_in, in, encap_id, encap_id);
+
+   mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index 158844c..ac52fdf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -88,4 +88,11 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
  struct mlx5_cmd_fc_bulk *b, u16 id,
  u64 *packets, u64 *bytes);
 
+int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev,
+int header_type,
+size_t size,
+void *encap_header,
+u32 *encap_id);
+void mlx5_cmd_dealloc_encap(struct mlx5_core_dev *dev, u32 encap_id);
+
 #endif
-- 
2.7.4



[PATCH for-next V3 12/15] net/mlx5: Improve driver log messages

2016-08-23 Thread Saeed Mahameed
Remove duplicate pci dev name printing in mlx5_core_err.
Use mlx5_core_{warn,info,err} where possible to have the pci info in the
driver log messages.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Parvi Kaustubhi <par...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  | 26 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  | 18 ---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 11 -
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h|  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/sriov.c|  7 +++---
 6 files changed, 39 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 1c7d8b8..681c12c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -416,8 +416,8 @@ int mlx5e_vport_rep_load(struct mlx5_eswitch *esw,
 {
rep->priv_data = mlx5e_create_netdev(esw->dev, _rep_profile, rep);
if (!rep->priv_data) {
-   pr_warn("Failed to create representor for vport %d\n",
-   rep->vport);
+   mlx5_core_warn(esw->dev, "Failed to create representor for 
vport %d\n",
+  rep->vport);
return -EINVAL;
}
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 0a364bf..7c493599 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -306,7 +306,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 
vport, bool rx_rule,
 
spec = mlx5_vzalloc(sizeof(*spec));
if (!spec) {
-   pr_warn("FDB: Failed to alloc match parameters\n");
+   esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n");
return NULL;
}
dmac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
@@ -340,8 +340,8 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 
vport, bool rx_rule,
   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
   0, );
if (IS_ERR(flow_rule)) {
-   pr_warn(
-   "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) 
-> vport(%d), err(%ld)\n",
+   esw_warn(esw->dev,
+"FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) 
-> vport(%d), err(%ld)\n",
 dmac_v, dmac_c, vport, PTR_ERR(flow_rule));
flow_rule = NULL;
}
@@ -1318,8 +1318,9 @@ static int esw_vport_ingress_config(struct mlx5_eswitch 
*esw,
   0, NULL);
if (IS_ERR(vport->ingress.allow_rule)) {
err = PTR_ERR(vport->ingress.allow_rule);
-   pr_warn("vport[%d] configure ingress allow rule, err(%d)\n",
-   vport->vport, err);
+   esw_warn(esw->dev,
+"vport[%d] configure ingress allow rule, err(%d)\n",
+vport->vport, err);
vport->ingress.allow_rule = NULL;
goto out;
}
@@ -1331,8 +1332,9 @@ static int esw_vport_ingress_config(struct mlx5_eswitch 
*esw,
   0, NULL);
if (IS_ERR(vport->ingress.drop_rule)) {
err = PTR_ERR(vport->ingress.drop_rule);
-   pr_warn("vport[%d] configure ingress drop rule, err(%d)\n",
-   vport->vport, err);
+   esw_warn(esw->dev,
+"vport[%d] configure ingress drop rule, err(%d)\n",
+vport->vport, err);
vport->ingress.drop_rule = NULL;
goto out;
}
@@ -1384,8 +1386,9 @@ static int esw_vport_egress_config(struct mlx5_eswitch 
*esw,
   0, NULL);
if (IS_ERR(vport->egress.allowed_vlan)) {
err = PTR_ERR(vport->egress.allowed_vlan);
-   pr_warn("vport[%d] configure egress allowed vlan rule failed, 
err(%d)\n",
-   vport->vport, err);
+   esw_warn(esw->dev,
+"vport[%d] configure egress allowed vlan rule failed, 
err(%d)\n",
+vport->vport, err);
vport->egress.allowed_vlan = NULL;
goto out;
}
@@ -1398,8 +1401,9 @@ static int esw_vport_egress_config(struct mlx5_eswitch 
*esw,
   0, NULL);
  

[PATCH for-next V2 08/17] {net,IB}/mlx5: MKey/PSV commands via mlx5 ifc

2016-08-22 Thread Saeed Mahameed
Remove old representation of manually created MKey/PSV commands layout,
and use mlx5_ifc canonical structures and defines.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h   |   2 +-
 drivers/infiniband/hw/mlx5/mr.c| 184 -
 drivers/infiniband/hw/mlx5/qp.c|   8 +-
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c  |   4 +-
 .../net/ethernet/mellanox/mlx5/core/en_common.c|  23 +--
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  37 ++---
 drivers/net/ethernet/mellanox/mlx5/core/mr.c   | 183 +---
 include/linux/mlx5/device.h| 113 +
 include/linux/mlx5/driver.h|  11 +-
 include/linux/mlx5/mlx5_ifc.h  |   2 +-
 10 files changed, 235 insertions(+), 332 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h 
b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 372385d..a59034a 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -504,7 +504,7 @@ struct mlx5_ib_mr {
int umred;
int npages;
struct mlx5_ib_dev *dev;
-   struct mlx5_create_mkey_mbox_out out;
+   u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
struct mlx5_core_sig_ctx*sig;
int live;
void*descs_alloc;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 4b02130..6f7e347 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -135,20 +135,10 @@ static void reg_mr_callback(int status, void *context)
return;
}
 
-   if (mr->out.hdr.status) {
-   mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
-mr->out.hdr.status,
-be32_to_cpu(mr->out.hdr.syndrome));
-   kfree(mr);
-   dev->fill_delay = 1;
-   mod_timer(>delay_timer, jiffies + HZ);
-   return;
-   }
-
spin_lock_irqsave(>mdev->priv.mkey_lock, flags);
key = dev->mdev->priv.mkey_key++;
spin_unlock_irqrestore(>mdev->priv.mkey_lock, flags);
-   mr->mmkey.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xff) 
| key;
+   mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, 
mkey_index)) | key;
 
cache->last_add = jiffies;
 
@@ -170,16 +160,19 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int 
num)
 {
struct mlx5_mr_cache *cache = >cache;
struct mlx5_cache_ent *ent = >ent[c];
-   struct mlx5_create_mkey_mbox_in *in;
+   int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_ib_mr *mr;
int npages = 1 << ent->order;
+   void *mkc;
+   u32 *in;
int err = 0;
int i;
 
-   in = kzalloc(sizeof(*in), GFP_KERNEL);
+   in = kzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
 
+   mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
for (i = 0; i < num; i++) {
if (ent->pending >= MAX_PENDING_REG_MR) {
err = -EAGAIN;
@@ -194,18 +187,22 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int 
num)
mr->order = ent->order;
mr->umred = 1;
mr->dev = dev;
-   in->seg.status = MLX5_MKEY_STATUS_FREE;
-   in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
-   in->seg.qpn_mkey7_0 = cpu_to_be32(0xff << 8);
-   in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
-   in->seg.log2_page_size = 12;
+
+   MLX5_SET(mkc, mkc, free, 1);
+   MLX5_SET(mkc, mkc, umr_en, 1);
+   MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
+
+   MLX5_SET(mkc, mkc, qpn, 0xff);
+   MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2);
+   MLX5_SET(mkc, mkc, log_page_size, 12);
 
spin_lock_irq(>lock);
ent->pending++;
spin_unlock_irq(>lock);
-   err = mlx5_core_create_mkey(dev->mdev, >mmkey, in,
-   sizeof(*in), reg_mr_callback,
-   mr, >out);
+   err = mlx5_core_create_mkey_cb(dev->mdev, >mmkey,
+  in, inlen,
+  mr->out, sizeof(mr->out),
+  reg_mr_callback, mr);
if (err) {
   

[PATCH for-next V2 11/17] net/mlx5: Unify and improve command interface

2016-08-22 Thread Saeed Mahameed
Now as all commands use mlx5 ifc interface, instead of doing two calls
for executing a command we embed command status checking into
mlx5_cmd_exec to simplify the interface.

Also we do here some cleanup for redundant software structures
(inbox/outbox) and functions and improved command failure output.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/infiniband/hw/mlx5/main.c  |  10 +-
 drivers/infiniband/hw/mlx5/qp.c|   5 +-
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c  | 251 +++--
 drivers/net/ethernet/mellanox/mlx5/core/cq.c   |  16 +-
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |   3 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  10 +-
 drivers/net/ethernet/mellanox/mlx5/core/eq.c   |  12 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  72 ++
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c   | 130 +++
 drivers/net/ethernet/mellanox/mlx5/core/fw.c   |  15 +-
 drivers/net/ethernet/mellanox/mlx5/core/mad.c  |   1 -
 drivers/net/ethernet/mellanox/mlx5/core/main.c |  80 ++-
 drivers/net/ethernet/mellanox/mlx5/core/mcg.c  |  10 +-
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h|  13 --
 drivers/net/ethernet/mellanox/mlx5/core/mr.c   |  28 +--
 .../net/ethernet/mellanox/mlx5/core/pagealloc.c|  11 +-
 drivers/net/ethernet/mellanox/mlx5/core/pd.c   |  11 +-
 drivers/net/ethernet/mellanox/mlx5/core/port.c |  99 ++--
 drivers/net/ethernet/mellanox/mlx5/core/qp.c   |  26 +--
 drivers/net/ethernet/mellanox/mlx5/core/rl.c   |  11 +-
 drivers/net/ethernet/mellanox/mlx5/core/srq.c  |  49 ++--
 drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 183 +--
 drivers/net/ethernet/mellanox/mlx5/core/uar.c  |  11 +-
 drivers/net/ethernet/mellanox/mlx5/core/vport.c|  74 ++
 drivers/net/ethernet/mellanox/mlx5/core/vxlan.c|  29 +--
 include/linux/mlx5/device.h| 115 --
 include/linux/mlx5/driver.h|   7 +-
 27 files changed, 385 insertions(+), 897 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c 
b/drivers/infiniband/hw/mlx5/main.c
index a84bb76..6fb77d7 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -233,23 +233,19 @@ static int set_roce_addr(struct ib_device *device, u8 
port_num,
 const union ib_gid *gid,
 const struct ib_gid_attr *attr)
 {
-   struct mlx5_ib_dev *dev = to_mdev(device);
-   u32  in[MLX5_ST_SZ_DW(set_roce_address_in)];
-   u32 out[MLX5_ST_SZ_DW(set_roce_address_out)];
+   struct mlx5_ib_dev *dev = to_mdev(device);
+   u32  in[MLX5_ST_SZ_DW(set_roce_address_in)]  = {0};
+   u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
 
if (ll != IB_LINK_LAYER_ETHERNET)
return -EINVAL;
 
-   memset(in, 0, sizeof(in));
-
ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
 
MLX5_SET(set_roce_address_in, in, roce_address_index, index);
MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
-
-   memset(out, 0, sizeof(out));
return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
 }
 
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 6261737..f3c943f 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1007,13 +1007,10 @@ static int is_connected(enum ib_qp_type qp_type)
 static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq, u32 tdn)
 {
-   u32 in[MLX5_ST_SZ_DW(create_tis_in)];
+   u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
 
-   memset(in, 0, sizeof(in));
-
MLX5_SET(tisc, tisc, transport_domain, tdn);
-
return mlx5_core_create_tis(dev->mdev, in, sizeof(in), >tisn);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 88b0540..23b95da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -554,11 +554,124 @@ const char *mlx5_command_str(int command)
}
 }
 
+static const char *cmd_status_str(u8 status)
+{
+   switch (status) {
+   case MLX5_CMD_STAT_OK:
+   return "OK";
+   case MLX5_CMD_STAT_INT_ERR:
+   return "internal error";
+   case MLX5_CMD_STAT_BAD_OP_ERR:
+   return "bad operation";
+   case MLX5_CMD_STAT_BAD_PARAM_ERR:
+   return "bad parameter";
+   case MLX5_C

[PATCH for-next V2 17/17] net/mlx5: Update struct mlx5_ifc_xrqc_bits

2016-08-22 Thread Saeed Mahameed
From: Artemy Kovalyov <artem...@mellanox.com>

Update struct mlx5_ifc_xrqc_bits according to last specification

Signed-off-by: Artemy Kovalyov <artem...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index e1f8e34..5f150c8 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -2829,7 +2829,7 @@ struct mlx5_ifc_xrqc_bits {
 
struct mlx5_ifc_tag_matching_topology_context_bits 
tag_matching_topology_context;
 
-   u8 reserved_at_180[0x180];
+   u8 reserved_at_180[0x200];
 
struct mlx5_ifc_wq_bits wq;
 };
-- 
2.7.4



[PATCH for-next V2 15/17] net/mlx5: Introduce alloc_encap and dealloc_encap commands

2016-08-22 Thread Saeed Mahameed
From: Ilya Lesokhin <il...@mellanox.com>

Implement low-level commands to support vxlan encapsulation.

Signed-off-by: Ilya Lesokhin <il...@mellanox.com>
Signed-off-by: Hadar Hen Zion <had...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c|  4 ++
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 48 
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h |  7 
 3 files changed, 59 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 23b95da..00bec60 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -301,6 +301,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev 
*dev, u16 op,
case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
+   case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
return MLX5_CMD_STAT_OK;
 
case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -402,6 +403,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev 
*dev, u16 op,
case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
+   case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
*status = MLX5_DRIVER_STATUS_ABORTED;
*synd = MLX5_DRIVER_SYND;
return -EIO;
@@ -550,6 +552,8 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER);
MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER);
MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE);
+   MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER);
+   MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER);
default: return "unknown command opcode";
}
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index e64499e..7aaefa9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -424,3 +424,51 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
*packets = MLX5_GET64(traffic_counter, stats, packets);
*bytes = MLX5_GET64(traffic_counter, stats, octets);
 }
+
+#define MAX_ENCAP_SIZE (128)
+
+int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev,
+int header_type,
+size_t size,
+void *encap_header,
+u32 *encap_id)
+{
+   u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)];
+   u32 in[MLX5_ST_SZ_DW(alloc_encap_header_in) +
+ (MAX_ENCAP_SIZE / sizeof(u32))];
+   void *encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in,
+encap_header);
+   void *header = MLX5_ADDR_OF(encap_header_in, encap_header_in,
+   encap_header);
+   int inlen = header - (void *)in  + size;
+   int err;
+
+   if (size > MAX_ENCAP_SIZE)
+   return -EINVAL;
+
+   memset(in, 0, inlen);
+   MLX5_SET(alloc_encap_header_in, in, opcode,
+MLX5_CMD_OP_ALLOC_ENCAP_HEADER);
+   MLX5_SET(encap_header_in, encap_header_in, encap_header_size, size);
+   MLX5_SET(encap_header_in, encap_header_in, header_type, header_type);
+   memcpy(header, encap_header, size);
+
+   memset(out, 0, sizeof(out));
+   err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+
+   *encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id);
+   return err;
+}
+
+void mlx5_cmd_dealloc_encap(struct mlx5_core_dev *dev, u32 encap_id)
+{
+   u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)];
+   u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)];
+
+   memset(in, 0, sizeof(in));
+   MLX5_SET(dealloc_encap_header_in, in, opcode,
+MLX5_CMD_OP_DEALLOC_ENCAP_HEADER);
+   MLX5_SET(dealloc_encap_header_in, in, encap_id, encap_id);
+
+   mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h 
b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index 158844c..ac52fdf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -88,4 +88,11 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
  struct mlx5_cmd_fc_bulk *b, u16 id,
  u64 *packets, u64 *bytes);
 
+int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev,
+int header_type,
+size_t size,
+void *encap_header,
+u32 *encap_id);
+void mlx5_cmd_dealloc_encap(struct mlx5_core_dev *dev, u32 encap_id);
+
 #endif
-- 
2.7.4



[PATCH for-next V2 16/17] net/mlx5: Modify RQ bitmask from mlx5 ifc

2016-08-22 Thread Saeed Mahameed
From: Alex Vesker <va...@mellanox.com>

Use mlx5 ifc MODIFY_BITMASK_VSD in mlx5e_modify_rq_vsd and expose counter
set capability bit in hca caps structure.

Signed-off-by: Alex Vesker <va...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 ++-
 include/linux/mlx5/driver.h   | 4 
 include/linux/mlx5/mlx5_ifc.h | 9 -
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 10fa12a..9e36c15 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -489,7 +489,8 @@ static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool 
vsd)
rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
 
MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY);
-   MLX5_SET64(modify_rq_in, in, modify_bitmask, MLX5_RQ_BITMASK_VSD);
+   MLX5_SET64(modify_rq_in, in, modify_bitmask,
+  MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD);
MLX5_SET(rqc, rqc, vsd, vsd);
MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index ebe57ab..0ea78b5 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -49,10 +49,6 @@
 #include 
 
 enum {
-   MLX5_RQ_BITMASK_VSD = 1 << 1,
-};
-
-enum {
MLX5_BOARD_ID_LEN = 64,
MLX5_MAX_NAME_LEN = 16,
 };
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 3766110..e1f8e34 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -779,7 +779,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 out_of_seq_cnt[0x1];
u8 vport_counters[0x1];
u8 retransmission_q_counters[0x1];
-   u8 reserved_at_183[0x3];
+   u8 reserved_at_183[0x1];
+   u8 modify_rq_counter_set_id[0x1];
+   u8 reserved_at_185[0x1];
u8 max_qp_cnt[0xa];
u8 pkey_table_size[0x10];
 
@@ -4750,6 +4752,11 @@ struct mlx5_ifc_modify_rq_out_bits {
u8 reserved_at_40[0x40];
 };
 
+enum {
+   MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD = 1ULL << 1,
+   MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID = 1ULL << 3,
+};
+
 struct mlx5_ifc_modify_rq_in_bits {
u8 opcode[0x10];
u8 reserved_at_10[0x10];
-- 
2.7.4



[PATCH for-next V2 01/17] net/mlx5: Init/Teardown hca commands via mlx5 ifc

2016-08-22 Thread Saeed Mahameed
Remove old representation of manually created Init/Teardown hca
commands layout and use mlx5_ifc canonical structures and defines.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/fw.c | 36 
 include/linux/mlx5/device.h  | 24 ---
 2 files changed, 10 insertions(+), 50 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 77fc1aa..56bf520 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -162,38 +162,22 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 
 int mlx5_cmd_init_hca(struct mlx5_core_dev *dev)
 {
-   struct mlx5_cmd_init_hca_mbox_in in;
-   struct mlx5_cmd_init_hca_mbox_out out;
+   u32 out[MLX5_ST_SZ_DW(init_hca_out)] = {0};
+   u32 in[MLX5_ST_SZ_DW(init_hca_in)]   = {0};
int err;
 
-   memset(, 0, sizeof(in));
-   memset(, 0, sizeof(out));
-   in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_INIT_HCA);
-   err = mlx5_cmd_exec(dev, , sizeof(in), , sizeof(out));
-   if (err)
-   return err;
-
-   if (out.hdr.status)
-   err = mlx5_cmd_status_to_err();
-
-   return err;
+   MLX5_SET(init_hca_in, in, opcode, MLX5_CMD_OP_INIT_HCA);
+   err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+   return err ? : mlx5_cmd_status_to_err_v2(out);
 }
 
 int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev)
 {
-   struct mlx5_cmd_teardown_hca_mbox_in in;
-   struct mlx5_cmd_teardown_hca_mbox_out out;
+   u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0};
+   u32 in[MLX5_ST_SZ_DW(teardown_hca_in)]   = {0};
int err;
 
-   memset(, 0, sizeof(in));
-   memset(, 0, sizeof(out));
-   in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_TEARDOWN_HCA);
-   err = mlx5_cmd_exec(dev, , sizeof(in), , sizeof(out));
-   if (err)
-   return err;
-
-   if (out.hdr.status)
-   err = mlx5_cmd_status_to_err();
-
-   return err;
+   MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA);
+   err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+   return err ? : mlx5_cmd_status_to_err_v2(out);
 }
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 0b6d15c..6c343c0 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -455,30 +455,6 @@ struct mlx5_odp_caps {
char reserved2[0xe4];
 };
 
-struct mlx5_cmd_init_hca_mbox_in {
-   struct mlx5_inbox_hdr   hdr;
-   u8  rsvd0[2];
-   __be16  profile;
-   u8  rsvd1[4];
-};
-
-struct mlx5_cmd_init_hca_mbox_out {
-   struct mlx5_outbox_hdr  hdr;
-   u8  rsvd[8];
-};
-
-struct mlx5_cmd_teardown_hca_mbox_in {
-   struct mlx5_inbox_hdr   hdr;
-   u8  rsvd0[2];
-   __be16  profile;
-   u8  rsvd1[4];
-};
-
-struct mlx5_cmd_teardown_hca_mbox_out {
-   struct mlx5_outbox_hdr  hdr;
-   u8  rsvd[8];
-};
-
 struct mlx5_cmd_layout {
u8  type;
u8  rsvd0[3];
-- 
2.7.4



[PATCH for-next V2 14/17] net/mlx5: Update mlx5_ifc.h for vxlan encap/decap

2016-08-22 Thread Saeed Mahameed
From: Hadar Hen Zion <had...@mellanox.com>

Add the required definitions related to vxlan encap/decap.

Signed-off-by: Hadar Hen Zion <had...@mellanox.com>
Signed-off-by: Ilya Lesokhin <il...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 105 --
 1 file changed, 101 insertions(+), 4 deletions(-)

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 7a8ef0a..3766110 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -212,6 +212,8 @@ enum {
MLX5_CMD_OP_DEALLOC_FLOW_COUNTER  = 0x93a,
MLX5_CMD_OP_QUERY_FLOW_COUNTER= 0x93b,
MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c,
+   MLX5_CMD_OP_ALLOC_ENCAP_HEADER= 0x93d,
+   MLX5_CMD_OP_DEALLOC_ENCAP_HEADER  = 0x93e,
MLX5_CMD_OP_MAX
 };
 
@@ -281,7 +283,9 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 modify_root[0x1];
u8 identified_miss_table_mode[0x1];
u8 flow_table_modify[0x1];
-   u8 reserved_at_7[0x19];
+   u8 encap[0x1];
+   u8 decap[0x1];
+   u8 reserved_at_9[0x17];
 
u8 reserved_at_20[0x2];
u8 log_max_ft_size[0x6];
@@ -512,7 +516,15 @@ struct mlx5_ifc_e_switch_cap_bits {
u8 nic_vport_node_guid_modify[0x1];
u8 nic_vport_port_guid_modify[0x1];
 
-   u8 reserved_at_20[0x7e0];
+   u8 vxlan_encap_decap[0x1];
+   u8 nvgre_encap_decap[0x1];
+   u8 reserved_at_22[0x9];
+   u8 log_max_encap_headers[0x5];
+   u8 reserved_2b[0x6];
+   u8 max_encap_header_size[0xa];
+
+   u8 reserved_40[0x7c0];
+
 };
 
 struct mlx5_ifc_qos_cap_bits {
@@ -2067,6 +2079,8 @@ enum {
MLX5_FLOW_CONTEXT_ACTION_DROP  = 0x2,
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST  = 0x4,
MLX5_FLOW_CONTEXT_ACTION_COUNT = 0x8,
+   MLX5_FLOW_CONTEXT_ACTION_ENCAP = 0x10,
+   MLX5_FLOW_CONTEXT_ACTION_DECAP = 0x20,
 };
 
 struct mlx5_ifc_flow_context_bits {
@@ -2086,7 +2100,9 @@ struct mlx5_ifc_flow_context_bits {
u8 reserved_at_a0[0x8];
u8 flow_counter_list_size[0x18];
 
-   u8 reserved_at_c0[0x140];
+   u8 encap_id[0x20];
+
+   u8 reserved_at_e0[0x120];
 
struct mlx5_ifc_fte_match_param_bits match_value;
 
@@ -4216,6 +4232,85 @@ struct mlx5_ifc_query_eq_in_bits {
u8 reserved_at_60[0x20];
 };
 
+struct mlx5_ifc_encap_header_in_bits {
+   u8 reserved_at_0[0x5];
+   u8 header_type[0x3];
+   u8 reserved_at_8[0xe];
+   u8 encap_header_size[0xa];
+
+   u8 reserved_at_20[0x10];
+   u8 encap_header[2][0x8];
+
+   u8 more_encap_header[0][0x8];
+};
+
+struct mlx5_ifc_query_encap_header_out_bits {
+   u8 status[0x8];
+   u8 reserved_at_8[0x18];
+
+   u8 syndrome[0x20];
+
+   u8 reserved_at_40[0xa0];
+
+   struct mlx5_ifc_encap_header_in_bits encap_header[0];
+};
+
+struct mlx5_ifc_query_encap_header_in_bits {
+   u8 opcode[0x10];
+   u8 reserved_at_10[0x10];
+
+   u8 reserved_at_20[0x10];
+   u8 op_mod[0x10];
+
+   u8 encap_id[0x20];
+
+   u8 reserved_at_60[0xa0];
+};
+
+struct mlx5_ifc_alloc_encap_header_out_bits {
+   u8 status[0x8];
+   u8 reserved_at_8[0x18];
+
+   u8 syndrome[0x20];
+
+   u8 encap_id[0x20];
+
+   u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_alloc_encap_header_in_bits {
+   u8 opcode[0x10];
+   u8 reserved_at_10[0x10];
+
+   u8 reserved_at_20[0x10];
+   u8 op_mod[0x10];
+
+   u8 reserved_at_40[0xa0];
+
+   struct mlx5_ifc_encap_header_in_bits encap_header;
+};
+
+struct mlx5_ifc_dealloc_encap_header_out_bits {
+   u8 status[0x8];
+   u8 reserved_at_8[0x18];
+
+   u8 syndrome[0x20];
+
+   u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_dealloc_encap_header_in_bits {
+   u8 opcode[0x10];
+   u8 reserved_at_10[0x10];
+
+   u8 reserved_20[0x10];
+   u8 op_mod[0x10];
+
+   u8 encap_id[0x20];
+
+   u8 reserved_60[0x20];
+};
+
 struct mlx5_ifc_query_dct_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
@@ -6102,7 +6197,9 @@ struct mlx5_ifc_create_flow_table_in_bits {
 
u8 reserved_at_a0[0x20];
 
-   u8 reserved_at_c0[0x4];
+   u8 encap_en[0x1];
+   u8 decap_en[0x1];
+   u8 r

[PATCH for-next V2 05/17] net/mlx5: Pages management commands via mlx5 ifc

2016-08-22 Thread Saeed Mahameed
Remove old representation of manually created Pages management
commands layout, and use mlx5_ifc canonical structures and defines.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 .../net/ethernet/mellanox/mlx5/core/pagealloc.c| 165 -
 1 file changed, 58 insertions(+), 107 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 32dea35..7bfac21 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -44,12 +44,6 @@ enum {
MLX5_PAGES_TAKE = 2
 };
 
-enum {
-   MLX5_BOOT_PAGES = 1,
-   MLX5_INIT_PAGES = 2,
-   MLX5_POST_INIT_PAGES= 3
-};
-
 struct mlx5_pages_req {
struct mlx5_core_dev *dev;
u16 func_id;
@@ -67,33 +61,6 @@ struct fw_page {
unsignedfree_count;
 };
 
-struct mlx5_query_pages_inbox {
-   struct mlx5_inbox_hdr   hdr;
-   u8  rsvd[8];
-};
-
-struct mlx5_query_pages_outbox {
-   struct mlx5_outbox_hdr  hdr;
-   __be16  rsvd;
-   __be16  func_id;
-   __be32  num_pages;
-};
-
-struct mlx5_manage_pages_inbox {
-   struct mlx5_inbox_hdr   hdr;
-   __be16  rsvd;
-   __be16  func_id;
-   __be32  num_entries;
-   __be64  pas[0];
-};
-
-struct mlx5_manage_pages_outbox {
-   struct mlx5_outbox_hdr  hdr;
-   __be32  num_entries;
-   u8  rsvd[4];
-   __be64  pas[0];
-};
-
 enum {
MAX_RECLAIM_TIME_MSECS  = 5000,
MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60,
@@ -167,24 +134,22 @@ static struct fw_page *find_fw_page(struct mlx5_core_dev 
*dev, u64 addr)
 static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
s32 *npages, int boot)
 {
-   struct mlx5_query_pages_inbox   in;
-   struct mlx5_query_pages_outbox  out;
+   u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {0};
+   u32 in[MLX5_ST_SZ_DW(query_pages_in)]   = {0};
int err;
 
-   memset(, 0, sizeof(in));
-   memset(, 0, sizeof(out));
-   in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_PAGES);
-   in.hdr.opmod = boot ? cpu_to_be16(MLX5_BOOT_PAGES) : 
cpu_to_be16(MLX5_INIT_PAGES);
+   MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES);
+   MLX5_SET(query_pages_in, in, op_mod, boot ?
+MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES :
+MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES);
 
-   err = mlx5_cmd_exec(dev, , sizeof(in), , sizeof(out));
+   err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+   err = err ? : mlx5_cmd_status_to_err_v2(out);
if (err)
return err;
 
-   if (out.hdr.status)
-   return mlx5_cmd_status_to_err();
-
-   *npages = be32_to_cpu(out.num_pages);
-   *func_id = be16_to_cpu(out.func_id);
+   *npages = MLX5_GET(query_pages_out, out, num_pages);
+   *func_id = MLX5_GET(query_pages_out, out, function_id);
 
return err;
 }
@@ -280,46 +245,37 @@ out_alloc:
 
 static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id)
 {
-   struct mlx5_manage_pages_inbox *in;
-   struct mlx5_manage_pages_outbox out;
+   u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
+   u32 in[MLX5_ST_SZ_DW(manage_pages_in)]   = {0};
int err;
 
-   in = kzalloc(sizeof(*in), GFP_KERNEL);
-   if (!in)
-   return;
-
-   memset(, 0, sizeof(out));
-   in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
-   in->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE);
-   in->func_id = cpu_to_be16(func_id);
-   err = mlx5_cmd_exec(dev, in, sizeof(*in), , sizeof(out));
-   if (!err)
-   err = mlx5_cmd_status_to_err();
-
+   MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
+   MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE);
+   MLX5_SET(manage_pages_in, in, function_id, func_id);
+   err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+   err = err ? : mlx5_cmd_status_to_err_v2(out);
if (err)
-   mlx5_core_warn(dev, "page notify failed\n");
-
-   kfree(in);
+   mlx5_core_warn(dev, "page notify failed func_id(%d) err(%d)\n",
+  func_id, err);
 }
 
 static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
  int notify_fail)
 {
-   struct mlx5_manage_pages_inbox *in;
-   struct mlx5_manage_pages_outbox out;
-   int inlen;
+   u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
+   int inlen = MLX5_ST_SZ_BYTES(manage

[PATCH for-next V2 12/17] net/mlx5: Improve driver log messages

2016-08-22 Thread Saeed Mahameed
Remove duplicate pci dev name printing in mlx5_core_err.
Use mlx5_core_{warn,info,err} where possible to have the pci info in the
driver log messages.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Parvi Kaustubhi <par...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  | 26 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  | 18 ---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 11 -
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h|  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/sriov.c|  7 +++---
 6 files changed, 39 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 1c7d8b8..681c12c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -416,8 +416,8 @@ int mlx5e_vport_rep_load(struct mlx5_eswitch *esw,
 {
rep->priv_data = mlx5e_create_netdev(esw->dev, _rep_profile, rep);
if (!rep->priv_data) {
-   pr_warn("Failed to create representor for vport %d\n",
-   rep->vport);
+   mlx5_core_warn(esw->dev, "Failed to create representor for 
vport %d\n",
+  rep->vport);
return -EINVAL;
}
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 0a364bf..7c493599 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -306,7 +306,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 
vport, bool rx_rule,
 
spec = mlx5_vzalloc(sizeof(*spec));
if (!spec) {
-   pr_warn("FDB: Failed to alloc match parameters\n");
+   esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n");
return NULL;
}
dmac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
@@ -340,8 +340,8 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 
vport, bool rx_rule,
   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
   0, );
if (IS_ERR(flow_rule)) {
-   pr_warn(
-   "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) 
-> vport(%d), err(%ld)\n",
+   esw_warn(esw->dev,
+"FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) 
-> vport(%d), err(%ld)\n",
 dmac_v, dmac_c, vport, PTR_ERR(flow_rule));
flow_rule = NULL;
}
@@ -1318,8 +1318,9 @@ static int esw_vport_ingress_config(struct mlx5_eswitch 
*esw,
   0, NULL);
if (IS_ERR(vport->ingress.allow_rule)) {
err = PTR_ERR(vport->ingress.allow_rule);
-   pr_warn("vport[%d] configure ingress allow rule, err(%d)\n",
-   vport->vport, err);
+   esw_warn(esw->dev,
+"vport[%d] configure ingress allow rule, err(%d)\n",
+vport->vport, err);
vport->ingress.allow_rule = NULL;
goto out;
}
@@ -1331,8 +1332,9 @@ static int esw_vport_ingress_config(struct mlx5_eswitch 
*esw,
   0, NULL);
if (IS_ERR(vport->ingress.drop_rule)) {
err = PTR_ERR(vport->ingress.drop_rule);
-   pr_warn("vport[%d] configure ingress drop rule, err(%d)\n",
-   vport->vport, err);
+   esw_warn(esw->dev,
+"vport[%d] configure ingress drop rule, err(%d)\n",
+vport->vport, err);
vport->ingress.drop_rule = NULL;
goto out;
}
@@ -1384,8 +1386,9 @@ static int esw_vport_egress_config(struct mlx5_eswitch 
*esw,
   0, NULL);
if (IS_ERR(vport->egress.allowed_vlan)) {
err = PTR_ERR(vport->egress.allowed_vlan);
-   pr_warn("vport[%d] configure egress allowed vlan rule failed, 
err(%d)\n",
-   vport->vport, err);
+   esw_warn(esw->dev,
+"vport[%d] configure egress allowed vlan rule failed, 
err(%d)\n",
+vport->vport, err);
vport->egress.allowed_vlan = NULL;
goto out;
}
@@ -1398,8 +1401,9 @@ static int esw_vport_egress_config(struct mlx5_eswitch 
*esw,
   0, NULL);
  

[PATCH for-next V2 04/17] net/mlx5: MCG commands via mlx5 ifc

2016-08-22 Thread Saeed Mahameed
Remove old representation of manually created MCG commands layout
and use mlx5_ifc canonical structures and defines.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c |  4 +-
 drivers/net/ethernet/mellanox/mlx5/core/mcg.c | 70 +++
 include/linux/mlx5/mlx5_ifc.h |  2 +-
 3 files changed, 21 insertions(+), 55 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index d6e2a1c..0d55e0f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -280,7 +280,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev 
*dev, u16 op,
case MLX5_CMD_OP_DEALLOC_Q_COUNTER:
case MLX5_CMD_OP_DEALLOC_PD:
case MLX5_CMD_OP_DEALLOC_UAR:
-   case MLX5_CMD_OP_DETTACH_FROM_MCG:
+   case MLX5_CMD_OP_DETACH_FROM_MCG:
case MLX5_CMD_OP_DEALLOC_XRCD:
case MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN:
case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT:
@@ -490,7 +490,7 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(CONFIG_INT_MODERATION);
MLX5_COMMAND_STR_CASE(ACCESS_REG);
MLX5_COMMAND_STR_CASE(ATTACH_TO_MCG);
-   MLX5_COMMAND_STR_CASE(DETTACH_FROM_MCG);
+   MLX5_COMMAND_STR_CASE(DETACH_FROM_MCG);
MLX5_COMMAND_STR_CASE(GET_DROPPED_PACKET_LOG);
MLX5_COMMAND_STR_CASE(MAD_IFC);
MLX5_COMMAND_STR_CASE(QUERY_MAD_DEMUX);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c 
b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
index d5a0c2d..01a1abd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
@@ -37,70 +37,36 @@
 #include 
 #include "mlx5_core.h"
 
-struct mlx5_attach_mcg_mbox_in {
-   struct mlx5_inbox_hdr   hdr;
-   __be32  qpn;
-   __be32  rsvd;
-   u8  gid[16];
-};
-
-struct mlx5_attach_mcg_mbox_out {
-   struct mlx5_outbox_hdr  hdr;
-   u8  rsvf[8];
-};
-
-struct mlx5_detach_mcg_mbox_in {
-   struct mlx5_inbox_hdr   hdr;
-   __be32  qpn;
-   __be32  rsvd;
-   u8  gid[16];
-};
-
-struct mlx5_detach_mcg_mbox_out {
-   struct mlx5_outbox_hdr  hdr;
-   u8  rsvf[8];
-};
-
 int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 
qpn)
 {
-   struct mlx5_attach_mcg_mbox_in in;
-   struct mlx5_attach_mcg_mbox_out out;
+   u32 out[MLX5_ST_SZ_DW(attach_to_mcg_out)] = {0};
+   u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)]   = {0};
+   void *gid;
int err;
 
-   memset(, 0, sizeof(in));
-   memset(, 0, sizeof(out));
-   in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ATTACH_TO_MCG);
-   memcpy(in.gid, mgid, sizeof(*mgid));
-   in.qpn = cpu_to_be32(qpn);
-   err = mlx5_cmd_exec(dev, , sizeof(in), , sizeof(out));
-   if (err)
-   return err;
-
-   if (out.hdr.status)
-   err = mlx5_cmd_status_to_err();
+   MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG);
+   MLX5_SET(attach_to_mcg_in, in, qpn, qpn);
+   gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid);
+   memcpy(gid, mgid, sizeof(*mgid));
 
-   return err;
+   err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+   return err ? : mlx5_cmd_status_to_err_v2(out);
 }
 EXPORT_SYMBOL(mlx5_core_attach_mcg);
 
 int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 
qpn)
 {
-   struct mlx5_detach_mcg_mbox_in in;
-   struct mlx5_detach_mcg_mbox_out out;
+   u32 out[MLX5_ST_SZ_DW(detach_from_mcg_out)] = {0};
+   u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)]   = {0};
+   void *gid;
int err;
 
-   memset(, 0, sizeof(in));
-   memset(, 0, sizeof(out));
-   in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DETTACH_FROM_MCG);
-   memcpy(in.gid, mgid, sizeof(*mgid));
-   in.qpn = cpu_to_be32(qpn);
-   err = mlx5_cmd_exec(dev, , sizeof(in), , sizeof(out));
-   if (err)
-   return err;
-
-   if (out.hdr.status)
-   err = mlx5_cmd_status_to_err();
+   MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
+   MLX5_SET(detach_from_mcg_in, in, qpn, qpn);
+   gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid);
+   memcpy(gid, mgid, sizeof(*mgid));
 
-   return err;
+   err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+   return err ? : mlx5_cmd_status_to_err_v2(out);
 }
 EXPORT_SYMBOL(mlx5_core_detach_mcg);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 21bc455..3f70fc9 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include

[PATCH for-next V2 06/17] net/mlx5: EQ commands via mlx5 ifc

2016-08-22 Thread Saeed Mahameed
Remove old representation of manually created EQ commands layout,
and use mlx5_ifc canonical structures and defines.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c | 18 +++---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c  | 78 ++-
 include/linux/mlx5/device.h   | 74 -
 include/linux/mlx5/driver.h   |  2 +-
 4 files changed, 44 insertions(+), 128 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c 
b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index 5210d92..58e5518 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -358,32 +358,32 @@ out:
 static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 int index)
 {
-   struct mlx5_query_eq_mbox_out *out;
-   struct mlx5_eq_context *ctx;
+   int outlen = MLX5_ST_SZ_BYTES(query_eq_out);
u64 param = 0;
+   void *ctx;
+   u32 *out;
int err;
 
-   out = kzalloc(sizeof(*out), GFP_KERNEL);
+   out = kzalloc(outlen, GFP_KERNEL);
if (!out)
return param;
 
-   ctx = >ctx;
-
-   err = mlx5_core_eq_query(dev, eq, out, sizeof(*out));
+   err = mlx5_core_eq_query(dev, eq, out, outlen);
if (err) {
mlx5_core_warn(dev, "failed to query eq\n");
goto out;
}
+   ctx = MLX5_ADDR_OF(query_eq_out, out, eq_context_entry);
 
switch (index) {
case EQ_NUM_EQES:
-   param = 1 << ((be32_to_cpu(ctx->log_sz_usr_page) >> 24) & 0x1f);
+   param = 1 << MLX5_GET(eqc, ctx, log_eq_size);
break;
case EQ_INTR:
-   param = ctx->intr;
+   param = MLX5_GET(eqc, ctx, intr);
break;
case EQ_LOG_PG_SZ:
-   param = (ctx->log_page_size & 0x1f) + 12;
+   param = MLX5_GET(eqc, ctx, log_page_size) + 12;
break;
}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 0e30602..7141197 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -86,23 +86,16 @@ struct cre_des_eq {
 
 static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
 {
-   struct mlx5_destroy_eq_mbox_in in;
-   struct mlx5_destroy_eq_mbox_out out;
+   u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0};
+   u32 in[MLX5_ST_SZ_DW(destroy_eq_in)]   = {0};
int err;
 
-   memset(, 0, sizeof(in));
-   memset(, 0, sizeof(out));
-   in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_EQ);
-   in.eqn = eqn;
-   err = mlx5_cmd_exec(dev, , sizeof(in), , sizeof(out));
-   if (!err)
-   goto ex;
+   MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ);
+   MLX5_SET(destroy_eq_in, in, eq_number, eqn);
 
-   if (out.hdr.status)
-   err = mlx5_cmd_status_to_err();
+   err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+   return err ? : mlx5_cmd_status_to_err_v2(out);
 
-ex:
-   return err;
 }
 
 static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
@@ -351,11 +344,13 @@ static void init_eq_buf(struct mlx5_eq *eq)
 int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 
vecidx,
   int nent, u64 mask, const char *name, struct mlx5_uar 
*uar)
 {
+   u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
struct mlx5_priv *priv = >priv;
-   struct mlx5_create_eq_mbox_in *in;
-   struct mlx5_create_eq_mbox_out out;
-   int err;
+   __be64 *pas;
+   void *eqc;
int inlen;
+   u32 *in;
+   int err;
 
eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
eq->cons_index = 0;
@@ -365,35 +360,37 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct 
mlx5_eq *eq, u8 vecidx,
 
init_eq_buf(eq);
 
-   inlen = sizeof(*in) + sizeof(in->pas[0]) * eq->buf.npages;
+   inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
+   MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->buf.npages;
+
in = mlx5_vzalloc(inlen);
if (!in) {
err = -ENOMEM;
goto err_buf;
}
-   memset(, 0, sizeof(out));
 
-   mlx5_fill_page_array(>buf, in->pas);
+   pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas);
+   mlx5_fill_page_array(>buf, pas);
 
-   in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ);
-   in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | 
uar->index);
-   in->ctx.intr = vecidx;
-   in->ctx.log_page_size = eq->bu

[PATCH for-next V2 02/17] net/mlx5: Access register and MAD IFC commands via mlx5 ifc

2016-08-22 Thread Saeed Mahameed
Remove old representation of manually created ACCESS_REG/MAD_IFC
commands layout and use mlx5_ifc canonical structures and defines.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/mad.c  | 42 ++---
 drivers/net/ethernet/mellanox/mlx5/core/port.c | 52 +-
 include/linux/mlx5/device.h| 29 --
 3 files changed, 45 insertions(+), 78 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c 
b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
index 1368dac..13e6afd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mad.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
@@ -39,36 +39,34 @@
 int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
  u16 opmod, u8 port)
 {
-   struct mlx5_mad_ifc_mbox_in *in = NULL;
-   struct mlx5_mad_ifc_mbox_out *out = NULL;
-   int err;
+   int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out);
+   int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in);
+   int err = -ENOMEM;
+   void *data;
+   void *resp;
+   u32 *out;
+   u32 *in;
 
-   in = kzalloc(sizeof(*in), GFP_KERNEL);
-   if (!in)
-   return -ENOMEM;
-
-   out = kzalloc(sizeof(*out), GFP_KERNEL);
-   if (!out) {
-   err = -ENOMEM;
+   in = kzalloc(inlen, GFP_KERNEL);
+   out = kzalloc(outlen, GFP_KERNEL);
+   if (!in || !out)
goto out;
-   }
 
-   in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MAD_IFC);
-   in->hdr.opmod = cpu_to_be16(opmod);
-   in->port = port;
+   MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC);
+   MLX5_SET(mad_ifc_in, in, op_mod, opmod);
+   MLX5_SET(mad_ifc_in, in, port, port);
 
-   memcpy(in->data, inb, sizeof(in->data));
+   data = MLX5_ADDR_OF(mad_ifc_in, in, mad);
+   memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad));
 
-   err = mlx5_cmd_exec(dev, in, sizeof(*in), out, sizeof(*out));
+   err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
+   err = err ? : mlx5_cmd_status_to_err_v2(out);
if (err)
goto out;
 
-   if (out->hdr.status) {
-   err = mlx5_cmd_status_to_err(>hdr);
-   goto out;
-   }
-
-   memcpy(outb, out->data, sizeof(out->data));
+   resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet);
+   memcpy(outb, resp,
+  MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet));
 
 out:
kfree(out);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c 
b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 752c081..e8324c2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -38,45 +38,43 @@
 
 int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
 int size_in, void *data_out, int size_out,
-u16 reg_num, int arg, int write)
+u16 reg_id, int arg, int write)
 {
-   struct mlx5_access_reg_mbox_in *in = NULL;
-   struct mlx5_access_reg_mbox_out *out = NULL;
+   int outlen = MLX5_ST_SZ_BYTES(access_register_out) + size_out;
+   int inlen = MLX5_ST_SZ_BYTES(access_register_in) + size_in;
int err = -ENOMEM;
+   u32 *out = NULL;
+   u32 *in = NULL;
+   void *data;
 
-   in = mlx5_vzalloc(sizeof(*in) + size_in);
-   if (!in)
-   return -ENOMEM;
-
-   out = mlx5_vzalloc(sizeof(*out) + size_out);
-   if (!out)
-   goto ex1;
-
-   memcpy(in->data, data_in, size_in);
-   in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ACCESS_REG);
-   in->hdr.opmod = cpu_to_be16(!write);
-   in->arg = cpu_to_be32(arg);
-   in->register_id = cpu_to_be16(reg_num);
-   err = mlx5_cmd_exec(dev, in, sizeof(*in) + size_in, out,
-   sizeof(*out) + size_out);
-   if (err)
-   goto ex2;
+   in = mlx5_vzalloc(inlen);
+   out = mlx5_vzalloc(outlen);
+   if (!in || !out)
+   goto out;
 
-   if (out->hdr.status)
-   err = mlx5_cmd_status_to_err(>hdr);
+   data = MLX5_ADDR_OF(access_register_in, in, register_data);
+   memcpy(data, data_in, size_in);
 
-   if (!err)
-   memcpy(data_out, out->data, size_out);
+   MLX5_SET(access_register_in, in, opcode, MLX5_CMD_OP_ACCESS_REG);
+   MLX5_SET(access_register_in, in, op_mod, !write);
+   MLX5_SET(access_register_in, in, argument, arg);
+   MLX5_SET(access_register_in, in, register_id, reg_id);
+
+   err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
+   err = err ? : mlx5_cmd_status_to_err_v2(out);
+   if (err)
+   goto out;
+
+   data = MLX5_ADDR_OF(access_register_out, out, registe

[PATCH for-next V2 03/17] net/mlx5: PD and UAR commands via mlx5 ifc

2016-08-22 Thread Saeed Mahameed
Remove old representation of manually created PD/UAR commands layouts
and use mlx5_ifc canonical structures and defines.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/pd.c  | 58 +--
 drivers/net/ethernet/mellanox/mlx5/core/uar.c | 66 ++-
 2 files changed, 25 insertions(+), 99 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
index f2d3aee..efe452c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
@@ -36,66 +36,32 @@
 #include 
 #include "mlx5_core.h"
 
-struct mlx5_alloc_pd_mbox_in {
-   struct mlx5_inbox_hdr   hdr;
-   u8  rsvd[8];
-};
-
-struct mlx5_alloc_pd_mbox_out {
-   struct mlx5_outbox_hdr  hdr;
-   __be32  pdn;
-   u8  rsvd[4];
-};
-
-struct mlx5_dealloc_pd_mbox_in {
-   struct mlx5_inbox_hdr   hdr;
-   __be32  pdn;
-   u8  rsvd[4];
-};
-
-struct mlx5_dealloc_pd_mbox_out {
-   struct mlx5_outbox_hdr  hdr;
-   u8  rsvd[8];
-};
-
 int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn)
 {
-   struct mlx5_alloc_pd_mbox_inin;
-   struct mlx5_alloc_pd_mbox_out   out;
+   u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {0};
+   u32 in[MLX5_ST_SZ_DW(alloc_pd_in)]   = {0};
int err;
 
-   memset(, 0, sizeof(in));
-   memset(, 0, sizeof(out));
-   in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_PD);
-   err = mlx5_cmd_exec(dev, , sizeof(in), , sizeof(out));
+   MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD);
+   err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+   err = err ? : mlx5_cmd_status_to_err_v2(out);
if (err)
return err;
 
-   if (out.hdr.status)
-   return mlx5_cmd_status_to_err();
-
-   *pdn = be32_to_cpu(out.pdn) & 0xff;
+   *pdn = MLX5_GET(alloc_pd_out, out, pd);
return err;
 }
 EXPORT_SYMBOL(mlx5_core_alloc_pd);
 
 int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn)
 {
-   struct mlx5_dealloc_pd_mbox_in  in;
-   struct mlx5_dealloc_pd_mbox_out out;
+   u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {0};
+   u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)]   = {0};
int err;
 
-   memset(, 0, sizeof(in));
-   memset(, 0, sizeof(out));
-   in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_PD);
-   in.pdn = cpu_to_be32(pdn);
-   err = mlx5_cmd_exec(dev, , sizeof(in), , sizeof(out));
-   if (err)
-   return err;
-
-   if (out.hdr.status)
-   return mlx5_cmd_status_to_err();
-
-   return err;
+   MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD);
+   MLX5_SET(dealloc_pd_in, in, pd, pdn);
+   err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+   return err ? : mlx5_cmd_status_to_err_v2(out);
 }
 EXPORT_SYMBOL(mlx5_core_dealloc_pd);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c 
b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
index 5ff8af4..d0a0e0b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -42,73 +42,33 @@ enum {
NUM_LOW_LAT_UUARS   = 4,
 };
 
-
-struct mlx5_alloc_uar_mbox_in {
-   struct mlx5_inbox_hdr   hdr;
-   u8  rsvd[8];
-};
-
-struct mlx5_alloc_uar_mbox_out {
-   struct mlx5_outbox_hdr  hdr;
-   __be32  uarn;
-   u8  rsvd[4];
-};
-
-struct mlx5_free_uar_mbox_in {
-   struct mlx5_inbox_hdr   hdr;
-   __be32  uarn;
-   u8  rsvd[4];
-};
-
-struct mlx5_free_uar_mbox_out {
-   struct mlx5_outbox_hdr  hdr;
-   u8  rsvd[8];
-};
-
 int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
 {
-   struct mlx5_alloc_uar_mbox_in   in;
-   struct mlx5_alloc_uar_mbox_out  out;
+   u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {0};
+   u32 in[MLX5_ST_SZ_DW(alloc_uar_in)]   = {0};
int err;
 
-   memset(, 0, sizeof(in));
-   memset(, 0, sizeof(out));
-   in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_UAR);
-   err = mlx5_cmd_exec(dev, , sizeof(in), , sizeof(out));
+   MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR);
+   err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+   err = err ? : mlx5_cmd_status_to_err_v2(out);
if (err)
-   goto ex;
-
-   if (out.hdr.status) {
-   err = mlx5_cmd_status_to_err();
-   goto ex;
-   }
-
-   *uarn = be32_to_cpu(out.uarn) & 0xff;
+   return err;
 
-ex:
+   *uarn = MLX5_GET(alloc_uar_out, out, uar);
   

[for-next V2 00/17][PULL request] Mellanox mlx5 core driver updates 2016-08-20

2016-08-22 Thread Saeed Mahameed
Hi Dave and Doug,

This series contains several low level and API updates for mlx5 core
commands interface and mlx5_ifc.h to be shared as base code for net-next and
rdma mlx5 4.9 submissions.

>From Saeed, ten patches that refactors old layouts of firmware commands which 
were manually generated before we introduced the mlx5_ifc, now all of the 
firmware
commands inbox/outbox layouts moved to use mlx5_ifc and we remove the old
manually generated structures.  Plus to those ten patches, we add two patches
that unifies mlx5 commands execution interface and improve the driver log 
messages
in that area.

>From Hadar and Ilya, added the needed hardware bits and infrastructure for
minimum inline headers setting and encap/decap commands and capabilities,
needed for E-Switch offloads.

>From Alex and Artemy, Update mlx5_ifc for modify RQ and XRC bits.

This series applies on top latest net-next and rdma/master, and smoothly merges 
with
the latest "Mellanox 100G mlx5 fixes 2016-08-16" series already applied into 
net branch.

The following changes since commit 29b4817d4018df78086157ea3a55c1d9424a7cfc:
Linux 4.8-rc1

are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/leon/linux-rdma.git 
tags/shared-for-4.9-1

for you to fetch changes up to 2e353b3468ecb1d12a44aaf35888f7de47d5c047:
net/mlx5: Update struct mlx5_ifc_xrqc_bits

Changes since V1:
- Reduce the number of patches and changed the cover letter respectively.

Thanks,
Saeed & Leon.

Alex Vesker (1):
  net/mlx5: Modify RQ bitmask from mlx5 ifc

Artemy Kovalyov (1):
  net/mlx5: Update struct mlx5_ifc_xrqc_bits

Hadar Hen Zion (2):
  net/mlx5: Enable setting minimum inline header mode for VFs
  net/mlx5: Update mlx5_ifc.h for vxlan encap/decap

Ilya Lesokhin (1):
  net/mlx5: Introduce alloc_encap and dealloc_encap commands

Saeed Mahameed (12):
  net/mlx5: Init/Teardown hca commands via mlx5 ifc
  net/mlx5: Access register and MAD IFC commands via mlx5 ifc
  net/mlx5: PD and UAR commands via mlx5 ifc
  net/mlx5: MCG commands via mlx5 ifc
  net/mlx5: Pages management commands via mlx5 ifc
  net/mlx5: EQ commands via mlx5 ifc
  {net,IB}/mlx5: CQ commands via mlx5 ifc
  {net,IB}/mlx5: MKey/PSV commands via mlx5 ifc
  {net,IB}/mlx5: QP/XRCD commands via mlx5 ifc
  {net,IB}/mlx5: Modify QP commands via mlx5 ifc
  net/mlx5: Unify and improve command interface
  net/mlx5: Improve driver log messages

 drivers/infiniband/hw/mlx5/cq.c| 110 +++---
 drivers/infiniband/hw/mlx5/main.c  |  10 +-
 drivers/infiniband/hw/mlx5/mlx5_ib.h   |   2 +-
 drivers/infiniband/hw/mlx5/mr.c| 184 +
 drivers/infiniband/hw/mlx5/qp.c| 189 -
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c  | 261 +++--
 drivers/net/ethernet/mellanox/mlx5/core/cq.c   | 109 ++
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c  |  50 +--
 .../net/ethernet/mellanox/mlx5/core/en_common.c|  23 +-
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |   3 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  50 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/eq.c   |  80 ++--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  98 ++---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c   | 178 -
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h   |   7 +
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  |  18 +-
 drivers/net/ethernet/mellanox/mlx5/core/fw.c   |  43 +--
 drivers/net/ethernet/mellanox/mlx5/core/mad.c  |  41 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c |  91 ++---
 drivers/net/ethernet/mellanox/mlx5/core/mcg.c  |  72 +---
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h|  17 +-
 drivers/net/ethernet/mellanox/mlx5/core/mr.c   | 189 -
 .../net/ethernet/mellanox/mlx5/core/pagealloc.c| 156 +++-
 drivers/net/ethernet/mellanox/mlx5/core/pd.c   |  61 +--
 drivers/net/ethernet/mellanox/mlx5/core/port.c | 149 +++
 drivers/net/ethernet/mellanox/mlx5/core/qp.c   | 299 --
 drivers/net/ethernet/mellanox/mlx5/core/rl.c   |  11 +-
 drivers/net/ethernet/mellanox/mlx5/core/sriov.c|   7 +-
 drivers/net/ethernet/mellanox/mlx5/core/srq.c  |  49 +--
 drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 183 +++--
 drivers/net/ethernet/mellanox/mlx5/core/uar.c  |  67 +---
 drivers/net/ethernet/mellanox/mlx5/core/vport.c|  94 ++---
 drivers/net/ethernet/mellanox/mlx5/core/vxlan.c|  29 +-
 include/linux/mlx5/cq.h|   6 +-
 include/linux/mlx5/device.h| 429 +
 include/linux/mlx5/driver.h|  24 +-
 include/linux/mlx5/mlx5_ifc.h  | 127 +-
 include/linux/mlx5/qp.h| 128 +-
 include/linux/mlx5/vport.h 

[PATCH for-next V2 10/17] {net,IB}/mlx5: Modify QP commands via mlx5 ifc

2016-08-22 Thread Saeed Mahameed
Prior to this patch we assumed that modify QP commands have the
same layout.

In ConnectX-4 for each QP transition there is a specific command
and their layout can vary.

e.g: 2err/2rst commands don't have QP context in their layout and before
this patch we posted the QP context in those commands.

Fortunately the FW only checks the suffix of the commands and executes
them, while ignoring all invalid data sent after the valid command
layout.

This patch removes mlx5_modify_qp_mbox_in and changes
mlx5_core_qp_modify to receive the required transition and QP context
with opt_param_mask if needed.  This way the caller is not required to
provide the command inbox layout and it will be generated automatically.

mlx5_core_qp_modify will generate the command inbox/outbox layouts
according to the requested transition and will fill the requested
parameters.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/infiniband/hw/mlx5/qp.c  |  22 ++---
 drivers/net/ethernet/mellanox/mlx5/core/qp.c | 124 +--
 include/linux/mlx5/qp.h  |  20 +
 3 files changed, 124 insertions(+), 42 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index d22492f..6261737 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1871,7 +1871,6 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, 
struct mlx5_ib_qp *qp)
 {
struct mlx5_ib_cq *send_cq, *recv_cq;
struct mlx5_ib_qp_base *base = >trans_qp.base;
-   struct mlx5_modify_qp_mbox_in *in;
unsigned long flags;
int err;
 
@@ -1884,16 +1883,12 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, 
struct mlx5_ib_qp *qp)
   >raw_packet_qp.rq.base :
   >trans_qp.base;
 
-   in = kzalloc(sizeof(*in), GFP_KERNEL);
-   if (!in)
-   return;
-
if (qp->state != IB_QPS_RESET) {
if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
mlx5_ib_qp_disable_pagefaults(qp);
err = mlx5_core_qp_modify(dev->mdev,
- MLX5_CMD_OP_2RST_QP, in, 0,
- >mqp);
+ MLX5_CMD_OP_2RST_QP, 0,
+ NULL, >mqp);
} else {
err = modify_raw_packet_qp(dev, qp,
   MLX5_CMD_OP_2RST_QP);
@@ -1935,8 +1930,6 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, 
struct mlx5_ib_qp *qp)
 base->mqp.qpn);
}
 
-   kfree(in);
-
if (qp->create_type == MLX5_QP_KERNEL)
destroy_qp_kernel(dev, qp);
else if (qp->create_type == MLX5_QP_USER)
@@ -2522,7 +2515,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
struct mlx5_ib_qp_base *base = >trans_qp.base;
struct mlx5_ib_cq *send_cq, *recv_cq;
struct mlx5_qp_context *context;
-   struct mlx5_modify_qp_mbox_in *in;
struct mlx5_ib_pd *pd;
enum mlx5_qp_state mlx5_cur, mlx5_new;
enum mlx5_qp_optpar optpar;
@@ -2531,11 +2523,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
int err;
u16 op;
 
-   in = kzalloc(sizeof(*in), GFP_KERNEL);
-   if (!in)
+   context = kzalloc(sizeof(*context), GFP_KERNEL);
+   if (!context)
return -ENOMEM;
 
-   context = >ctx;
err = to_mlx5_st(ibqp->qp_type);
if (err < 0) {
mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type);
@@ -2700,12 +2691,11 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
op = optab[mlx5_cur][mlx5_new];
optpar = ib_mask_to_mlx5_opt(attr_mask);
optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
-   in->optparam = cpu_to_be32(optpar);
 
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
err = modify_raw_packet_qp(dev, qp, op);
else
-   err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event,
+   err = mlx5_core_qp_modify(dev->mdev, op, optpar, context,
  >mqp);
if (err)
goto out;
@@ -2746,7 +2736,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
}
 
 out:
-   kfree(in);
+   kfree(context);
return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c 
b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 36d240c..50875a4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -335,21 +335,127 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL_GPL(m

[PATCH for-next V2 07/17] {net,IB}/mlx5: CQ commands via mlx5 ifc

2016-08-22 Thread Saeed Mahameed
Remove old representation of manually created CQ commands layout,
and use mlx5_ifc canonical structures and defines.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/infiniband/hw/mlx5/cq.c   | 110 -
 drivers/net/ethernet/mellanox/mlx5/core/cq.c  | 113 +-
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c |  18 ++--
 include/linux/mlx5/cq.h   |   6 +-
 include/linux/mlx5/device.h   |  76 ---
 5 files changed, 122 insertions(+), 201 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 308a358..35a9f71 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -747,14 +747,16 @@ static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct 
mlx5_ib_cq_buf *buf,
 
 static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
  struct ib_ucontext *context, struct mlx5_ib_cq *cq,
- int entries, struct mlx5_create_cq_mbox_in **cqb,
+ int entries, u32 **cqb,
  int *cqe_size, int *index, int *inlen)
 {
struct mlx5_ib_create_cq ucmd;
size_t ucmdlen;
int page_shift;
+   __be64 *pas;
int npages;
int ncont;
+   void *cqc;
int err;
 
ucmdlen =
@@ -792,14 +794,20 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct 
ib_udata *udata,
mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont 
%d\n",
ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, 
ncont);
 
-   *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * ncont;
+   *inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont;
*cqb = mlx5_vzalloc(*inlen);
if (!*cqb) {
err = -ENOMEM;
goto err_db;
}
-   mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0);
-   (*cqb)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+
+   pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
+   mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0);
+
+   cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
+   MLX5_SET(cqc, cqc, log_page_size,
+page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 
*index = to_mucontext(context)->uuari.uars[0].index;
 
@@ -834,9 +842,10 @@ static void init_cq_buf(struct mlx5_ib_cq *cq, struct 
mlx5_ib_cq_buf *buf)
 
 static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
int entries, int cqe_size,
-   struct mlx5_create_cq_mbox_in **cqb,
-   int *index, int *inlen)
+   u32 **cqb, int *index, int *inlen)
 {
+   __be64 *pas;
+   void *cqc;
int err;
 
err = mlx5_db_alloc(dev->mdev, >db);
@@ -853,15 +862,21 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, 
struct mlx5_ib_cq *cq,
 
init_cq_buf(cq, >buf);
 
-   *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages;
+   *inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages;
*cqb = mlx5_vzalloc(*inlen);
if (!*cqb) {
err = -ENOMEM;
goto err_buf;
}
-   mlx5_fill_page_array(>buf.buf, (*cqb)->pas);
 
-   (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - 
MLX5_ADAPTER_PAGE_SHIFT;
+   pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
+   mlx5_fill_page_array(>buf.buf, pas);
+
+   cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
+   MLX5_SET(cqc, cqc, log_page_size,
+cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+
*index = dev->mdev->priv.uuari.uars[0].index;
 
return 0;
@@ -895,11 +910,12 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
 {
int entries = attr->cqe;
int vector = attr->comp_vector;
-   struct mlx5_create_cq_mbox_in *cqb = NULL;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_cq *cq;
int uninitialized_var(index);
int uninitialized_var(inlen);
+   u32 *cqb = NULL;
+   void *cqc;
int cqe_size;
unsigned int irqn;
int eqn;
@@ -945,19 +961,20 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
INIT_WORK(>notify_work, notify_soft_wc_handler);
}
 
-   cq->cqe_size = cqe_size;
-   cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
-
-   if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
-   cqb->ctx.cqe_sz_flags |= (1 << 

[PATCH for-next V2 09/17] {net,IB}/mlx5: QP/XRCD commands via mlx5 ifc

2016-08-22 Thread Saeed Mahameed
Remove old representation of manually created QP/XRCD commands layout
amd use mlx5_ifc canonical structures and defines.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/infiniband/hw/mlx5/qp.c   | 154 +++-
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c |  14 +-
 drivers/net/ethernet/mellanox/mlx5/core/qp.c  | 167 --
 include/linux/mlx5/mlx5_ifc.h |   5 +-
 include/linux/mlx5/qp.h   | 108 +-
 5 files changed, 165 insertions(+), 283 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 21ab0e2..d22492f 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -726,7 +726,7 @@ err_umem:
 static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
  struct mlx5_ib_qp *qp, struct ib_udata *udata,
  struct ib_qp_init_attr *attr,
- struct mlx5_create_qp_mbox_in **in,
+ u32 **in,
  struct mlx5_ib_create_qp_resp *resp, int *inlen,
  struct mlx5_ib_qp_base *base)
 {
@@ -739,6 +739,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct 
ib_pd *pd,
u32 offset = 0;
int uuarn;
int ncont = 0;
+   __be64 *pas;
+   void *qpc;
int err;
 
err = ib_copy_from_udata(, udata, sizeof(ucmd));
@@ -795,20 +797,24 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct 
ib_pd *pd,
ubuffer->umem = NULL;
}
 
-   *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
+   *inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
+MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * ncont;
*in = mlx5_vzalloc(*inlen);
if (!*in) {
err = -ENOMEM;
goto err_umem;
}
+
+   pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
if (ubuffer->umem)
-   mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift,
-(*in)->pas, 0);
-   (*in)->ctx.log_pg_sz_remote_qpn =
-   cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
-   (*in)->ctx.params2 = cpu_to_be32(offset << 6);
+   mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0);
+
+   qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
+
+   MLX5_SET(qpc, qpc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+   MLX5_SET(qpc, qpc, page_offset, offset);
 
-   (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
+   MLX5_SET(qpc, qpc, uar_page, uar_index);
resp->uuar_index = uuarn;
qp->uuarn = uuarn;
 
@@ -857,12 +863,13 @@ static void destroy_qp_user(struct ib_pd *pd, struct 
mlx5_ib_qp *qp,
 static int create_kernel_qp(struct mlx5_ib_dev *dev,
struct ib_qp_init_attr *init_attr,
struct mlx5_ib_qp *qp,
-   struct mlx5_create_qp_mbox_in **in, int *inlen,
+   u32 **in, int *inlen,
struct mlx5_ib_qp_base *base)
 {
enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
struct mlx5_uuar_info *uuari;
int uar_index;
+   void *qpc;
int uuarn;
int err;
 
@@ -902,25 +909,29 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
}
 
qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
-   *inlen = sizeof(**in) + sizeof(*(*in)->pas) * qp->buf.npages;
+   *inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
+MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages;
*in = mlx5_vzalloc(*inlen);
if (!*in) {
err = -ENOMEM;
goto err_buf;
}
-   (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
-   (*in)->ctx.log_pg_sz_remote_qpn =
-   cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 
24);
+
+   qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
+   MLX5_SET(qpc, qpc, uar_page, uar_index);
+   MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - 
MLX5_ADAPTER_PAGE_SHIFT);
+
/* Set "fast registration enabled" for all kernel QPs */
-   (*in)->ctx.params1 |= cpu_to_be32(1 << 11);
-   (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
+   MLX5_SET(qpc, qpc, fre, 1);
+   MLX5_SET(qpc, qpc, rlky, 1);
 
if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) {
-   (*in)->ctx.deth_sqpn = cpu_to_be32(1);
+   MLX5_SET(qpc, qpc, deth_sqpn, 1);
qp->flags |= MLX5_IB_QP_SQPN_QP1;
}
 
-   mlx5_fill_page_array(>buf, (*in)->pas);
+

[PATCH for-next V2 13/17] net/mlx5: Enable setting minimum inline header mode for VFs

2016-08-22 Thread Saeed Mahameed
From: Hadar Hen Zion <had...@mellanox.com>

Implement the low-level part of the PF side in setting minimum
inline header mode for VFs.

Signed-off-by: Hadar Hen Zion <had...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/vport.c | 20 
 include/linux/mlx5/mlx5_ifc.h   |  2 +-
 include/linux/mlx5/vport.h  |  2 ++
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c 
b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 3593bf7..525f17a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -125,6 +125,26 @@ void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev 
*mdev,
 }
 EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline);
 
+int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+u16 vport, u8 min_inline)
+{
+   u32 in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {0};
+   int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+   void *nic_vport_ctx;
+
+   MLX5_SET(modify_nic_vport_context_in, in,
+field_select.min_inline, 1);
+   MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
+   MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+
+   nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
+in, nic_vport_context);
+   MLX5_SET(nic_vport_context, nic_vport_ctx,
+min_wqe_inline_mode, min_inline);
+
+   return mlx5_modify_nic_vport_context(mdev, in, inlen);
+}
+
 int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 u16 vport, u8 *addr)
 {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index cb94ac5..7a8ef0a 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -4724,7 +4724,7 @@ struct mlx5_ifc_modify_nic_vport_field_select_bits {
u8 reserved_at_0[0x16];
u8 node_guid[0x1];
u8 port_guid[0x1];
-   u8 reserved_at_18[0x1];
+   u8 min_inline[0x1];
u8 mtu[0x1];
u8 change_event[0x1];
u8 promisc[0x1];
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index e087b7d..451b0bd 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -45,6 +45,8 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev 
*mdev,
 u16 vport, u8 *addr);
 void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
 u8 *min_inline);
+int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+u16 vport, u8 min_inline);
 int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev,
  u16 vport, u8 *addr);
 int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu);
-- 
2.7.4



Re: [for-next V2 00/17][PULL request] Mellanox mlx5 core driver updates 2016-08-20

2016-08-22 Thread Saeed Mahameed
On Mon, Aug 22, 2016 at 8:11 PM, David Miller  wrote:
>
> Sorry, too many patches.
>
> Please keep your series to 10 to 15 patches maximum.
>
> Please do not push back on this, I made several other submitters do
> the same exact thing over the past few days.

Sure. Rules are rules. I will be more strict in the future.

Technical question though. Do i need to create a new tag for the new
pull request? I mean, should the tag always point to the "up to" patch
in the pull request? or can I just use the same tag for the new pull
request with different "up to" commit ?

Thanks,
Saeed.


[PATCH for-next V3 02/15] net/mlx5: Access register and MAD IFC commands via mlx5 ifc

2016-08-23 Thread Saeed Mahameed
Remove old representation of manually created ACCESS_REG/MAD_IFC
commands layout and use mlx5_ifc canonical structures and defines.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/mad.c  | 42 ++---
 drivers/net/ethernet/mellanox/mlx5/core/port.c | 52 +-
 include/linux/mlx5/device.h| 29 --
 3 files changed, 45 insertions(+), 78 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c 
b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
index 1368dac..13e6afd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mad.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
@@ -39,36 +39,34 @@
 int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
  u16 opmod, u8 port)
 {
-   struct mlx5_mad_ifc_mbox_in *in = NULL;
-   struct mlx5_mad_ifc_mbox_out *out = NULL;
-   int err;
+   int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out);
+   int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in);
+   int err = -ENOMEM;
+   void *data;
+   void *resp;
+   u32 *out;
+   u32 *in;
 
-   in = kzalloc(sizeof(*in), GFP_KERNEL);
-   if (!in)
-   return -ENOMEM;
-
-   out = kzalloc(sizeof(*out), GFP_KERNEL);
-   if (!out) {
-   err = -ENOMEM;
+   in = kzalloc(inlen, GFP_KERNEL);
+   out = kzalloc(outlen, GFP_KERNEL);
+   if (!in || !out)
goto out;
-   }
 
-   in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MAD_IFC);
-   in->hdr.opmod = cpu_to_be16(opmod);
-   in->port = port;
+   MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC);
+   MLX5_SET(mad_ifc_in, in, op_mod, opmod);
+   MLX5_SET(mad_ifc_in, in, port, port);
 
-   memcpy(in->data, inb, sizeof(in->data));
+   data = MLX5_ADDR_OF(mad_ifc_in, in, mad);
+   memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad));
 
-   err = mlx5_cmd_exec(dev, in, sizeof(*in), out, sizeof(*out));
+   err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
+   err = err ? : mlx5_cmd_status_to_err_v2(out);
if (err)
goto out;
 
-   if (out->hdr.status) {
-   err = mlx5_cmd_status_to_err(>hdr);
-   goto out;
-   }
-
-   memcpy(outb, out->data, sizeof(out->data));
+   resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet);
+   memcpy(outb, resp,
+  MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet));
 
 out:
kfree(out);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c 
b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 752c081..e8324c2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -38,45 +38,43 @@
 
 int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
 int size_in, void *data_out, int size_out,
-u16 reg_num, int arg, int write)
+u16 reg_id, int arg, int write)
 {
-   struct mlx5_access_reg_mbox_in *in = NULL;
-   struct mlx5_access_reg_mbox_out *out = NULL;
+   int outlen = MLX5_ST_SZ_BYTES(access_register_out) + size_out;
+   int inlen = MLX5_ST_SZ_BYTES(access_register_in) + size_in;
int err = -ENOMEM;
+   u32 *out = NULL;
+   u32 *in = NULL;
+   void *data;
 
-   in = mlx5_vzalloc(sizeof(*in) + size_in);
-   if (!in)
-   return -ENOMEM;
-
-   out = mlx5_vzalloc(sizeof(*out) + size_out);
-   if (!out)
-   goto ex1;
-
-   memcpy(in->data, data_in, size_in);
-   in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ACCESS_REG);
-   in->hdr.opmod = cpu_to_be16(!write);
-   in->arg = cpu_to_be32(arg);
-   in->register_id = cpu_to_be16(reg_num);
-   err = mlx5_cmd_exec(dev, in, sizeof(*in) + size_in, out,
-   sizeof(*out) + size_out);
-   if (err)
-   goto ex2;
+   in = mlx5_vzalloc(inlen);
+   out = mlx5_vzalloc(outlen);
+   if (!in || !out)
+   goto out;
 
-   if (out->hdr.status)
-   err = mlx5_cmd_status_to_err(>hdr);
+   data = MLX5_ADDR_OF(access_register_in, in, register_data);
+   memcpy(data, data_in, size_in);
 
-   if (!err)
-   memcpy(data_out, out->data, size_out);
+   MLX5_SET(access_register_in, in, opcode, MLX5_CMD_OP_ACCESS_REG);
+   MLX5_SET(access_register_in, in, op_mod, !write);
+   MLX5_SET(access_register_in, in, argument, arg);
+   MLX5_SET(access_register_in, in, register_id, reg_id);
+
+   err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
+   err = err ? : mlx5_cmd_status_to_err_v2(out);
+   if (err)
+   goto out;
+
+   data = MLX5_ADDR_OF(access_register_out, out, registe

[for-next V3 00/15][PULL request] Mellanox mlx5 core driver updates 2016-08-20

2016-08-23 Thread Saeed Mahameed
Hi Dave and Doug,

This series contains several low level and API updates for mlx5 core
commands interface and mlx5_ifc.h to be shared as base code for net-next and
rdma mlx5 4.9 submissions.

>From Saeed, ten patches that refactors old layouts of firmware commands which 
were manually generated before we introduced the mlx5_ifc, now all of the 
firmware
commands inbox/outbox layouts moved to use mlx5_ifc and we remove the old
manually generated structures.  Plus to those ten patches, we add two patches
that unifies mlx5 commands execution interface and improve the driver log 
messages
in that area.

>From Hadar and Ilya, added the needed hardware bits and infrastructure for
minimum inline headers setting and encap/decap commands and capabilities,
needed for E-Switch offloads.

This series applies on top latest net-next and rdma/master, and smoothly merges 
with
the latest "Mellanox 100G mlx5 fixes 2016-08-16" series already applied into 
net branch.

The following changes since commit 29b4817d4018df78086157ea3a55c1d9424a7cfc:
Linux 4.8-rc1

are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/leon/linux-rdma.git 
tags/shared-for-4.9-1

for you to fetch changes up to 575ddf5888eaf8f271cb3df7b0806cb2db2c333a:
net/mlx5: Introduce alloc_encap and dealloc_encap commands

Changes since V2 and V1:
- Reduce the number of patches and changed the cover letter respectively.

Thanks,
Saeed & Leon.

Hadar Hen Zion (2):
  net/mlx5: Enable setting minimum inline header mode for VFs
  net/mlx5: Update mlx5_ifc.h for vxlan encap/decap

Ilya Lesokhin (1):
  net/mlx5: Introduce alloc_encap and dealloc_encap commands

Saeed Mahameed (12):
  net/mlx5: Init/Teardown hca commands via mlx5 ifc
  net/mlx5: Access register and MAD IFC commands via mlx5 ifc
  net/mlx5: PD and UAR commands via mlx5 ifc
  net/mlx5: MCG commands via mlx5 ifc
  net/mlx5: Pages management commands via mlx5 ifc
  net/mlx5: EQ commands via mlx5 ifc
  {net,IB}/mlx5: CQ commands via mlx5 ifc
  {net,IB}/mlx5: MKey/PSV commands via mlx5 ifc
  {net,IB}/mlx5: QP/XRCD commands via mlx5 ifc
  {net,IB}/mlx5: Modify QP commands via mlx5 ifc
  net/mlx5: Unify and improve command interface
  net/mlx5: Improve driver log messages

 drivers/infiniband/hw/mlx5/cq.c| 110 +++---
 drivers/infiniband/hw/mlx5/main.c  |  10 +-
 drivers/infiniband/hw/mlx5/mlx5_ib.h   |   2 +-
 drivers/infiniband/hw/mlx5/mr.c| 184 +
 drivers/infiniband/hw/mlx5/qp.c| 189 -
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c  | 261 +++--
 drivers/net/ethernet/mellanox/mlx5/core/cq.c   | 109 ++
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c  |  50 +--
 .../net/ethernet/mellanox/mlx5/core/en_common.c|  23 +-
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |   3 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  47 +--
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/eq.c   |  80 ++--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  98 ++---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c   | 178 -
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h   |   7 +
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  |  18 +-
 drivers/net/ethernet/mellanox/mlx5/core/fw.c   |  43 +--
 drivers/net/ethernet/mellanox/mlx5/core/mad.c  |  41 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c |  91 ++---
 drivers/net/ethernet/mellanox/mlx5/core/mcg.c  |  72 +---
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h|  17 +-
 drivers/net/ethernet/mellanox/mlx5/core/mr.c   | 189 -
 .../net/ethernet/mellanox/mlx5/core/pagealloc.c| 156 +++-
 drivers/net/ethernet/mellanox/mlx5/core/pd.c   |  61 +--
 drivers/net/ethernet/mellanox/mlx5/core/port.c | 149 +++
 drivers/net/ethernet/mellanox/mlx5/core/qp.c   | 299 --
 drivers/net/ethernet/mellanox/mlx5/core/rl.c   |  11 +-
 drivers/net/ethernet/mellanox/mlx5/core/sriov.c|   7 +-
 drivers/net/ethernet/mellanox/mlx5/core/srq.c  |  49 +--
 drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 183 +++--
 drivers/net/ethernet/mellanox/mlx5/core/uar.c  |  67 +---
 drivers/net/ethernet/mellanox/mlx5/core/vport.c|  94 ++---
 drivers/net/ethernet/mellanox/mlx5/core/vxlan.c|  29 +-
 include/linux/mlx5/cq.h|   6 +-
 include/linux/mlx5/device.h| 429 +
 include/linux/mlx5/driver.h|  20 +-
 include/linux/mlx5/mlx5_ifc.h  | 116 +-
 include/linux/mlx5/qp.h| 128 +-
 include/linux/mlx5/vport.h |   2 +
 40 files changed, 1390 insertions(+), 2242 deletions(-)

-- 
2.7.4



[PATCH for-next V3 03/15] net/mlx5: PD and UAR commands via mlx5 ifc

2016-08-23 Thread Saeed Mahameed
Remove old representation of manually created PD/UAR commands layouts
and use mlx5_ifc canonical structures and defines.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/pd.c  | 58 +--
 drivers/net/ethernet/mellanox/mlx5/core/uar.c | 66 ++-
 2 files changed, 25 insertions(+), 99 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
index f2d3aee..efe452c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
@@ -36,66 +36,32 @@
 #include 
 #include "mlx5_core.h"
 
-struct mlx5_alloc_pd_mbox_in {
-   struct mlx5_inbox_hdr   hdr;
-   u8  rsvd[8];
-};
-
-struct mlx5_alloc_pd_mbox_out {
-   struct mlx5_outbox_hdr  hdr;
-   __be32  pdn;
-   u8  rsvd[4];
-};
-
-struct mlx5_dealloc_pd_mbox_in {
-   struct mlx5_inbox_hdr   hdr;
-   __be32  pdn;
-   u8  rsvd[4];
-};
-
-struct mlx5_dealloc_pd_mbox_out {
-   struct mlx5_outbox_hdr  hdr;
-   u8  rsvd[8];
-};
-
 int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn)
 {
-   struct mlx5_alloc_pd_mbox_inin;
-   struct mlx5_alloc_pd_mbox_out   out;
+   u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {0};
+   u32 in[MLX5_ST_SZ_DW(alloc_pd_in)]   = {0};
int err;
 
-   memset(, 0, sizeof(in));
-   memset(, 0, sizeof(out));
-   in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_PD);
-   err = mlx5_cmd_exec(dev, , sizeof(in), , sizeof(out));
+   MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD);
+   err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+   err = err ? : mlx5_cmd_status_to_err_v2(out);
if (err)
return err;
 
-   if (out.hdr.status)
-   return mlx5_cmd_status_to_err();
-
-   *pdn = be32_to_cpu(out.pdn) & 0xff;
+   *pdn = MLX5_GET(alloc_pd_out, out, pd);
return err;
 }
 EXPORT_SYMBOL(mlx5_core_alloc_pd);
 
 int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn)
 {
-   struct mlx5_dealloc_pd_mbox_in  in;
-   struct mlx5_dealloc_pd_mbox_out out;
+   u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {0};
+   u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)]   = {0};
int err;
 
-   memset(, 0, sizeof(in));
-   memset(, 0, sizeof(out));
-   in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_PD);
-   in.pdn = cpu_to_be32(pdn);
-   err = mlx5_cmd_exec(dev, , sizeof(in), , sizeof(out));
-   if (err)
-   return err;
-
-   if (out.hdr.status)
-   return mlx5_cmd_status_to_err();
-
-   return err;
+   MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD);
+   MLX5_SET(dealloc_pd_in, in, pd, pdn);
+   err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+   return err ? : mlx5_cmd_status_to_err_v2(out);
 }
 EXPORT_SYMBOL(mlx5_core_dealloc_pd);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c 
b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
index 5ff8af4..d0a0e0b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -42,73 +42,33 @@ enum {
NUM_LOW_LAT_UUARS   = 4,
 };
 
-
-struct mlx5_alloc_uar_mbox_in {
-   struct mlx5_inbox_hdr   hdr;
-   u8  rsvd[8];
-};
-
-struct mlx5_alloc_uar_mbox_out {
-   struct mlx5_outbox_hdr  hdr;
-   __be32  uarn;
-   u8  rsvd[4];
-};
-
-struct mlx5_free_uar_mbox_in {
-   struct mlx5_inbox_hdr   hdr;
-   __be32  uarn;
-   u8  rsvd[4];
-};
-
-struct mlx5_free_uar_mbox_out {
-   struct mlx5_outbox_hdr  hdr;
-   u8  rsvd[8];
-};
-
 int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
 {
-   struct mlx5_alloc_uar_mbox_in   in;
-   struct mlx5_alloc_uar_mbox_out  out;
+   u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {0};
+   u32 in[MLX5_ST_SZ_DW(alloc_uar_in)]   = {0};
int err;
 
-   memset(, 0, sizeof(in));
-   memset(, 0, sizeof(out));
-   in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_UAR);
-   err = mlx5_cmd_exec(dev, , sizeof(in), , sizeof(out));
+   MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR);
+   err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+   err = err ? : mlx5_cmd_status_to_err_v2(out);
if (err)
-   goto ex;
-
-   if (out.hdr.status) {
-   err = mlx5_cmd_status_to_err();
-   goto ex;
-   }
-
-   *uarn = be32_to_cpu(out.uarn) & 0xff;
+   return err;
 
-ex:
+   *uarn = MLX5_GET(alloc_uar_out, out, uar);
   

[PATCH for-next V3 05/15] net/mlx5: Pages management commands via mlx5 ifc

2016-08-23 Thread Saeed Mahameed
Remove old representation of manually created Pages management
commands layout, and use mlx5_ifc canonical structures and defines.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 .../net/ethernet/mellanox/mlx5/core/pagealloc.c| 165 -
 1 file changed, 58 insertions(+), 107 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 32dea35..7bfac21 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -44,12 +44,6 @@ enum {
MLX5_PAGES_TAKE = 2
 };
 
-enum {
-   MLX5_BOOT_PAGES = 1,
-   MLX5_INIT_PAGES = 2,
-   MLX5_POST_INIT_PAGES= 3
-};
-
 struct mlx5_pages_req {
struct mlx5_core_dev *dev;
u16 func_id;
@@ -67,33 +61,6 @@ struct fw_page {
unsignedfree_count;
 };
 
-struct mlx5_query_pages_inbox {
-   struct mlx5_inbox_hdr   hdr;
-   u8  rsvd[8];
-};
-
-struct mlx5_query_pages_outbox {
-   struct mlx5_outbox_hdr  hdr;
-   __be16  rsvd;
-   __be16  func_id;
-   __be32  num_pages;
-};
-
-struct mlx5_manage_pages_inbox {
-   struct mlx5_inbox_hdr   hdr;
-   __be16  rsvd;
-   __be16  func_id;
-   __be32  num_entries;
-   __be64  pas[0];
-};
-
-struct mlx5_manage_pages_outbox {
-   struct mlx5_outbox_hdr  hdr;
-   __be32  num_entries;
-   u8  rsvd[4];
-   __be64  pas[0];
-};
-
 enum {
MAX_RECLAIM_TIME_MSECS  = 5000,
MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60,
@@ -167,24 +134,22 @@ static struct fw_page *find_fw_page(struct mlx5_core_dev 
*dev, u64 addr)
 static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
s32 *npages, int boot)
 {
-   struct mlx5_query_pages_inbox   in;
-   struct mlx5_query_pages_outbox  out;
+   u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {0};
+   u32 in[MLX5_ST_SZ_DW(query_pages_in)]   = {0};
int err;
 
-   memset(, 0, sizeof(in));
-   memset(, 0, sizeof(out));
-   in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_PAGES);
-   in.hdr.opmod = boot ? cpu_to_be16(MLX5_BOOT_PAGES) : 
cpu_to_be16(MLX5_INIT_PAGES);
+   MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES);
+   MLX5_SET(query_pages_in, in, op_mod, boot ?
+MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES :
+MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES);
 
-   err = mlx5_cmd_exec(dev, , sizeof(in), , sizeof(out));
+   err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+   err = err ? : mlx5_cmd_status_to_err_v2(out);
if (err)
return err;
 
-   if (out.hdr.status)
-   return mlx5_cmd_status_to_err();
-
-   *npages = be32_to_cpu(out.num_pages);
-   *func_id = be16_to_cpu(out.func_id);
+   *npages = MLX5_GET(query_pages_out, out, num_pages);
+   *func_id = MLX5_GET(query_pages_out, out, function_id);
 
return err;
 }
@@ -280,46 +245,37 @@ out_alloc:
 
 static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id)
 {
-   struct mlx5_manage_pages_inbox *in;
-   struct mlx5_manage_pages_outbox out;
+   u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
+   u32 in[MLX5_ST_SZ_DW(manage_pages_in)]   = {0};
int err;
 
-   in = kzalloc(sizeof(*in), GFP_KERNEL);
-   if (!in)
-   return;
-
-   memset(, 0, sizeof(out));
-   in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
-   in->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE);
-   in->func_id = cpu_to_be16(func_id);
-   err = mlx5_cmd_exec(dev, in, sizeof(*in), , sizeof(out));
-   if (!err)
-   err = mlx5_cmd_status_to_err();
-
+   MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
+   MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE);
+   MLX5_SET(manage_pages_in, in, function_id, func_id);
+   err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+   err = err ? : mlx5_cmd_status_to_err_v2(out);
if (err)
-   mlx5_core_warn(dev, "page notify failed\n");
-
-   kfree(in);
+   mlx5_core_warn(dev, "page notify failed func_id(%d) err(%d)\n",
+  func_id, err);
 }
 
 static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
  int notify_fail)
 {
-   struct mlx5_manage_pages_inbox *in;
-   struct mlx5_manage_pages_outbox out;
-   int inlen;
+   u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
+   int inlen = MLX5_ST_SZ_BYTES(manage

[PATCH for-next V3 06/15] net/mlx5: EQ commands via mlx5 ifc

2016-08-23 Thread Saeed Mahameed
Remove old representation of manually created EQ commands layout,
and use mlx5_ifc canonical structures and defines.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c | 18 +++---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c  | 78 ++-
 include/linux/mlx5/device.h   | 74 -
 include/linux/mlx5/driver.h   |  2 +-
 4 files changed, 44 insertions(+), 128 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c 
b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index 5210d92..58e5518 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -358,32 +358,32 @@ out:
 static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 int index)
 {
-   struct mlx5_query_eq_mbox_out *out;
-   struct mlx5_eq_context *ctx;
+   int outlen = MLX5_ST_SZ_BYTES(query_eq_out);
u64 param = 0;
+   void *ctx;
+   u32 *out;
int err;
 
-   out = kzalloc(sizeof(*out), GFP_KERNEL);
+   out = kzalloc(outlen, GFP_KERNEL);
if (!out)
return param;
 
-   ctx = >ctx;
-
-   err = mlx5_core_eq_query(dev, eq, out, sizeof(*out));
+   err = mlx5_core_eq_query(dev, eq, out, outlen);
if (err) {
mlx5_core_warn(dev, "failed to query eq\n");
goto out;
}
+   ctx = MLX5_ADDR_OF(query_eq_out, out, eq_context_entry);
 
switch (index) {
case EQ_NUM_EQES:
-   param = 1 << ((be32_to_cpu(ctx->log_sz_usr_page) >> 24) & 0x1f);
+   param = 1 << MLX5_GET(eqc, ctx, log_eq_size);
break;
case EQ_INTR:
-   param = ctx->intr;
+   param = MLX5_GET(eqc, ctx, intr);
break;
case EQ_LOG_PG_SZ:
-   param = (ctx->log_page_size & 0x1f) + 12;
+   param = MLX5_GET(eqc, ctx, log_page_size) + 12;
break;
}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c 
b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 0e30602..7141197 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -86,23 +86,16 @@ struct cre_des_eq {
 
 static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
 {
-   struct mlx5_destroy_eq_mbox_in in;
-   struct mlx5_destroy_eq_mbox_out out;
+   u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0};
+   u32 in[MLX5_ST_SZ_DW(destroy_eq_in)]   = {0};
int err;
 
-   memset(, 0, sizeof(in));
-   memset(, 0, sizeof(out));
-   in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_EQ);
-   in.eqn = eqn;
-   err = mlx5_cmd_exec(dev, , sizeof(in), , sizeof(out));
-   if (!err)
-   goto ex;
+   MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ);
+   MLX5_SET(destroy_eq_in, in, eq_number, eqn);
 
-   if (out.hdr.status)
-   err = mlx5_cmd_status_to_err();
+   err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+   return err ? : mlx5_cmd_status_to_err_v2(out);
 
-ex:
-   return err;
 }
 
 static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
@@ -351,11 +344,13 @@ static void init_eq_buf(struct mlx5_eq *eq)
 int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 
vecidx,
   int nent, u64 mask, const char *name, struct mlx5_uar 
*uar)
 {
+   u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
struct mlx5_priv *priv = >priv;
-   struct mlx5_create_eq_mbox_in *in;
-   struct mlx5_create_eq_mbox_out out;
-   int err;
+   __be64 *pas;
+   void *eqc;
int inlen;
+   u32 *in;
+   int err;
 
eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
eq->cons_index = 0;
@@ -365,35 +360,37 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct 
mlx5_eq *eq, u8 vecidx,
 
init_eq_buf(eq);
 
-   inlen = sizeof(*in) + sizeof(in->pas[0]) * eq->buf.npages;
+   inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
+   MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->buf.npages;
+
in = mlx5_vzalloc(inlen);
if (!in) {
err = -ENOMEM;
goto err_buf;
}
-   memset(, 0, sizeof(out));
 
-   mlx5_fill_page_array(>buf, in->pas);
+   pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas);
+   mlx5_fill_page_array(>buf, pas);
 
-   in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ);
-   in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | 
uar->index);
-   in->ctx.intr = vecidx;
-   in->ctx.log_page_size = eq->bu

[PATCH for-next V3 07/15] {net,IB}/mlx5: CQ commands via mlx5 ifc

2016-08-23 Thread Saeed Mahameed
Remove old representation of manually created CQ commands layout,
and use mlx5_ifc canonical structures and defines.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/infiniband/hw/mlx5/cq.c   | 110 -
 drivers/net/ethernet/mellanox/mlx5/core/cq.c  | 113 +-
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c |  18 ++--
 include/linux/mlx5/cq.h   |   6 +-
 include/linux/mlx5/device.h   |  76 ---
 5 files changed, 122 insertions(+), 201 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 308a358..35a9f71 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -747,14 +747,16 @@ static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct 
mlx5_ib_cq_buf *buf,
 
 static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
  struct ib_ucontext *context, struct mlx5_ib_cq *cq,
- int entries, struct mlx5_create_cq_mbox_in **cqb,
+ int entries, u32 **cqb,
  int *cqe_size, int *index, int *inlen)
 {
struct mlx5_ib_create_cq ucmd;
size_t ucmdlen;
int page_shift;
+   __be64 *pas;
int npages;
int ncont;
+   void *cqc;
int err;
 
ucmdlen =
@@ -792,14 +794,20 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct 
ib_udata *udata,
mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont 
%d\n",
ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, 
ncont);
 
-   *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * ncont;
+   *inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont;
*cqb = mlx5_vzalloc(*inlen);
if (!*cqb) {
err = -ENOMEM;
goto err_db;
}
-   mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0);
-   (*cqb)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+
+   pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
+   mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0);
+
+   cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
+   MLX5_SET(cqc, cqc, log_page_size,
+page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 
*index = to_mucontext(context)->uuari.uars[0].index;
 
@@ -834,9 +842,10 @@ static void init_cq_buf(struct mlx5_ib_cq *cq, struct 
mlx5_ib_cq_buf *buf)
 
 static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
int entries, int cqe_size,
-   struct mlx5_create_cq_mbox_in **cqb,
-   int *index, int *inlen)
+   u32 **cqb, int *index, int *inlen)
 {
+   __be64 *pas;
+   void *cqc;
int err;
 
err = mlx5_db_alloc(dev->mdev, >db);
@@ -853,15 +862,21 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, 
struct mlx5_ib_cq *cq,
 
init_cq_buf(cq, >buf);
 
-   *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages;
+   *inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages;
*cqb = mlx5_vzalloc(*inlen);
if (!*cqb) {
err = -ENOMEM;
goto err_buf;
}
-   mlx5_fill_page_array(>buf.buf, (*cqb)->pas);
 
-   (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - 
MLX5_ADAPTER_PAGE_SHIFT;
+   pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
+   mlx5_fill_page_array(>buf.buf, pas);
+
+   cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
+   MLX5_SET(cqc, cqc, log_page_size,
+cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+
*index = dev->mdev->priv.uuari.uars[0].index;
 
return 0;
@@ -895,11 +910,12 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
 {
int entries = attr->cqe;
int vector = attr->comp_vector;
-   struct mlx5_create_cq_mbox_in *cqb = NULL;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_cq *cq;
int uninitialized_var(index);
int uninitialized_var(inlen);
+   u32 *cqb = NULL;
+   void *cqc;
int cqe_size;
unsigned int irqn;
int eqn;
@@ -945,19 +961,20 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
INIT_WORK(>notify_work, notify_soft_wc_handler);
}
 
-   cq->cqe_size = cqe_size;
-   cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
-
-   if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
-   cqb->ctx.cqe_sz_flags |= (1 << 

[PATCH for-next V3 04/15] net/mlx5: MCG commands via mlx5 ifc

2016-08-23 Thread Saeed Mahameed
Remove old representation of manually created MCG commands layout
and use mlx5_ifc canonical structures and defines.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c |  4 +-
 drivers/net/ethernet/mellanox/mlx5/core/mcg.c | 70 +++
 include/linux/mlx5/mlx5_ifc.h |  2 +-
 3 files changed, 21 insertions(+), 55 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index d6e2a1c..0d55e0f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -280,7 +280,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev 
*dev, u16 op,
case MLX5_CMD_OP_DEALLOC_Q_COUNTER:
case MLX5_CMD_OP_DEALLOC_PD:
case MLX5_CMD_OP_DEALLOC_UAR:
-   case MLX5_CMD_OP_DETTACH_FROM_MCG:
+   case MLX5_CMD_OP_DETACH_FROM_MCG:
case MLX5_CMD_OP_DEALLOC_XRCD:
case MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN:
case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT:
@@ -490,7 +490,7 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(CONFIG_INT_MODERATION);
MLX5_COMMAND_STR_CASE(ACCESS_REG);
MLX5_COMMAND_STR_CASE(ATTACH_TO_MCG);
-   MLX5_COMMAND_STR_CASE(DETTACH_FROM_MCG);
+   MLX5_COMMAND_STR_CASE(DETACH_FROM_MCG);
MLX5_COMMAND_STR_CASE(GET_DROPPED_PACKET_LOG);
MLX5_COMMAND_STR_CASE(MAD_IFC);
MLX5_COMMAND_STR_CASE(QUERY_MAD_DEMUX);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c 
b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
index d5a0c2d..01a1abd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
@@ -37,70 +37,36 @@
 #include 
 #include "mlx5_core.h"
 
-struct mlx5_attach_mcg_mbox_in {
-   struct mlx5_inbox_hdr   hdr;
-   __be32  qpn;
-   __be32  rsvd;
-   u8  gid[16];
-};
-
-struct mlx5_attach_mcg_mbox_out {
-   struct mlx5_outbox_hdr  hdr;
-   u8  rsvf[8];
-};
-
-struct mlx5_detach_mcg_mbox_in {
-   struct mlx5_inbox_hdr   hdr;
-   __be32  qpn;
-   __be32  rsvd;
-   u8  gid[16];
-};
-
-struct mlx5_detach_mcg_mbox_out {
-   struct mlx5_outbox_hdr  hdr;
-   u8  rsvf[8];
-};
-
 int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 
qpn)
 {
-   struct mlx5_attach_mcg_mbox_in in;
-   struct mlx5_attach_mcg_mbox_out out;
+   u32 out[MLX5_ST_SZ_DW(attach_to_mcg_out)] = {0};
+   u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)]   = {0};
+   void *gid;
int err;
 
-   memset(, 0, sizeof(in));
-   memset(, 0, sizeof(out));
-   in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ATTACH_TO_MCG);
-   memcpy(in.gid, mgid, sizeof(*mgid));
-   in.qpn = cpu_to_be32(qpn);
-   err = mlx5_cmd_exec(dev, , sizeof(in), , sizeof(out));
-   if (err)
-   return err;
-
-   if (out.hdr.status)
-   err = mlx5_cmd_status_to_err();
+   MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG);
+   MLX5_SET(attach_to_mcg_in, in, qpn, qpn);
+   gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid);
+   memcpy(gid, mgid, sizeof(*mgid));
 
-   return err;
+   err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+   return err ? : mlx5_cmd_status_to_err_v2(out);
 }
 EXPORT_SYMBOL(mlx5_core_attach_mcg);
 
 int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 
qpn)
 {
-   struct mlx5_detach_mcg_mbox_in in;
-   struct mlx5_detach_mcg_mbox_out out;
+   u32 out[MLX5_ST_SZ_DW(detach_from_mcg_out)] = {0};
+   u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)]   = {0};
+   void *gid;
int err;
 
-   memset(, 0, sizeof(in));
-   memset(, 0, sizeof(out));
-   in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DETTACH_FROM_MCG);
-   memcpy(in.gid, mgid, sizeof(*mgid));
-   in.qpn = cpu_to_be32(qpn);
-   err = mlx5_cmd_exec(dev, , sizeof(in), , sizeof(out));
-   if (err)
-   return err;
-
-   if (out.hdr.status)
-   err = mlx5_cmd_status_to_err();
+   MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
+   MLX5_SET(detach_from_mcg_in, in, qpn, qpn);
+   gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid);
+   memcpy(gid, mgid, sizeof(*mgid));
 
-   return err;
+   err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+   return err ? : mlx5_cmd_status_to_err_v2(out);
 }
 EXPORT_SYMBOL(mlx5_core_detach_mcg);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 21bc455..3f70fc9 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include

[PATCH for-next V3 01/15] net/mlx5: Init/Teardown hca commands via mlx5 ifc

2016-08-23 Thread Saeed Mahameed
Remove old representation of manually created Init/Teardown hca
commands layout and use mlx5_ifc canonical structures and defines.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/fw.c | 36 
 include/linux/mlx5/device.h  | 24 ---
 2 files changed, 10 insertions(+), 50 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 77fc1aa..56bf520 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -162,38 +162,22 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 
 int mlx5_cmd_init_hca(struct mlx5_core_dev *dev)
 {
-   struct mlx5_cmd_init_hca_mbox_in in;
-   struct mlx5_cmd_init_hca_mbox_out out;
+   u32 out[MLX5_ST_SZ_DW(init_hca_out)] = {0};
+   u32 in[MLX5_ST_SZ_DW(init_hca_in)]   = {0};
int err;
 
-   memset(, 0, sizeof(in));
-   memset(, 0, sizeof(out));
-   in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_INIT_HCA);
-   err = mlx5_cmd_exec(dev, , sizeof(in), , sizeof(out));
-   if (err)
-   return err;
-
-   if (out.hdr.status)
-   err = mlx5_cmd_status_to_err();
-
-   return err;
+   MLX5_SET(init_hca_in, in, opcode, MLX5_CMD_OP_INIT_HCA);
+   err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+   return err ? : mlx5_cmd_status_to_err_v2(out);
 }
 
 int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev)
 {
-   struct mlx5_cmd_teardown_hca_mbox_in in;
-   struct mlx5_cmd_teardown_hca_mbox_out out;
+   u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0};
+   u32 in[MLX5_ST_SZ_DW(teardown_hca_in)]   = {0};
int err;
 
-   memset(, 0, sizeof(in));
-   memset(, 0, sizeof(out));
-   in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_TEARDOWN_HCA);
-   err = mlx5_cmd_exec(dev, , sizeof(in), , sizeof(out));
-   if (err)
-   return err;
-
-   if (out.hdr.status)
-   err = mlx5_cmd_status_to_err();
-
-   return err;
+   MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA);
+   err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+   return err ? : mlx5_cmd_status_to_err_v2(out);
 }
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 0b6d15c..6c343c0 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -455,30 +455,6 @@ struct mlx5_odp_caps {
char reserved2[0xe4];
 };
 
-struct mlx5_cmd_init_hca_mbox_in {
-   struct mlx5_inbox_hdr   hdr;
-   u8  rsvd0[2];
-   __be16  profile;
-   u8  rsvd1[4];
-};
-
-struct mlx5_cmd_init_hca_mbox_out {
-   struct mlx5_outbox_hdr  hdr;
-   u8  rsvd[8];
-};
-
-struct mlx5_cmd_teardown_hca_mbox_in {
-   struct mlx5_inbox_hdr   hdr;
-   u8  rsvd0[2];
-   __be16  profile;
-   u8  rsvd1[4];
-};
-
-struct mlx5_cmd_teardown_hca_mbox_out {
-   struct mlx5_outbox_hdr  hdr;
-   u8  rsvd[8];
-};
-
 struct mlx5_cmd_layout {
u8  type;
u8  rsvd0[3];
-- 
2.7.4



[PATCH for-next V3 11/15] net/mlx5: Unify and improve command interface

2016-08-23 Thread Saeed Mahameed
Now as all commands use mlx5 ifc interface, instead of doing two calls
for executing a command we embed command status checking into
mlx5_cmd_exec to simplify the interface.

Also we do here some cleanup for redundant software structures
(inbox/outbox) and functions and improved command failure output.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/infiniband/hw/mlx5/main.c  |  10 +-
 drivers/infiniband/hw/mlx5/qp.c|   5 +-
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c  | 251 +++--
 drivers/net/ethernet/mellanox/mlx5/core/cq.c   |  16 +-
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |   3 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  10 +-
 drivers/net/ethernet/mellanox/mlx5/core/eq.c   |  12 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  72 ++
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c   | 130 +++
 drivers/net/ethernet/mellanox/mlx5/core/fw.c   |  15 +-
 drivers/net/ethernet/mellanox/mlx5/core/mad.c  |   1 -
 drivers/net/ethernet/mellanox/mlx5/core/main.c |  80 ++-
 drivers/net/ethernet/mellanox/mlx5/core/mcg.c  |  10 +-
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h|  13 --
 drivers/net/ethernet/mellanox/mlx5/core/mr.c   |  28 +--
 .../net/ethernet/mellanox/mlx5/core/pagealloc.c|  11 +-
 drivers/net/ethernet/mellanox/mlx5/core/pd.c   |  11 +-
 drivers/net/ethernet/mellanox/mlx5/core/port.c |  99 ++--
 drivers/net/ethernet/mellanox/mlx5/core/qp.c   |  26 +--
 drivers/net/ethernet/mellanox/mlx5/core/rl.c   |  11 +-
 drivers/net/ethernet/mellanox/mlx5/core/srq.c  |  49 ++--
 drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 183 +--
 drivers/net/ethernet/mellanox/mlx5/core/uar.c  |  11 +-
 drivers/net/ethernet/mellanox/mlx5/core/vport.c|  74 ++
 drivers/net/ethernet/mellanox/mlx5/core/vxlan.c|  29 +--
 include/linux/mlx5/device.h| 115 --
 include/linux/mlx5/driver.h|   7 +-
 27 files changed, 385 insertions(+), 897 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c 
b/drivers/infiniband/hw/mlx5/main.c
index a84bb76..6fb77d7 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -233,23 +233,19 @@ static int set_roce_addr(struct ib_device *device, u8 
port_num,
 const union ib_gid *gid,
 const struct ib_gid_attr *attr)
 {
-   struct mlx5_ib_dev *dev = to_mdev(device);
-   u32  in[MLX5_ST_SZ_DW(set_roce_address_in)];
-   u32 out[MLX5_ST_SZ_DW(set_roce_address_out)];
+   struct mlx5_ib_dev *dev = to_mdev(device);
+   u32  in[MLX5_ST_SZ_DW(set_roce_address_in)]  = {0};
+   u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
 
if (ll != IB_LINK_LAYER_ETHERNET)
return -EINVAL;
 
-   memset(in, 0, sizeof(in));
-
ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
 
MLX5_SET(set_roce_address_in, in, roce_address_index, index);
MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
-
-   memset(out, 0, sizeof(out));
return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
 }
 
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 6261737..f3c943f 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1007,13 +1007,10 @@ static int is_connected(enum ib_qp_type qp_type)
 static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq, u32 tdn)
 {
-   u32 in[MLX5_ST_SZ_DW(create_tis_in)];
+   u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
 
-   memset(in, 0, sizeof(in));
-
MLX5_SET(tisc, tisc, transport_domain, tdn);
-
return mlx5_core_create_tis(dev->mdev, in, sizeof(in), >tisn);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 88b0540..23b95da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -554,11 +554,124 @@ const char *mlx5_command_str(int command)
}
 }
 
+static const char *cmd_status_str(u8 status)
+{
+   switch (status) {
+   case MLX5_CMD_STAT_OK:
+   return "OK";
+   case MLX5_CMD_STAT_INT_ERR:
+   return "internal error";
+   case MLX5_CMD_STAT_BAD_OP_ERR:
+   return "bad operation";
+   case MLX5_CMD_STAT_BAD_PARAM_ERR:
+   return "bad parameter";
+   case MLX5_C

[PATCH for-next V3 09/15] {net,IB}/mlx5: QP/XRCD commands via mlx5 ifc

2016-08-23 Thread Saeed Mahameed
Remove old representation of manually created QP/XRCD commands layout
amd use mlx5_ifc canonical structures and defines.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/infiniband/hw/mlx5/qp.c   | 154 +++-
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c |  14 +-
 drivers/net/ethernet/mellanox/mlx5/core/qp.c  | 167 --
 include/linux/mlx5/mlx5_ifc.h |   5 +-
 include/linux/mlx5/qp.h   | 108 +-
 5 files changed, 165 insertions(+), 283 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 21ab0e2..d22492f 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -726,7 +726,7 @@ err_umem:
 static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
  struct mlx5_ib_qp *qp, struct ib_udata *udata,
  struct ib_qp_init_attr *attr,
- struct mlx5_create_qp_mbox_in **in,
+ u32 **in,
  struct mlx5_ib_create_qp_resp *resp, int *inlen,
  struct mlx5_ib_qp_base *base)
 {
@@ -739,6 +739,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct 
ib_pd *pd,
u32 offset = 0;
int uuarn;
int ncont = 0;
+   __be64 *pas;
+   void *qpc;
int err;
 
err = ib_copy_from_udata(, udata, sizeof(ucmd));
@@ -795,20 +797,24 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct 
ib_pd *pd,
ubuffer->umem = NULL;
}
 
-   *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
+   *inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
+MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * ncont;
*in = mlx5_vzalloc(*inlen);
if (!*in) {
err = -ENOMEM;
goto err_umem;
}
+
+   pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
if (ubuffer->umem)
-   mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift,
-(*in)->pas, 0);
-   (*in)->ctx.log_pg_sz_remote_qpn =
-   cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
-   (*in)->ctx.params2 = cpu_to_be32(offset << 6);
+   mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0);
+
+   qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
+
+   MLX5_SET(qpc, qpc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+   MLX5_SET(qpc, qpc, page_offset, offset);
 
-   (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
+   MLX5_SET(qpc, qpc, uar_page, uar_index);
resp->uuar_index = uuarn;
qp->uuarn = uuarn;
 
@@ -857,12 +863,13 @@ static void destroy_qp_user(struct ib_pd *pd, struct 
mlx5_ib_qp *qp,
 static int create_kernel_qp(struct mlx5_ib_dev *dev,
struct ib_qp_init_attr *init_attr,
struct mlx5_ib_qp *qp,
-   struct mlx5_create_qp_mbox_in **in, int *inlen,
+   u32 **in, int *inlen,
struct mlx5_ib_qp_base *base)
 {
enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
struct mlx5_uuar_info *uuari;
int uar_index;
+   void *qpc;
int uuarn;
int err;
 
@@ -902,25 +909,29 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
}
 
qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
-   *inlen = sizeof(**in) + sizeof(*(*in)->pas) * qp->buf.npages;
+   *inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
+MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages;
*in = mlx5_vzalloc(*inlen);
if (!*in) {
err = -ENOMEM;
goto err_buf;
}
-   (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
-   (*in)->ctx.log_pg_sz_remote_qpn =
-   cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 
24);
+
+   qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
+   MLX5_SET(qpc, qpc, uar_page, uar_index);
+   MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - 
MLX5_ADAPTER_PAGE_SHIFT);
+
/* Set "fast registration enabled" for all kernel QPs */
-   (*in)->ctx.params1 |= cpu_to_be32(1 << 11);
-   (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
+   MLX5_SET(qpc, qpc, fre, 1);
+   MLX5_SET(qpc, qpc, rlky, 1);
 
if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) {
-   (*in)->ctx.deth_sqpn = cpu_to_be32(1);
+   MLX5_SET(qpc, qpc, deth_sqpn, 1);
qp->flags |= MLX5_IB_QP_SQPN_QP1;
}
 
-   mlx5_fill_page_array(>buf, (*in)->pas);
+

[PATCH for-next V3 08/15] {net,IB}/mlx5: MKey/PSV commands via mlx5 ifc

2016-08-23 Thread Saeed Mahameed
Remove old representation of manually created MKey/PSV commands layout,
and use mlx5_ifc canonical structures and defines.

Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h   |   2 +-
 drivers/infiniband/hw/mlx5/mr.c| 184 -
 drivers/infiniband/hw/mlx5/qp.c|   8 +-
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c  |   4 +-
 .../net/ethernet/mellanox/mlx5/core/en_common.c|  23 +--
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  37 ++---
 drivers/net/ethernet/mellanox/mlx5/core/mr.c   | 183 +---
 include/linux/mlx5/device.h| 113 +
 include/linux/mlx5/driver.h|  11 +-
 include/linux/mlx5/mlx5_ifc.h  |   2 +-
 10 files changed, 235 insertions(+), 332 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h 
b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 372385d..a59034a 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -504,7 +504,7 @@ struct mlx5_ib_mr {
int umred;
int npages;
struct mlx5_ib_dev *dev;
-   struct mlx5_create_mkey_mbox_out out;
+   u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
struct mlx5_core_sig_ctx*sig;
int live;
void*descs_alloc;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 4b02130..6f7e347 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -135,20 +135,10 @@ static void reg_mr_callback(int status, void *context)
return;
}
 
-   if (mr->out.hdr.status) {
-   mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
-mr->out.hdr.status,
-be32_to_cpu(mr->out.hdr.syndrome));
-   kfree(mr);
-   dev->fill_delay = 1;
-   mod_timer(>delay_timer, jiffies + HZ);
-   return;
-   }
-
spin_lock_irqsave(>mdev->priv.mkey_lock, flags);
key = dev->mdev->priv.mkey_key++;
spin_unlock_irqrestore(>mdev->priv.mkey_lock, flags);
-   mr->mmkey.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xff) 
| key;
+   mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, 
mkey_index)) | key;
 
cache->last_add = jiffies;
 
@@ -170,16 +160,19 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int 
num)
 {
struct mlx5_mr_cache *cache = >cache;
struct mlx5_cache_ent *ent = >ent[c];
-   struct mlx5_create_mkey_mbox_in *in;
+   int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_ib_mr *mr;
int npages = 1 << ent->order;
+   void *mkc;
+   u32 *in;
int err = 0;
int i;
 
-   in = kzalloc(sizeof(*in), GFP_KERNEL);
+   in = kzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
 
+   mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
for (i = 0; i < num; i++) {
if (ent->pending >= MAX_PENDING_REG_MR) {
err = -EAGAIN;
@@ -194,18 +187,22 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int 
num)
mr->order = ent->order;
mr->umred = 1;
mr->dev = dev;
-   in->seg.status = MLX5_MKEY_STATUS_FREE;
-   in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
-   in->seg.qpn_mkey7_0 = cpu_to_be32(0xff << 8);
-   in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
-   in->seg.log2_page_size = 12;
+
+   MLX5_SET(mkc, mkc, free, 1);
+   MLX5_SET(mkc, mkc, umr_en, 1);
+   MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
+
+   MLX5_SET(mkc, mkc, qpn, 0xff);
+   MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2);
+   MLX5_SET(mkc, mkc, log_page_size, 12);
 
spin_lock_irq(>lock);
ent->pending++;
spin_unlock_irq(>lock);
-   err = mlx5_core_create_mkey(dev->mdev, >mmkey, in,
-   sizeof(*in), reg_mr_callback,
-   mr, >out);
+   err = mlx5_core_create_mkey_cb(dev->mdev, >mmkey,
+  in, inlen,
+  mr->out, sizeof(mr->out),
+  reg_mr_callback, mr);
if (err) {
   

Re: [PATCH v2] mlx5: Add ndo_poll_controller() implementation

2016-09-28 Thread Saeed Mahameed
On Wed, Sep 28, 2016 at 1:23 AM, Calvin Owens <calvinow...@fb.com> wrote:
> This implements ndo_poll_controller in net_device_ops callback for mlx5,
> which is necessary to use netconsole with this driver.
>
> Cc: Saeed Mahameed <sae...@dev.mellanox.co.il>
> Signed-off-by: Calvin Owens <calvinow...@fb.com>

Acked-By: Saeed Mahameed <sae...@mellanox.com>


Re: [PATCH for-next 00/14][PULL request] Mellanox mlx5 core driver updates 2016-10-25

2016-10-30 Thread Saeed Mahameed
On Fri, Oct 28, 2016 at 7:53 PM, David Miller  wrote:
>
> I really disalike pull requests of this form.
>
> You add lots of datastructures and helper functions but no actual
> users of these facilities to the driver.
>
> Do this instead:
>
> 1) Add TSAR infrastructure
> 2) Add use of TSAR facilities to the driver
>
> That's one pull request.
>
> I don't care if this is hard, or if there are entanglements with
> Infiniband or whatever, you must submit changes in this manner.
>

It is not hard, it is just not right,  we have lots of IB and ETH
features that we would like to submit in the same kernel cycle,
with your suggestion I will have to almost submit every feature (core
infrastructure and netdev/RDMA usage)
to you and Doug.  Same for rdma features,  you will receive PULL
request for them as well,
I am sure you and the netdev list don't need such noise.  do not
forget that this will slow down mlx5 progress since
netde will block rdma and vise-versa.

> I will not accept additions to a driver that don't even get really
> used.

For logic/helper functions containing patches such as "Add TSAR
infrastructure" I agree and i can find a way to move some code around
to
avoid future conflicts and remove them from such pull requests.

but you need to at least accept hardware related structures
infrastructure patches for shared code such as
include/linux/mlx5/mlx5_ifc.h where we have only hardware definitions
and those patches are really minimal.

So bottom line, I will do my best to ensure future PULL requests will
contain only include/linux/mlx5/*.h hardware related definitions
or fully implemented features.

Can we agree on that ?

Thanks,
Saeed.


Re: [PATCH RFC 0/2] ethtool: Add actual port speed reporting

2016-11-09 Thread Saeed Mahameed
On Wed, Nov 2, 2016 at 5:50 PM, Mintz, Yuval  wrote:
>> Sending RFC to get feedback for the following ethtool proposal:
>>
>> In some cases such as virtual machines and multi functions (SR-IOV), the 
>> actual
>> bandwidth exposed for each machine is not accurately shown in ethtool.
>> Currently ethtool shows only physical port link speed.
>> In our case we would like to show the virtual port operational link speed 
>> which
>> in some cases is less than the physical port speed.
>>
>> This will give users better visibility for the actual speed running on their 
>> device.
>>
>> $ ethtool ens6
>> ...
>> Speed: 5Mb/s
>> Actual speed: 25000Mb/s
>
> Not saying this is a bad thing, but where exactly is it listed that ethtool 
> has
> to show the physical port speed?

Well, looking at the ethtool fields you can clearly see those fields
refer only to physical properties of port connector module.
from this you can conclude that the speed field refers to the physical
port speed.

Settings for ens1f0:
Supported ports: [ FIBRE Backplane ]
Supported link modes:   1000baseKX/Full
   1baseKR/Full
   4baseKR4/Full
   4baseCR4/Full
   4baseSR4/Full
   4baseLR4/Full
   25000baseCR/Full
   25000baseKR/Full
   25000baseSR/Full
   5baseCR2/Full
   5baseKR2/Full
   10baseKR4/Full
   10baseSR4/Full
   10baseCR4/Full
   10baseLR4_ER4/Full
Supported pause frame use: Symmetric Receive-only
Supports auto-negotiation: Yes
Advertised link modes:  1000baseKX/Full
   1baseKR/Full
   4baseKR4/Full
   4baseCR4/Full
   4baseSR4/Full
   4baseLR4/Full
   25000baseCR/Full
   25000baseKR/Full
   25000baseSR/Full
   5baseCR2/Full
   5baseKR2/Full
   10baseKR4/Full
   10baseSR4/Full
   10baseCR4/Full
   10baseLR4_ER4/Full
Advertised pause frame use: No
Advertised auto-negotiation: Yes
Speed: 10Mb/s
Duplex: Full
Port: Direct Attach Copper
PHYAD: 0
Transceiver: internal
Auto-negotiation: on
Supports Wake-on: d
Wake-on: d
Link detected: yes

> E.g., bnx2x shows the logical speed instead, and has been doing that for 
> years.
> [Perhaps that's a past wrongness, but that's how it goes].
>
> And besides, one can argue that in the SR-IOV scenario the VF has no business
> knowing the physical port speed.

Yes for SR-IOV VFs one field (logical) is sufficient.
But in some cases on a native system (no SR-IOV nor virtualization)
there will be a need for both physical and logical speed reporting.
logical speed can be limited for several reasons (NIC Low power mode,
pci (width,gen), Internal HCA rate limiters, etc ... ).

Such information will be more than useful for system administrators
and will not be available if we decide to show only one field.

-Saeed.


[PATCH net-next 00/13] Mellanox 100G SRIOV offloads tunnel_key set/release

2016-11-07 Thread Saeed Mahameed
Hi Dave,
  
>From Hadar Hen Zion: 

This series further enhances the SRIOV TC offloads of mlx5 to handle the
TC tunnel_key release and set actions.

This serves a common use-case in virtualization systems where the virtual
switch encapsulate packets (tunnel_key set action) sent from VMs with
outer headers corresponding to the local/remote host IPs and de-capsulate
(tunnel_key release) outer headers before the packets are received by the
VM. 

We use the new E-Switch switchdev mode and TC tunnel_key set/release
action to achieve that also in SW defined SRIOV environments by
offloading TC rules that contain these actions along with forwarding
(TC mirred/redirect action) the packets.

The first six patches are adding the needed support in flow dissector,
flower and tc for offloading tunnel_key actions:
- The first three patches are adding the needed help functions
  and enums
- The next three patches in the series are adding UDP port attribute
  to tunnel_key release and set actions. 

The addition of UDP ports would allow the HW driver to make sure they are
given (say) a VXLAN tunnel to offload (mlx5e uses that).

Patches 7-10 are mlx5 preparations for tunnel_key actions offloads support.

Patch #11 adds mlx5e support to offload tunnel_key release action, and the
last two patches (#12-13) add mlx5e support to tc tunnel_key set action.

Currently in order to offload tc tunnel_key release action, the tc rule
should be placed on top of the mlx5e offloading (uplink) interface instead
of the shared tunnel interface. The resolution between the tunnel interface
to the HW netdevice will be implemented in a follow up series.

Hadar.

This series was generated against commit
94edc86bf13f ("Merge branch 'dwmac-sti-refactor-cleanup'")

Thanks,
Saeed.

Hadar Hen Zion (13):
  net/sched: act_tunnel_key: add helper inlines to access tcf_tunnel_key
  flow_dissector: Add enums for encapsulation keys
  net/sched: cls_flower: Allow setting encapsulation fields as used key
  net/sched: cls_flower: Add UDP port to tunnel parameters
  net/dst: Add dst port to dst_metadata utility functions
  net/sched: act_tunnel_key: Add UDP dst port option
  net/mlx5: Move alloc/dealloc encap commands declarations to common
header file
  net/mlx5: Check max encap header size capability
  net/mlx5: Add creation flags when adding new flow table
  net/mlx5: Support encap id when setting new steering entry
  net/mlx5e: Add TC tunnel release action for SRIOV offloads
  net/mlx5e: Add ndo_udp_tunnel_add to VF representors
  net/mlx5e: Add basic TC tunnel set action for SRIOV offloads

 drivers/infiniband/hw/mlx5/main.c  |  12 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |   4 +
 drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c  |  19 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c|  35 +-
 .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c|  12 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |   8 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |   2 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c| 462 -
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  26 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  20 +
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c |  42 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c   |  46 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h   |   9 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  |  76 ++--
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h  |   2 +
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h|   6 +
 include/linux/mlx5/fs.h|  19 +-
 include/net/dst_metadata.h |  10 +-
 include/net/flow_dissector.h   |   5 +
 include/net/tc_act/tc_tunnel_key.h |  37 ++
 include/uapi/linux/pkt_cls.h   |   5 +
 include/uapi/linux/tc_act/tc_tunnel_key.h  |   1 +
 net/sched/act_tunnel_key.c |  15 +-
 net/sched/cls_flower.c |  39 +-
 24 files changed, 764 insertions(+), 148 deletions(-)

-- 
2.7.4



<    4   5   6   7   8   9   10   11   12   13   >