Re: [PATCH for-next 5/7] IB/mlx4: Enable send of RoCE QP1 packets with IP/UDP headers

2015-12-29 Thread Or Gerlitz
On Tue, Dec 29, 2015 at 3:24 PM, Matan Barak  wrote:
> @@ -2413,34 +2442,27 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, 
> struct ib_ud_wr *wr,
>
> if (is_eth) {
> struct in6_addr in6;
> -
> +   u16 ether_type;
> u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) 
> << 13;
>
> +   ether_type = (!is_udp) ? MLX4_IB_IBOE_ETHERTYPE :
> +   (ip_version == 4 ? ETH_P_IP : ETH_P_IPV6);
> +
> mlx->sched_prio = cpu_to_be16(pcp);
>
> +   ether_addr_copy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac);
> memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
> -   /* FIXME: cache smac value? */
> memcpy(>srcrb_flags16[0], ah->av.eth.mac, 2);
> memcpy(>imm, ah->av.eth.mac + 2, 4);
> memcpy(, sgid.raw, sizeof(in6));
>
> -   if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
> -   u64 mac = 
> atomic64_read(_mdev(ib_dev)->iboe.mac[sqp->qp.port - 1]);
> -   u8 smac[ETH_ALEN];
> -
> -   mlx4_u64_to_smac(smac, mac);
> -   memcpy(sqp->ud_header.eth.smac_h, smac, ETH_ALEN);
> -   } else {
> -   /* use the src mac of the tunnel */
> -   memcpy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac, 
> ETH_ALEN);
> -   }
>

The last hunk that you removed had a role and was by no means
dead-code, right? so... (1) why it's correct to remove it? (2) if you
want to introduce different way to implement what was done here, why
in this patch? maybe add pre-patch for that
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V2] IB/mlx5: Unify CQ create flags check

2015-12-29 Thread Or Gerlitz

On 12/29/2015 4:41 PM, Leon Romanovsky wrote:

From: Leon Romanovsky 

The create_cq() can receive creation flags which were used
differently by two commits which added create_cq extended
command and cross-channel. The merged code caused to not
accept any flags at all.

This patch unifies the check into one function and one return
error code.

Fixes: 972ecb821379 ("IB/mlx5: Add create_cq extended command")
Fixes: 051f263098a9 ("IB/mlx5: Add driver cross-channel support")

---
Changes from v1:
   * Remove links to linux-rdma from commit message
   * Placed change log under git comment section (---)
Changes from v0:
   * Add Fixes tag

Signed-off-by: Leon Romanovsky 
wrong placing. Needs to be before the 1st --- and w.o blank lines after 
the Fixes: lines please


Please use dry runs to get this to run (...) correctly



---
  drivers/infiniband/hw/mlx5/cq.c  | 9 +
  drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++-
  2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index b14316603e44..7ddc790b1819 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -757,10 +757,6 @@ static void destroy_cq_kernel(struct mlx5_ib_dev *dev, 
struct mlx5_ib_cq *cq)
mlx5_db_free(dev->mdev, >db);
  }
  
-enum {

-   CQ_CREATE_FLAGS_SUPPORTED = IB_CQ_FLAGS_TIMESTAMP_COMPLETION
-};
-
  struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
@@ -778,13 +774,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
int eqn;
int err;
  
-	if (check_cq_create_flags(attr->flags))

-   return ERR_PTR(-EINVAL);
-
if (entries < 0)
return ERR_PTR(-EINVAL);
  
-	if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED)

+   if (check_cq_create_flags(attr->flags))
return ERR_PTR(-EOPNOTSUPP);
  
  	entries = roundup_pow_of_two(entries + 1);

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h 
b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index d4b227126265..fbf14a768105 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -692,6 +692,7 @@ static inline u32 check_cq_create_flags(u32 flags)
 * It returns non-zero value for unsupported CQ
 * create flags, otherwise it returns zero.
 */
-   return (flags & ~IB_CQ_FLAGS_IGNORE_OVERRUN);
+   return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN |
+ IB_CQ_FLAGS_TIMESTAMP_COMPLETION));
  }
  #endif /* MLX5_IB_H */


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-next 7/7] IB/mlx4: Advertise RoCE support

2015-12-29 Thread Or Gerlitz

On 12/29/2015 3:24 PM, Matan Barak wrote:

Advertise RoCE support in port_immutable according to the hardware
capabilities. This enables the verbs stack to use RoCE v2 mode.


Advertise RoCE V2 support



Signed-off-by: Matan Barak 


I guess you wanted  "IB/mlx4: Advertise RoCE V2 support" for the patch 
title? since we did

advertise RDMA_CORE_PORT_IBA_ROCE prior to this patch.

Or.

---
  drivers/infiniband/hw/mlx4/main.c | 12 +---
  1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/main.c 
b/drivers/infiniband/hw/mlx4/main.c
index 44e5699..8cf2575 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2183,6 +2183,7 @@ static int mlx4_port_immutable(struct ib_device *ibdev, 
u8 port_num,
   struct ib_port_immutable *immutable)
  {
struct ib_port_attr attr;
+   struct mlx4_ib_dev *mdev = to_mdev(ibdev);
int err;
  
  	err = mlx4_ib_query_port(ibdev, port_num, );

@@ -2192,10 +2193,15 @@ static int mlx4_port_immutable(struct ib_device *ibdev, 
u8 port_num,
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
  
-	if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND)

+   if (mlx4_ib_port_link_layer(ibdev, port_num) == 
IB_LINK_LAYER_INFINIBAND) {
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
-   else
-   immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+   } else {
+   if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
+   immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+   if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+   immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
+   RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+   }
  
  	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
  


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] IB/mad: Ensure fairness in ib_mad_completion_handler

2015-12-29 Thread ira.weiny
On Tue, Dec 29, 2015 at 11:51:19AM +0200, Sagi Grimberg wrote:
> 
> >Please just convert the mad handler to the new CQ API in
> >drivers/infiniband/core/cq.c.  If you have any question about it I'd be
> >glad to help you.
> 
> +1 on this suggestion.
> 
> We had these sorts of questions in our ULPs as well. The CQ API should
> take care of all that for you and leaves you to just handle the
> completions...

I saw your work and agree it would be nice but it will take some time to
convert and debug the MAD stack.  I'll try and find some time but it is
unlikely I will anytime soon.

We can hit this bug regularly with hfi1 but have not hit with qib or mlx4.  I
leave it up to Doug if he wants to take this fix before someone finds time to
convert the MAD stack.

Ira

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-next 2/7] IB/mlx4: Add RoCE per GID support for add_gid and del_gid

2015-12-29 Thread Or Gerlitz

On 12/29/2015 3:24 PM, Matan Barak wrote:

[...] We use a new firmware command in order to populate the GID table and 
store the type along with the GID value.


Its a new value to existing command.. so better say we use a new value 
to the SET_PORT firmware command to do X


Also here, break out mlx4_core new functionality e.g the changes to 
include/linux/mlx4/cmd.h into mlx4_core only patch. You don't need any 
change to mlx4_core to have it's own patch, I guess one up to three mlx4 
core patches would be OK.


Did you make sure (at the resource tracker) that VFs can't do this new 
set port command flavor?


Also find some spot to put blank line in the change-log, it's hard to 
read this way.


Or.


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-next 1/7] IB/mlx4: Query RoCE support

2015-12-29 Thread Or Gerlitz

On 12/29/2015 3:24 PM, Matan Barak wrote:

@@ -905,6 +906,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct 
mlx4_dev_cap *dev_cap)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
MLX4_GET(dev_cap->bmme_flags, outbox,
 QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+   if (dev_cap->bmme_flags & MLX4_FLAG_ROCE_V1_V2)
+   dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ROCE_V1_V2;


Did you make sure that the query dev cap wrapper unsets this bit when 
proxing VF queries?



if (dev_cap->bmme_flags & MLX4_FLAG_PORT_REMAP)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_REMAP;
MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET);



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] IB/core: sysfs.c: Fix PerfMgt ClassPortInfo handling

2015-12-29 Thread Christoph Lameter

Reviewed-by: Christoph Lameter 

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Generic InfiniBand transport done in software

2015-12-29 Thread Doug Ledford
On 12/27/2015 12:54 PM, Moni Shoua wrote:

>> Yes it is specific to Intel *now*, that doesn't mean it should stay that
>> way. Rdmavt could, and in my opinion should, be extended to support
>> soft-roce. I don't think replicating the same thing is a great idea.
>>
> But you post *now* a so called generic driver so it must now fit any
> possible driver (including Soft RoCE)

This is incorrect.  This isn't some public API that we are exporting to
user space.  Nor is it an API that out of tree drivers are using.  This
is a purely kernel internal API for use by a limited number of drivers.
 As such, it need not be finalized before it is submitted or used.  It
can be taken one piece at a time, and if, at some point, it is
determined that there are shortcomings to the API, it can be updated in
place with all of the drivers that use it in a single patch or patch
series.  So a finalized design prior to putting code in place is
specifically *not* needed.

>> As to the location, where do you think it should go. drivers/infiniband/sw
>> makes the most sense to me, but open to suggestions.
>>
>> And for the question of why publish when it's not ready, the better question
>> is why not?  Is it not good to see the work in progress as it evolves so the
>> community can provide feedback?
>>
> What kind of a feedback you expect when I don't have an idea about
> your plans for rdmavt
> Interfaces, flows, data structures... all is missing from the
> documentation to rdmavt.

They released it so that you can start hooking SoftRoCE into it.  As you
hook it in, if it needs changes to work with SoftRoCE, simply make the
changes needed and move on.

I think Dennis' point, and I agree with him, is that you are over
complicating the issue here.  This need not be a highly designed item,
it needs to be a functional item, and we can build it as we go.  If you
have to make changes to rdmavt in order to hook up SoftRoCE, that's
fine, post them to the list, they will get reviewed.  As long as the
change doesn't break or otherwise negatively impact qib and/or hfi1,
then it should be fine.  If it does, then I'm sure Intel will work with
you to find a solution that doesn't negatively impact them.


-- 
Doug Ledford 
  GPG KeyID: 0E572FDD




signature.asc
Description: OpenPGP digital signature


SoftRoCE V1

2015-12-29 Thread Wenda Ni
Hi experts,

We have several Mellanox RoCE V1 NIC cards, and would like to try
communicating with SoftRoCE V1.

We are using branch rxe-3.0 from https://github.com/SoftRoCE/rxe-dev
according to the Soft-RoCE README Rev 1.0 issued from Mellanox last
year.


Testing using pingpong examples from libibverbs (hardware RoCE <->
SoftRoCE) already shows a bug in Ethernet CRC generation. We expect
further bugs along the way.

So we would like to know for SoftRoCE V1, are we using the latest
branch? Also, is it still tied to SLES11 SP3 OS?

There are very few documentations we can find, so we hope this is the
right place to ask such questions.

Cheers,

Wenda Ni, Ph.D.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Generic InfiniBand transport done in software

2015-12-29 Thread Moni Shoua
I think that my point is missed. See my answers inline


> This is incorrect.  This isn't some public API that we are exporting to
> user space.  Nor is it an API that out of tree drivers are using.  This
> is a purely kernel internal API for use by a limited number of drivers.
>  As such, it need not be finalized before it is submitted or used.  It
> can be taken one piece at a time, and if, at some point, it is
> determined that there are shortcomings to the API, it can be updated in
> place with all of the drivers that use it in a single patch or patch
> series.  So a finalized design prior to putting code in place is
> specifically *not* needed.
>
This is not a question of future backward comparability where
interfaces must be kept forever. I agree that kernel interfaces may be
changed with kernel moving forward. However, this is not what I'm
arguing against.

When one submits a RFC for a generic Infrastructure he must state what
are the interfaces between blocks of the design.
Soft RoCE block can't start until I know how the final interfaces look
like. This is an unacceptable method of work.

>
> They released it so that you can start hooking SoftRoCE into it.  As you
> hook it in, if it needs changes to work with SoftRoCE, simply make the
> changes needed and move on.

This is not a question if I can hook Soft RoCE driver into this framework.
In fact, I can't think of an IB driver that can't use this framework. What this
framework offers is just another hop from ib_core the real driver.
Where is the removal of duplicated code? This is a list of functions
that for now
must be implemented in the low level driver.

create_cq
destroy_cq
poll_cq
req_notify_cq
resize_cq
create_srq
modify_srq
destroy_srq
query_srq
create_qp
query_device
query_gid
alloc_ucontext
modify_device
modify_qp
dealloc_ucontext
query_port
destroy_qp
get_port_immutable
modify_port
query_qp
post_send
post_recv
post_srq_recv

Most if not all of them have common part in all drivers.
What are the plans to get rid of them? When?
Don't you think that this should be known in advance?

I already asked and never been answered seriously: what was
the purpose of the submission in this premature state of the code
It can't be for feedback because what kind of feedback can you provide
for just a skeleton? Moreover, today they submitted V2 with a changelog
that is almost 100% cosmetic changes. I really don't understand this kind
of work.




>
> I think Dennis' point, and I agree with him, is that you are over
> complicating the issue here.  This need not be a highly designed item,
> it needs to be a functional item, and we can build it as we go.  If you
> have to make changes to rdmavt in order to hook up SoftRoCE, that's
> fine, post them to the list, they will get reviewed.  As long as the
> change doesn't break or otherwise negatively impact qib and/or hfi1,
> then it should be fine.  If it does, then I'm sure Intel will work with
> you to find a solution that doesn't negatively impact them.

A reminder of what the initial goal was - remove code duplicates between
all IB transport drivers. This goal is complicated and in my RFC I explained
why. So, for start, I am not complicating anything that was simple before.

Second, what you are saying here is actually: "this is a project to serves
Intel's needs". So why treat it as a generic infrastructure? I'm not aiming to
hurt performance but Intel should aim for achieving the goals we agreed on
in the begging.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Generic InfiniBand transport done in software

2015-12-29 Thread Dennis Dalessandro

On Tue, Dec 29, 2015 at 07:38:30PM +0200, Moni Shoua wrote:

This is not a question if I can hook Soft RoCE driver into this framework.
In fact, I can't think of an IB driver that can't use this framework. What 
this

framework offers is just another hop from ib_core the real driver.
Where is the removal of duplicated code? This is a list of functions
that for now
must be implemented in the low level driver.

create_cq
destroy_cq
poll_cq
req_notify_cq
resize_cq
create_srq
modify_srq
destroy_srq
query_srq
create_qp
query_device
query_gid
alloc_ucontext
modify_device
modify_qp
dealloc_ucontext
query_port
destroy_qp
get_port_immutable
modify_port
query_qp
post_send
post_recv
post_srq_recv

Most if not all of them have common part in all drivers.
What are the plans to get rid of them? When?
Don't you think that this should be known in advance?


We have patch sets that implement all of these which will be posted soon.  
With the holidays things have just been a bit slow to come out.


-Denny
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-next 2/3] IB/core: Change per-entry lock in RoCE GID table to one lock

2015-12-29 Thread Bart Van Assche

On 12/30/2015 07:01 AM, Or Gerlitz wrote:

On 10/28/2015 4:52 PM, Matan Barak wrote:

@@ -134,16 +138,14 @@ static int write_gid(struct ib_device *ib_dev,
u8 port,
  {
  int ret = 0;
  struct net_device *old_net_dev;
-unsigned long flags;
  /* in rdma_cap_roce_gid_table, this funciton should be protected
by a
   * sleep-able lock.
   */
-write_lock_irqsave(>data_vec[ix].lock, flags);
  if (rdma_cap_roce_gid_table(ib_dev, port)) {
  table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
-write_unlock_irqrestore(>data_vec[ix].lock, flags);
+write_unlock_irq(>rwlock);
  /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by
   * RoCE providers and thus only updates the cache.
   */
@@ -153,7 +155,7 @@ static int write_gid(struct ib_device *ib_dev, u8
port,
  else if (action == GID_TABLE_WRITE_ACTION_DEL)
  ret = ib_dev->del_gid(ib_dev, port, ix,
>data_vec[ix].context);
-write_lock_irqsave(>data_vec[ix].lock, flags);
+write_lock_irq(>rwlock);
  }


sparse complains on

drivers/infiniband/core/cache.c:186:17: warning: context imbalance in
'write_gid' - unexpected unlock

is this false positive?


Hello Or,

sparse expects __release() and __acquire() annotations for functions 
that unlock a lock object that has been locked by its caller. See e.g. 
http://lists.kernelnewbies.org/pipermail/kernelnewbies/2011-October/003541.html.


Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-next V3 10/11] IB/core: Initialize UD header structure with IP and UDP headers

2015-12-29 Thread Or Gerlitz

On 12/23/2015 2:56 PM, Matan Barak wrote:
  
+__be16 ib_ud_ip4_csum(struct ib_ud_header *header)

+{
+   struct iphdr iph;
+
+   iph.ihl = 5;
+   iph.version = 4;
+   iph.tos = header->ip4.tos;
+   iph.tot_len = header->ip4.tot_len;
+   iph.id  = header->ip4.id;
+   iph.frag_off= header->ip4.frag_off;
+   iph.ttl = header->ip4.ttl;
+   iph.protocol= header->ip4.protocol;
+   iph.check   = 0;
+   iph.saddr   = header->ip4.saddr;
+   iph.daddr   = header->ip4.daddr;
+
+   return ip_fast_csum((u8 *), iph.ihl);
+}
+EXPORT_SYMBOL(ib_ud_ip4_csum);


You have introduced here this sparse warning, please fix

drivers/infiniband/core/ud_header.c:299:28: warning: incorrect type in 
return expression (different base types)

drivers/infiniband/core/ud_header.c:299:28:expected restricted __be16
drivers/infiniband/core/ud_header.c:299:28:got restricted __sum16

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-next V3 00/11] Add RoCE v2 support

2015-12-29 Thread Or Gerlitz

Hi Matan,

I see these two smatch complaints on code added with this series, can 
you please take a look?


drivers/infiniband/core/addr.c:503 rdma_resolve_ip_route() warn: 
variable dereferenced before check 'src_addr' (see line 500)
drivers/infiniband/core/cma_configfs.c:172 make_cma_ports() warn: double 
check that we're allocating correct size: 8 vs 128



Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-next 2/3] IB/core: Change per-entry lock in RoCE GID table to one lock

2015-12-29 Thread Or Gerlitz

On 10/28/2015 4:52 PM, Matan Barak wrote:

@@ -134,16 +138,14 @@ static int write_gid(struct ib_device *ib_dev, u8 port,
  {
int ret = 0;
struct net_device *old_net_dev;
-   unsigned long flags;
  
  	/* in rdma_cap_roce_gid_table, this funciton should be protected by a

 * sleep-able lock.
 */
-   write_lock_irqsave(>data_vec[ix].lock, flags);
  
  	if (rdma_cap_roce_gid_table(ib_dev, port)) {

table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
-   write_unlock_irqrestore(>data_vec[ix].lock, flags);
+   write_unlock_irq(>rwlock);
/* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by
 * RoCE providers and thus only updates the cache.
 */
@@ -153,7 +155,7 @@ static int write_gid(struct ib_device *ib_dev, u8 port,
else if (action == GID_TABLE_WRITE_ACTION_DEL)
ret = ib_dev->del_gid(ib_dev, port, ix,
  >data_vec[ix].context);
-   write_lock_irqsave(>data_vec[ix].lock, flags);
+   write_lock_irq(>rwlock);
}


sparse complains on

drivers/infiniband/core/cache.c:186:17: warning: context imbalance in 
'write_gid' - unexpected unlock


is this false positive?

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] IB/core: sysfs.c: Fix PerfMgt ClassPortInfo handling

2015-12-29 Thread Matan Barak
On Mon, Dec 28, 2015 at 11:53 PM, Hal Rosenstock  
wrote:
>
> Port number is not part of ClassPortInfo attribute but is
> still needed as a parameter when invoking process_mad.
>
> To properly handle this attribute, port_num is added as a
> parameter to get_counter_table and get_perf_mad was changed
> not to store port_num in the attribute itself when it's
> querying the ClassPortInfo attribute.
>
> This handles issue pointed out by Matan Barak 
>
> Signed-off-by: Hal Rosenstock 
> Acked-by: Matan Barak 
> ---
> diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
> index 539040f..2daf832 100644
> --- a/drivers/infiniband/core/sysfs.c
> +++ b/drivers/infiniband/core/sysfs.c
> @@ -438,7 +438,8 @@ static int get_perf_mad(struct ib_device *dev, int 
> port_num, int attr,
> in_mad->mad_hdr.method= IB_MGMT_METHOD_GET;
> in_mad->mad_hdr.attr_id   = attr;
>
> -   in_mad->data[41] = port_num;/* PortSelect field */
> +   if (attr != IB_PMA_CLASS_PORT_INFO)
> +   in_mad->data[41] = port_num;/* PortSelect field */
>
> if ((dev->process_mad(dev, IB_MAD_IGNORE_MKEY,
>  port_num, NULL, NULL,
> @@ -714,11 +715,12 @@ err:
>   * Figure out which counter table to use depending on
>   * the device capabilities.
>   */
> -static struct attribute_group *get_counter_table(struct ib_device *dev)
> +static struct attribute_group *get_counter_table(struct ib_device *dev,
> +int port_num)
>  {
> struct ib_class_port_info cpi;
>
> -   if (get_perf_mad(dev, 0, IB_PMA_CLASS_PORT_INFO,
> +   if (get_perf_mad(dev, port_num, IB_PMA_CLASS_PORT_INFO,
> , 40, sizeof(cpi)) >= 0) {
>
> if (cpi.capability_mask && IB_PMA_CLASS_CAP_EXT_WIDTH)
> @@ -776,7 +778,7 @@ static int add_port(struct ib_device *device, int 
> port_num,
> goto err_put;
> }
>
> -   p->pma_table = get_counter_table(device);
> +   p->pma_table = get_counter_table(device, port_num);
> ret = sysfs_create_group(>kobj, p->pma_table);
> if (ret)
> goto err_put_gid_attrs;
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

Please just add:
Fixes: 145d9c541032 ('IB/core: Display extended counter set if available')
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] IB/mad: Ensure fairness in ib_mad_completion_handler

2015-12-29 Thread Christoph Hellwig
Please just convert the mad handler to the new CQ API in
drivers/infiniband/core/cq.c.  If you have any question about it I'd be
glad to help you.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Generic InfiniBand transport done in software

2015-12-29 Thread Christoph Hellwig
Hi Moni,

On Sun, Dec 27, 2015 at 07:54:46PM +0200, Moni Shoua wrote:
> But you post *now* a so called generic driver so it must now fit any
> possible driver (including Soft RoCE)

it's never going to fit any possible future driver.  Dennis and folks
have done great work to move code outside the drivers into a shared
library.  So far it's been driven just by the Intel drivers as that's
the only thing they were interested in.

If you are interested in supporting SoftROCE please work with them
by adjusting the code towards your requirements.  In Linux we have
great results with iterative appoaches and I'd suggest you try it
as well.

> What kind of a feedback you expect when I don't have an idea about
> your plans for rdmavt
> Interfaces, flows, data structures... all is missing from the
> documentation to rdmavt.

You've got the code, so let's work based on that.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: completion queue abstraction V2

2015-12-29 Thread Bart Van Assche

On 12/07/2015 09:51 PM, Christoph Hellwig wrote:

This series adds a new RDMA core abstraction that insulated the
ULPs from the nitty gritty details of CQ polling.  See the individual
patches for more details.


Hello Christoph,

After having tested the SRP initiator and target drivers with this patch 
series applied I have further feedback about this patch series. I will 
provide that feedback as replies to the individual patches.


Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] IB/mlx5: Unify CQ create flags check

2015-12-29 Thread Sagi Grimberg

Does this deserve a Fixes tag?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] IB/core: Remove a set-but-not-used variable from ib_sg_to_pages()

2015-12-29 Thread Sagi Grimberg

Thanks Bart,

Acked-by: Sagi Grimberg 
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] IB/core: sysfs.c: Fix PerfMgt ClassPortInfo handling

2015-12-29 Thread Hal Rosenstock
On 12/29/2015 7:21 AM, Or Gerlitz wrote:
> On 12/29/2015 12:43 PM, Hal Rosenstock wrote:
>> This handles issue pointed out by Matan Barak 
>>
>> Fixes: 145d9c541032 ('IB/core: Display extended counter set if
>> available')
>>
>> Signed-off-by: Hal Rosenstock 
> again, remove the blank line after the fixes tag.
> 
> Also,  I am not that the way Doug is setting the branch for pull would
> preserve commit IDs when
> the offending patch landed in Linus tree. If this is the case, we should
> put your patch in 2nd pull
> request and have the right commit ID there. Please check with Doug.

Doug ?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-next 6/7] IB/mlx4: Create and use another QP1 for RoCEv2

2015-12-29 Thread Or Gerlitz

On 12/29/2015 3:24 PM, Matan Barak wrote:

The mlx4 driver uses a special QP to implement the GSI QP. This kind
of QP allows to build the InfiniBand headers in SW to be put before
the payload that comes in with the WR. The mlx4 HW builds the packet,
calculates the ICRC and puts it at the end of the payload. This ICRC
calculation however depends on the QP configuration which is
determined when QP is modified (roce_mode during INIT->RTR).
On the other hand, ICRC verification when packet is received does to
depend on this configuration.


I don't understand the part of the sentence saying "when packet is 
received does to depend on this configuration"

maybe some typo/s there?


Therefore, using 2 GSI QPs for send (one for each RoCE version) and 1
GSI QP for receive are required.


s/2/two/ and s/1/one/ please

Or.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] IB/core: Remove a set-but-not-used variable from ib_sg_to_pages()

2015-12-29 Thread Leon Romanovsky
On Tue, Dec 29, 2015 at 10:45:03AM +0100, Bart Van Assche wrote:
> Detected this by building the IB core with W=1. See also patch
> "IB core: Fix ib_sg_to_pages()" (commit 8f5ba10ed40a).

Reviewed-by: Leon Romanovsky 

> 
> Signed-off-by: Bart Van Assche 
> Cc: Sagi Grimberg 
> Cc: Christoph Hellwig 
> ---
>  drivers/infiniband/core/verbs.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
> index 545906d..c90ed29 100644
> --- a/drivers/infiniband/core/verbs.c
> +++ b/drivers/infiniband/core/verbs.c
> @@ -1530,7 +1530,7 @@ int ib_sg_to_pages(struct ib_mr *mr,
>  int (*set_page)(struct ib_mr *, u64))
>  {
>   struct scatterlist *sg;
> - u64 last_end_dma_addr = 0, last_page_addr = 0;
> + u64 last_end_dma_addr = 0;
>   unsigned int last_page_off = 0;
>   u64 page_mask = ~((u64)mr->page_size - 1);
>   int i, ret;
> @@ -1572,7 +1572,6 @@ next_page:
>  
>   mr->length += dma_len;
>   last_end_dma_addr = end_dma_addr;
> - last_page_addr = end_dma_addr & page_mask;
>   last_page_off = end_dma_addr & ~page_mask;
>   }
>  
> -- 
> 2.1.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Generic InfiniBand transport done in software

2015-12-29 Thread Moni Shoua
> it's never going to fit any possible future driver.  Dennis and folks
> have done great work to move code outside the drivers into a shared
> library.  So far it's been driven just by the Intel drivers as that's
> the only thing they were interested in.
>

If it's not going to be a solution for anything else but Intel then
why declare it as such?
Where is that shared library? There amount of shared code in rdmavt
that can be considered as shared is very little.

> If you are interested in supporting SoftROCE please work with them
> by adjusting the code towards your requirements.  In Linux we have
> great results with iterative appoaches and I'd suggest you try it
> as well.
>
Exactly. All you asked for  is in the RFC I posted.

>
> You've got the code, so let's work based on that.
> --
I say let's agree on the interfaces and start writing code.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 00/36] Add rdma verbs transport library

2015-12-29 Thread Moni Shoua
> Changes since v1:
> Removed driver specific version
> Fixed license text to remove copyright and put on top
> Return 0 in rvt_map_sg instead of BAD_DMA_AGGRESS
> Remove #include of dma.h from dma.c
> Update comment about protection domain limit
> Remove comment on alternative design for private data
> Rename CDR macro to CHECK_DRIVER_OVERRIDE
> Change all the stubs to return EOPNOTSUPP
> Fix comment style for rvt_query_port
> Fix typo in subject
> Rename rdi.lk_table to rdi.lkey_table
> Rename rvt_sge.m => rvt_sge.cur_map (Sean)
> Rename rvt_sge.n => rvt_sge.cur_seg (Sean)
> Remove rvt_reg_phys_mr
> Drop support for commit 38071a461f0a ("IB/qib: Support the new memory
>registration API")
>
I don't understand what in this change log justifies a V2 for this patch set
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 03/13] irq_poll: fold irq_poll_sched_prep into irq_poll_sched

2015-12-29 Thread Bart Van Assche

On 12/07/2015 09:51 PM, Christoph Hellwig wrote:

diff --git a/lib/irq_poll.c b/lib/irq_poll.c
index 88af879..13cb149 100644
--- a/lib/irq_poll.c
+++ b/lib/irq_poll.c
@@ -21,13 +21,17 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
   *
   * Description:
   * Add this irq_poll structure to the pending poll list and trigger the
- * raise of the blk iopoll softirq. The driver must already have gotten a
- * successful return from irq_poll_sched_prep() before calling this.
+ * raise of the blk iopoll softirq.
   **/
  void irq_poll_sched(struct irq_poll *iop)
  {
unsigned long flags;

+   if (test_bit(IRQ_POLL_F_DISABLE, >state))
+   return;
+   if (!test_and_set_bit(IRQ_POLL_F_SCHED, >state))
+   return;
+
local_irq_save(flags);
list_add_tail(>list, this_cpu_ptr(_cpu_iopoll));
__raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);


After having applied these changes the SRP initiator didn't receive any 
RDMA completions anymore. I could remedy that by changing 
"!test_and_set_bit()" into "test_and_set_bit()":


diff --git a/lib/irq_poll.c b/lib/irq_poll.c
index 43a3370..3a67019 100644
--- a/lib/irq_poll.c
+++ b/lib/irq_poll.c
@@ -29,7 +29,7 @@ void irq_poll_sched(struct irq_poll *iop)

if (test_bit(IRQ_POLL_F_DISABLE, >state))
return;
-   if (!test_and_set_bit(IRQ_POLL_F_SCHED, >state))
+   if (test_and_set_bit(IRQ_POLL_F_SCHED, >state))
return;

local_irq_save(flags);
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH for-next 1/7] IB/mlx4: Query RoCE support

2015-12-29 Thread Matan Barak
From: Moni Shoua 

Query the RoCE support from firmware using the appropriate firmware
commands. Downstream patches will read these capabilities and act
accordingly.

Signed-off-by: Moni Shoua 
---
 drivers/net/ethernet/mellanox/mlx4/fw.c   |  3 +++
 drivers/net/ethernet/mellanox/mlx4/main.c |  6 +-
 include/linux/mlx4/device.h   | 11 +--
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c 
b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 90db94e..bdd6822 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -157,6 +157,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 
flags)
[29] = "802.1ad offload support",
[31] = "Modifying loopback source checks using UPDATE_QP 
support",
[32] = "Loopback source checks support",
+   [33] = "RoCEv2 support"
};
int i;
 
@@ -905,6 +906,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct 
mlx4_dev_cap *dev_cap)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
MLX4_GET(dev_cap->bmme_flags, outbox,
 QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+   if (dev_cap->bmme_flags & MLX4_FLAG_ROCE_V1_V2)
+   dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ROCE_V1_V2;
if (dev_cap->bmme_flags & MLX4_FLAG_PORT_REMAP)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_REMAP;
MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET);
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c 
b/drivers/net/ethernet/mellanox/mlx4/main.c
index 31c491e..fb4968f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -424,8 +424,12 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct 
mlx4_dev_cap *dev_cap)
if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT)
dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
/* Don't do sense port on multifunction devices (for now at least) */
-   if (mlx4_is_mfunc(dev))
+   /* Don't do enable RoCE V2 on multifunction devices */
+   if (mlx4_is_mfunc(dev)) {
dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
+   dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_ROCE_V1_V2;
+   mlx4_dbg(dev, "RoCE V2 is not supported when SR-IOV is 
enabled\n");
+   }
 
if (mlx4_low_memory_profile()) {
dev->caps.log_num_macs  = MLX4_MIN_LOG_NUM_MAC;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index d3133be..dbf39ab 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -216,6 +216,7 @@ enum {
MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN  = 1LL <<  30,
MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31,
MLX4_DEV_CAP_FLAG2_LB_SRC_CHK   = 1ULL << 32,
+   MLX4_DEV_CAP_FLAG2_ROCE_V1_V2   = 1LL <<  33,
 };
 
 enum {
@@ -267,6 +268,7 @@ enum {
MLX4_BMME_FLAG_TYPE_2_WIN   = 1 <<  9,
MLX4_BMME_FLAG_RESERVED_LKEY= 1 << 10,
MLX4_BMME_FLAG_FAST_REG_WR  = 1 << 11,
+   MLX4_BMME_FLAG_ROCE_V1_V2   = 1 << 19,
MLX4_BMME_FLAG_PORT_REMAP   = 1 << 24,
MLX4_BMME_FLAG_VSD_INIT2RTR = 1 << 28,
 };
@@ -275,6 +277,10 @@ enum {
MLX4_FLAG_PORT_REMAP= MLX4_BMME_FLAG_PORT_REMAP
 };
 
+enum {
+   MLX4_FLAG_ROCE_V1_V2= MLX4_BMME_FLAG_ROCE_V1_V2
+};
+
 enum mlx4_event {
MLX4_EVENT_TYPE_COMP   = 0x00,
MLX4_EVENT_TYPE_PATH_MIG   = 0x01,
@@ -984,9 +990,10 @@ struct mlx4_mad_ifc {
if (((dev)->caps.port_mask[port] != MLX4_PORT_TYPE_IB))
 
 #define mlx4_foreach_ib_transport_port(port, dev) \
-   for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)   \
+   for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)   \
if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \
-   ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
+   ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) || \
+   ((dev)->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2))
 
 #define MLX4_INVALID_SLAVE_ID  0xFF
 #define MLX4_SINK_COUNTER_INDEX(dev)   (dev->caps.max_counters - 1)
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH for-next 2/7] IB/mlx4: Add RoCE per GID support for add_gid and del_gid

2015-12-29 Thread Matan Barak
In RoCE, GID table is managed in the IB core driver. The role of the
mlx4 driver is to synchronize the HW with the entries in the GID table.
Since it is possible that the same GID value will appear more than once
in the GID table (though with different attributes) it is required from
the mlx4 driver to maintain a reference counting mechanism and populate
the HW with a single value. We use a new firmware command in order to
populate the GID table and store the type along with the GID value.

Signed-off-by: Moni Shoua 
---
 drivers/infiniband/hw/mlx4/main.c| 69 +---
 drivers/infiniband/hw/mlx4/mlx4_ib.h |  1 +
 include/linux/mlx4/cmd.h |  3 +-
 3 files changed, 67 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/main.c 
b/drivers/infiniband/hw/mlx4/main.c
index 627267f..988fa33 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -154,9 +154,9 @@ static struct net_device *mlx4_ib_get_netdev(struct 
ib_device *device, u8 port_n
return dev;
 }
 
-static int mlx4_ib_update_gids(struct gid_entry *gids,
-  struct mlx4_ib_dev *ibdev,
-  u8 port_num)
+static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
+ struct mlx4_ib_dev *ibdev,
+ u8 port_num)
 {
struct mlx4_cmd_mailbox *mailbox;
int err;
@@ -187,6 +187,61 @@ static int mlx4_ib_update_gids(struct gid_entry *gids,
return err;
 }
 
+static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
+struct mlx4_ib_dev *ibdev,
+u8 port_num)
+{
+   struct mlx4_cmd_mailbox *mailbox;
+   int err;
+   struct mlx4_dev *dev = ibdev->dev;
+   int i;
+   struct {
+   union ib_gidgid;
+   __be32  rsrvd1[2];
+   __be16  rsrvd2;
+   u8  type;
+   u8  version;
+   __be32  rsrvd3;
+   } *gid_tbl;
+
+   mailbox = mlx4_alloc_cmd_mailbox(dev);
+   if (IS_ERR(mailbox))
+   return -ENOMEM;
+
+   gid_tbl = mailbox->buf;
+   for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
+   memcpy(_tbl[i].gid, [i].gid, sizeof(union ib_gid));
+   if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+   gid_tbl[i].version = 2;
+   if (!ipv6_addr_v4mapped((struct in6_addr 
*)[i].gid))
+   gid_tbl[i].type = 1;
+   }
+   }
+
+   err = mlx4_cmd(dev, mailbox->dma,
+  MLX4_SET_PORT_ROCE_ADDR << 8 | port_num,
+  1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+  MLX4_CMD_WRAPPED);
+   if (mlx4_is_bonded(dev))
+   err += mlx4_cmd(dev, mailbox->dma,
+   MLX4_SET_PORT_ROCE_ADDR << 8 | 2,
+   1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+   MLX4_CMD_WRAPPED);
+
+   mlx4_free_cmd_mailbox(dev, mailbox);
+   return err;
+}
+
+static int mlx4_ib_update_gids(struct gid_entry *gids,
+  struct mlx4_ib_dev *ibdev,
+  u8 port_num)
+{
+   if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+   return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num);
+
+   return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
+}
+
 static int mlx4_ib_add_gid(struct ib_device *device,
   u8 port_num,
   unsigned int index,
@@ -215,7 +270,8 @@ static int mlx4_ib_add_gid(struct ib_device *device,
port_gid_table = >gids[port_num - 1];
spin_lock_bh(>lock);
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
-   if (!memcmp(_gid_table->gids[i].gid, gid, sizeof(*gid))) {
+   if (!memcmp(_gid_table->gids[i].gid, gid, sizeof(*gid)) &&
+   (port_gid_table->gids[i].gid_type == attr->gid_type))  {
found = i;
break;
}
@@ -233,6 +289,7 @@ static int mlx4_ib_add_gid(struct ib_device *device,
} else {
*context = port_gid_table->gids[free].ctx;
memcpy(_gid_table->gids[free].gid, gid, 
sizeof(*gid));
+   port_gid_table->gids[free].gid_type = 
attr->gid_type;
port_gid_table->gids[free].ctx->real_index = 
free;
port_gid_table->gids[free].ctx->refcount = 1;
hw_update = 1;
@@ -248,8 +305,10 @@ static int mlx4_ib_add_gid(struct ib_device *device,
if (!gids) {
ret = -ENOMEM;
  

[PATCH for-next 7/7] IB/mlx4: Advertise RoCE support

2015-12-29 Thread Matan Barak
Advertise RoCE support in port_immutable according to the hardware
capabilities. This enables the verbs stack to use RoCE v2 mode.

Signed-off-by: Matan Barak 
---
 drivers/infiniband/hw/mlx4/main.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/main.c 
b/drivers/infiniband/hw/mlx4/main.c
index 44e5699..8cf2575 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2183,6 +2183,7 @@ static int mlx4_port_immutable(struct ib_device *ibdev, 
u8 port_num,
   struct ib_port_immutable *immutable)
 {
struct ib_port_attr attr;
+   struct mlx4_ib_dev *mdev = to_mdev(ibdev);
int err;
 
err = mlx4_ib_query_port(ibdev, port_num, );
@@ -2192,10 +2193,15 @@ static int mlx4_port_immutable(struct ib_device *ibdev, 
u8 port_num,
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
 
-   if (mlx4_ib_port_link_layer(ibdev, port_num) == 
IB_LINK_LAYER_INFINIBAND)
+   if (mlx4_ib_port_link_layer(ibdev, port_num) == 
IB_LINK_LAYER_INFINIBAND) {
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
-   else
-   immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+   } else {
+   if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
+   immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+   if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+   immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
+   RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+   }
 
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH for-next 0/7] Add RoCE v2 support for mlx4 driver

2015-12-29 Thread Matan Barak
Hi Doug,

This series adds RoCE v2 support for mlx4 driver.
It implements the required bits in the new RoCE v2 API while adding
the necessary firmware commands and handling.

Patch 0001 queries the firmware if RoCE is supported.
Patch 0002 introduces a new firmware command that sets the GID table,
such that we store the GID type along the GID itself in the table.
Patch 0003 configures the device to work in RoCE v1 and RoCE v2 mixed
mode.
Patch 0004 adds the support to create steering rules for IPv4 based
packets. This is necessary in order to support RoCE multicast.
Patch 0005 introduces the support for sending RoCE v2 packets from
QP1.
Patch 0006 creates another QP in order to receive QP1 RoCE v2 traffic.
Patch 0007 advertises RoCE v2 support for upper layer. From this point
and on, the GID table will be populated with RoCE v2 based GIDs (if
the hardware supports so).

Regards,
Moni and Matan

Maor Gottlieb (1):
  net/mlx4_core: Add handlning of RoCE v2 over IPV4 in attach_flow

Matan Barak (2):
  IB/mlx4: Add RoCE per GID support for add_gid and del_gid
  IB/mlx4: Advertise RoCE support

Moni Shoua (4):
  IB/mlx4: Query RoCE support
  IB/mlx4: Configure device to work in RoCEv2
  IB/mlx4: Enable send of RoCE QP1 packets with IP/UDP headers
  IB/mlx4: Create and use another QP1 for RoCEv2

 drivers/infiniband/hw/mlx4/main.c | 100 +--
 drivers/infiniband/hw/mlx4/mlx4_ib.h  |   8 +
 drivers/infiniband/hw/mlx4/qp.c   | 283 --
 drivers/net/ethernet/mellanox/mlx4/fw.c   |  19 +-
 drivers/net/ethernet/mellanox/mlx4/main.c |   6 +-
 drivers/net/ethernet/mellanox/mlx4/mcg.c  |  14 +-
 drivers/net/ethernet/mellanox/mlx4/mlx4.h |   7 +-
 drivers/net/ethernet/mellanox/mlx4/port.c |   8 +
 drivers/net/ethernet/mellanox/mlx4/qp.c   |  28 +++
 include/linux/mlx4/cmd.h  |   3 +-
 include/linux/mlx4/device.h   |  18 +-
 include/linux/mlx4/qp.h   |  15 +-
 include/rdma/ib_verbs.h   |   2 +
 13 files changed, 434 insertions(+), 77 deletions(-)

-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 08/13] IB/srpt: chain RDMA READ/WRITE requests

2015-12-29 Thread Bart Van Assche
On 12/07/2015 09:51 PM, Christoph Hellwig wrote:
> Remove struct rdma_iu and instead allocate the struct ib_rdma_wr array
> early and fill out directly.  This allows us to chain the WRs, and thus
> archive both less lock contention on the HCA workqueue as well as much
> simpler error handling.

Please consider folding the patch below into this patch.

Thanks,

Bart.

[PATCH] IB/srpt: Fix a recently introduced kernel crash

BUG: unable to handle kernel paging request at 00010198
IP: [] __lock_acquire+0xa2/0x560
Call Trace:
 [] lock_acquire+0x62/0x80
 [] _raw_spin_lock_irqsave+0x43/0x60
 [] srpt_rdma_read_done+0x57/0x120 [ib_srpt]
 [] __ib_process_cq+0x43/0xc0 [ib_core]
 [] ib_cq_poll_work+0x25/0x70 [ib_core]
 [] process_one_work+0x1bd/0x460
 [] worker_thread+0x118/0x420
 [] kthread+0xe4/0x100
 [] ret_from_fork+0x3f/0x70

---
 drivers/infiniband/ulp/srpt/ib_srpt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c 
b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 8068aff..3daab39 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -1395,7 +1395,7 @@ static void srpt_rdma_read_done(struct ib_cq *cq, struct 
ib_wc *wc)
 {
struct srpt_rdma_ch *ch = cq->cq_context;
struct srpt_send_ioctx *ioctx =
-   container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe);
+   container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
 
WARN_ON(ioctx->n_rdma <= 0);
atomic_add(ioctx->n_rdma, >sq_wr_avail);
@@ -1418,7 +1418,7 @@ static void srpt_rdma_read_done(struct ib_cq *cq, struct 
ib_wc *wc)
 static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
 {
struct srpt_send_ioctx *ioctx =
-   container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe);
+   container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
 
if (unlikely(wc->status != IB_WC_SUCCESS)) {
pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n",
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] IB/core: sysfs.c: Fix PerfMgt ClassPortInfo handling

2015-12-29 Thread Or Gerlitz

On 12/29/2015 12:43 PM, Hal Rosenstock wrote:

Port number is not part of ClassPortInfo attribute but is
still needed as a parameter when invoking process_mad.


Please remove the blank line above your 1st sentence.



To properly handle this attribute, port_num is added as a
parameter to get_counter_table and get_perf_mad was changed
not to store port_num in the attribute itself when it's
querying the ClassPortInfo attribute.

This handles issue pointed out by Matan Barak 

Fixes: 145d9c541032 ('IB/core: Display extended counter set if available')

Signed-off-by: Hal Rosenstock 

again, remove the blank line after the fixes tag.

Also,  I am not that the way Doug is setting the branch for pull would 
preserve commit IDs when
the offending patch landed in Linus tree. If this is the case, we should 
put your patch in 2nd pull

request and have the right commit ID there. Please check with Doug.

Acked-by: Matan Barak 
Acked-by: Ira Weiny 
---
Change from v1:
Added fixes line to description


So this patch makes mlx4 IB driver on Eth ports workable with the 
4.5-rc1 proposed bits?


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2] IB/mlx5: Unify CQ create flags check

2015-12-29 Thread Leon Romanovsky
From: Leon Romanovsky 

The create_cq() can receive creation flags which were used
differently by two commits which added create_cq extended
command and cross-channel. The merged code caused to not
accept any flags at all.

This patch unifies the check into one function and one return
error code.

Fixes: 972ecb821379 ("IB/mlx5: Add create_cq extended command")
Fixes: 051f263098a9 ("IB/mlx5: Add driver cross-channel support")

---
Changes from v1:
  * Remove links to linux-rdma from commit message
  * Placed change log under git comment section (---)
Changes from v0:
  * Add Fixes tag

Signed-off-by: Leon Romanovsky 
---
 drivers/infiniband/hw/mlx5/cq.c  | 9 +
 drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++-
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index b14316603e44..7ddc790b1819 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -757,10 +757,6 @@ static void destroy_cq_kernel(struct mlx5_ib_dev *dev, 
struct mlx5_ib_cq *cq)
mlx5_db_free(dev->mdev, >db);
 }
 
-enum {
-   CQ_CREATE_FLAGS_SUPPORTED = IB_CQ_FLAGS_TIMESTAMP_COMPLETION
-};
-
 struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
@@ -778,13 +774,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
int eqn;
int err;
 
-   if (check_cq_create_flags(attr->flags))
-   return ERR_PTR(-EINVAL);
-
if (entries < 0)
return ERR_PTR(-EINVAL);
 
-   if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED)
+   if (check_cq_create_flags(attr->flags))
return ERR_PTR(-EOPNOTSUPP);
 
entries = roundup_pow_of_two(entries + 1);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h 
b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index d4b227126265..fbf14a768105 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -692,6 +692,7 @@ static inline u32 check_cq_create_flags(u32 flags)
 * It returns non-zero value for unsupported CQ
 * create flags, otherwise it returns zero.
 */
-   return (flags & ~IB_CQ_FLAGS_IGNORE_OVERRUN);
+   return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN |
+ IB_CQ_FLAGS_TIMESTAMP_COMPLETION));
 }
 #endif /* MLX5_IB_H */
-- 
1.7.12.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V2] IB/mlx5: Unify CQ create flags check

2015-12-29 Thread Leon Romanovsky
On Tue, Dec 29, 2015 at 04:03:41PM +0200, Leon Romanovsky wrote:
> On Tue, Dec 29, 2015 at 03:51:47PM +0200, Sagi Grimberg wrote:
> > >From: Leon Romanovsky 
> > >
> > >The create_cq() can receive creation flags which were used
> > >differently by two following commits [1] and [2]. The current
> > >code caused to not accept any flags at all.
> > 
> > We can skip referencing the linux-rdma mailing list.
> > 
> > >
> > >This patch unifies the check into one function and one return
> > >error code.
> > >
> > >Fixes: 972ecb821379 ("IB/mlx5: Add create_cq extended command")
> > >Fixes: 051f263098a9 ("IB/mlx5: Add driver cross-channel support")
> > >
> > >[1] http://www.spinics.net/lists/linux-rdma/msg31430.html
> > >[2] http://www.spinics.net/lists/linux-rdma/msg31658.html
> > >
> > >Changes from v0:
> > >   * Add Fixes tag
> > 
> > Umm, the above ([1], [2], Changes) usually go under the "---"
> > separator so we won't see them in git log forever.
> 
> Doug,
> Do you want me resend the patch?

I sent new version.
Thanks.

> 
> > 
> > >
> > >Signed-off-by: Leon Romanovsky 
> > >---
> > >  drivers/infiniband/hw/mlx5/cq.c  | 9 +
> > >  drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++-
> > >  2 files changed, 3 insertions(+), 9 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] IB/mlx5: Unify CQ create flags check

2015-12-29 Thread Leon Romanovsky
From: Leon Romanovsky 

The create_cq() can receive creation flags which were used
differently by two following commits [1] and [2].

This patch unifies the check into one function and one return
error code.

[1] commit 972ecb821379 ("IB/mlx5: Add create_cq extended command")
[2] commit 051f263098a9 ("IB/mlx5: Add driver cross-channel support")

Signed-off-by: Leon Romanovsky 
---
 drivers/infiniband/hw/mlx5/cq.c  | 9 +
 drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++-
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index b14316603e44..7ddc790b1819 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -757,10 +757,6 @@ static void destroy_cq_kernel(struct mlx5_ib_dev *dev, 
struct mlx5_ib_cq *cq)
mlx5_db_free(dev->mdev, >db);
 }
 
-enum {
-   CQ_CREATE_FLAGS_SUPPORTED = IB_CQ_FLAGS_TIMESTAMP_COMPLETION
-};
-
 struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
@@ -778,13 +774,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
int eqn;
int err;
 
-   if (check_cq_create_flags(attr->flags))
-   return ERR_PTR(-EINVAL);
-
if (entries < 0)
return ERR_PTR(-EINVAL);
 
-   if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED)
+   if (check_cq_create_flags(attr->flags))
return ERR_PTR(-EOPNOTSUPP);
 
entries = roundup_pow_of_two(entries + 1);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h 
b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index d4b227126265..fbf14a768105 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -692,6 +692,7 @@ static inline u32 check_cq_create_flags(u32 flags)
 * It returns non-zero value for unsupported CQ
 * create flags, otherwise it returns zero.
 */
-   return (flags & ~IB_CQ_FLAGS_IGNORE_OVERRUN);
+   return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN |
+ IB_CQ_FLAGS_TIMESTAMP_COMPLETION));
 }
 #endif /* MLX5_IB_H */
-- 
1.7.12.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2] IB/mlx5: Unify CQ create flags check

2015-12-29 Thread Leon Romanovsky
From: Leon Romanovsky 

The create_cq() can receive creation flags which were used
differently by two following commits [1] and [2]. The current
code caused to not accept any flags at all.

This patch unifies the check into one function and one return
error code.

Fixes: 972ecb821379 ("IB/mlx5: Add create_cq extended command")
Fixes: 051f263098a9 ("IB/mlx5: Add driver cross-channel support")

[1] http://www.spinics.net/lists/linux-rdma/msg31430.html
[2] http://www.spinics.net/lists/linux-rdma/msg31658.html

Changes from v0:
  * Add Fixes tag

Signed-off-by: Leon Romanovsky 
---
 drivers/infiniband/hw/mlx5/cq.c  | 9 +
 drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++-
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index b14316603e44..7ddc790b1819 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -757,10 +757,6 @@ static void destroy_cq_kernel(struct mlx5_ib_dev *dev, 
struct mlx5_ib_cq *cq)
mlx5_db_free(dev->mdev, >db);
 }
 
-enum {
-   CQ_CREATE_FLAGS_SUPPORTED = IB_CQ_FLAGS_TIMESTAMP_COMPLETION
-};
-
 struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
@@ -778,13 +774,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
int eqn;
int err;
 
-   if (check_cq_create_flags(attr->flags))
-   return ERR_PTR(-EINVAL);
-
if (entries < 0)
return ERR_PTR(-EINVAL);
 
-   if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED)
+   if (check_cq_create_flags(attr->flags))
return ERR_PTR(-EOPNOTSUPP);
 
entries = roundup_pow_of_two(entries + 1);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h 
b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index d4b227126265..fbf14a768105 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -692,6 +692,7 @@ static inline u32 check_cq_create_flags(u32 flags)
 * It returns non-zero value for unsupported CQ
 * create flags, otherwise it returns zero.
 */
-   return (flags & ~IB_CQ_FLAGS_IGNORE_OVERRUN);
+   return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN |
+ IB_CQ_FLAGS_TIMESTAMP_COMPLETION));
 }
 #endif /* MLX5_IB_H */
-- 
1.7.12.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-next 4/7] net/mlx4_core: Add handlning of RoCE v2 over IPV4 in attach_flow

2015-12-29 Thread Or Gerlitz

On 12/29/2015 3:24 PM, Matan Barak wrote:

From: Maor Gottlieb 

s/handlning/handling/



When attaching multicast for RoCE v2, we need to be able to steer
packets to the QPs. Hence, we add support for IPV4 over IB steering.


not sure to follow on the change-log, can you clarify it little further...



Signed-off-by: Maor Gottlieb 
---
  drivers/net/ethernet/mellanox/mlx4/mcg.c | 14 --
  include/linux/mlx4/device.h  |  6 ++
  2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c 
b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index 1d4e2e0..834e60e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -858,7 +858,9 @@ static int parse_trans_rule(struct mlx4_dev *dev, struct 
mlx4_spec_list *spec,
break;
  
  	case MLX4_NET_TRANS_RULE_ID_IB:

-   rule_hw->ib.l3_qpn = spec->ib.l3_qpn;
+   rule_hw->ib.l3_qpn = spec->ib.l3_qpn |
+   (spec->ib.roce_type == MLX4_FLOW_SPEC_IB_ROCE_TYPE_IPV4 
?
+(__force __be32)0x80 : (__force __be32)0);


maybe avoid using hard coded constants and get meaningful name for them?


rule_hw->ib.qpn_mask = spec->ib.qpn_msk;
memcpy(_hw->ib.dst_gid, >ib.dst_gid, 16);
memcpy(_hw->ib.dst_gid_msk, >ib.dst_gid_msk, 16);
@@ -1384,10 +1386,18 @@ int mlx4_trans_to_dmfs_attach(struct mlx4_dev *dev, 
struct mlx4_qp *qp,
memcpy(spec.eth.dst_mac_msk, _mask, ETH_ALEN);
break;
  
+		case MLX4_PROT_IB_IPV4:

+   spec.id = MLX4_NET_TRANS_RULE_ID_IB;
+   memcpy(spec.ib.dst_gid + 12, gid + 12, 4);
+   memset(spec.ib.dst_gid_msk + 12, 0xff, 4);
+   spec.ib.roce_type = MLX4_FLOW_SPEC_IB_ROCE_TYPE_IPV4;
+   break;
+
case MLX4_PROT_IB_IPV6:
spec.id = MLX4_NET_TRANS_RULE_ID_IB;
memcpy(spec.ib.dst_gid, gid, 16);
-   memset(_gid_msk, 0xff, 16);
+   memset(spec.ib.dst_gid_msk, 0xff, 16);
+   spec.ib.roce_type = MLX4_FLOW_SPEC_IB_ROCE_TYPE_IPV6;
break;
default:
return -EINVAL;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 0d873f1ae..cdc75b2 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -391,6 +391,11 @@ enum mlx4_protocol {
MLX4_PROT_FCOE
  };
  
+enum mlx4_flow_roce_type {

+   MLX4_FLOW_SPEC_IB_ROCE_TYPE_IPV6 = 0,
+   MLX4_FLOW_SPEC_IB_ROCE_TYPE_IPV4
+};
+
  enum {
MLX4_MTT_FLAG_PRESENT   = 1
  };
@@ -1197,6 +1202,7 @@ struct mlx4_spec_ipv4 {
  struct mlx4_spec_ib {
__be32  l3_qpn;
__be32  qpn_msk;
+   enummlx4_flow_roce_type roce_type;
u8  dst_gid[16];
u8  dst_gid_msk[16];
  };


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V3] IB/mlx5: Unify CQ create flags check

2015-12-29 Thread Leon Romanovsky
From: Leon Romanovsky 

The create_cq() can receive creation flags which were used
differently by two commits which added create_cq extended
command and cross-channel. The merged code caused to not
accept any flags at all.

This patch unifies the check into one function and one return
error code.

Fixes: 972ecb821379 ("IB/mlx5: Add create_cq extended command")
Fixes: 051f263098a9 ("IB/mlx5: Add driver cross-channel support")
Signed-off-by: Leon Romanovsky 

---
Changes from v2:
  * Remove blank line after Fixes
  * Place changes below Signed-off-by
Changes from v1:
  * Remove links to linux-rdma from commit message
  * Placed change log under git comment section (---)
Changes from v0:
  * Add Fixes tag

 drivers/infiniband/hw/mlx5/cq.c  | 9 +
 drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++-
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index b14316603e44..7ddc790b1819 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -757,10 +757,6 @@ static void destroy_cq_kernel(struct mlx5_ib_dev *dev, 
struct mlx5_ib_cq *cq)
mlx5_db_free(dev->mdev, >db);
 }
 
-enum {
-   CQ_CREATE_FLAGS_SUPPORTED = IB_CQ_FLAGS_TIMESTAMP_COMPLETION
-};
-
 struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
@@ -778,13 +774,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
int eqn;
int err;
 
-   if (check_cq_create_flags(attr->flags))
-   return ERR_PTR(-EINVAL);
-
if (entries < 0)
return ERR_PTR(-EINVAL);
 
-   if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED)
+   if (check_cq_create_flags(attr->flags))
return ERR_PTR(-EOPNOTSUPP);
 
entries = roundup_pow_of_two(entries + 1);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h 
b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index d4b227126265..fbf14a768105 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -692,6 +692,7 @@ static inline u32 check_cq_create_flags(u32 flags)
 * It returns non-zero value for unsupported CQ
 * create flags, otherwise it returns zero.
 */
-   return (flags & ~IB_CQ_FLAGS_IGNORE_OVERRUN);
+   return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN |
+ IB_CQ_FLAGS_TIMESTAMP_COMPLETION));
 }
 #endif /* MLX5_IB_H */
-- 
1.7.12.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] IB/mad: Ensure fairness in ib_mad_completion_handler

2015-12-29 Thread Sagi Grimberg



Please just convert the mad handler to the new CQ API in
drivers/infiniband/core/cq.c.  If you have any question about it I'd be
glad to help you.


+1 on this suggestion.

We had these sorts of questions in our ULPs as well. The CQ API should
take care of all that for you and leaves you to just handle the
completions...
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] IB/core: Remove a set-but-not-used variable from ib_sg_to_pages()

2015-12-29 Thread Bart Van Assche
Detected this by building the IB core with W=1. See also patch
"IB core: Fix ib_sg_to_pages()" (commit 8f5ba10ed40a).

Signed-off-by: Bart Van Assche 
Cc: Sagi Grimberg 
Cc: Christoph Hellwig 
---
 drivers/infiniband/core/verbs.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 545906d..c90ed29 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1530,7 +1530,7 @@ int ib_sg_to_pages(struct ib_mr *mr,
   int (*set_page)(struct ib_mr *, u64))
 {
struct scatterlist *sg;
-   u64 last_end_dma_addr = 0, last_page_addr = 0;
+   u64 last_end_dma_addr = 0;
unsigned int last_page_off = 0;
u64 page_mask = ~((u64)mr->page_size - 1);
int i, ret;
@@ -1572,7 +1572,6 @@ next_page:
 
mr->length += dma_len;
last_end_dma_addr = end_dma_addr;
-   last_page_addr = end_dma_addr & page_mask;
last_page_off = end_dma_addr & ~page_mask;
}
 
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] IB/core: sysfs.c: Fix PerfMgt ClassPortInfo handling

2015-12-29 Thread Hal Rosenstock

Port number is not part of ClassPortInfo attribute but is
still needed as a parameter when invoking process_mad.

To properly handle this attribute, port_num is added as a
parameter to get_counter_table and get_perf_mad was changed
not to store port_num in the attribute itself when it's
querying the ClassPortInfo attribute.

This handles issue pointed out by Matan Barak 

Fixes: 145d9c541032 ('IB/core: Display extended counter set if available')

Signed-off-by: Hal Rosenstock 
Acked-by: Matan Barak 
Acked-by: Ira Weiny 
---
Change from v1:
Added fixes line to description

diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 539040f..2daf832 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -438,7 +438,8 @@ static int get_perf_mad(struct ib_device *dev, int 
port_num, int attr,
in_mad->mad_hdr.method= IB_MGMT_METHOD_GET;
in_mad->mad_hdr.attr_id   = attr;
 
-   in_mad->data[41] = port_num;/* PortSelect field */
+   if (attr != IB_PMA_CLASS_PORT_INFO)
+   in_mad->data[41] = port_num;/* PortSelect field */
 
if ((dev->process_mad(dev, IB_MAD_IGNORE_MKEY,
 port_num, NULL, NULL,
@@ -714,11 +715,12 @@ err:
  * Figure out which counter table to use depending on
  * the device capabilities.
  */
-static struct attribute_group *get_counter_table(struct ib_device *dev)
+static struct attribute_group *get_counter_table(struct ib_device *dev,
+int port_num)
 {
struct ib_class_port_info cpi;
 
-   if (get_perf_mad(dev, 0, IB_PMA_CLASS_PORT_INFO,
+   if (get_perf_mad(dev, port_num, IB_PMA_CLASS_PORT_INFO,
, 40, sizeof(cpi)) >= 0) {
 
if (cpi.capability_mask && IB_PMA_CLASS_CAP_EXT_WIDTH)
@@ -776,7 +778,7 @@ static int add_port(struct ib_device *device, int port_num,
goto err_put;
}
 
-   p->pma_table = get_counter_table(device);
+   p->pma_table = get_counter_table(device, port_num);
ret = sysfs_create_group(>kobj, p->pma_table);
if (ret)
goto err_put_gid_attrs;
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/6] IB/uapi: expose uverbs WC opcodes

2015-12-29 Thread Sagi Grimberg



+   IB_WC_SEND  = IB_UVERBS_WC_SEND,
+   IB_WC_RDMA_WRITE= IB_UVERBS_WC_RDMA_WRITE,
+   IB_WC_RDMA_READ = IB_UVERBS_WC_RDMA_READ,
+   IB_WC_COMP_SWAP = IB_UVERBS_WC_COMP_SWAP,
+   IB_WC_FETCH_ADD = IB_UVERBS_WC_FETCH_ADD,
+   IB_WC_LSO   = IB_UVERBS_WC_SEND_END,
+   IB_WC_LOCAL_INV = IB_UVERBS_WC_SEND_END + 1,
+   IB_WC_REG_MR= IB_UVERBS_WC_SEND_END + 2,
+   IB_WC_MASKED_COMP_SWAP  = IB_UVERBS_WC_SEND_END + 3,
+   IB_WC_MASKED_FETCH_ADD  = IB_UVERBS_WC_SEND_END + 4,

As you did it in the first patch, just don't assign after IB_WC_LOCAL_INV.
Compiler will handle IB_UVERS_WC_SEND_END + X calculations by itself.


I disagree, I'd say its better to keep the code verbosity level here...
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/6] IB/uapi: expose uverbs WC flags

2015-12-29 Thread Sagi Grimberg



+enum ib_uverbs_wc_flags {
+   IB_UVERBS_WC_GRH= (1 << 0),
+   IB_UVERBS_WC_WITH_IMM   = (1 << 1),
+   IB_UVERBS_WC_WITH_INVALIDATE= (1 << 2),
+   IB_UVERBS_WC_IP_CSUM_OK = (1 << 3),
+   IB_UVERBS_WC_WITH_SMAC  = (1 << 4),
+   IB_UVERBS_WC_WITH_VLAN  = (1 << 5),
+   IB_UVERBS_WC_WITH_NETWORK_HDR_TYPE  = (1 << 6),
+};

It will be great to add _FLAGS_ mark in the name and add _LAST too.


Don't you prefer to stay consistent with the kernel enumeration?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH for-next 3/7] IB/mlx4: Configure device to work in RoCEv2

2015-12-29 Thread Matan Barak
From: Moni Shoua 

Some mlx4 adapters are RoCEv2 capable. To enable this feature some
hardware configuration is required. This is

1. Set port general parameters
2. Configure the outgoing UDP destination port
3. Configure the QP that work with RoCEv2

Signed-off-by: Moni Shoua 
---
 drivers/infiniband/hw/mlx4/main.c | 19 ++---
 drivers/infiniband/hw/mlx4/qp.c   | 35 ---
 drivers/net/ethernet/mellanox/mlx4/fw.c   | 16 +-
 drivers/net/ethernet/mellanox/mlx4/mlx4.h |  7 +--
 drivers/net/ethernet/mellanox/mlx4/port.c |  8 +++
 drivers/net/ethernet/mellanox/mlx4/qp.c   | 28 +
 include/linux/mlx4/device.h   |  1 +
 include/linux/mlx4/qp.h   | 15 +++--
 include/rdma/ib_verbs.h   |  2 ++
 9 files changed, 120 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/main.c 
b/drivers/infiniband/hw/mlx4/main.c
index 988fa33..44e5699 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -384,6 +384,7 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev 
*ibdev,
int i;
int ret;
unsigned long flags;
+   struct ib_gid_attr attr;
 
if (port_num > MLX4_MAX_PORTS)
return -EINVAL;
@@ -394,10 +395,13 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev 
*ibdev,
if (!rdma_cap_roce_gid_table(>ib_dev, port_num))
return index;
 
-   ret = ib_get_cached_gid(>ib_dev, port_num, index, , NULL);
+   ret = ib_get_cached_gid(>ib_dev, port_num, index, , );
if (ret)
return ret;
 
+   if (attr.ndev)
+   dev_put(attr.ndev);
+
if (!memcmp(, , sizeof(gid)))
return -EINVAL;
 
@@ -405,7 +409,8 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev 
*ibdev,
port_gid_table = >gids[port_num - 1];
 
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
-   if (!memcmp(_gid_table->gids[i].gid, , sizeof(gid))) {
+   if (!memcmp(_gid_table->gids[i].gid, , sizeof(gid)) &&
+   attr.gid_type == port_gid_table->gids[i].gid_type) {
ctx = port_gid_table->gids[i].ctx;
break;
}
@@ -2481,7 +2486,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_ib_init_sriov(ibdev))
goto err_mad;
 
-   if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) {
+   if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE ||
+   dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
if (!iboe->nb.notifier_call) {
iboe->nb.notifier_call = mlx4_ib_netdev_event;
err = register_netdevice_notifier(>nb);
@@ -2490,6 +2496,13 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
goto err_notif;
}
}
+   if (!mlx4_is_slave(dev) &&
+   dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+   err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
+   if (err) {
+   goto err_notif;
+   }
+   }
}
 
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 8d28059..c0dee79 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1508,6 +1508,24 @@ static int create_qp_lb_counter(struct mlx4_ib_dev *dev, 
struct mlx4_ib_qp *qp)
return 0;
 }
 
+enum {
+   MLX4_QPC_ROCE_MODE_1 = 0,
+   MLX4_QPC_ROCE_MODE_2 = 2,
+   MLX4_QPC_ROCE_MODE_MAX = 0xff
+};
+
+static u8 gid_type_to_qpc(enum ib_gid_type gid_type)
+{
+   switch (gid_type) {
+   case IB_GID_TYPE_ROCE:
+   return MLX4_QPC_ROCE_MODE_1;
+   case IB_GID_TYPE_ROCE_UDP_ENCAP:
+   return MLX4_QPC_ROCE_MODE_2;
+   default:
+   return MLX4_QPC_ROCE_MODE_MAX;
+   }
+}
+
 static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
   const struct ib_qp_attr *attr, int attr_mask,
   enum ib_qp_state cur_state, enum ib_qp_state 
new_state)
@@ -1651,9 +1669,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
u16 vlan = 0x;
u8 smac[ETH_ALEN];
int status = 0;
+   int is_eth = rdma_cap_eth_ah(>ib_dev, port_num) &&
+   attr->ah_attr.ah_flags & IB_AH_GRH;
 
-   if (rdma_cap_eth_ah(>ib_dev, port_num) &&
-   attr->ah_attr.ah_flags & IB_AH_GRH) {
+   if (is_eth && attr->ah_attr.ah_flags & IB_AH_GRH) {
int index = attr->ah_attr.grh.sgid_index;
 
status = 

[PATCH for-next 4/7] net/mlx4_core: Add handlning of RoCE v2 over IPV4 in attach_flow

2015-12-29 Thread Matan Barak
From: Maor Gottlieb 

When attaching multicast for RoCE v2, we need to be able to steer
packets to the QPs. Hence, we add support for IPV4 over IB steering.

Signed-off-by: Maor Gottlieb 
---
 drivers/net/ethernet/mellanox/mlx4/mcg.c | 14 --
 include/linux/mlx4/device.h  |  6 ++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c 
b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index 1d4e2e0..834e60e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -858,7 +858,9 @@ static int parse_trans_rule(struct mlx4_dev *dev, struct 
mlx4_spec_list *spec,
break;
 
case MLX4_NET_TRANS_RULE_ID_IB:
-   rule_hw->ib.l3_qpn = spec->ib.l3_qpn;
+   rule_hw->ib.l3_qpn = spec->ib.l3_qpn |
+   (spec->ib.roce_type == MLX4_FLOW_SPEC_IB_ROCE_TYPE_IPV4 
?
+(__force __be32)0x80 : (__force __be32)0);
rule_hw->ib.qpn_mask = spec->ib.qpn_msk;
memcpy(_hw->ib.dst_gid, >ib.dst_gid, 16);
memcpy(_hw->ib.dst_gid_msk, >ib.dst_gid_msk, 16);
@@ -1384,10 +1386,18 @@ int mlx4_trans_to_dmfs_attach(struct mlx4_dev *dev, 
struct mlx4_qp *qp,
memcpy(spec.eth.dst_mac_msk, _mask, ETH_ALEN);
break;
 
+   case MLX4_PROT_IB_IPV4:
+   spec.id = MLX4_NET_TRANS_RULE_ID_IB;
+   memcpy(spec.ib.dst_gid + 12, gid + 12, 4);
+   memset(spec.ib.dst_gid_msk + 12, 0xff, 4);
+   spec.ib.roce_type = MLX4_FLOW_SPEC_IB_ROCE_TYPE_IPV4;
+   break;
+
case MLX4_PROT_IB_IPV6:
spec.id = MLX4_NET_TRANS_RULE_ID_IB;
memcpy(spec.ib.dst_gid, gid, 16);
-   memset(_gid_msk, 0xff, 16);
+   memset(spec.ib.dst_gid_msk, 0xff, 16);
+   spec.ib.roce_type = MLX4_FLOW_SPEC_IB_ROCE_TYPE_IPV6;
break;
default:
return -EINVAL;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 0d873f1ae..cdc75b2 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -391,6 +391,11 @@ enum mlx4_protocol {
MLX4_PROT_FCOE
 };
 
+enum mlx4_flow_roce_type {
+   MLX4_FLOW_SPEC_IB_ROCE_TYPE_IPV6 = 0,
+   MLX4_FLOW_SPEC_IB_ROCE_TYPE_IPV4
+};
+
 enum {
MLX4_MTT_FLAG_PRESENT   = 1
 };
@@ -1197,6 +1202,7 @@ struct mlx4_spec_ipv4 {
 struct mlx4_spec_ib {
__be32  l3_qpn;
__be32  qpn_msk;
+   enummlx4_flow_roce_type roce_type;
u8  dst_gid[16];
u8  dst_gid_msk[16];
 };
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH for-next 5/7] IB/mlx4: Enable send of RoCE QP1 packets with IP/UDP headers

2015-12-29 Thread Matan Barak
From: Moni Shoua 

RoCEv2 packets are sent over IP/UDP protocols.
The mlx4 driver uses a type of RAW QP to send packets for QP1 and
therefore needs to build the network headers below BTH in software.

This patche adds option to build QP1 packets with IP and UDP headers if
RoCEv2 is requested.

Signed-off-by: Moni Shoua 
---
 drivers/infiniband/hw/mlx4/qp.c | 86 ++---
 1 file changed, 54 insertions(+), 32 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index c0dee79..8485602 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -32,6 +32,8 @@
  */
 
 #include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -2282,16 +2284,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp 
*sqp,
return 0;
 }
 
-static void mlx4_u64_to_smac(u8 *dst_mac, u64 src_mac)
-{
-   int i;
-
-   for (i = ETH_ALEN; i; i--) {
-   dst_mac[i - 1] = src_mac & 0xff;
-   src_mac >>= 8;
-   }
-}
-
+#define MLX4_ROCEV2_QP1_SPORT 0xC000
 static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
void *wqe, unsigned *mlx_seg_len)
 {
@@ -2311,6 +2304,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, 
struct ib_ud_wr *wr,
bool is_eth;
bool is_vlan = false;
bool is_grh;
+   bool is_udp = false;
+   int ip_version = 0;
 
send_size = 0;
for (i = 0; i < wr->wr.num_sge; ++i)
@@ -2319,6 +2314,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, 
struct ib_ud_wr *wr,
is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == 
IB_LINK_LAYER_ETHERNET;
is_grh = mlx4_ib_ah_grh_present(ah);
if (is_eth) {
+   struct ib_gid_attr gid_attr;
+
if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
/* When multi-function is enabled, the ib_core gid
 * indexes don't necessarily match the hw ones, so
@@ -2329,23 +2326,36 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, 
struct ib_ud_wr *wr,
if (err)
return err;
} else  {
-   err = ib_get_cached_gid(ib_dev,
+   err = ib_get_cached_gid(sqp->qp.ibqp.device,
be32_to_cpu(ah->av.ib.port_pd) 
>> 24,
ah->av.ib.gid_index, ,
-   NULL);
-   if (!err && !memcmp(, , sizeof(sgid)))
-   err = -ENOENT;
-   if (err)
+   _attr);
+   if (!err) {
+   if (gid_attr.ndev)
+   dev_put(gid_attr.ndev);
+   if (!memcmp(, , sizeof(sgid)))
+   err = -ENOENT;
+   }
+   if (!err) {
+   is_udp = gid_attr.gid_type == 
IB_GID_TYPE_ROCE_UDP_ENCAP;
+   if (is_udp) {
+   if (ipv6_addr_v4mapped((struct in6_addr 
*)))
+   ip_version = 4;
+   else
+   ip_version = 6;
+   is_grh = false;
+   }
+   } else {
return err;
+   }
}
-
if (ah->av.eth.vlan != cpu_to_be16(0x)) {
vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
is_vlan = 1;
}
}
err = ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh,
-   0, 0, 0, >ud_header);
+ ip_version, is_udp, 0, >ud_header);
if (err)
return err;
 
@@ -2356,7 +2366,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, 
struct ib_ud_wr *wr,
sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 
0x7f);
}
 
-   if (is_grh) {
+   if (is_grh || (ip_version == 6)) {
sqp->ud_header.grh.traffic_class =
(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 
0xff;
sqp->ud_header.grh.flow_label=
@@ -2385,6 +2395,25 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, 
struct ib_ud_wr *wr,
   ah->av.ib.dgid, 16);
}
 
+   if (ip_version == 4) {
+   sqp->ud_header.ip4.tos =
+   (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 
0xff;
+   sqp->ud_header.ip4.id 

[PATCH for-next 6/7] IB/mlx4: Create and use another QP1 for RoCEv2

2015-12-29 Thread Matan Barak
From: Moni Shoua 

The mlx4 driver uses a special QP to implement the GSI QP. This kind
of QP allows to build the InfiniBand headers in SW to be put before
the payload that comes in with the WR. The mlx4 HW builds the packet,
calculates the ICRC and puts it at the end of the payload. This ICRC
calculation however depends on the QP configuration which is
determined when QP is modified (roce_mode during INIT->RTR).
On the other hand, ICRC verification when packet is received does to
depend on this configuration.
Therefore, using 2 GSI QPs for send (one for each RoCE version) and 1
GSI QP for receive are required.

Signed-off-by: Moni Shoua 
---
 drivers/infiniband/hw/mlx4/mlx4_ib.h |   7 ++
 drivers/infiniband/hw/mlx4/qp.c  | 162 ++-
 2 files changed, 149 insertions(+), 20 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h 
b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 7179fb1..52ce7b0 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -177,11 +177,18 @@ struct mlx4_ib_wq {
unsignedtail;
 };
 
+enum {
+   MLX4_IB_QP_CREATE_ROCE_V2_GSI = IB_QP_CREATE_RESERVED_START
+};
+
 enum mlx4_ib_qp_flags {
MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
+
+   /* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */
+   MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI,
MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
MLX4_IB_SRIOV_SQP = 1 << 31,
 };
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 8485602..a154d51 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -87,6 +87,7 @@ struct mlx4_ib_sqp {
u32 send_psn;
struct ib_ud_header ud_header;
u8  header_buf[MLX4_IB_UD_HEADER_SIZE];
+   struct ib_qp*roce_v2_gsi;
 };
 
 enum {
@@ -155,7 +156,10 @@ static int is_sqp(struct mlx4_ib_dev *dev, struct 
mlx4_ib_qp *qp)
}
}
}
-   return proxy_sqp;
+   if (proxy_sqp)
+   return 1;
+
+   return !!(qp->flags & MLX4_IB_ROCE_V2_GSI_QP);
 }
 
 /* used for INIT/CLOSE port logic */
@@ -695,6 +699,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct 
ib_pd *pd,
qp = >qp;
qp->pri.vid = 0x;
qp->alt.vid = 0x;
+   sqp->roce_v2_gsi = NULL;
} else {
qp = kzalloc(sizeof (struct mlx4_ib_qp), gfp);
if (!qp)
@@ -1085,9 +1090,17 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, 
struct mlx4_ib_qp *qp,
del_gid_entries(qp);
 }
 
-static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
+static int get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
 {
/* Native or PPF */
+   if ((!mlx4_is_mfunc(dev->dev) || mlx4_is_master(dev->dev)) &&
+   attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) {
+   int sqpn;
+   int res = mlx4_qp_reserve_range(dev->dev, 1, 1, , 0);
+
+   return res ? -abs(res) : sqpn;
+   }
+
if (!mlx4_is_mfunc(dev->dev) ||
(mlx4_is_master(dev->dev) &&
 attr->create_flags & MLX4_IB_SRIOV_SQP)) {
@@ -1102,9 +1115,9 @@ static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct 
ib_qp_init_attr *attr)
return dev->dev->caps.qp1_proxy[attr->port_num - 1];
 }
 
-struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
-   struct ib_qp_init_attr *init_attr,
-   struct ib_udata *udata)
+static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
+   struct ib_qp_init_attr *init_attr,
+   struct ib_udata *udata)
 {
struct mlx4_ib_qp *qp = NULL;
int err;
@@ -1123,6 +1136,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
MLX4_IB_SRIOV_TUNNEL_QP |
MLX4_IB_SRIOV_SQP |
MLX4_IB_QP_NETIF |
+   MLX4_IB_QP_CREATE_ROCE_V2_GSI |
MLX4_IB_QP_CREATE_USE_GFP_NOIO))
return ERR_PTR(-EINVAL);
 
@@ -1131,15 +1145,21 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
return ERR_PTR(-EINVAL);
}
 
-   if (init_attr->create_flags &&
-   ((udata && init_attr->create_flags & ~(sup_u_create_flags)) ||
-((init_attr->create_flags & 

Re: [PATCH V2] IB/mlx5: Unify CQ create flags check

2015-12-29 Thread Leon Romanovsky
On Tue, Dec 29, 2015 at 03:51:47PM +0200, Sagi Grimberg wrote:
> >From: Leon Romanovsky 
> >
> >The create_cq() can receive creation flags which were used
> >differently by two following commits [1] and [2]. The current
> >code caused to not accept any flags at all.
> 
> We can skip referencing the linux-rdma mailing list.
> 
> >
> >This patch unifies the check into one function and one return
> >error code.
> >
> >Fixes: 972ecb821379 ("IB/mlx5: Add create_cq extended command")
> >Fixes: 051f263098a9 ("IB/mlx5: Add driver cross-channel support")
> >
> >[1] http://www.spinics.net/lists/linux-rdma/msg31430.html
> >[2] http://www.spinics.net/lists/linux-rdma/msg31658.html
> >
> >Changes from v0:
> >   * Add Fixes tag
> 
> Umm, the above ([1], [2], Changes) usually go under the "---"
> separator so we won't see them in git log forever.

Doug,
Do you want me resend the patch?

> 
> >
> >Signed-off-by: Leon Romanovsky 
> >---
> >  drivers/infiniband/hw/mlx5/cq.c  | 9 +
> >  drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++-
> >  2 files changed, 3 insertions(+), 9 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Generic InfiniBand transport done in software

2015-12-29 Thread Moni Shoua
> No. PIO and SDMA is driver specific and lives in the driver. Rdmavt has no
> concept of this. I'm agreeing that the send will be generic and have no hw
> specific stuff.
>
I understand that PIO/SDMA are not a concept of RVT. However, making
the send from RVT to driver exactly as the interface from ib_core to
RVT raises the question: What exactly do we achieve by this?
>
>
> As I've stated a number of times across multiple threads: It must not do
> anything that would prevent another driver from using it.
>
The question is not how Soft RoCE fits into this framework but how
does this framework achieve its goals.

>
>
> I expect feedback based on the code submissions. More will be coming
> shortly. I have taken all the feedback from the first post and will be
> sending a v2 shortly.
>
Again, I have no idea about the complete interfaces between both
pieces of the suggested solution.
- If you have them then please publish
- if you don't but plan to have them then why did you submit a half baked idea
- If you say that final interface is what we see now then I say that
the problem of code duplication isn't going to be resolved
So, what it is from the 3?

>
> -Denny
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-next 3/7] IB/mlx4: Configure device to work in RoCEv2

2015-12-29 Thread Or Gerlitz

On 12/29/2015 3:24 PM, Matan Barak wrote:

From: Moni Shoua 

Some mlx4 adapters are RoCEv2 capable. To enable this feature some
hardware configuration is required. This is

1. Set port general parameters
2. Configure the outgoing UDP destination port
3. Configure the QP that work with RoCEv2

Signed-off-by: Moni Shoua 
---
  drivers/infiniband/hw/mlx4/main.c | 19 ++---
  drivers/infiniband/hw/mlx4/qp.c   | 35 ---
  drivers/net/ethernet/mellanox/mlx4/fw.c   | 16 +-
  drivers/net/ethernet/mellanox/mlx4/mlx4.h |  7 +--
  drivers/net/ethernet/mellanox/mlx4/port.c |  8 +++
  drivers/net/ethernet/mellanox/mlx4/qp.c   | 28 +
  include/linux/mlx4/device.h   |  1 +
  include/linux/mlx4/qp.h   | 15 +++--
  include/rdma/ib_verbs.h   |  2 ++
  9 files changed, 120 insertions(+), 11 deletions(-)


Better put (please do...) functionality which is plain mlx4 corish (such 
as new/modified FW commands, new SW/FW fields of structs and such) into 
mlx4_core patch.




diff --git a/drivers/infiniband/hw/mlx4/main.c 
b/drivers/infiniband/hw/mlx4/main.c
index 988fa33..44e5699 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -384,6 +384,7 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev 
*ibdev,
int i;
int ret;
unsigned long flags;
+   struct ib_gid_attr attr;
  
  	if (port_num > MLX4_MAX_PORTS)

return -EINVAL;
@@ -394,10 +395,13 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev 
*ibdev,
if (!rdma_cap_roce_gid_table(>ib_dev, port_num))
return index;
  
-	ret = ib_get_cached_gid(>ib_dev, port_num, index, , NULL);

+   ret = ib_get_cached_gid(>ib_dev, port_num, index, , );
if (ret)
return ret;
  
+	if (attr.ndev)

+   dev_put(attr.ndev);
+
if (!memcmp(, , sizeof(gid)))
return -EINVAL;
  
@@ -405,7 +409,8 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,

port_gid_table = >gids[port_num - 1];
  
  	for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)

-   if (!memcmp(_gid_table->gids[i].gid, , sizeof(gid))) {
+   if (!memcmp(_gid_table->gids[i].gid, , sizeof(gid)) &&
+   attr.gid_type == port_gid_table->gids[i].gid_type) {
ctx = port_gid_table->gids[i].ctx;
break;
}
@@ -2481,7 +2486,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_ib_init_sriov(ibdev))
goto err_mad;
  
-	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) {

+   if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE ||
+   dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
if (!iboe->nb.notifier_call) {
iboe->nb.notifier_call = mlx4_ib_netdev_event;
err = register_netdevice_notifier(>nb);
@@ -2490,6 +2496,13 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
goto err_notif;
}
}
+   if (!mlx4_is_slave(dev) &&
+   dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+   err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
+   if (err) {
+   goto err_notif;
+   }
+   }
}
  
  	for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 8d28059..c0dee79 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1508,6 +1508,24 @@ static int create_qp_lb_counter(struct mlx4_ib_dev *dev, 
struct mlx4_ib_qp *qp)
return 0;
  }
  
+enum {

+   MLX4_QPC_ROCE_MODE_1 = 0,
+   MLX4_QPC_ROCE_MODE_2 = 2,
+   MLX4_QPC_ROCE_MODE_MAX = 0xff
+};
+
+static u8 gid_type_to_qpc(enum ib_gid_type gid_type)
+{
+   switch (gid_type) {
+   case IB_GID_TYPE_ROCE:
+   return MLX4_QPC_ROCE_MODE_1;
+   case IB_GID_TYPE_ROCE_UDP_ENCAP:
+   return MLX4_QPC_ROCE_MODE_2;
+   default:
+   return MLX4_QPC_ROCE_MODE_MAX;
+   }
+}
+
  static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
   const struct ib_qp_attr *attr, int attr_mask,
   enum ib_qp_state cur_state, enum ib_qp_state 
new_state)
@@ -1651,9 +1669,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
u16 vlan = 0x;
u8 smac[ETH_ALEN];
int status = 0;
+   int is_eth = rdma_cap_eth_ah(>ib_dev, port_num) &&
+   attr->ah_attr.ah_flags & IB_AH_GRH;
  
-		if (rdma_cap_eth_ah(>ib_dev, port_num) &&

-