Re: [PATCH for-next 5/7] IB/mlx4: Enable send of RoCE QP1 packets with IP/UDP headers
On Tue, Dec 29, 2015 at 3:24 PM, Matan Barakwrote: > @@ -2413,34 +2442,27 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, > struct ib_ud_wr *wr, > > if (is_eth) { > struct in6_addr in6; > - > + u16 ether_type; > u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) > << 13; > > + ether_type = (!is_udp) ? MLX4_IB_IBOE_ETHERTYPE : > + (ip_version == 4 ? ETH_P_IP : ETH_P_IPV6); > + > mlx->sched_prio = cpu_to_be16(pcp); > > + ether_addr_copy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac); > memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6); > - /* FIXME: cache smac value? */ > memcpy(>srcrb_flags16[0], ah->av.eth.mac, 2); > memcpy(>imm, ah->av.eth.mac + 2, 4); > memcpy(, sgid.raw, sizeof(in6)); > > - if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { > - u64 mac = > atomic64_read(_mdev(ib_dev)->iboe.mac[sqp->qp.port - 1]); > - u8 smac[ETH_ALEN]; > - > - mlx4_u64_to_smac(smac, mac); > - memcpy(sqp->ud_header.eth.smac_h, smac, ETH_ALEN); > - } else { > - /* use the src mac of the tunnel */ > - memcpy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac, > ETH_ALEN); > - } > The last hunk that you removed had a role and was by no means dead-code, right? so... (1) why it's correct to remove it? (2) if you want to introduce different way to implement what was done here, why in this patch? maybe add pre-patch for that -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V2] IB/mlx5: Unify CQ create flags check
On 12/29/2015 4:41 PM, Leon Romanovsky wrote: From: Leon RomanovskyThe create_cq() can receive creation flags which were used differently by two commits which added create_cq extended command and cross-channel. The merged code caused to not accept any flags at all. This patch unifies the check into one function and one return error code. Fixes: 972ecb821379 ("IB/mlx5: Add create_cq extended command") Fixes: 051f263098a9 ("IB/mlx5: Add driver cross-channel support") --- Changes from v1: * Remove links to linux-rdma from commit message * Placed change log under git comment section (---) Changes from v0: * Add Fixes tag Signed-off-by: Leon Romanovsky wrong placing. Needs to be before the 1st --- and w.o blank lines after the Fixes: lines please Please use dry runs to get this to run (...) correctly --- drivers/infiniband/hw/mlx5/cq.c | 9 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index b14316603e44..7ddc790b1819 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -757,10 +757,6 @@ static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) mlx5_db_free(dev->mdev, >db); } -enum { - CQ_CREATE_FLAGS_SUPPORTED = IB_CQ_FLAGS_TIMESTAMP_COMPLETION -}; - struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, @@ -778,13 +774,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int eqn; int err; - if (check_cq_create_flags(attr->flags)) - return ERR_PTR(-EINVAL); - if (entries < 0) return ERR_PTR(-EINVAL); - if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED) + if (check_cq_create_flags(attr->flags)) return ERR_PTR(-EOPNOTSUPP); entries = roundup_pow_of_two(entries + 1); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d4b227126265..fbf14a768105 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -692,6 +692,7 @@ static inline u32 check_cq_create_flags(u32 flags) * It returns non-zero value for unsupported CQ * create flags, otherwise it returns zero. */ - return (flags & ~IB_CQ_FLAGS_IGNORE_OVERRUN); + return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN | + IB_CQ_FLAGS_TIMESTAMP_COMPLETION)); } #endif /* MLX5_IB_H */ -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-next 7/7] IB/mlx4: Advertise RoCE support
On 12/29/2015 3:24 PM, Matan Barak wrote: Advertise RoCE support in port_immutable according to the hardware capabilities. This enables the verbs stack to use RoCE v2 mode. Advertise RoCE V2 support Signed-off-by: Matan BarakI guess you wanted "IB/mlx4: Advertise RoCE V2 support" for the patch title? since we did advertise RDMA_CORE_PORT_IBA_ROCE prior to this patch. Or. --- drivers/infiniband/hw/mlx4/main.c | 12 +--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 44e5699..8cf2575 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2183,6 +2183,7 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { struct ib_port_attr attr; + struct mlx4_ib_dev *mdev = to_mdev(ibdev); int err; err = mlx4_ib_query_port(ibdev, port_num, ); @@ -2192,10 +2193,15 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num, immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; - if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) + if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) { immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; - else - immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + } else { + if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE | + RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + } immutable->max_mad_size = IB_MGMT_MAD_SIZE; -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] IB/mad: Ensure fairness in ib_mad_completion_handler
On Tue, Dec 29, 2015 at 11:51:19AM +0200, Sagi Grimberg wrote: > > >Please just convert the mad handler to the new CQ API in > >drivers/infiniband/core/cq.c. If you have any question about it I'd be > >glad to help you. > > +1 on this suggestion. > > We had these sorts of questions in our ULPs as well. The CQ API should > take care of all that for you and leaves you to just handle the > completions... I saw your work and agree it would be nice but it will take some time to convert and debug the MAD stack. I'll try and find some time but it is unlikely I will anytime soon. We can hit this bug regularly with hfi1 but have not hit with qib or mlx4. I leave it up to Doug if he wants to take this fix before someone finds time to convert the MAD stack. Ira -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-next 2/7] IB/mlx4: Add RoCE per GID support for add_gid and del_gid
On 12/29/2015 3:24 PM, Matan Barak wrote: [...] We use a new firmware command in order to populate the GID table and store the type along with the GID value. Its a new value to existing command.. so better say we use a new value to the SET_PORT firmware command to do X Also here, break out mlx4_core new functionality e.g the changes to include/linux/mlx4/cmd.h into mlx4_core only patch. You don't need any change to mlx4_core to have it's own patch, I guess one up to three mlx4 core patches would be OK. Did you make sure (at the resource tracker) that VFs can't do this new set port command flavor? Also find some spot to put blank line in the change-log, it's hard to read this way. Or. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-next 1/7] IB/mlx4: Query RoCE support
On 12/29/2015 3:24 PM, Matan Barak wrote: @@ -905,6 +906,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE; MLX4_GET(dev_cap->bmme_flags, outbox, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); + if (dev_cap->bmme_flags & MLX4_FLAG_ROCE_V1_V2) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ROCE_V1_V2; Did you make sure that the query dev cap wrapper unsets this bit when proxing VF queries? if (dev_cap->bmme_flags & MLX4_FLAG_PORT_REMAP) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_REMAP; MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET); -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] IB/core: sysfs.c: Fix PerfMgt ClassPortInfo handling
Reviewed-by: Christoph Lameter-- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] Generic InfiniBand transport done in software
On 12/27/2015 12:54 PM, Moni Shoua wrote: >> Yes it is specific to Intel *now*, that doesn't mean it should stay that >> way. Rdmavt could, and in my opinion should, be extended to support >> soft-roce. I don't think replicating the same thing is a great idea. >> > But you post *now* a so called generic driver so it must now fit any > possible driver (including Soft RoCE) This is incorrect. This isn't some public API that we are exporting to user space. Nor is it an API that out of tree drivers are using. This is a purely kernel internal API for use by a limited number of drivers. As such, it need not be finalized before it is submitted or used. It can be taken one piece at a time, and if, at some point, it is determined that there are shortcomings to the API, it can be updated in place with all of the drivers that use it in a single patch or patch series. So a finalized design prior to putting code in place is specifically *not* needed. >> As to the location, where do you think it should go. drivers/infiniband/sw >> makes the most sense to me, but open to suggestions. >> >> And for the question of why publish when it's not ready, the better question >> is why not? Is it not good to see the work in progress as it evolves so the >> community can provide feedback? >> > What kind of a feedback you expect when I don't have an idea about > your plans for rdmavt > Interfaces, flows, data structures... all is missing from the > documentation to rdmavt. They released it so that you can start hooking SoftRoCE into it. As you hook it in, if it needs changes to work with SoftRoCE, simply make the changes needed and move on. I think Dennis' point, and I agree with him, is that you are over complicating the issue here. This need not be a highly designed item, it needs to be a functional item, and we can build it as we go. If you have to make changes to rdmavt in order to hook up SoftRoCE, that's fine, post them to the list, they will get reviewed. As long as the change doesn't break or otherwise negatively impact qib and/or hfi1, then it should be fine. If it does, then I'm sure Intel will work with you to find a solution that doesn't negatively impact them. -- Doug LedfordGPG KeyID: 0E572FDD signature.asc Description: OpenPGP digital signature
SoftRoCE V1
Hi experts, We have several Mellanox RoCE V1 NIC cards, and would like to try communicating with SoftRoCE V1. We are using branch rxe-3.0 from https://github.com/SoftRoCE/rxe-dev according to the Soft-RoCE README Rev 1.0 issued from Mellanox last year. Testing using pingpong examples from libibverbs (hardware RoCE <-> SoftRoCE) already shows a bug in Ethernet CRC generation. We expect further bugs along the way. So we would like to know for SoftRoCE V1, are we using the latest branch? Also, is it still tied to SLES11 SP3 OS? There are very few documentations we can find, so we hope this is the right place to ask such questions. Cheers, Wenda Ni, Ph.D. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] Generic InfiniBand transport done in software
I think that my point is missed. See my answers inline > This is incorrect. This isn't some public API that we are exporting to > user space. Nor is it an API that out of tree drivers are using. This > is a purely kernel internal API for use by a limited number of drivers. > As such, it need not be finalized before it is submitted or used. It > can be taken one piece at a time, and if, at some point, it is > determined that there are shortcomings to the API, it can be updated in > place with all of the drivers that use it in a single patch or patch > series. So a finalized design prior to putting code in place is > specifically *not* needed. > This is not a question of future backward comparability where interfaces must be kept forever. I agree that kernel interfaces may be changed with kernel moving forward. However, this is not what I'm arguing against. When one submits a RFC for a generic Infrastructure he must state what are the interfaces between blocks of the design. Soft RoCE block can't start until I know how the final interfaces look like. This is an unacceptable method of work. > > They released it so that you can start hooking SoftRoCE into it. As you > hook it in, if it needs changes to work with SoftRoCE, simply make the > changes needed and move on. This is not a question if I can hook Soft RoCE driver into this framework. In fact, I can't think of an IB driver that can't use this framework. What this framework offers is just another hop from ib_core the real driver. Where is the removal of duplicated code? This is a list of functions that for now must be implemented in the low level driver. create_cq destroy_cq poll_cq req_notify_cq resize_cq create_srq modify_srq destroy_srq query_srq create_qp query_device query_gid alloc_ucontext modify_device modify_qp dealloc_ucontext query_port destroy_qp get_port_immutable modify_port query_qp post_send post_recv post_srq_recv Most if not all of them have common part in all drivers. What are the plans to get rid of them? When? Don't you think that this should be known in advance? I already asked and never been answered seriously: what was the purpose of the submission in this premature state of the code It can't be for feedback because what kind of feedback can you provide for just a skeleton? Moreover, today they submitted V2 with a changelog that is almost 100% cosmetic changes. I really don't understand this kind of work. > > I think Dennis' point, and I agree with him, is that you are over > complicating the issue here. This need not be a highly designed item, > it needs to be a functional item, and we can build it as we go. If you > have to make changes to rdmavt in order to hook up SoftRoCE, that's > fine, post them to the list, they will get reviewed. As long as the > change doesn't break or otherwise negatively impact qib and/or hfi1, > then it should be fine. If it does, then I'm sure Intel will work with > you to find a solution that doesn't negatively impact them. A reminder of what the initial goal was - remove code duplicates between all IB transport drivers. This goal is complicated and in my RFC I explained why. So, for start, I am not complicating anything that was simple before. Second, what you are saying here is actually: "this is a project to serves Intel's needs". So why treat it as a generic infrastructure? I'm not aiming to hurt performance but Intel should aim for achieving the goals we agreed on in the begging. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] Generic InfiniBand transport done in software
On Tue, Dec 29, 2015 at 07:38:30PM +0200, Moni Shoua wrote: This is not a question if I can hook Soft RoCE driver into this framework. In fact, I can't think of an IB driver that can't use this framework. What this framework offers is just another hop from ib_core the real driver. Where is the removal of duplicated code? This is a list of functions that for now must be implemented in the low level driver. create_cq destroy_cq poll_cq req_notify_cq resize_cq create_srq modify_srq destroy_srq query_srq create_qp query_device query_gid alloc_ucontext modify_device modify_qp dealloc_ucontext query_port destroy_qp get_port_immutable modify_port query_qp post_send post_recv post_srq_recv Most if not all of them have common part in all drivers. What are the plans to get rid of them? When? Don't you think that this should be known in advance? We have patch sets that implement all of these which will be posted soon. With the holidays things have just been a bit slow to come out. -Denny -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-next 2/3] IB/core: Change per-entry lock in RoCE GID table to one lock
On 12/30/2015 07:01 AM, Or Gerlitz wrote: On 10/28/2015 4:52 PM, Matan Barak wrote: @@ -134,16 +138,14 @@ static int write_gid(struct ib_device *ib_dev, u8 port, { int ret = 0; struct net_device *old_net_dev; -unsigned long flags; /* in rdma_cap_roce_gid_table, this funciton should be protected by a * sleep-able lock. */ -write_lock_irqsave(>data_vec[ix].lock, flags); if (rdma_cap_roce_gid_table(ib_dev, port)) { table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID; -write_unlock_irqrestore(>data_vec[ix].lock, flags); +write_unlock_irq(>rwlock); /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by * RoCE providers and thus only updates the cache. */ @@ -153,7 +155,7 @@ static int write_gid(struct ib_device *ib_dev, u8 port, else if (action == GID_TABLE_WRITE_ACTION_DEL) ret = ib_dev->del_gid(ib_dev, port, ix, >data_vec[ix].context); -write_lock_irqsave(>data_vec[ix].lock, flags); +write_lock_irq(>rwlock); } sparse complains on drivers/infiniband/core/cache.c:186:17: warning: context imbalance in 'write_gid' - unexpected unlock is this false positive? Hello Or, sparse expects __release() and __acquire() annotations for functions that unlock a lock object that has been locked by its caller. See e.g. http://lists.kernelnewbies.org/pipermail/kernelnewbies/2011-October/003541.html. Bart. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-next V3 10/11] IB/core: Initialize UD header structure with IP and UDP headers
On 12/23/2015 2:56 PM, Matan Barak wrote: +__be16 ib_ud_ip4_csum(struct ib_ud_header *header) +{ + struct iphdr iph; + + iph.ihl = 5; + iph.version = 4; + iph.tos = header->ip4.tos; + iph.tot_len = header->ip4.tot_len; + iph.id = header->ip4.id; + iph.frag_off= header->ip4.frag_off; + iph.ttl = header->ip4.ttl; + iph.protocol= header->ip4.protocol; + iph.check = 0; + iph.saddr = header->ip4.saddr; + iph.daddr = header->ip4.daddr; + + return ip_fast_csum((u8 *), iph.ihl); +} +EXPORT_SYMBOL(ib_ud_ip4_csum); You have introduced here this sparse warning, please fix drivers/infiniband/core/ud_header.c:299:28: warning: incorrect type in return expression (different base types) drivers/infiniband/core/ud_header.c:299:28:expected restricted __be16 drivers/infiniband/core/ud_header.c:299:28:got restricted __sum16 Or. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-next V3 00/11] Add RoCE v2 support
Hi Matan, I see these two smatch complaints on code added with this series, can you please take a look? drivers/infiniband/core/addr.c:503 rdma_resolve_ip_route() warn: variable dereferenced before check 'src_addr' (see line 500) drivers/infiniband/core/cma_configfs.c:172 make_cma_ports() warn: double check that we're allocating correct size: 8 vs 128 Or. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-next 2/3] IB/core: Change per-entry lock in RoCE GID table to one lock
On 10/28/2015 4:52 PM, Matan Barak wrote: @@ -134,16 +138,14 @@ static int write_gid(struct ib_device *ib_dev, u8 port, { int ret = 0; struct net_device *old_net_dev; - unsigned long flags; /* in rdma_cap_roce_gid_table, this funciton should be protected by a * sleep-able lock. */ - write_lock_irqsave(>data_vec[ix].lock, flags); if (rdma_cap_roce_gid_table(ib_dev, port)) { table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID; - write_unlock_irqrestore(>data_vec[ix].lock, flags); + write_unlock_irq(>rwlock); /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by * RoCE providers and thus only updates the cache. */ @@ -153,7 +155,7 @@ static int write_gid(struct ib_device *ib_dev, u8 port, else if (action == GID_TABLE_WRITE_ACTION_DEL) ret = ib_dev->del_gid(ib_dev, port, ix, >data_vec[ix].context); - write_lock_irqsave(>data_vec[ix].lock, flags); + write_lock_irq(>rwlock); } sparse complains on drivers/infiniband/core/cache.c:186:17: warning: context imbalance in 'write_gid' - unexpected unlock is this false positive? Or. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] IB/core: sysfs.c: Fix PerfMgt ClassPortInfo handling
On Mon, Dec 28, 2015 at 11:53 PM, Hal Rosenstockwrote: > > Port number is not part of ClassPortInfo attribute but is > still needed as a parameter when invoking process_mad. > > To properly handle this attribute, port_num is added as a > parameter to get_counter_table and get_perf_mad was changed > not to store port_num in the attribute itself when it's > querying the ClassPortInfo attribute. > > This handles issue pointed out by Matan Barak > > Signed-off-by: Hal Rosenstock > Acked-by: Matan Barak > --- > diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c > index 539040f..2daf832 100644 > --- a/drivers/infiniband/core/sysfs.c > +++ b/drivers/infiniband/core/sysfs.c > @@ -438,7 +438,8 @@ static int get_perf_mad(struct ib_device *dev, int > port_num, int attr, > in_mad->mad_hdr.method= IB_MGMT_METHOD_GET; > in_mad->mad_hdr.attr_id = attr; > > - in_mad->data[41] = port_num;/* PortSelect field */ > + if (attr != IB_PMA_CLASS_PORT_INFO) > + in_mad->data[41] = port_num;/* PortSelect field */ > > if ((dev->process_mad(dev, IB_MAD_IGNORE_MKEY, > port_num, NULL, NULL, > @@ -714,11 +715,12 @@ err: > * Figure out which counter table to use depending on > * the device capabilities. > */ > -static struct attribute_group *get_counter_table(struct ib_device *dev) > +static struct attribute_group *get_counter_table(struct ib_device *dev, > +int port_num) > { > struct ib_class_port_info cpi; > > - if (get_perf_mad(dev, 0, IB_PMA_CLASS_PORT_INFO, > + if (get_perf_mad(dev, port_num, IB_PMA_CLASS_PORT_INFO, > , 40, sizeof(cpi)) >= 0) { > > if (cpi.capability_mask && IB_PMA_CLASS_CAP_EXT_WIDTH) > @@ -776,7 +778,7 @@ static int add_port(struct ib_device *device, int > port_num, > goto err_put; > } > > - p->pma_table = get_counter_table(device); > + p->pma_table = get_counter_table(device, port_num); > ret = sysfs_create_group(>kobj, p->pma_table); > if (ret) > goto err_put_gid_attrs; > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html Please just add: Fixes: 145d9c541032 ('IB/core: Display extended counter set if available') -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] IB/mad: Ensure fairness in ib_mad_completion_handler
Please just convert the mad handler to the new CQ API in drivers/infiniband/core/cq.c. If you have any question about it I'd be glad to help you. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] Generic InfiniBand transport done in software
Hi Moni, On Sun, Dec 27, 2015 at 07:54:46PM +0200, Moni Shoua wrote: > But you post *now* a so called generic driver so it must now fit any > possible driver (including Soft RoCE) it's never going to fit any possible future driver. Dennis and folks have done great work to move code outside the drivers into a shared library. So far it's been driven just by the Intel drivers as that's the only thing they were interested in. If you are interested in supporting SoftROCE please work with them by adjusting the code towards your requirements. In Linux we have great results with iterative appoaches and I'd suggest you try it as well. > What kind of a feedback you expect when I don't have an idea about > your plans for rdmavt > Interfaces, flows, data structures... all is missing from the > documentation to rdmavt. You've got the code, so let's work based on that. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: completion queue abstraction V2
On 12/07/2015 09:51 PM, Christoph Hellwig wrote: This series adds a new RDMA core abstraction that insulated the ULPs from the nitty gritty details of CQ polling. See the individual patches for more details. Hello Christoph, After having tested the SRP initiator and target drivers with this patch series applied I have further feedback about this patch series. I will provide that feedback as replies to the individual patches. Bart. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] IB/mlx5: Unify CQ create flags check
Does this deserve a Fixes tag? -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] IB/core: Remove a set-but-not-used variable from ib_sg_to_pages()
Thanks Bart, Acked-by: Sagi Grimberg-- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] IB/core: sysfs.c: Fix PerfMgt ClassPortInfo handling
On 12/29/2015 7:21 AM, Or Gerlitz wrote: > On 12/29/2015 12:43 PM, Hal Rosenstock wrote: >> This handles issue pointed out by Matan Barak>> >> Fixes: 145d9c541032 ('IB/core: Display extended counter set if >> available') >> >> Signed-off-by: Hal Rosenstock > again, remove the blank line after the fixes tag. > > Also, I am not that the way Doug is setting the branch for pull would > preserve commit IDs when > the offending patch landed in Linus tree. If this is the case, we should > put your patch in 2nd pull > request and have the right commit ID there. Please check with Doug. Doug ? -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-next 6/7] IB/mlx4: Create and use another QP1 for RoCEv2
On 12/29/2015 3:24 PM, Matan Barak wrote: The mlx4 driver uses a special QP to implement the GSI QP. This kind of QP allows to build the InfiniBand headers in SW to be put before the payload that comes in with the WR. The mlx4 HW builds the packet, calculates the ICRC and puts it at the end of the payload. This ICRC calculation however depends on the QP configuration which is determined when QP is modified (roce_mode during INIT->RTR). On the other hand, ICRC verification when packet is received does to depend on this configuration. I don't understand the part of the sentence saying "when packet is received does to depend on this configuration" maybe some typo/s there? Therefore, using 2 GSI QPs for send (one for each RoCE version) and 1 GSI QP for receive are required. s/2/two/ and s/1/one/ please Or. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] IB/core: Remove a set-but-not-used variable from ib_sg_to_pages()
On Tue, Dec 29, 2015 at 10:45:03AM +0100, Bart Van Assche wrote: > Detected this by building the IB core with W=1. See also patch > "IB core: Fix ib_sg_to_pages()" (commit 8f5ba10ed40a). Reviewed-by: Leon Romanovsky> > Signed-off-by: Bart Van Assche > Cc: Sagi Grimberg > Cc: Christoph Hellwig > --- > drivers/infiniband/core/verbs.c | 3 +-- > 1 file changed, 1 insertion(+), 2 deletions(-) > > diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c > index 545906d..c90ed29 100644 > --- a/drivers/infiniband/core/verbs.c > +++ b/drivers/infiniband/core/verbs.c > @@ -1530,7 +1530,7 @@ int ib_sg_to_pages(struct ib_mr *mr, > int (*set_page)(struct ib_mr *, u64)) > { > struct scatterlist *sg; > - u64 last_end_dma_addr = 0, last_page_addr = 0; > + u64 last_end_dma_addr = 0; > unsigned int last_page_off = 0; > u64 page_mask = ~((u64)mr->page_size - 1); > int i, ret; > @@ -1572,7 +1572,6 @@ next_page: > > mr->length += dma_len; > last_end_dma_addr = end_dma_addr; > - last_page_addr = end_dma_addr & page_mask; > last_page_off = end_dma_addr & ~page_mask; > } > > -- > 2.1.4 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] Generic InfiniBand transport done in software
> it's never going to fit any possible future driver. Dennis and folks > have done great work to move code outside the drivers into a shared > library. So far it's been driven just by the Intel drivers as that's > the only thing they were interested in. > If it's not going to be a solution for anything else but Intel then why declare it as such? Where is that shared library? There amount of shared code in rdmavt that can be considered as shared is very little. > If you are interested in supporting SoftROCE please work with them > by adjusting the code towards your requirements. In Linux we have > great results with iterative appoaches and I'd suggest you try it > as well. > Exactly. All you asked for is in the RFC I posted. > > You've got the code, so let's work based on that. > -- I say let's agree on the interfaces and start writing code. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 00/36] Add rdma verbs transport library
> Changes since v1: > Removed driver specific version > Fixed license text to remove copyright and put on top > Return 0 in rvt_map_sg instead of BAD_DMA_AGGRESS > Remove #include of dma.h from dma.c > Update comment about protection domain limit > Remove comment on alternative design for private data > Rename CDR macro to CHECK_DRIVER_OVERRIDE > Change all the stubs to return EOPNOTSUPP > Fix comment style for rvt_query_port > Fix typo in subject > Rename rdi.lk_table to rdi.lkey_table > Rename rvt_sge.m => rvt_sge.cur_map (Sean) > Rename rvt_sge.n => rvt_sge.cur_seg (Sean) > Remove rvt_reg_phys_mr > Drop support for commit 38071a461f0a ("IB/qib: Support the new memory >registration API") > I don't understand what in this change log justifies a V2 for this patch set -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 03/13] irq_poll: fold irq_poll_sched_prep into irq_poll_sched
On 12/07/2015 09:51 PM, Christoph Hellwig wrote: diff --git a/lib/irq_poll.c b/lib/irq_poll.c index 88af879..13cb149 100644 --- a/lib/irq_poll.c +++ b/lib/irq_poll.c @@ -21,13 +21,17 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll); * * Description: * Add this irq_poll structure to the pending poll list and trigger the - * raise of the blk iopoll softirq. The driver must already have gotten a - * successful return from irq_poll_sched_prep() before calling this. + * raise of the blk iopoll softirq. **/ void irq_poll_sched(struct irq_poll *iop) { unsigned long flags; + if (test_bit(IRQ_POLL_F_DISABLE, >state)) + return; + if (!test_and_set_bit(IRQ_POLL_F_SCHED, >state)) + return; + local_irq_save(flags); list_add_tail(>list, this_cpu_ptr(_cpu_iopoll)); __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); After having applied these changes the SRP initiator didn't receive any RDMA completions anymore. I could remedy that by changing "!test_and_set_bit()" into "test_and_set_bit()": diff --git a/lib/irq_poll.c b/lib/irq_poll.c index 43a3370..3a67019 100644 --- a/lib/irq_poll.c +++ b/lib/irq_poll.c @@ -29,7 +29,7 @@ void irq_poll_sched(struct irq_poll *iop) if (test_bit(IRQ_POLL_F_DISABLE, >state)) return; - if (!test_and_set_bit(IRQ_POLL_F_SCHED, >state)) + if (test_and_set_bit(IRQ_POLL_F_SCHED, >state)) return; local_irq_save(flags); -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH for-next 1/7] IB/mlx4: Query RoCE support
From: Moni ShouaQuery the RoCE support from firmware using the appropriate firmware commands. Downstream patches will read these capabilities and act accordingly. Signed-off-by: Moni Shoua --- drivers/net/ethernet/mellanox/mlx4/fw.c | 3 +++ drivers/net/ethernet/mellanox/mlx4/main.c | 6 +- include/linux/mlx4/device.h | 11 +-- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 90db94e..bdd6822 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -157,6 +157,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [29] = "802.1ad offload support", [31] = "Modifying loopback source checks using UPDATE_QP support", [32] = "Loopback source checks support", + [33] = "RoCEv2 support" }; int i; @@ -905,6 +906,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE; MLX4_GET(dev_cap->bmme_flags, outbox, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); + if (dev_cap->bmme_flags & MLX4_FLAG_ROCE_V1_V2) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ROCE_V1_V2; if (dev_cap->bmme_flags & MLX4_FLAG_PORT_REMAP) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_REMAP; MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 31c491e..fb4968f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -424,8 +424,12 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; /* Don't do sense port on multifunction devices (for now at least) */ - if (mlx4_is_mfunc(dev)) + /* Don't do enable RoCE V2 on multifunction devices */ + if (mlx4_is_mfunc(dev)) { dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; + dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_ROCE_V1_V2; + mlx4_dbg(dev, "RoCE V2 is not supported when SR-IOV is enabled\n"); + } if (mlx4_low_memory_profile()) { dev->caps.log_num_macs = MLX4_MIN_LOG_NUM_MAC; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index d3133be..dbf39ab 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -216,6 +216,7 @@ enum { MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN = 1LL << 30, MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31, MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32, + MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1LL << 33, }; enum { @@ -267,6 +268,7 @@ enum { MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9, MLX4_BMME_FLAG_RESERVED_LKEY= 1 << 10, MLX4_BMME_FLAG_FAST_REG_WR = 1 << 11, + MLX4_BMME_FLAG_ROCE_V1_V2 = 1 << 19, MLX4_BMME_FLAG_PORT_REMAP = 1 << 24, MLX4_BMME_FLAG_VSD_INIT2RTR = 1 << 28, }; @@ -275,6 +277,10 @@ enum { MLX4_FLAG_PORT_REMAP= MLX4_BMME_FLAG_PORT_REMAP }; +enum { + MLX4_FLAG_ROCE_V1_V2= MLX4_BMME_FLAG_ROCE_V1_V2 +}; + enum mlx4_event { MLX4_EVENT_TYPE_COMP = 0x00, MLX4_EVENT_TYPE_PATH_MIG = 0x01, @@ -984,9 +990,10 @@ struct mlx4_mad_ifc { if (((dev)->caps.port_mask[port] != MLX4_PORT_TYPE_IB)) #define mlx4_foreach_ib_transport_port(port, dev) \ - for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ + for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \ - ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) + ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) || \ + ((dev)->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)) #define MLX4_INVALID_SLAVE_ID 0xFF #define MLX4_SINK_COUNTER_INDEX(dev) (dev->caps.max_counters - 1) -- 2.1.0 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH for-next 2/7] IB/mlx4: Add RoCE per GID support for add_gid and del_gid
In RoCE, GID table is managed in the IB core driver. The role of the mlx4 driver is to synchronize the HW with the entries in the GID table. Since it is possible that the same GID value will appear more than once in the GID table (though with different attributes) it is required from the mlx4 driver to maintain a reference counting mechanism and populate the HW with a single value. We use a new firmware command in order to populate the GID table and store the type along with the GID value. Signed-off-by: Moni Shoua--- drivers/infiniband/hw/mlx4/main.c| 69 +--- drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 + include/linux/mlx4/cmd.h | 3 +- 3 files changed, 67 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 627267f..988fa33 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -154,9 +154,9 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_n return dev; } -static int mlx4_ib_update_gids(struct gid_entry *gids, - struct mlx4_ib_dev *ibdev, - u8 port_num) +static int mlx4_ib_update_gids_v1(struct gid_entry *gids, + struct mlx4_ib_dev *ibdev, + u8 port_num) { struct mlx4_cmd_mailbox *mailbox; int err; @@ -187,6 +187,61 @@ static int mlx4_ib_update_gids(struct gid_entry *gids, return err; } +static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids, +struct mlx4_ib_dev *ibdev, +u8 port_num) +{ + struct mlx4_cmd_mailbox *mailbox; + int err; + struct mlx4_dev *dev = ibdev->dev; + int i; + struct { + union ib_gidgid; + __be32 rsrvd1[2]; + __be16 rsrvd2; + u8 type; + u8 version; + __be32 rsrvd3; + } *gid_tbl; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return -ENOMEM; + + gid_tbl = mailbox->buf; + for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) { + memcpy(_tbl[i].gid, [i].gid, sizeof(union ib_gid)); + if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { + gid_tbl[i].version = 2; + if (!ipv6_addr_v4mapped((struct in6_addr *)[i].gid)) + gid_tbl[i].type = 1; + } + } + + err = mlx4_cmd(dev, mailbox->dma, + MLX4_SET_PORT_ROCE_ADDR << 8 | port_num, + 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + if (mlx4_is_bonded(dev)) + err += mlx4_cmd(dev, mailbox->dma, + MLX4_SET_PORT_ROCE_ADDR << 8 | 2, + 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +static int mlx4_ib_update_gids(struct gid_entry *gids, + struct mlx4_ib_dev *ibdev, + u8 port_num) +{ + if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) + return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num); + + return mlx4_ib_update_gids_v1(gids, ibdev, port_num); +} + static int mlx4_ib_add_gid(struct ib_device *device, u8 port_num, unsigned int index, @@ -215,7 +270,8 @@ static int mlx4_ib_add_gid(struct ib_device *device, port_gid_table = >gids[port_num - 1]; spin_lock_bh(>lock); for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) { - if (!memcmp(_gid_table->gids[i].gid, gid, sizeof(*gid))) { + if (!memcmp(_gid_table->gids[i].gid, gid, sizeof(*gid)) && + (port_gid_table->gids[i].gid_type == attr->gid_type)) { found = i; break; } @@ -233,6 +289,7 @@ static int mlx4_ib_add_gid(struct ib_device *device, } else { *context = port_gid_table->gids[free].ctx; memcpy(_gid_table->gids[free].gid, gid, sizeof(*gid)); + port_gid_table->gids[free].gid_type = attr->gid_type; port_gid_table->gids[free].ctx->real_index = free; port_gid_table->gids[free].ctx->refcount = 1; hw_update = 1; @@ -248,8 +305,10 @@ static int mlx4_ib_add_gid(struct ib_device *device, if (!gids) { ret = -ENOMEM;
[PATCH for-next 7/7] IB/mlx4: Advertise RoCE support
Advertise RoCE support in port_immutable according to the hardware capabilities. This enables the verbs stack to use RoCE v2 mode. Signed-off-by: Matan Barak--- drivers/infiniband/hw/mlx4/main.c | 12 +--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 44e5699..8cf2575 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2183,6 +2183,7 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { struct ib_port_attr attr; + struct mlx4_ib_dev *mdev = to_mdev(ibdev); int err; err = mlx4_ib_query_port(ibdev, port_num, ); @@ -2192,10 +2193,15 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num, immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; - if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) + if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) { immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; - else - immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + } else { + if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE | + RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + } immutable->max_mad_size = IB_MGMT_MAD_SIZE; -- 2.1.0 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH for-next 0/7] Add RoCE v2 support for mlx4 driver
Hi Doug, This series adds RoCE v2 support for mlx4 driver. It implements the required bits in the new RoCE v2 API while adding the necessary firmware commands and handling. Patch 0001 queries the firmware if RoCE is supported. Patch 0002 introduces a new firmware command that sets the GID table, such that we store the GID type along the GID itself in the table. Patch 0003 configures the device to work in RoCE v1 and RoCE v2 mixed mode. Patch 0004 adds the support to create steering rules for IPv4 based packets. This is necessary in order to support RoCE multicast. Patch 0005 introduces the support for sending RoCE v2 packets from QP1. Patch 0006 creates another QP in order to receive QP1 RoCE v2 traffic. Patch 0007 advertises RoCE v2 support for upper layer. From this point and on, the GID table will be populated with RoCE v2 based GIDs (if the hardware supports so). Regards, Moni and Matan Maor Gottlieb (1): net/mlx4_core: Add handlning of RoCE v2 over IPV4 in attach_flow Matan Barak (2): IB/mlx4: Add RoCE per GID support for add_gid and del_gid IB/mlx4: Advertise RoCE support Moni Shoua (4): IB/mlx4: Query RoCE support IB/mlx4: Configure device to work in RoCEv2 IB/mlx4: Enable send of RoCE QP1 packets with IP/UDP headers IB/mlx4: Create and use another QP1 for RoCEv2 drivers/infiniband/hw/mlx4/main.c | 100 +-- drivers/infiniband/hw/mlx4/mlx4_ib.h | 8 + drivers/infiniband/hw/mlx4/qp.c | 283 -- drivers/net/ethernet/mellanox/mlx4/fw.c | 19 +- drivers/net/ethernet/mellanox/mlx4/main.c | 6 +- drivers/net/ethernet/mellanox/mlx4/mcg.c | 14 +- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 7 +- drivers/net/ethernet/mellanox/mlx4/port.c | 8 + drivers/net/ethernet/mellanox/mlx4/qp.c | 28 +++ include/linux/mlx4/cmd.h | 3 +- include/linux/mlx4/device.h | 18 +- include/linux/mlx4/qp.h | 15 +- include/rdma/ib_verbs.h | 2 + 13 files changed, 434 insertions(+), 77 deletions(-) -- 2.1.0 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 08/13] IB/srpt: chain RDMA READ/WRITE requests
On 12/07/2015 09:51 PM, Christoph Hellwig wrote: > Remove struct rdma_iu and instead allocate the struct ib_rdma_wr array > early and fill out directly. This allows us to chain the WRs, and thus > archive both less lock contention on the HCA workqueue as well as much > simpler error handling. Please consider folding the patch below into this patch. Thanks, Bart. [PATCH] IB/srpt: Fix a recently introduced kernel crash BUG: unable to handle kernel paging request at 00010198 IP: [] __lock_acquire+0xa2/0x560 Call Trace: [] lock_acquire+0x62/0x80 [] _raw_spin_lock_irqsave+0x43/0x60 [] srpt_rdma_read_done+0x57/0x120 [ib_srpt] [] __ib_process_cq+0x43/0xc0 [ib_core] [] ib_cq_poll_work+0x25/0x70 [ib_core] [] process_one_work+0x1bd/0x460 [] worker_thread+0x118/0x420 [] kthread+0xe4/0x100 [] ret_from_fork+0x3f/0x70 --- drivers/infiniband/ulp/srpt/ib_srpt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 8068aff..3daab39 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1395,7 +1395,7 @@ static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc) { struct srpt_rdma_ch *ch = cq->cq_context; struct srpt_send_ioctx *ioctx = - container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe); + container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe); WARN_ON(ioctx->n_rdma <= 0); atomic_add(ioctx->n_rdma, >sq_wr_avail); @@ -1418,7 +1418,7 @@ static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc) static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) { struct srpt_send_ioctx *ioctx = - container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe); + container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe); if (unlikely(wc->status != IB_WC_SUCCESS)) { pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n", -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] IB/core: sysfs.c: Fix PerfMgt ClassPortInfo handling
On 12/29/2015 12:43 PM, Hal Rosenstock wrote: Port number is not part of ClassPortInfo attribute but is still needed as a parameter when invoking process_mad. Please remove the blank line above your 1st sentence. To properly handle this attribute, port_num is added as a parameter to get_counter_table and get_perf_mad was changed not to store port_num in the attribute itself when it's querying the ClassPortInfo attribute. This handles issue pointed out by Matan BarakFixes: 145d9c541032 ('IB/core: Display extended counter set if available') Signed-off-by: Hal Rosenstock again, remove the blank line after the fixes tag. Also, I am not that the way Doug is setting the branch for pull would preserve commit IDs when the offending patch landed in Linus tree. If this is the case, we should put your patch in 2nd pull request and have the right commit ID there. Please check with Doug. Acked-by: Matan Barak Acked-by: Ira Weiny --- Change from v1: Added fixes line to description So this patch makes mlx4 IB driver on Eth ports workable with the 4.5-rc1 proposed bits? -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH V2] IB/mlx5: Unify CQ create flags check
From: Leon RomanovskyThe create_cq() can receive creation flags which were used differently by two commits which added create_cq extended command and cross-channel. The merged code caused to not accept any flags at all. This patch unifies the check into one function and one return error code. Fixes: 972ecb821379 ("IB/mlx5: Add create_cq extended command") Fixes: 051f263098a9 ("IB/mlx5: Add driver cross-channel support") --- Changes from v1: * Remove links to linux-rdma from commit message * Placed change log under git comment section (---) Changes from v0: * Add Fixes tag Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/cq.c | 9 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index b14316603e44..7ddc790b1819 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -757,10 +757,6 @@ static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) mlx5_db_free(dev->mdev, >db); } -enum { - CQ_CREATE_FLAGS_SUPPORTED = IB_CQ_FLAGS_TIMESTAMP_COMPLETION -}; - struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, @@ -778,13 +774,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int eqn; int err; - if (check_cq_create_flags(attr->flags)) - return ERR_PTR(-EINVAL); - if (entries < 0) return ERR_PTR(-EINVAL); - if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED) + if (check_cq_create_flags(attr->flags)) return ERR_PTR(-EOPNOTSUPP); entries = roundup_pow_of_two(entries + 1); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d4b227126265..fbf14a768105 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -692,6 +692,7 @@ static inline u32 check_cq_create_flags(u32 flags) * It returns non-zero value for unsupported CQ * create flags, otherwise it returns zero. */ - return (flags & ~IB_CQ_FLAGS_IGNORE_OVERRUN); + return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN | + IB_CQ_FLAGS_TIMESTAMP_COMPLETION)); } #endif /* MLX5_IB_H */ -- 1.7.12.4 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V2] IB/mlx5: Unify CQ create flags check
On Tue, Dec 29, 2015 at 04:03:41PM +0200, Leon Romanovsky wrote: > On Tue, Dec 29, 2015 at 03:51:47PM +0200, Sagi Grimberg wrote: > > >From: Leon Romanovsky> > > > > >The create_cq() can receive creation flags which were used > > >differently by two following commits [1] and [2]. The current > > >code caused to not accept any flags at all. > > > > We can skip referencing the linux-rdma mailing list. > > > > > > > >This patch unifies the check into one function and one return > > >error code. > > > > > >Fixes: 972ecb821379 ("IB/mlx5: Add create_cq extended command") > > >Fixes: 051f263098a9 ("IB/mlx5: Add driver cross-channel support") > > > > > >[1] http://www.spinics.net/lists/linux-rdma/msg31430.html > > >[2] http://www.spinics.net/lists/linux-rdma/msg31658.html > > > > > >Changes from v0: > > > * Add Fixes tag > > > > Umm, the above ([1], [2], Changes) usually go under the "---" > > separator so we won't see them in git log forever. > > Doug, > Do you want me resend the patch? I sent new version. Thanks. > > > > > > > > >Signed-off-by: Leon Romanovsky > > >--- > > > drivers/infiniband/hw/mlx5/cq.c | 9 + > > > drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++- > > > 2 files changed, 3 insertions(+), 9 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] IB/mlx5: Unify CQ create flags check
From: Leon RomanovskyThe create_cq() can receive creation flags which were used differently by two following commits [1] and [2]. This patch unifies the check into one function and one return error code. [1] commit 972ecb821379 ("IB/mlx5: Add create_cq extended command") [2] commit 051f263098a9 ("IB/mlx5: Add driver cross-channel support") Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/cq.c | 9 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index b14316603e44..7ddc790b1819 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -757,10 +757,6 @@ static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) mlx5_db_free(dev->mdev, >db); } -enum { - CQ_CREATE_FLAGS_SUPPORTED = IB_CQ_FLAGS_TIMESTAMP_COMPLETION -}; - struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, @@ -778,13 +774,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int eqn; int err; - if (check_cq_create_flags(attr->flags)) - return ERR_PTR(-EINVAL); - if (entries < 0) return ERR_PTR(-EINVAL); - if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED) + if (check_cq_create_flags(attr->flags)) return ERR_PTR(-EOPNOTSUPP); entries = roundup_pow_of_two(entries + 1); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d4b227126265..fbf14a768105 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -692,6 +692,7 @@ static inline u32 check_cq_create_flags(u32 flags) * It returns non-zero value for unsupported CQ * create flags, otherwise it returns zero. */ - return (flags & ~IB_CQ_FLAGS_IGNORE_OVERRUN); + return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN | + IB_CQ_FLAGS_TIMESTAMP_COMPLETION)); } #endif /* MLX5_IB_H */ -- 1.7.12.4 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH V2] IB/mlx5: Unify CQ create flags check
From: Leon RomanovskyThe create_cq() can receive creation flags which were used differently by two following commits [1] and [2]. The current code caused to not accept any flags at all. This patch unifies the check into one function and one return error code. Fixes: 972ecb821379 ("IB/mlx5: Add create_cq extended command") Fixes: 051f263098a9 ("IB/mlx5: Add driver cross-channel support") [1] http://www.spinics.net/lists/linux-rdma/msg31430.html [2] http://www.spinics.net/lists/linux-rdma/msg31658.html Changes from v0: * Add Fixes tag Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/cq.c | 9 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index b14316603e44..7ddc790b1819 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -757,10 +757,6 @@ static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) mlx5_db_free(dev->mdev, >db); } -enum { - CQ_CREATE_FLAGS_SUPPORTED = IB_CQ_FLAGS_TIMESTAMP_COMPLETION -}; - struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, @@ -778,13 +774,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int eqn; int err; - if (check_cq_create_flags(attr->flags)) - return ERR_PTR(-EINVAL); - if (entries < 0) return ERR_PTR(-EINVAL); - if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED) + if (check_cq_create_flags(attr->flags)) return ERR_PTR(-EOPNOTSUPP); entries = roundup_pow_of_two(entries + 1); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d4b227126265..fbf14a768105 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -692,6 +692,7 @@ static inline u32 check_cq_create_flags(u32 flags) * It returns non-zero value for unsupported CQ * create flags, otherwise it returns zero. */ - return (flags & ~IB_CQ_FLAGS_IGNORE_OVERRUN); + return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN | + IB_CQ_FLAGS_TIMESTAMP_COMPLETION)); } #endif /* MLX5_IB_H */ -- 1.7.12.4 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-next 4/7] net/mlx4_core: Add handlning of RoCE v2 over IPV4 in attach_flow
On 12/29/2015 3:24 PM, Matan Barak wrote: From: Maor Gottliebs/handlning/handling/ When attaching multicast for RoCE v2, we need to be able to steer packets to the QPs. Hence, we add support for IPV4 over IB steering. not sure to follow on the change-log, can you clarify it little further... Signed-off-by: Maor Gottlieb --- drivers/net/ethernet/mellanox/mlx4/mcg.c | 14 -- include/linux/mlx4/device.h | 6 ++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 1d4e2e0..834e60e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -858,7 +858,9 @@ static int parse_trans_rule(struct mlx4_dev *dev, struct mlx4_spec_list *spec, break; case MLX4_NET_TRANS_RULE_ID_IB: - rule_hw->ib.l3_qpn = spec->ib.l3_qpn; + rule_hw->ib.l3_qpn = spec->ib.l3_qpn | + (spec->ib.roce_type == MLX4_FLOW_SPEC_IB_ROCE_TYPE_IPV4 ? +(__force __be32)0x80 : (__force __be32)0); maybe avoid using hard coded constants and get meaningful name for them? rule_hw->ib.qpn_mask = spec->ib.qpn_msk; memcpy(_hw->ib.dst_gid, >ib.dst_gid, 16); memcpy(_hw->ib.dst_gid_msk, >ib.dst_gid_msk, 16); @@ -1384,10 +1386,18 @@ int mlx4_trans_to_dmfs_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, memcpy(spec.eth.dst_mac_msk, _mask, ETH_ALEN); break; + case MLX4_PROT_IB_IPV4: + spec.id = MLX4_NET_TRANS_RULE_ID_IB; + memcpy(spec.ib.dst_gid + 12, gid + 12, 4); + memset(spec.ib.dst_gid_msk + 12, 0xff, 4); + spec.ib.roce_type = MLX4_FLOW_SPEC_IB_ROCE_TYPE_IPV4; + break; + case MLX4_PROT_IB_IPV6: spec.id = MLX4_NET_TRANS_RULE_ID_IB; memcpy(spec.ib.dst_gid, gid, 16); - memset(_gid_msk, 0xff, 16); + memset(spec.ib.dst_gid_msk, 0xff, 16); + spec.ib.roce_type = MLX4_FLOW_SPEC_IB_ROCE_TYPE_IPV6; break; default: return -EINVAL; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 0d873f1ae..cdc75b2 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -391,6 +391,11 @@ enum mlx4_protocol { MLX4_PROT_FCOE }; +enum mlx4_flow_roce_type { + MLX4_FLOW_SPEC_IB_ROCE_TYPE_IPV6 = 0, + MLX4_FLOW_SPEC_IB_ROCE_TYPE_IPV4 +}; + enum { MLX4_MTT_FLAG_PRESENT = 1 }; @@ -1197,6 +1202,7 @@ struct mlx4_spec_ipv4 { struct mlx4_spec_ib { __be32 l3_qpn; __be32 qpn_msk; + enummlx4_flow_roce_type roce_type; u8 dst_gid[16]; u8 dst_gid_msk[16]; }; -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH V3] IB/mlx5: Unify CQ create flags check
From: Leon RomanovskyThe create_cq() can receive creation flags which were used differently by two commits which added create_cq extended command and cross-channel. The merged code caused to not accept any flags at all. This patch unifies the check into one function and one return error code. Fixes: 972ecb821379 ("IB/mlx5: Add create_cq extended command") Fixes: 051f263098a9 ("IB/mlx5: Add driver cross-channel support") Signed-off-by: Leon Romanovsky --- Changes from v2: * Remove blank line after Fixes * Place changes below Signed-off-by Changes from v1: * Remove links to linux-rdma from commit message * Placed change log under git comment section (---) Changes from v0: * Add Fixes tag drivers/infiniband/hw/mlx5/cq.c | 9 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index b14316603e44..7ddc790b1819 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -757,10 +757,6 @@ static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) mlx5_db_free(dev->mdev, >db); } -enum { - CQ_CREATE_FLAGS_SUPPORTED = IB_CQ_FLAGS_TIMESTAMP_COMPLETION -}; - struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, @@ -778,13 +774,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int eqn; int err; - if (check_cq_create_flags(attr->flags)) - return ERR_PTR(-EINVAL); - if (entries < 0) return ERR_PTR(-EINVAL); - if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED) + if (check_cq_create_flags(attr->flags)) return ERR_PTR(-EOPNOTSUPP); entries = roundup_pow_of_two(entries + 1); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d4b227126265..fbf14a768105 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -692,6 +692,7 @@ static inline u32 check_cq_create_flags(u32 flags) * It returns non-zero value for unsupported CQ * create flags, otherwise it returns zero. */ - return (flags & ~IB_CQ_FLAGS_IGNORE_OVERRUN); + return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN | + IB_CQ_FLAGS_TIMESTAMP_COMPLETION)); } #endif /* MLX5_IB_H */ -- 1.7.12.4 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] IB/mad: Ensure fairness in ib_mad_completion_handler
Please just convert the mad handler to the new CQ API in drivers/infiniband/core/cq.c. If you have any question about it I'd be glad to help you. +1 on this suggestion. We had these sorts of questions in our ULPs as well. The CQ API should take care of all that for you and leaves you to just handle the completions... -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] IB/core: Remove a set-but-not-used variable from ib_sg_to_pages()
Detected this by building the IB core with W=1. See also patch "IB core: Fix ib_sg_to_pages()" (commit 8f5ba10ed40a). Signed-off-by: Bart Van AsscheCc: Sagi Grimberg Cc: Christoph Hellwig --- drivers/infiniband/core/verbs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 545906d..c90ed29 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1530,7 +1530,7 @@ int ib_sg_to_pages(struct ib_mr *mr, int (*set_page)(struct ib_mr *, u64)) { struct scatterlist *sg; - u64 last_end_dma_addr = 0, last_page_addr = 0; + u64 last_end_dma_addr = 0; unsigned int last_page_off = 0; u64 page_mask = ~((u64)mr->page_size - 1); int i, ret; @@ -1572,7 +1572,6 @@ next_page: mr->length += dma_len; last_end_dma_addr = end_dma_addr; - last_page_addr = end_dma_addr & page_mask; last_page_off = end_dma_addr & ~page_mask; } -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2] IB/core: sysfs.c: Fix PerfMgt ClassPortInfo handling
Port number is not part of ClassPortInfo attribute but is still needed as a parameter when invoking process_mad. To properly handle this attribute, port_num is added as a parameter to get_counter_table and get_perf_mad was changed not to store port_num in the attribute itself when it's querying the ClassPortInfo attribute. This handles issue pointed out by Matan BarakFixes: 145d9c541032 ('IB/core: Display extended counter set if available') Signed-off-by: Hal Rosenstock Acked-by: Matan Barak Acked-by: Ira Weiny --- Change from v1: Added fixes line to description diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 539040f..2daf832 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -438,7 +438,8 @@ static int get_perf_mad(struct ib_device *dev, int port_num, int attr, in_mad->mad_hdr.method= IB_MGMT_METHOD_GET; in_mad->mad_hdr.attr_id = attr; - in_mad->data[41] = port_num;/* PortSelect field */ + if (attr != IB_PMA_CLASS_PORT_INFO) + in_mad->data[41] = port_num;/* PortSelect field */ if ((dev->process_mad(dev, IB_MAD_IGNORE_MKEY, port_num, NULL, NULL, @@ -714,11 +715,12 @@ err: * Figure out which counter table to use depending on * the device capabilities. */ -static struct attribute_group *get_counter_table(struct ib_device *dev) +static struct attribute_group *get_counter_table(struct ib_device *dev, +int port_num) { struct ib_class_port_info cpi; - if (get_perf_mad(dev, 0, IB_PMA_CLASS_PORT_INFO, + if (get_perf_mad(dev, port_num, IB_PMA_CLASS_PORT_INFO, , 40, sizeof(cpi)) >= 0) { if (cpi.capability_mask && IB_PMA_CLASS_CAP_EXT_WIDTH) @@ -776,7 +778,7 @@ static int add_port(struct ib_device *device, int port_num, goto err_put; } - p->pma_table = get_counter_table(device); + p->pma_table = get_counter_table(device, port_num); ret = sysfs_create_group(>kobj, p->pma_table); if (ret) goto err_put_gid_attrs; -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/6] IB/uapi: expose uverbs WC opcodes
+ IB_WC_SEND = IB_UVERBS_WC_SEND, + IB_WC_RDMA_WRITE= IB_UVERBS_WC_RDMA_WRITE, + IB_WC_RDMA_READ = IB_UVERBS_WC_RDMA_READ, + IB_WC_COMP_SWAP = IB_UVERBS_WC_COMP_SWAP, + IB_WC_FETCH_ADD = IB_UVERBS_WC_FETCH_ADD, + IB_WC_LSO = IB_UVERBS_WC_SEND_END, + IB_WC_LOCAL_INV = IB_UVERBS_WC_SEND_END + 1, + IB_WC_REG_MR= IB_UVERBS_WC_SEND_END + 2, + IB_WC_MASKED_COMP_SWAP = IB_UVERBS_WC_SEND_END + 3, + IB_WC_MASKED_FETCH_ADD = IB_UVERBS_WC_SEND_END + 4, As you did it in the first patch, just don't assign after IB_WC_LOCAL_INV. Compiler will handle IB_UVERS_WC_SEND_END + X calculations by itself. I disagree, I'd say its better to keep the code verbosity level here... -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/6] IB/uapi: expose uverbs WC flags
+enum ib_uverbs_wc_flags { + IB_UVERBS_WC_GRH= (1 << 0), + IB_UVERBS_WC_WITH_IMM = (1 << 1), + IB_UVERBS_WC_WITH_INVALIDATE= (1 << 2), + IB_UVERBS_WC_IP_CSUM_OK = (1 << 3), + IB_UVERBS_WC_WITH_SMAC = (1 << 4), + IB_UVERBS_WC_WITH_VLAN = (1 << 5), + IB_UVERBS_WC_WITH_NETWORK_HDR_TYPE = (1 << 6), +}; It will be great to add _FLAGS_ mark in the name and add _LAST too. Don't you prefer to stay consistent with the kernel enumeration? -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH for-next 3/7] IB/mlx4: Configure device to work in RoCEv2
From: Moni ShouaSome mlx4 adapters are RoCEv2 capable. To enable this feature some hardware configuration is required. This is 1. Set port general parameters 2. Configure the outgoing UDP destination port 3. Configure the QP that work with RoCEv2 Signed-off-by: Moni Shoua --- drivers/infiniband/hw/mlx4/main.c | 19 ++--- drivers/infiniband/hw/mlx4/qp.c | 35 --- drivers/net/ethernet/mellanox/mlx4/fw.c | 16 +- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 7 +-- drivers/net/ethernet/mellanox/mlx4/port.c | 8 +++ drivers/net/ethernet/mellanox/mlx4/qp.c | 28 + include/linux/mlx4/device.h | 1 + include/linux/mlx4/qp.h | 15 +++-- include/rdma/ib_verbs.h | 2 ++ 9 files changed, 120 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 988fa33..44e5699 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -384,6 +384,7 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, int i; int ret; unsigned long flags; + struct ib_gid_attr attr; if (port_num > MLX4_MAX_PORTS) return -EINVAL; @@ -394,10 +395,13 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, if (!rdma_cap_roce_gid_table(>ib_dev, port_num)) return index; - ret = ib_get_cached_gid(>ib_dev, port_num, index, , NULL); + ret = ib_get_cached_gid(>ib_dev, port_num, index, , ); if (ret) return ret; + if (attr.ndev) + dev_put(attr.ndev); + if (!memcmp(, , sizeof(gid))) return -EINVAL; @@ -405,7 +409,8 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, port_gid_table = >gids[port_num - 1]; for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) - if (!memcmp(_gid_table->gids[i].gid, , sizeof(gid))) { + if (!memcmp(_gid_table->gids[i].gid, , sizeof(gid)) && + attr.gid_type == port_gid_table->gids[i].gid_type) { ctx = port_gid_table->gids[i].ctx; break; } @@ -2481,7 +2486,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (mlx4_ib_init_sriov(ibdev)) goto err_mad; - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) { + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE || + dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) { if (!iboe->nb.notifier_call) { iboe->nb.notifier_call = mlx4_ib_netdev_event; err = register_netdevice_notifier(>nb); @@ -2490,6 +2496,13 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) goto err_notif; } } + if (!mlx4_is_slave(dev) && + dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) { + err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT); + if (err) { + goto err_notif; + } + } } for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 8d28059..c0dee79 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1508,6 +1508,24 @@ static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) return 0; } +enum { + MLX4_QPC_ROCE_MODE_1 = 0, + MLX4_QPC_ROCE_MODE_2 = 2, + MLX4_QPC_ROCE_MODE_MAX = 0xff +}; + +static u8 gid_type_to_qpc(enum ib_gid_type gid_type) +{ + switch (gid_type) { + case IB_GID_TYPE_ROCE: + return MLX4_QPC_ROCE_MODE_1; + case IB_GID_TYPE_ROCE_UDP_ENCAP: + return MLX4_QPC_ROCE_MODE_2; + default: + return MLX4_QPC_ROCE_MODE_MAX; + } +} + static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) @@ -1651,9 +1669,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, u16 vlan = 0x; u8 smac[ETH_ALEN]; int status = 0; + int is_eth = rdma_cap_eth_ah(>ib_dev, port_num) && + attr->ah_attr.ah_flags & IB_AH_GRH; - if (rdma_cap_eth_ah(>ib_dev, port_num) && - attr->ah_attr.ah_flags & IB_AH_GRH) { + if (is_eth && attr->ah_attr.ah_flags & IB_AH_GRH) { int index = attr->ah_attr.grh.sgid_index; status =
[PATCH for-next 4/7] net/mlx4_core: Add handlning of RoCE v2 over IPV4 in attach_flow
From: Maor GottliebWhen attaching multicast for RoCE v2, we need to be able to steer packets to the QPs. Hence, we add support for IPV4 over IB steering. Signed-off-by: Maor Gottlieb --- drivers/net/ethernet/mellanox/mlx4/mcg.c | 14 -- include/linux/mlx4/device.h | 6 ++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 1d4e2e0..834e60e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -858,7 +858,9 @@ static int parse_trans_rule(struct mlx4_dev *dev, struct mlx4_spec_list *spec, break; case MLX4_NET_TRANS_RULE_ID_IB: - rule_hw->ib.l3_qpn = spec->ib.l3_qpn; + rule_hw->ib.l3_qpn = spec->ib.l3_qpn | + (spec->ib.roce_type == MLX4_FLOW_SPEC_IB_ROCE_TYPE_IPV4 ? +(__force __be32)0x80 : (__force __be32)0); rule_hw->ib.qpn_mask = spec->ib.qpn_msk; memcpy(_hw->ib.dst_gid, >ib.dst_gid, 16); memcpy(_hw->ib.dst_gid_msk, >ib.dst_gid_msk, 16); @@ -1384,10 +1386,18 @@ int mlx4_trans_to_dmfs_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, memcpy(spec.eth.dst_mac_msk, _mask, ETH_ALEN); break; + case MLX4_PROT_IB_IPV4: + spec.id = MLX4_NET_TRANS_RULE_ID_IB; + memcpy(spec.ib.dst_gid + 12, gid + 12, 4); + memset(spec.ib.dst_gid_msk + 12, 0xff, 4); + spec.ib.roce_type = MLX4_FLOW_SPEC_IB_ROCE_TYPE_IPV4; + break; + case MLX4_PROT_IB_IPV6: spec.id = MLX4_NET_TRANS_RULE_ID_IB; memcpy(spec.ib.dst_gid, gid, 16); - memset(_gid_msk, 0xff, 16); + memset(spec.ib.dst_gid_msk, 0xff, 16); + spec.ib.roce_type = MLX4_FLOW_SPEC_IB_ROCE_TYPE_IPV6; break; default: return -EINVAL; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 0d873f1ae..cdc75b2 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -391,6 +391,11 @@ enum mlx4_protocol { MLX4_PROT_FCOE }; +enum mlx4_flow_roce_type { + MLX4_FLOW_SPEC_IB_ROCE_TYPE_IPV6 = 0, + MLX4_FLOW_SPEC_IB_ROCE_TYPE_IPV4 +}; + enum { MLX4_MTT_FLAG_PRESENT = 1 }; @@ -1197,6 +1202,7 @@ struct mlx4_spec_ipv4 { struct mlx4_spec_ib { __be32 l3_qpn; __be32 qpn_msk; + enummlx4_flow_roce_type roce_type; u8 dst_gid[16]; u8 dst_gid_msk[16]; }; -- 2.1.0 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH for-next 5/7] IB/mlx4: Enable send of RoCE QP1 packets with IP/UDP headers
From: Moni ShouaRoCEv2 packets are sent over IP/UDP protocols. The mlx4 driver uses a type of RAW QP to send packets for QP1 and therefore needs to build the network headers below BTH in software. This patche adds option to build QP1 packets with IP and UDP headers if RoCEv2 is requested. Signed-off-by: Moni Shoua --- drivers/infiniband/hw/mlx4/qp.c | 86 ++--- 1 file changed, 54 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index c0dee79..8485602 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -32,6 +32,8 @@ */ #include +#include +#include #include #include #include @@ -2282,16 +2284,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, return 0; } -static void mlx4_u64_to_smac(u8 *dst_mac, u64 src_mac) -{ - int i; - - for (i = ETH_ALEN; i; i--) { - dst_mac[i - 1] = src_mac & 0xff; - src_mac >>= 8; - } -} - +#define MLX4_ROCEV2_QP1_SPORT 0xC000 static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { @@ -2311,6 +2304,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, bool is_eth; bool is_vlan = false; bool is_grh; + bool is_udp = false; + int ip_version = 0; send_size = 0; for (i = 0; i < wr->wr.num_sge; ++i) @@ -2319,6 +2314,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; is_grh = mlx4_ib_ah_grh_present(ah); if (is_eth) { + struct ib_gid_attr gid_attr; + if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { /* When multi-function is enabled, the ib_core gid * indexes don't necessarily match the hw ones, so @@ -2329,23 +2326,36 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, if (err) return err; } else { - err = ib_get_cached_gid(ib_dev, + err = ib_get_cached_gid(sqp->qp.ibqp.device, be32_to_cpu(ah->av.ib.port_pd) >> 24, ah->av.ib.gid_index, , - NULL); - if (!err && !memcmp(, , sizeof(sgid))) - err = -ENOENT; - if (err) + _attr); + if (!err) { + if (gid_attr.ndev) + dev_put(gid_attr.ndev); + if (!memcmp(, , sizeof(sgid))) + err = -ENOENT; + } + if (!err) { + is_udp = gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP; + if (is_udp) { + if (ipv6_addr_v4mapped((struct in6_addr *))) + ip_version = 4; + else + ip_version = 6; + is_grh = false; + } + } else { return err; + } } - if (ah->av.eth.vlan != cpu_to_be16(0x)) { vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff; is_vlan = 1; } } err = ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, - 0, 0, 0, >ud_header); + ip_version, is_udp, 0, >ud_header); if (err) return err; @@ -2356,7 +2366,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f); } - if (is_grh) { + if (is_grh || (ip_version == 6)) { sqp->ud_header.grh.traffic_class = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff; sqp->ud_header.grh.flow_label= @@ -2385,6 +2395,25 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, ah->av.ib.dgid, 16); } + if (ip_version == 4) { + sqp->ud_header.ip4.tos = + (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff; + sqp->ud_header.ip4.id
[PATCH for-next 6/7] IB/mlx4: Create and use another QP1 for RoCEv2
From: Moni ShouaThe mlx4 driver uses a special QP to implement the GSI QP. This kind of QP allows to build the InfiniBand headers in SW to be put before the payload that comes in with the WR. The mlx4 HW builds the packet, calculates the ICRC and puts it at the end of the payload. This ICRC calculation however depends on the QP configuration which is determined when QP is modified (roce_mode during INIT->RTR). On the other hand, ICRC verification when packet is received does to depend on this configuration. Therefore, using 2 GSI QPs for send (one for each RoCE version) and 1 GSI QP for receive are required. Signed-off-by: Moni Shoua --- drivers/infiniband/hw/mlx4/mlx4_ib.h | 7 ++ drivers/infiniband/hw/mlx4/qp.c | 162 ++- 2 files changed, 149 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 7179fb1..52ce7b0 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -177,11 +177,18 @@ struct mlx4_ib_wq { unsignedtail; }; +enum { + MLX4_IB_QP_CREATE_ROCE_V2_GSI = IB_QP_CREATE_RESERVED_START +}; + enum mlx4_ib_qp_flags { MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP, MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO, + + /* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */ + MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI, MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30, MLX4_IB_SRIOV_SQP = 1 << 31, }; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 8485602..a154d51 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -87,6 +87,7 @@ struct mlx4_ib_sqp { u32 send_psn; struct ib_ud_header ud_header; u8 header_buf[MLX4_IB_UD_HEADER_SIZE]; + struct ib_qp*roce_v2_gsi; }; enum { @@ -155,7 +156,10 @@ static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) } } } - return proxy_sqp; + if (proxy_sqp) + return 1; + + return !!(qp->flags & MLX4_IB_ROCE_V2_GSI_QP); } /* used for INIT/CLOSE port logic */ @@ -695,6 +699,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp = >qp; qp->pri.vid = 0x; qp->alt.vid = 0x; + sqp->roce_v2_gsi = NULL; } else { qp = kzalloc(sizeof (struct mlx4_ib_qp), gfp); if (!qp) @@ -1085,9 +1090,17 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, del_gid_entries(qp); } -static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr) +static int get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr) { /* Native or PPF */ + if ((!mlx4_is_mfunc(dev->dev) || mlx4_is_master(dev->dev)) && + attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) { + int sqpn; + int res = mlx4_qp_reserve_range(dev->dev, 1, 1, , 0); + + return res ? -abs(res) : sqpn; + } + if (!mlx4_is_mfunc(dev->dev) || (mlx4_is_master(dev->dev) && attr->create_flags & MLX4_IB_SRIOV_SQP)) { @@ -1102,9 +1115,9 @@ static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr) return dev->dev->caps.qp1_proxy[attr->port_num - 1]; } -struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) +static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) { struct mlx4_ib_qp *qp = NULL; int err; @@ -1123,6 +1136,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP | MLX4_IB_QP_NETIF | + MLX4_IB_QP_CREATE_ROCE_V2_GSI | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) return ERR_PTR(-EINVAL); @@ -1131,15 +1145,21 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); } - if (init_attr->create_flags && - ((udata && init_attr->create_flags & ~(sup_u_create_flags)) || -((init_attr->create_flags &
Re: [PATCH V2] IB/mlx5: Unify CQ create flags check
On Tue, Dec 29, 2015 at 03:51:47PM +0200, Sagi Grimberg wrote: > >From: Leon Romanovsky> > > >The create_cq() can receive creation flags which were used > >differently by two following commits [1] and [2]. The current > >code caused to not accept any flags at all. > > We can skip referencing the linux-rdma mailing list. > > > > >This patch unifies the check into one function and one return > >error code. > > > >Fixes: 972ecb821379 ("IB/mlx5: Add create_cq extended command") > >Fixes: 051f263098a9 ("IB/mlx5: Add driver cross-channel support") > > > >[1] http://www.spinics.net/lists/linux-rdma/msg31430.html > >[2] http://www.spinics.net/lists/linux-rdma/msg31658.html > > > >Changes from v0: > > * Add Fixes tag > > Umm, the above ([1], [2], Changes) usually go under the "---" > separator so we won't see them in git log forever. Doug, Do you want me resend the patch? > > > > >Signed-off-by: Leon Romanovsky > >--- > > drivers/infiniband/hw/mlx5/cq.c | 9 + > > drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++- > > 2 files changed, 3 insertions(+), 9 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] Generic InfiniBand transport done in software
> No. PIO and SDMA is driver specific and lives in the driver. Rdmavt has no > concept of this. I'm agreeing that the send will be generic and have no hw > specific stuff. > I understand that PIO/SDMA are not a concept of RVT. However, making the send from RVT to driver exactly as the interface from ib_core to RVT raises the question: What exactly do we achieve by this? > > > As I've stated a number of times across multiple threads: It must not do > anything that would prevent another driver from using it. > The question is not how Soft RoCE fits into this framework but how does this framework achieve its goals. > > > I expect feedback based on the code submissions. More will be coming > shortly. I have taken all the feedback from the first post and will be > sending a v2 shortly. > Again, I have no idea about the complete interfaces between both pieces of the suggested solution. - If you have them then please publish - if you don't but plan to have them then why did you submit a half baked idea - If you say that final interface is what we see now then I say that the problem of code duplication isn't going to be resolved So, what it is from the 3? > > -Denny > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-next 3/7] IB/mlx4: Configure device to work in RoCEv2
On 12/29/2015 3:24 PM, Matan Barak wrote: From: Moni ShouaSome mlx4 adapters are RoCEv2 capable. To enable this feature some hardware configuration is required. This is 1. Set port general parameters 2. Configure the outgoing UDP destination port 3. Configure the QP that work with RoCEv2 Signed-off-by: Moni Shoua --- drivers/infiniband/hw/mlx4/main.c | 19 ++--- drivers/infiniband/hw/mlx4/qp.c | 35 --- drivers/net/ethernet/mellanox/mlx4/fw.c | 16 +- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 7 +-- drivers/net/ethernet/mellanox/mlx4/port.c | 8 +++ drivers/net/ethernet/mellanox/mlx4/qp.c | 28 + include/linux/mlx4/device.h | 1 + include/linux/mlx4/qp.h | 15 +++-- include/rdma/ib_verbs.h | 2 ++ 9 files changed, 120 insertions(+), 11 deletions(-) Better put (please do...) functionality which is plain mlx4 corish (such as new/modified FW commands, new SW/FW fields of structs and such) into mlx4_core patch. diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 988fa33..44e5699 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -384,6 +384,7 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, int i; int ret; unsigned long flags; + struct ib_gid_attr attr; if (port_num > MLX4_MAX_PORTS) return -EINVAL; @@ -394,10 +395,13 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, if (!rdma_cap_roce_gid_table(>ib_dev, port_num)) return index; - ret = ib_get_cached_gid(>ib_dev, port_num, index, , NULL); + ret = ib_get_cached_gid(>ib_dev, port_num, index, , ); if (ret) return ret; + if (attr.ndev) + dev_put(attr.ndev); + if (!memcmp(, , sizeof(gid))) return -EINVAL; @@ -405,7 +409,8 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, port_gid_table = >gids[port_num - 1]; for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) - if (!memcmp(_gid_table->gids[i].gid, , sizeof(gid))) { + if (!memcmp(_gid_table->gids[i].gid, , sizeof(gid)) && + attr.gid_type == port_gid_table->gids[i].gid_type) { ctx = port_gid_table->gids[i].ctx; break; } @@ -2481,7 +2486,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (mlx4_ib_init_sriov(ibdev)) goto err_mad; - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) { + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE || + dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) { if (!iboe->nb.notifier_call) { iboe->nb.notifier_call = mlx4_ib_netdev_event; err = register_netdevice_notifier(>nb); @@ -2490,6 +2496,13 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) goto err_notif; } } + if (!mlx4_is_slave(dev) && + dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) { + err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT); + if (err) { + goto err_notif; + } + } } for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 8d28059..c0dee79 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1508,6 +1508,24 @@ static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) return 0; } +enum { + MLX4_QPC_ROCE_MODE_1 = 0, + MLX4_QPC_ROCE_MODE_2 = 2, + MLX4_QPC_ROCE_MODE_MAX = 0xff +}; + +static u8 gid_type_to_qpc(enum ib_gid_type gid_type) +{ + switch (gid_type) { + case IB_GID_TYPE_ROCE: + return MLX4_QPC_ROCE_MODE_1; + case IB_GID_TYPE_ROCE_UDP_ENCAP: + return MLX4_QPC_ROCE_MODE_2; + default: + return MLX4_QPC_ROCE_MODE_MAX; + } +} + static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) @@ -1651,9 +1669,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, u16 vlan = 0x; u8 smac[ETH_ALEN]; int status = 0; + int is_eth = rdma_cap_eth_ah(>ib_dev, port_num) && + attr->ah_attr.ah_flags & IB_AH_GRH; - if (rdma_cap_eth_ah(>ib_dev, port_num) && -