Add support for RDMAoE device binding and IP --> GID resolution. Modify calls to the SA such that the link type is veirifed first and the appropriate call is made to either IB SA or RDMAoE SA.
Signed-off-by: Eli Cohen <e...@mellanox.co.il> --- drivers/infiniband/core/addr.c | 20 ++++--- drivers/infiniband/core/cma.c | 124 +++++++++++++++++++++++++++++++++------- include/rdma/ib_addr.h | 53 +++++++++++++++++ 3 files changed, 166 insertions(+), 31 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index ce511d8..440e613 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -64,7 +64,7 @@ struct addr_req { static void process_req(struct work_struct *work); -static DEFINE_MUTEX(lock); +static DEFINE_SPINLOCK(lock); static LIST_HEAD(req_list); static DECLARE_DELAYED_WORK(work, process_req); static struct workqueue_struct *addr_wq; @@ -163,7 +163,7 @@ static void queue_req(struct addr_req *req) { struct addr_req *temp_req; - mutex_lock(&lock); + spin_lock(&lock); list_for_each_entry_reverse(temp_req, &req_list, list) { if (time_after_eq(req->timeout, temp_req->timeout)) break; @@ -173,7 +173,7 @@ static void queue_req(struct addr_req *req) if (req_list.next == &req->list) set_timeout(req->timeout); - mutex_unlock(&lock); + spin_unlock(&lock); } static void addr_send_arp(struct sockaddr *dst_in) @@ -207,7 +207,9 @@ static void addr_send_arp(struct sockaddr *dst_in) if (!dst) return; - neigh_event_send(dst->neighbour, NULL); + if (dst->neighbour) + neigh_event_send(dst->neighbour, NULL); + dst_release(dst); break; } @@ -322,7 +324,7 @@ static void process_req(struct work_struct *work) INIT_LIST_HEAD(&done_list); - mutex_lock(&lock); + spin_lock(&lock); list_for_each_entry_safe(req, temp_req, &req_list, list) { if (req->status == -ENODATA) { src_in = (struct sockaddr *) &req->src_addr; @@ -341,7 +343,7 @@ static void process_req(struct work_struct *work) req = list_entry(req_list.next, struct addr_req, list); set_timeout(req->timeout); } - mutex_unlock(&lock); + spin_unlock(&lock); list_for_each_entry_safe(req, temp_req, &done_list, list) { list_del(&req->list); @@ -439,7 +441,7 @@ int rdma_resolve_ip(struct rdma_addr_client *client, struct addr_req *req; int ret = 0; - req = kzalloc(sizeof *req, GFP_KERNEL); + req = kzalloc(sizeof *req, GFP_ATOMIC); if (!req) return -ENOMEM; @@ -483,7 +485,7 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr) { struct addr_req *req, *temp_req; - mutex_lock(&lock); + spin_lock(&lock); list_for_each_entry_safe(req, temp_req, &req_list, list) { if (req->addr == addr) { req->status = -ECANCELED; @@ -493,7 +495,7 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr) break; } } - mutex_unlock(&lock); + spin_unlock(&lock); } EXPORT_SYMBOL(rdma_addr_cancel); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 851de83..6cf0f1b 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -49,6 +49,7 @@ #include <rdma/ib_cache.h> #include <rdma/ib_cm.h> #include <rdma/ib_sa.h> +#include <rdma/rdmaoe_sa.h> #include <rdma/iw_cm.h> MODULE_AUTHOR("Sean Hefty"); @@ -69,6 +70,8 @@ static struct ib_client cma_client = { }; static struct ib_sa_client sa_client; +static struct ib_sa_client rdmaoe_sa_client; +static struct ib_sa_client rdmaoe_mcast_client; static struct rdma_addr_client addr_client; static LIST_HEAD(dev_list); static LIST_HEAD(listen_any_list); @@ -327,22 +330,30 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct cma_device *cma_dev; - union ib_gid gid; + union ib_gid gid, llgid, *tmp; int ret = -ENODEV; switch (rdma_node_get_transport(dev_addr->dev_type)) { case RDMA_TRANSPORT_IB: ib_addr_get_sgid(dev_addr, &gid); + tmp = &gid; break; case RDMA_TRANSPORT_IWARP: iw_addr_get_sgid(dev_addr, &gid); + rdma_mac_to_ll_addr(dev_addr->src_dev_addr, &llgid); break; default: return -ENODEV; } list_for_each_entry(cma_dev, &dev_list, list) { - ret = ib_find_cached_gid(cma_dev->device, &gid, + if (ib_get_port_link_type(cma_dev->device, id_priv->id.port_num) + == PORT_LINK_ETH && + rdma_node_get_transport(dev_addr->dev_type) == + RDMA_TRANSPORT_IWARP) + tmp = &llgid; + + ret = ib_find_cached_gid(cma_dev->device, tmp, &id_priv->id.port_num, NULL); if (!ret) { cma_attach_to_dev(id_priv, cma_dev); @@ -821,12 +832,18 @@ static void cma_release_port(struct rdma_id_private *id_priv) static void cma_leave_mc_groups(struct rdma_id_private *id_priv) { struct cma_multicast *mc; + enum ib_port_link_type lt; + lt = ib_get_port_link_type(id_priv->id.device, id_priv->id.port_num); while (!list_empty(&id_priv->mc_list)) { mc = container_of(id_priv->mc_list.next, struct cma_multicast, list); list_del(&mc->list); - ib_sa_free_multicast(mc->multicast.ib); + if (lt == PORT_LINK_IB) + ib_sa_free_multicast(mc->multicast.ib); + else + rdmaoe_sa_free_multicast(mc->multicast.ib); + kfree(mc); } } @@ -1032,7 +1049,19 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, if (rt->num_paths == 2) rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; - ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); + + switch (ib_get_port_link_type(listen_id->device, + ib_event->param.req_rcvd.port)) { + case PORT_LINK_IB: + ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); + break; + case PORT_LINK_ETH: + iw_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); + break; + default: + printk(KERN_ERR "RDMA CMA: unknown rdma port link type\n"); + goto destroy_id; + } ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr, &id->route.addr.dev_addr); if (ret) @@ -1563,10 +1592,19 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, struct ib_sa_path_rec path_rec; ib_sa_comp_mask comp_mask; struct sockaddr_in6 *sin6; + enum ib_port_link_type lt; + lt = ib_get_port_link_type(id_priv->id.device, id_priv->id.port_num); memset(&path_rec, 0, sizeof path_rec); - ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid); - ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid); + if (lt == PORT_LINK_IB || + (ib_addr_hw_addr_is_gid(addr->dev_addr.src_dev_addr) && + ib_addr_hw_addr_is_gid(addr->dev_addr.dst_dev_addr))) { + ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid); + ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid); + } else { + rdma_mac_to_ll_addr(addr->dev_addr.src_dev_addr, &path_rec.sgid); + rdma_mac_to_ll_addr(addr->dev_addr.dst_dev_addr, &path_rec.dgid); + } path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr)); path_rec.numb_path = 1; path_rec.reversible = 1; @@ -1586,11 +1624,18 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; } - id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, - id_priv->id.port_num, &path_rec, - comp_mask, timeout_ms, - GFP_KERNEL, cma_query_handler, - work, &id_priv->query); + if (lt == PORT_LINK_IB) + id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, + id_priv->id.port_num, &path_rec, + comp_mask, timeout_ms, + GFP_KERNEL, cma_query_handler, + work, &id_priv->query); + else + id_priv->query_id = rdmaoe_sa_path_rec_get(&rdmaoe_sa_client, id_priv->id.device, + id_priv->id.port_num, &path_rec, + comp_mask, timeout_ms, + GFP_KERNEL, cma_query_handler, + work, &id_priv->query); return (id_priv->query_id < 0) ? id_priv->query_id : 0; } @@ -2699,6 +2744,14 @@ static void cma_set_mgid(struct rdma_id_private *id_priv, } } +static union ib_gid rdmaoe_mgid6 = { + .raw = {0xff, 0x12, 0x60, 0x1b} +}; + +static union ib_gid rdmaoe_mgid4 = { + .raw = {0xff, 0x12, 0x40, 0x1b} +}; + static int cma_join_ib_multicast(struct rdma_id_private *id_priv, struct cma_multicast *mc) { @@ -2706,17 +2759,27 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; ib_sa_comp_mask comp_mask; int ret; + enum ib_port_link_type lt; - ib_addr_get_mgid(dev_addr, &rec.mgid); - ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, - &rec.mgid, &rec); - if (ret) - return ret; + lt = ib_get_port_link_type(id_priv->id.device, id_priv->id.port_num); + if (lt == PORT_LINK_IB) { + ib_addr_get_mgid(dev_addr, &rec.mgid); + ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, + &rec.mgid, &rec); + if (ret) + return ret; + + } else + ((struct sockaddr *)&mc->addr)->sa_family == AF_INET6 ? + rec.mgid = rdmaoe_mgid6 : rdmaoe_mgid4; cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); if (id_priv->id.ps == RDMA_PS_UDP) rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); - ib_addr_get_sgid(dev_addr, &rec.port_gid); + if (lt == PORT_LINK_ETH) + rdma_mac_to_ll_addr(dev_addr->src_dev_addr, &rec.port_gid); + else + ib_addr_get_sgid(dev_addr, &rec.port_gid); rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); rec.join_state = 1; @@ -2730,10 +2793,16 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, comp_mask |= IB_SA_MCMEMBER_REC_RATE | IB_SA_MCMEMBER_REC_RATE_SELECTOR; - mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, - id_priv->id.port_num, &rec, - comp_mask, GFP_KERNEL, - cma_ib_mc_handler, mc); + mc->multicast.ib = lt == PORT_LINK_IB ? + ib_sa_join_multicast(&sa_client, id_priv->id.device, + id_priv->id.port_num, &rec, + comp_mask, GFP_KERNEL, + cma_ib_mc_handler, mc) : + rdmaoe_sa_join_multicast(&rdmaoe_mcast_client, id_priv->id.device, + id_priv->id.port_num, &rec, + comp_mask, GFP_KERNEL, + cma_ib_mc_handler, mc); + if (IS_ERR(mc->multicast.ib)) return PTR_ERR(mc->multicast.ib); @@ -2787,8 +2856,10 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) { struct rdma_id_private *id_priv; struct cma_multicast *mc; + enum ib_port_link_type lt; id_priv = container_of(id, struct rdma_id_private, id); + lt = ib_get_port_link_type(id_priv->id.device, id_priv->id.port_num); spin_lock_irq(&id_priv->lock); list_for_each_entry(mc, &id_priv->mc_list, list) { if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) { @@ -2799,7 +2870,10 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) ib_detach_mcast(id->qp, &mc->multicast.ib->rec.mgid, mc->multicast.ib->rec.mlid); - ib_sa_free_multicast(mc->multicast.ib); + if (lt == PORT_LINK_IB) + ib_sa_free_multicast(mc->multicast.ib); + else + rdmaoe_sa_free_multicast(mc->multicast.ib); kfree(mc); return; } @@ -2974,6 +3048,8 @@ static int cma_init(void) return -ENOMEM; ib_sa_register_client(&sa_client); + ib_sa_register_client(&rdmaoe_sa_client); + ib_sa_register_client(&rdmaoe_mcast_client); rdma_addr_register_client(&addr_client); register_netdevice_notifier(&cma_nb); @@ -2985,6 +3061,8 @@ static int cma_init(void) err: unregister_netdevice_notifier(&cma_nb); rdma_addr_unregister_client(&addr_client); + ib_sa_unregister_client(&rdmaoe_mcast_client); + ib_sa_unregister_client(&rdmaoe_sa_client); ib_sa_unregister_client(&sa_client); destroy_workqueue(cma_wq); return ret; @@ -2995,6 +3073,8 @@ static void cma_cleanup(void) ib_unregister_client(&cma_client); unregister_netdevice_notifier(&cma_nb); rdma_addr_unregister_client(&addr_client); + ib_sa_unregister_client(&rdmaoe_mcast_client); + ib_sa_unregister_client(&rdmaoe_sa_client); ib_sa_unregister_client(&sa_client); destroy_workqueue(cma_wq); idr_destroy(&sdp_ps); diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 483057b..7438b89 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -151,10 +151,63 @@ static inline void iw_addr_get_sgid(struct rdma_dev_addr *dev_addr, memcpy(gid, dev_addr->src_dev_addr, sizeof *gid); } +static inline void iw_addr_set_dgid(struct rdma_dev_addr *dev_addr, + union ib_gid *gid) +{ + memcpy(dev_addr->dst_dev_addr, gid, sizeof *gid); +} + static inline void iw_addr_get_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(gid, dev_addr->dst_dev_addr, sizeof *gid); } +static inline int rdma_link_local_addr(struct in6_addr *addr) +{ + if (addr->s6_addr32[0] == cpu_to_be32(0xfe800000) && + addr->s6_addr32[1] == 0) + return 1; + else + return 0; +} + +static inline void rdma_mac_to_ll_addr(u8 *mac, union ib_gid *gid) +{ + memset(gid->raw, 0, 16); + *((u32 *)gid->raw) = cpu_to_be32(0xfe800000); + gid->raw[12] = 0xfe; + gid->raw[11] = 0xff; + memcpy(gid->raw + 13, mac + 3, 3); + memcpy(gid->raw + 8, mac, 3); + gid->raw[8] ^= 2; +} + +static inline int rdma_is_multicast_addr(struct in6_addr *addr) +{ + return addr->s6_addr[0] == 0xff ? 1 : 0; +} + +static inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac) +{ + memcpy(mac, &addr->s6_addr[8], 3); + memcpy(mac + 3, &addr->s6_addr[13], 3); + mac[0] ^= 2; +} + +static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac) +{ + int i; + + mac[0] = 0x33; + mac[1] = 0x33; + for (i = 2; i < 6; ++i) + mac[i] = addr->s6_addr[i + 10]; +} + + static inline int ib_addr_hw_addr_is_gid(u8 *addr) + { + return be16_to_cpu(*(u16 *)(addr + 4)) == 0xfe80; + } + #endif /* IB_ADDR_H */ -- 1.6.3.3 _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general