Changes include
 > Introduced a new 'RDMA_NETWORK_TYPE' in the IB Core
 > Adding a new 'network_hdr_type' in data structures like 'global_route',
 'work_completion' and 'path_record' structs
 > Determining if IPvX Address is routeable and set hop_limit in IP HDR 
 > accordingly
 > Repurpose rdma_addr_find_dmac_by_grh to get network hdr type

Signed-off-by: Somnath Kotur <[email protected]>
Signed-off-by: Padmanabh Ratnakar <[email protected]>
Signed-off-by: Devesh Sharma <[email protected]>
---
PS: Not yet added UDP hdr addition to this. This sets the basic foundation for 
the
IP routeability. UDP source port /hdr filling should be relatively trivial and
will follow this patch based on how this design finally shapes up post review.

 drivers/infiniband/core/addr.c     |   13 +++++++++-
 drivers/infiniband/core/cm.c       |   15 +++++++----
 drivers/infiniband/core/cma.c      |    4 ++-
 drivers/infiniband/core/sa_query.c |    1 +
 drivers/infiniband/core/verbs.c    |   47 ++++++++++++++++++++++++++++++-----
 include/rdma/ib_addr.h             |    3 +-
 include/rdma/ib_sa.h               |    1 +
 include/rdma/ib_verbs.h            |   15 +++++++++++
 8 files changed, 83 insertions(+), 16 deletions(-)

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 8172d37..77262e6 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -257,6 +257,9 @@ static int addr4_resolve(struct sockaddr_in *src_in,
                goto put;
        }
 
+       if (rt->rt_uses_gateway)
+               addr->network = RDMA_NETWORK_IPv4;
+
        ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr);
 put:
        ip_rt_put(rt);
@@ -271,6 +274,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
 {
        struct flowi6 fl6;
        struct dst_entry *dst;
+       struct rt6_info *rt;
        int ret;
 
        memset(&fl6, 0, sizeof fl6);
@@ -282,6 +286,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
        if ((ret = dst->error))
                goto put;
 
+       rt = (struct rt6_info *)dst;
        if (ipv6_addr_any(&fl6.saddr)) {
                ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
                                         &fl6.daddr, 0, &fl6.saddr);
@@ -305,6 +310,9 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
                goto put;
        }
 
+       if (rt->rt6i_flags & RTF_GATEWAY)
+               addr->network = RDMA_NETWORK_IPv6;
+
        ret = dst_fetch_ha(dst, addr, &fl6.daddr);
 put:
        dst_release(dst);
@@ -458,7 +466,7 @@ static void resolve_cb(int status, struct sockaddr 
*src_addr,
 }
 
 int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 
*dmac,
-                              u16 *vlan_id)
+                              u16 *vlan_id, u8 *network_hdr_type)
 {
        int ret = 0;
        struct rdma_dev_addr dev_addr;
@@ -497,6 +505,9 @@ int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union 
ib_gid *dgid, u8 *dmac,
                return -ENODEV;
        if (vlan_id)
                *vlan_id = rdma_vlan_dev_vlan_id(dev);
+       if (network_hdr_type)
+               *network_hdr_type = dev_addr.network;
+
        dev_put(dev);
        return ret;
 }
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index e28a494..01986e8 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1233,8 +1233,8 @@ static inline int cm_is_active_peer(__be64 local_ca_guid, 
__be64 remote_ca_guid,
 }
 
 static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
-                                           struct ib_sa_path_rec *primary_path,
-                                           struct ib_sa_path_rec *alt_path)
+                                    struct ib_sa_path_rec *primary_path,
+                                    struct ib_sa_path_rec *alt_path)
 {
        memset(primary_path, 0, sizeof *primary_path);
        primary_path->dgid = req_msg->primary_local_gid;
@@ -1520,9 +1520,10 @@ static void cm_process_routed_req(struct cm_req_msg 
*req_msg, struct ib_wc *wc)
 
 static int cm_req_handler(struct cm_work *work)
 {
-       struct ib_cm_id *cm_id;
        struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
        struct cm_req_msg *req_msg;
+       struct ib_wc *wc;
+       struct ib_cm_id *cm_id;
        int ret;
 
        req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1531,10 +1532,10 @@ static int cm_req_handler(struct cm_work *work)
        if (IS_ERR(cm_id))
                return PTR_ERR(cm_id);
 
+       wc = work->mad_recv_wc->wc;
        cm_id_priv = container_of(cm_id, struct cm_id_private, id);
        cm_id_priv->id.remote_id = req_msg->local_comm_id;
-       cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
-                               work->mad_recv_wc->recv_buf.grh,
+       cm_init_av_for_response(work->port, wc, work->mad_recv_wc->recv_buf.grh,
                                &cm_id_priv->av);
        cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
                                                            id.local_id);
@@ -1558,11 +1559,13 @@ static int cm_req_handler(struct cm_work *work)
        cm_id_priv->id.service_id = req_msg->service_id;
        cm_id_priv->id.service_mask = ~cpu_to_be64(0);
 
-       cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
+       cm_process_routed_req(req_msg, wc);
        cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
 
        memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
        work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id;
+       work->path[0].network_hdr_type = wc->network_hdr_type;
+
        ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
        if (ret) {
                ib_get_cached_gid(work->port->cm_dev->ib_device,
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index d570030..285794a 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1924,7 +1924,9 @@ static int cma_resolve_iboe_route(struct rdma_id_private 
*id_priv)
        rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
                    &route->path_rec->dgid);
 
-       route->path_rec->hop_limit = 1;
+       route->path_rec->network_hdr_type = addr->dev_addr.network;
+       if (route->path_rec->network_hdr_type != RDMA_NETWORK_IB)
+               route->path_rec->hop_limit = IPV6_DEFAULT_HOPLIMIT;
        route->path_rec->reversible = 1;
        route->path_rec->pkey = cpu_to_be16(0xffff);
        route->path_rec->mtu_selector = IB_SA_EQ;
diff --git a/drivers/infiniband/core/sa_query.c 
b/drivers/infiniband/core/sa_query.c
index c38f030..2377577 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -555,6 +555,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 
port_num,
                ah_attr->grh.flow_label    = be32_to_cpu(rec->flow_label);
                ah_attr->grh.hop_limit     = rec->hop_limit;
                ah_attr->grh.traffic_class = rec->traffic_class;
+               ah_attr->grh.network_hdr_type = rec->network_hdr_type;
        }
        if (force_grh) {
                memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index c2b89cc..68d7d23 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -195,9 +195,14 @@ EXPORT_SYMBOL(ib_create_ah);
 int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
                       struct ib_grh *grh, struct ib_ah_attr *ah_attr)
 {
-       u32 flow_class;
-       u16 gid_index;
        int ret;
+       u16 gid_index;
+       u32 flow_class;
+       u8 *network_hdr_type;
+       struct sockaddr_in  src_in;
+       struct sockaddr_in  dst_in;
+       union rdma_network_hdr *l3grh;
+       union ib_gid *sgid, *dgid, ipv4_sgid, ipv4_dgid;
        int is_eth = (rdma_port_get_link_layer(device, port_num) ==
                        IB_LINK_LAYER_ETHERNET);
 
@@ -206,18 +211,42 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 
port_num, struct ib_wc *wc,
                if (!(wc->wc_flags & IB_WC_GRH))
                        return -EPROTOTYPE;
 
+               sgid = &grh->sgid;
+               dgid = &grh->dgid;
+
                if (wc->wc_flags & IB_WC_WITH_SMAC &&
                    wc->wc_flags & IB_WC_WITH_VLAN) {
                        memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
                        ah_attr->vlan_id = wc->vlan_id;
                } else {
-                       ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
-                                       ah_attr->dmac, &ah_attr->vlan_id);
+                       ah_attr->grh.hop_limit = grh->hop_limit;
+                       if (wc->wc_flags & IB_WC_WITH_L3_TYPE &&
+                           wc->network_hdr_type == RDMA_NETWORK_IPv4) {
+                               l3grh = (union rdma_network_hdr *)grh;
+                               memcpy(&src_in.sin_addr.s_addr,
+                                      &l3grh->roce4grh.saddr, 4);
+                               memcpy(&dst_in.sin_addr.s_addr,
+                                      &l3grh->roce4grh.daddr, 4);
+                               ipv6_addr_set_v4mapped(src_in.sin_addr.s_addr,
+                                                      (struct in6_addr *)
+                                                      &ipv4_sgid);
+                               ipv6_addr_set_v4mapped(dst_in.sin_addr.s_addr,
+                                                      (struct in6_addr *)
+                                                      &ipv4_dgid);
+                               dgid = &ipv4_dgid;
+                               sgid = &ipv4_sgid;
+                               ah_attr->grh.hop_limit = l3grh->roce4grh.ttl;
+                       }
+                       network_hdr_type = &ah_attr->grh.network_hdr_type;
+                       ret = rdma_addr_find_dmac_by_grh(dgid, sgid,
+                                                        ah_attr->dmac,
+                                                        &ah_attr->vlan_id,
+                                                        network_hdr_type);
                        if (ret)
                                return ret;
                }
        } else {
-               ah_attr->vlan_id = 0xffff;
+                       ah_attr->vlan_id = 0xffff;
        }
 
        ah_attr->dlid = wc->slid;
@@ -237,7 +266,6 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 
port_num, struct ib_wc *wc,
                ah_attr->grh.sgid_index = (u8) gid_index;
                flow_class = be32_to_cpu(grh->version_tclass_flow);
                ah_attr->grh.flow_label = flow_class & 0xFFFFF;
-               ah_attr->grh.hop_limit = 0xFF;
                ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
        }
        return 0;
@@ -869,6 +897,7 @@ int ib_resolve_eth_l2_attrs(struct ib_qp *qp,
 {
        int           ret = 0;
        union ib_gid  sgid;
+       u8 *network_hdr_type;
 
        if ((*qp_attr_mask & IB_QP_AV)  &&
            (rdma_port_get_link_layer(qp->device, qp_attr->ah_attr.port_num) == 
IB_LINK_LAYER_ETHERNET)) {
@@ -881,8 +910,12 @@ int ib_resolve_eth_l2_attrs(struct ib_qp *qp,
                        rdma_get_ll_mac((struct in6_addr *)sgid.raw, 
qp_attr->smac);
                        qp_attr->vlan_id = rdma_get_vlan_id(&sgid);
                } else {
+                       network_hdr_type =
+                                       &qp_attr->ah_attr.grh.network_hdr_type;
                        ret = rdma_addr_find_dmac_by_grh(&sgid, 
&qp_attr->ah_attr.grh.dgid,
-                                       qp_attr->ah_attr.dmac, 
&qp_attr->vlan_id);
+                                                        qp_attr->ah_attr.dmac,
+                                                        &qp_attr->vlan_id,
+                                                        network_hdr_type);
                        if (ret)
                                goto out;
                        ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr->smac, 
NULL);
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index ce55906..242c0a9 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -71,6 +71,7 @@ struct rdma_dev_addr {
        unsigned short dev_type;
        int bound_dev_if;
        enum rdma_transport_type transport;
+       enum rdma_network_type network;
 };
 
 /**
@@ -112,7 +113,7 @@ int rdma_addr_size(struct sockaddr *addr);
 
 int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id);
 int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 
*smac,
-                              u16 *vlan_id);
+                              u16 *vlan_id, u8 *network_hdr_type);
 
 static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
 {
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index 7e071a6..3946a67 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -157,6 +157,7 @@ struct ib_sa_path_rec {
        u8           smac[ETH_ALEN];
        u8           dmac[ETH_ALEN];
        u16          vlan_id;
+       u8           network_hdr_type;
 };
 
 #define IB_SA_MCMEMBER_REC_MGID                                
IB_SA_COMP_MASK( 0)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 470a011..3f5dc91 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -49,6 +49,7 @@
 #include <linux/scatterlist.h>
 #include <linux/workqueue.h>
 #include <uapi/linux/if_ether.h>
+#include <uapi/linux/ip.h>
 
 #include <linux/atomic.h>
 #include <asm/uaccess.h>
@@ -83,6 +84,12 @@ enum rdma_transport_type {
 __attribute_const__ enum rdma_transport_type
 rdma_node_get_transport(enum rdma_node_type node_type);
 
+enum rdma_network_type {
+       RDMA_NETWORK_IB,
+       RDMA_NETWORK_IPv4,
+       RDMA_NETWORK_IPv6
+};
+
 enum rdma_link_layer {
        IB_LINK_LAYER_UNSPECIFIED,
        IB_LINK_LAYER_INFINIBAND,
@@ -418,6 +425,7 @@ struct ib_global_route {
        u8              sgid_index;
        u8              hop_limit;
        u8              traffic_class;
+       u8              network_hdr_type;
 };
 
 struct ib_grh {
@@ -429,6 +437,11 @@ struct ib_grh {
        union ib_gid    dgid;
 };
 
+union rdma_network_hdr {
+       struct ib_grh ibgrh;
+       struct iphdr roce4grh;
+};
+
 enum {
        IB_MULTICAST_QPN = 0xffffff
 };
@@ -666,6 +679,7 @@ enum ib_wc_flags {
        IB_WC_IP_CSUM_OK        = (1<<3),
        IB_WC_WITH_SMAC         = (1<<4),
        IB_WC_WITH_VLAN         = (1<<5),
+       IB_WC_WITH_L3_TYPE      = (1<<6)
 };
 
 struct ib_wc {
@@ -688,6 +702,7 @@ struct ib_wc {
        u8                      port_num;       /* valid only for DR SMPs on 
switches */
        u8                      smac[ETH_ALEN];
        u16                     vlan_id;
+       u8                      network_hdr_type;
 };
 
 enum ib_cq_notify_flags {
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to