Hi, folks

I've done a draft (very rough draft...) according to my understanding on
Sean's proposal.

The implementation is to allow device setup the management flags during
ib_query_port() (amso1100 as eg), and later we could use the flags to check
the capability.

For new capability/proto, like OPA, device could setup new flag
IB_MGMT_PROTO_OPA during query_port() callback, and some helper like
rdma_mgmt_cap_opa() can be used for management branch.

How do you think about this?

Regards,
Michael Wang



diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index d570030..ad1685e 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -375,8 +375,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
                      listen_id_priv->id.port_num) == dev_ll) {
         cma_dev = listen_id_priv->cma_dev;
         port = listen_id_priv->id.port_num;
- if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && - rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
+        if (rdma_mgmt_cap_iboe(cma_dev->device, port))
             ret = ib_find_cached_gid(cma_dev->device, &iboe_gid,
                          &found_port, NULL);
         else
@@ -395,8 +394,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
                 listen_id_priv->id.port_num == port)
                 continue;
if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) { - if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && - rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
+                if (rdma_mgmt_cap_iboe(cma_dev->device, port))
ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, &found_port, NULL);
                 else
ret = ib_find_cached_gid(cma_dev->device, &gid, &found_port, NULL);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 74c30f4..0ae6b04 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -2938,7 +2938,7 @@ static int ib_mad_port_open(struct ib_device *device,
     init_mad_qp(port_priv, &port_priv->qp_info[1]);

     cq_size = mad_sendq_size + mad_recvq_size;
- has_smi = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND;
+    has_smi = rdma_mgmt_cap_smi(device, port_num);
     if (has_smi)
         cq_size *= 2;

@@ -3057,7 +3057,7 @@ static void ib_mad_init_device(struct ib_device *device)
 {
     int start, end, i;

-    if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+    if (!rdma_mgmt_cap_ib(device))
         return;

     if (device->node_type == RDMA_NODE_IB_SWITCH) {
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index f93eb8d..5ecf9c8 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -146,6 +146,26 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_
 }
 EXPORT_SYMBOL(rdma_port_get_link_layer);

+int rdma_port_default_mgmt_flags(struct ib_device *device, u8 port_num)
+{
+    int mgmt_flags = 0;
+    enum rdma_transport_type tp =
+            rdma_node_get_transport(device->node_type);
+    enum rdma_link_layer ll =
+            rdma_port_get_link_layer(device, port_num);
+
+    if (tp == RDMA_TRANSPORT_IB) {
+        mgmt_flags |= IB_MGMT_PROTO_IB;
+        if (ll == IB_LINK_LAYER_INFINIBAND) {
+            mgmt_flags |= IB_MGMT_PROTO_SMI;
+            mgmt_flags |= IB_MGMT_PROTO_IBOE;
+        }
+    }
+
+    return mgmt_flags;
+}
+EXPORT_SYMBOL(rdma_port_default_mgmt_flags);
+
 /* Protection domains */

 struct ib_pd *ib_alloc_pd(struct ib_device *device)
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index bdf3507..04d005e 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -96,6 +96,9 @@ static int c2_query_port(struct ib_device *ibdev,
     props->active_width = 1;
     props->active_speed = IB_SPEED_SDR;

+    /* Makeup flags here, by default or on your own */
+    props->mgmt_flags = rdma_port_default_mgmt_flags(ibdev, port);
+
     return 0;
 }

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 65994a1..d19c7c9 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -90,6 +90,13 @@ enum rdma_link_layer {
     IB_LINK_LAYER_ETHERNET,
 };

+enum rdma_mgmt_flag {
+    IB_MGMT_PROTO_IB,
+    IB_MGMT_PROTO_SMI,
+    IB_MGMT_PROTO_IBOE,
+    /* More Here*/
+};
+
 enum ib_device_cap_flags {
     IB_DEVICE_RESIZE_MAX_WR        = 1,
     IB_DEVICE_BAD_PKEY_CNTR        = (1<<1),
@@ -352,6 +359,7 @@ struct ib_port_attr {
     enum ib_mtu        active_mtu;
     int            gid_tbl_len;
     u32            port_cap_flags;
+    u32            mgmt_flags;
     u32            max_msg_sz;
     u32            bad_pkey_cntr;
     u32            qkey_viol_cntr;
@@ -1743,6 +1751,32 @@ int ib_query_port(struct ib_device *device,
 enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device,
                            u8 port_num);

+int rdma_port_default_mgmt_flags(struct ib_device *device, u8 port_num);
+
+static inline int rdma_mgmt_cap(struct ib_device *device, u8 port_num)
+{
+    struct ib_port_attr port_attr;
+    memset(&port_attr, 0, sizeof port_attr);
+    ib_query_port(device, port_num, &port_attr);
+    return port_attr.mgmt_flags;
+}
+
+static inline int rdma_mgmt_cap_ib(struct ib_device *device)
+{
+    u8 port_num = device->node_type == RDMA_NODE_IB_SWITCH ? 0 : 1;
+    return rdma_mgmt_cap(device, port_num) & IB_MGMT_PROTO_IB;
+}
+
+static inline int rdma_mgmt_cap_smi(struct ib_device *device, u8 port_num)
+{
+    return rdma_mgmt_cap(device, port_num) & IB_MGMT_PROTO_SMI;
+}
+
+static inline int rdma_mgmt_cap_iboe(struct ib_device *device, u8 port_num)
+{
+    return rdma_mgmt_cap(device, port_num) & IB_MGMT_PROTO_IBOE;
+}
+
 int ib_query_gid(struct ib_device *device,
          u8 port_num, int index, union ib_gid *gid);



On 03/18/2015 12:36 AM, Hefty, Sean wrote:
But it makes sense to me to use management specific
fields/attributes/flags for the *management* pieces, rather than using the
link and/or transport layer protocols as a proxy.  Management related code
should really branch based on that.
As a proposal, we could add a new field to the kernel port attribute structure. 
 The field would be a bitmask of management capabilities/protocols:

IB_MGMT_PROTO_SM - supports IB SMPs
IB_MGMT_PROTO_SA - supports IB SA MADs
IB_MGMT_PROTO_GS - supports IB GSI MADs (e.g. CM, PM, ...)
IB_MGMT_PROTO_OPA_SM - supports OPA SMPs (or whatever they are called)
IB_MGMT_PROTO_OPA_GS - supports OPA GS MADs (or whatever is supported)

If the *GS flags are not sufficient to distinguish between MADs supported over 
IB and RoCE, it can be further divided (i.e. CM, PM, BM, DM, etc.).

This would provide a direct mapping of which management protocols are supported 
for a given port, rather than it being inferred by the link/transport fields, 
which should really be independent.  It would also allow for simple checks by 
the core layer.

If we want the code to be more generic, additional field(s) could be added, 
such as mad_size, so that any size of management datagram is supported.  This 
would be used instead of inferring the size based on the supported protocol.

- Sean
N�����r��y���b�X��ǧv�^�)޺{.n�+����{��ٚ�{ay�ʇڙ�,j��f���h���z��w���

���j:+v���w�j�m��������zZ+�����ݢj"��!tml=

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to