Roland, please pull from:

        git://git.openfabrics.org/~shefty/rdma-dev.git for-roland

This will pick up QoS and CM scalability changes that I would like to get
into 2.6.24 (and OFED 1.3).  All have been posted to the list before, though
the QoS patches have received more attention.

Sean Hefty (7):
      ib/ipoib: specify Traffic Class with PR queries for QoS support
      ib/sa: add new QoS fields to path record
      rdma/cm: add ability to specify type of service
      rdma/ucm: export setting service type to user space
      ib/srp: add QoS support through service ID
      ib/cm: modify interface to send MRAs in response to duplicate messages
      rdma/cm: queue IB CM MRAs to avoid unnecessary remote retries

 drivers/infiniband/core/cm.c                   |   51 +++++++----------
 drivers/infiniband/core/cma.c                  |   46 ++++++++++++---
 drivers/infiniband/core/sa_query.c             |   10 +--
 drivers/infiniband/core/ucma.c                 |   74 ++++++++++++++++++++++++-
 drivers/infiniband/ulp/ipoib/ipoib.h           |   22 +++++++
 drivers/infiniband/ulp/ipoib/ipoib_main.c      |    8 +-
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c |   22 -------
 drivers/infiniband/ulp/srp/ib_srp.c            |    2 
 include/rdma/ib_cm.h                           |    7 +-
 include/rdma/ib_sa.h                           |   11 +--
 include/rdma/rdma_cm.h                         |   14 ++++
 include/rdma/rdma_user_cm.h                    |   18 ++++++
 12 files changed, 205 insertions(+), 80 deletions(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 4df269f..2e39236 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -2219,6 +2219,9 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
 {
        struct cm_id_private *cm_id_priv;
        struct ib_mad_send_buf *msg;
+       enum ib_cm_state cm_state;
+       enum ib_cm_lap_state lap_state;
+       enum cm_msg_response msg_response;
        void *data;
        unsigned long flags;
        int ret;
@@ -2235,48 +2238,40 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
        spin_lock_irqsave(&cm_id_priv->lock, flags);
        switch(cm_id_priv->id.state) {
        case IB_CM_REQ_RCVD:
-               ret = cm_alloc_msg(cm_id_priv, &msg);
-               if (ret)
-                       goto error1;
-
-               cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
-                             CM_MSG_RESPONSE_REQ, service_timeout,
-                             private_data, private_data_len);
-               ret = ib_post_send_mad(msg, NULL);
-               if (ret)
-                       goto error2;
-               cm_id->state = IB_CM_MRA_REQ_SENT;
+               cm_state = IB_CM_MRA_REQ_SENT;
+               lap_state = cm_id->lap_state;
+               msg_response = CM_MSG_RESPONSE_REQ;
                break;
        case IB_CM_REP_RCVD:
-               ret = cm_alloc_msg(cm_id_priv, &msg);
-               if (ret)
-                       goto error1;
-
-               cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
-                             CM_MSG_RESPONSE_REP, service_timeout,
-                             private_data, private_data_len);
-               ret = ib_post_send_mad(msg, NULL);
-               if (ret)
-                       goto error2;
-               cm_id->state = IB_CM_MRA_REP_SENT;
+               cm_state = IB_CM_MRA_REP_SENT;
+               lap_state = cm_id->lap_state;
+               msg_response = CM_MSG_RESPONSE_REP;
                break;
        case IB_CM_ESTABLISHED:
+               cm_state = cm_id->state;
+               lap_state = IB_CM_MRA_LAP_SENT;
+               msg_response = CM_MSG_RESPONSE_OTHER;
+               break;
+       default:
+               ret = -EINVAL;
+               goto error1;
+       }
+
+       if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
                ret = cm_alloc_msg(cm_id_priv, &msg);
                if (ret)
                        goto error1;
 
                cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
-                             CM_MSG_RESPONSE_OTHER, service_timeout,
+                             msg_response, service_timeout,
                              private_data, private_data_len);
                ret = ib_post_send_mad(msg, NULL);
                if (ret)
                        goto error2;
-               cm_id->lap_state = IB_CM_MRA_LAP_SENT;
-               break;
-       default:
-               ret = -EINVAL;
-               goto error1;
        }
+
+       cm_id->state = cm_state;
+       cm_id->lap_state = lap_state;
        cm_id_priv->service_timeout = service_timeout;
        cm_set_private_data(cm_id_priv, data, private_data_len);
        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 9ffb998..7253952 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -52,6 +52,7 @@ MODULE_LICENSE("Dual BSD/GPL");
 
 #define CMA_CM_RESPONSE_TIMEOUT 20
 #define CMA_MAX_CM_RETRIES 15
+#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
 
 static void cma_add_one(struct ib_device *device);
 static void cma_remove_one(struct ib_device *device);
@@ -138,6 +139,7 @@ struct rdma_id_private {
        u32                     qkey;
        u32                     qp_num;
        u8                      srq;
+       u8                      tos;
 };
 
 struct cma_multicast {
@@ -1089,6 +1091,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct 
ib_cm_event *ib_event)
                event.param.ud.private_data_len =
                                IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
        } else {
+               ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
                conn_id = cma_new_conn_id(&listen_id->id, ib_event);
                cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
                                       ib_event->private_data, offset);
@@ -1474,6 +1477,15 @@ err:
 }
 EXPORT_SYMBOL(rdma_listen);
 
+void rdma_set_service_type(struct rdma_cm_id *id, int tos)
+{
+       struct rdma_id_private *id_priv;
+
+       id_priv = container_of(id, struct rdma_id_private, id);
+       id_priv->tos = (u8) tos;
+}
+EXPORT_SYMBOL(rdma_set_service_type);
+
 static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
                              void *context)
 {
@@ -1498,23 +1510,37 @@ static void cma_query_handler(int status, struct 
ib_sa_path_rec *path_rec,
 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
                              struct cma_work *work)
 {
-       struct rdma_dev_addr *addr = &id_priv->id.route.addr.dev_addr;
+       struct rdma_addr *addr = &id_priv->id.route.addr;
        struct ib_sa_path_rec path_rec;
+       ib_sa_comp_mask comp_mask;
+       struct sockaddr_in6 *sin6;
 
        memset(&path_rec, 0, sizeof path_rec);
-       ib_addr_get_sgid(addr, &path_rec.sgid);
-       ib_addr_get_dgid(addr, &path_rec.dgid);
-       path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr));
+       ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
+       ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
+       path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
        path_rec.numb_path = 1;
        path_rec.reversible = 1;
+       path_rec.service_id = cma_get_service_id(id_priv->id.ps, 
&addr->dst_addr);
+
+       comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
+                   IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
+                   IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
+
+       if (addr->src_addr.sa_family == AF_INET) {
+               path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
+               comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
+       } else {
+               sin6 = (struct sockaddr_in6 *) &addr->src_addr;
+               path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) 
>> 20);
+               comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
+       }
 
        id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
-                               id_priv->id.port_num, &path_rec,
-                               IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
-                               IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
-                               IB_SA_PATH_REC_REVERSIBLE,
-                               timeout_ms, GFP_KERNEL,
-                               cma_query_handler, work, &id_priv->query);
+                                              id_priv->id.port_num, &path_rec,
+                                              comp_mask, timeout_ms,
+                                              GFP_KERNEL, cma_query_handler,
+                                              work, &id_priv->query);
 
        return (id_priv->query_id < 0) ? id_priv->query_id : 0;
 }
diff --git a/drivers/infiniband/core/sa_query.c 
b/drivers/infiniband/core/sa_query.c
index d271bd7..6f56bb5 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -123,14 +123,10 @@ static u32 tid;
        .field_name          = "sa_path_rec:" #field
 
 static const struct ib_field path_rec_table[] = {
-       { RESERVED,
+       { PATH_REC_FIELD(service_id),
          .offset_words = 0,
          .offset_bits  = 0,
-         .size_bits    = 32 },
-       { RESERVED,
-         .offset_words = 1,
-         .offset_bits  = 0,
-         .size_bits    = 32 },
+         .size_bits    = 64 },
        { PATH_REC_FIELD(dgid),
          .offset_words = 2,
          .offset_bits  = 0,
@@ -179,7 +175,7 @@ static const struct ib_field path_rec_table[] = {
          .offset_words = 12,
          .offset_bits  = 16,
          .size_bits    = 16 },
-       { RESERVED,
+       { PATH_REC_FIELD(qos_class),
          .offset_words = 13,
          .offset_bits  = 0,
          .size_bits    = 12 },
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 53b4c94..90d675a 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -792,6 +792,78 @@ out:
        return ret;
 }
 
+static int ucma_set_option_id(struct ucma_context *ctx, int optname,
+                             void *optval, size_t optlen)
+{
+       int ret = 0;
+
+       switch (optname) {
+       case RDMA_OPTION_ID_TOS:
+               if (optlen != sizeof(u8)) {
+                       ret = -EINVAL;
+                       break;
+               }
+               rdma_set_service_type(ctx->cm_id, *((u8 *) optval));
+               break;
+       default:
+               ret = -ENOSYS;
+       }
+
+       return ret;
+}
+
+static int ucma_set_option_level(struct ucma_context *ctx, int level,
+                                int optname, void *optval, size_t optlen)
+{
+       int ret;
+
+       switch (level) {
+       case RDMA_OPTION_ID:
+               ret = ucma_set_option_id(ctx, optname, optval, optlen);
+               break;
+       default:
+               ret = -ENOSYS;
+       }
+
+       return ret;
+}
+
+static ssize_t ucma_set_option(struct ucma_file *file, const char __user 
*inbuf,
+                              int in_len, int out_len)
+{
+       struct rdma_ucm_set_option cmd;
+       struct ucma_context *ctx;
+       void *optval;
+       int ret;
+
+       if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+               return -EFAULT;
+
+       ctx = ucma_get_ctx(file, cmd.id);
+       if (IS_ERR(ctx))
+               return PTR_ERR(ctx);
+
+       optval = kmalloc(cmd.optlen, GFP_KERNEL);
+       if (!optval) {
+               ret = -ENOMEM;
+               goto out1;
+       }
+
+       if (copy_from_user(optval, (void __user *) (unsigned long) cmd.optval,
+                          cmd.optlen)) {
+               ret = -EFAULT;
+               goto out2;
+       }
+
+       ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval,
+                                   cmd.optlen);
+out2:
+       kfree(optval);
+out1:
+       ucma_put_ctx(ctx);
+       return ret;
+}
+
 static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
                           int in_len, int out_len)
 {
@@ -936,7 +1008,7 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
        [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr,
        [RDMA_USER_CM_CMD_GET_EVENT]    = ucma_get_event,
        [RDMA_USER_CM_CMD_GET_OPTION]   = NULL,
-       [RDMA_USER_CM_CMD_SET_OPTION]   = NULL,
+       [RDMA_USER_CM_CMD_SET_OPTION]   = ucma_set_option,
        [RDMA_USER_CM_CMD_NOTIFY]       = ucma_notify,
        [RDMA_USER_CM_CMD_JOIN_MCAST]   = ucma_join_multicast,
        [RDMA_USER_CM_CMD_LEAVE_MCAST]  = ucma_leave_multicast,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h 
b/drivers/infiniband/ulp/ipoib/ipoib.h
index 285c143..fc16bce 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -113,7 +113,27 @@ struct ipoib_pseudoheader {
        u8  hwaddr[INFINIBAND_ALEN];
 };
 
-struct ipoib_mcast;
+/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
+struct ipoib_mcast {
+       struct ib_sa_mcmember_rec mcmember;
+       struct ib_sa_multicast   *mc;
+       struct ipoib_ah          *ah;
+
+       struct rb_node    rb_node;
+       struct list_head  list;
+
+       unsigned long created;
+       unsigned long backoff;
+
+       unsigned long flags;
+       unsigned char logcount;
+
+       struct list_head  neigh_list;
+
+       struct sk_buff_head pkt_queue;
+
+       struct net_device *dev;
+};
 
 struct ipoib_rx_buf {
        struct sk_buff *skb;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c 
b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 894b1dc..841e068 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -468,9 +468,10 @@ static struct ipoib_path *path_rec_create(struct 
net_device *dev, void *gid)
        INIT_LIST_HEAD(&path->neigh_list);
 
        memcpy(path->pathrec.dgid.raw, gid, sizeof (union ib_gid));
-       path->pathrec.sgid      = priv->local_gid;
-       path->pathrec.pkey      = cpu_to_be16(priv->pkey);
-       path->pathrec.numb_path = 1;
+       path->pathrec.sgid          = priv->local_gid;
+       path->pathrec.pkey          = cpu_to_be16(priv->pkey);
+       path->pathrec.numb_path     = 1;
+       path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class;
 
        return path;
 }
@@ -491,6 +492,7 @@ static int path_rec_start(struct net_device *dev,
                                   IB_SA_PATH_REC_DGID          |
                                   IB_SA_PATH_REC_SGID          |
                                   IB_SA_PATH_REC_NUMB_PATH     |
+                                  IB_SA_PATH_REC_TRAFFIC_CLASS |
                                   IB_SA_PATH_REC_PKEY,
                                   1000, GFP_ATOMIC,
                                   path_rec_completion,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 
b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index aae3670..94a5709 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -57,28 +57,6 @@ MODULE_PARM_DESC(mcast_debug_level,
 
 static DEFINE_MUTEX(mcast_mutex);
 
-/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
-struct ipoib_mcast {
-       struct ib_sa_mcmember_rec mcmember;
-       struct ib_sa_multicast   *mc;
-       struct ipoib_ah          *ah;
-
-       struct rb_node    rb_node;
-       struct list_head  list;
-
-       unsigned long created;
-       unsigned long backoff;
-
-       unsigned long flags;
-       unsigned char logcount;
-
-       struct list_head  neigh_list;
-
-       struct sk_buff_head pkt_queue;
-
-       struct net_device *dev;
-};
-
 struct ipoib_mcast_iter {
        struct net_device *dev;
        union ib_gid       mgid;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c 
b/drivers/infiniband/ulp/srp/ib_srp.c
index f6a0514..9ccc638 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -285,6 +285,7 @@ static int srp_lookup_path(struct srp_target_port *target)
                                                   target->srp_host->dev->dev,
                                                   target->srp_host->port,
                                                   &target->path,
+                                                  IB_SA_PATH_REC_SERVICE_ID    
|
                                                   IB_SA_PATH_REC_DGID          
|
                                                   IB_SA_PATH_REC_SGID          
|
                                                   IB_SA_PATH_REC_NUMB_PATH     
|
@@ -1692,6 +1693,7 @@ static int srp_parse_options(const char *buf, struct 
srp_target_port *target)
                                goto out;
                        }
                        target->service_id = cpu_to_be64(simple_strtoull(p, 
NULL, 16));
+                       target->path.service_id = target->service_id;
                        kfree(p);
                        break;
 
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 12243e8..a627c86 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -477,12 +477,15 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
                   const void *private_data,
                   u8 private_data_len);
 
+#define IB_CM_MRA_FLAG_DELAY 0x80  /* Send MRA only after a duplicate msg */
+
 /**
  * ib_send_cm_mra - Sends a message receipt acknowledgement to a connection
  *   message.
  * @cm_id: Connection identifier associated with the connection message.
- * @service_timeout: The maximum time required for the sender to reply to
- *   to the connection message.
+ * @service_timeout: The lower 5-bits specify the maximum time required for
+ *   the sender to reply to to the connection message.  The upper 3-bits
+ *   specify additional control flags.
  * @private_data: Optional user-defined private data sent with the
  *   message receipt acknowledgement.
  * @private_data_len: Size of the private data buffer, in bytes.
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index 5e26b2f..942692b 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -109,8 +109,8 @@ enum ib_sa_selector {
  * Reserved rows are indicated with comments to help maintainability.
  */
 
-/* reserved:                                                            0 */
-/* reserved:                                                            1 */
+#define IB_SA_PATH_REC_SERVICE_ID                     (IB_SA_COMP_MASK( 0) |\
+                                                       IB_SA_COMP_MASK( 1))
 #define IB_SA_PATH_REC_DGID                            IB_SA_COMP_MASK( 2)
 #define IB_SA_PATH_REC_SGID                            IB_SA_COMP_MASK( 3)
 #define IB_SA_PATH_REC_DLID                            IB_SA_COMP_MASK( 4)
@@ -123,7 +123,7 @@ enum ib_sa_selector {
 #define IB_SA_PATH_REC_REVERSIBLE                      IB_SA_COMP_MASK(11)
 #define IB_SA_PATH_REC_NUMB_PATH                       IB_SA_COMP_MASK(12)
 #define IB_SA_PATH_REC_PKEY                            IB_SA_COMP_MASK(13)
-/* reserved:                                                           14 */
+#define IB_SA_PATH_REC_QOS_CLASS                       IB_SA_COMP_MASK(14)
 #define IB_SA_PATH_REC_SL                              IB_SA_COMP_MASK(15)
 #define IB_SA_PATH_REC_MTU_SELECTOR                    IB_SA_COMP_MASK(16)
 #define IB_SA_PATH_REC_MTU                             IB_SA_COMP_MASK(17)
@@ -134,8 +134,7 @@ enum ib_sa_selector {
 #define IB_SA_PATH_REC_PREFERENCE                      IB_SA_COMP_MASK(22)
 
 struct ib_sa_path_rec {
-       /* reserved */
-       /* reserved */
+       __be64       service_id;
        union ib_gid dgid;
        union ib_gid sgid;
        __be16       dlid;
@@ -148,7 +147,7 @@ struct ib_sa_path_rec {
        int          reversible;
        u8           numb_path;
        __be16       pkey;
-       /* reserved */
+       __be16       qos_class;
        u8           sl;
        u8           mtu_selector;
        u8           mtu;
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 2d6a770..010f876 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -314,4 +314,18 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct 
sockaddr *addr,
  */
 void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr);
 
+/**
+ * rdma_set_service_type - Set the type of service associated with a
+ *   connection identifier.
+ * @id: Communication identifier to associated with service type.
+ * @tos: Type of service.
+ *
+ * The type of service is interpretted as a differentiated service
+ * field (RFC 2474).  The service type should be specified before
+ * performing route resolution, as existing communication on the
+ * connection identifier may be unaffected.  The type of service
+ * requested may not be supported by the network to all destinations.
+ */
+void rdma_set_service_type(struct rdma_cm_id *id, int tos);
+
 #endif /* RDMA_CM_H */
diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h
index f632b0c..9749c1b 100644
--- a/include/rdma/rdma_user_cm.h
+++ b/include/rdma/rdma_user_cm.h
@@ -212,4 +212,22 @@ struct rdma_ucm_event_resp {
        } param;
 };
 
+/* Option levels */
+enum {
+       RDMA_OPTION_ID          = 0
+};
+
+/* Option details */
+enum {
+       RDMA_OPTION_ID_TOS      = 0
+};
+
+struct rdma_ucm_set_option {
+       __u64 optval;
+       __u32 id;
+       __u32 level;
+       __u32 optname;
+       __u32 optlen;
+};
+
 #endif /* RDMA_USER_CM_H */

_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to