On 9/7/07, Roland Dreier <[EMAIL PROTECTED]> wrote: > Here is a long overdue patch to enable userspace to control the P_Key > index used for userspace MADs. I used the approach we discussed when > this first came up, namely adding an ioctl to enable to the new > interface so that existing binaries don't break. > > I haven't had a chance to make all the userspace library changes to > test the new interface and I likely won't until I return home (I > should be done traveling for a few months after this week). I have > tested existing code against a kernel with this patch applied and it > seems to be OK, and I wanted to at least get this out for review as > soon as I had it. > > Please review/test. I would like to get this into 2.6.24 if possible > since we've known so long that we needed it.
Thanks for doing this :-) One nit below in the doc. I spent some time testing it today in old mode and although my environment is limited, I did have trouble with an RMPP test as follows: Can someone try the following with OpenSM running: First, osmtest -f c and then osmtest -f a All on same node with new user_mad module. That seems to hangup rather than complete for me. I didn't have time to track this down any further. -- Hal > Thanks, > Roland > > > diff --git a/Documentation/infiniband/user_mad.txt > b/Documentation/infiniband/user_mad.txt > index 8ec54b9..a3450aa 100644 > --- a/Documentation/infiniband/user_mad.txt > +++ b/Documentation/infiniband/user_mad.txt > @@ -99,6 +99,20 @@ Transaction IDs > request/response pairs. The upper 32 bits are reserved for use by > the kernel and will be overwritten before a MAD is sent. > > +P_Key Index Handling > + > + The old ib_umad interface did not allow setting the P_Key index for > + MADs that are sent and did not provide a way for obtaining the P_Key > + index of received MADs. A new layout for struct ib_user_mad_hdr > + with a pkey_index member has been defined; however, to preserve > + binary compatibility with older applications, this new layout will > + not be used unless the IB_USER_MAD_ENABLE_PKEY ioctl is called > + before a file description is used for anything else. Nit: Should this be "file descriptor" ? > + > + In September 2008, the IB_USER_MAD_ABI_VERSION will be incremented > + to 6, the new layout of struct ib_user_mad_hdr will be used by > + default, and the IB_USER_MAD_ENABLE_PKEY ioctl will be removed. > + > Setting IsSM Capability Bit > > To set the IsSM capability bit for a port, simply open the > diff --git a/drivers/infiniband/core/user_mad.c > b/drivers/infiniband/core/user_mad.c > index d97ded2..3a0e579 100644 > --- a/drivers/infiniband/core/user_mad.c > +++ b/drivers/infiniband/core/user_mad.c > @@ -118,6 +118,8 @@ struct ib_umad_file { > wait_queue_head_t recv_wait; > struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS]; > int agents_dead; > + u8 use_pkey_index; > + u8 already_used; > }; > > struct ib_umad_packet { > @@ -147,6 +149,12 @@ static void ib_umad_release_dev(struct kref *ref) > kfree(dev); > } > > +static int hdr_size(struct ib_umad_file *file) > +{ > + return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) : > + sizeof (struct ib_user_mad_hdr_old); > +} > + > /* caller must hold port->mutex at least for reading */ > static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id) > { > @@ -221,13 +229,13 @@ static void recv_handler(struct ib_mad_agent *agent, > packet->length = mad_recv_wc->mad_len; > packet->recv_wc = mad_recv_wc; > > - packet->mad.hdr.status = 0; > - packet->mad.hdr.length = sizeof (struct ib_user_mad) + > - mad_recv_wc->mad_len; > - packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); > - packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); > - packet->mad.hdr.sl = mad_recv_wc->wc->sl; > - packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; > + packet->mad.hdr.status = 0; > + packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len; > + packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); > + packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); > + packet->mad.hdr.sl = mad_recv_wc->wc->sl; > + packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; > + packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index; > packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & > IB_WC_GRH); > if (packet->mad.hdr.grh_present) { > struct ib_ah_attr ah_attr; > @@ -253,8 +261,8 @@ err1: > ib_free_recv_mad(mad_recv_wc); > } > > -static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet, > - size_t count) > +static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf, > + struct ib_umad_packet *packet, size_t count) > { > struct ib_mad_recv_buf *recv_buf; > int left, seg_payload, offset, max_seg_payload; > @@ -262,15 +270,15 @@ static ssize_t copy_recv_mad(char __user *buf, struct > ib_umad_packet *packet, > /* We need enough room to copy the first (or only) MAD segment. */ > recv_buf = &packet->recv_wc->recv_buf; > if ((packet->length <= sizeof (*recv_buf->mad) && > - count < sizeof (packet->mad) + packet->length) || > + count < hdr_size(file) + packet->length) || > (packet->length > sizeof (*recv_buf->mad) && > - count < sizeof (packet->mad) + sizeof (*recv_buf->mad))) > + count < hdr_size(file) + sizeof (*recv_buf->mad))) > return -EINVAL; > > - if (copy_to_user(buf, &packet->mad, sizeof (packet->mad))) > + if (copy_to_user(buf, &packet->mad, hdr_size(file))) > return -EFAULT; > > - buf += sizeof (packet->mad); > + buf += hdr_size(file); > seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad)); > if (copy_to_user(buf, recv_buf->mad, seg_payload)) > return -EFAULT; > @@ -280,7 +288,7 @@ static ssize_t copy_recv_mad(char __user *buf, struct > ib_umad_packet *packet, > * Multipacket RMPP MAD message. Copy remainder of message. > * Note that last segment may have a shorter payload. > */ > - if (count < sizeof (packet->mad) + packet->length) { > + if (count < hdr_size(file) + packet->length) { > /* > * The buffer is too small, return the first RMPP > segment, > * which includes the RMPP message length. > @@ -300,18 +308,23 @@ static ssize_t copy_recv_mad(char __user *buf, struct > ib_umad_packet *packet, > return -EFAULT; > } > } > - return sizeof (packet->mad) + packet->length; > + return hdr_size(file) + packet->length; > } > > -static ssize_t copy_send_mad(char __user *buf, struct ib_umad_packet *packet, > - size_t count) > +static ssize_t copy_send_mad(struct ib_umad_file *file, char __user *buf, > + struct ib_umad_packet *packet, size_t count) > { > - ssize_t size = sizeof (packet->mad) + packet->length; > + ssize_t size = hdr_size(file) + packet->length; > > if (count < size) > return -EINVAL; > > - if (copy_to_user(buf, &packet->mad, size)) > + if (copy_to_user(buf, &packet->mad, hdr_size(file))) > + return -EFAULT; > + > + buf += hdr_size(file); > + > + if (copy_to_user(buf, packet->mad.data, packet->length)) > return -EFAULT; > > return size; > @@ -324,7 +337,7 @@ static ssize_t ib_umad_read(struct file *filp, char > __user *buf, > struct ib_umad_packet *packet; > ssize_t ret; > > - if (count < sizeof (struct ib_user_mad)) > + if (count < hdr_size(file)) > return -EINVAL; > > spin_lock_irq(&file->recv_lock); > @@ -348,9 +361,9 @@ static ssize_t ib_umad_read(struct file *filp, char > __user *buf, > spin_unlock_irq(&file->recv_lock); > > if (packet->recv_wc) > - ret = copy_recv_mad(buf, packet, count); > + ret = copy_recv_mad(file, buf, packet, count); > else > - ret = copy_send_mad(buf, packet, count); > + ret = copy_send_mad(file, buf, packet, count); > > if (ret < 0) { > /* Requeue packet */ > @@ -442,15 +455,14 @@ static ssize_t ib_umad_write(struct file *filp, const > char __user *buf, > __be64 *tid; > int ret, data_len, hdr_len, copy_offset, rmpp_active; > > - if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR) > + if (count < hdr_size(file) + IB_MGMT_RMPP_HDR) > return -EINVAL; > > packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); > if (!packet) > return -ENOMEM; > > - if (copy_from_user(&packet->mad, buf, > - sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)) { > + if (copy_from_user(&packet->mad, buf, hdr_size(file))) { > ret = -EFAULT; > goto err; > } > @@ -461,6 +473,13 @@ static ssize_t ib_umad_write(struct file *filp, const > char __user *buf, > goto err; > } > > + buf += hdr_size(file); > + > + if (copy_from_user(packet->mad.data, buf, IB_MGMT_RMPP_HDR)) { > + ret = -EFAULT; > + goto err; > + } > + > down_read(&file->port->mutex); > > agent = __get_agent(file, packet->mad.hdr.id); > @@ -500,11 +519,11 @@ static ssize_t ib_umad_write(struct file *filp, const > char __user *buf, > IB_MGMT_RMPP_FLAG_ACTIVE; > } > > - data_len = count - sizeof (struct ib_user_mad) - hdr_len; > + data_len = count - hdr_size(file) - hdr_len; > packet->msg = ib_create_send_mad(agent, > be32_to_cpu(packet->mad.hdr.qpn), > - 0, rmpp_active, hdr_len, > - data_len, GFP_KERNEL); > + packet->mad.hdr.pkey_index, > rmpp_active, > + hdr_len, data_len, GFP_KERNEL); > if (IS_ERR(packet->msg)) { > ret = PTR_ERR(packet->msg); > goto err_ah; > @@ -517,7 +536,6 @@ static ssize_t ib_umad_write(struct file *filp, const > char __user *buf, > > /* Copy MAD header. Any RMPP header is already in place. */ > memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR); > - buf += sizeof (struct ib_user_mad); > > if (!rmpp_active) { > if (copy_from_user(packet->msg->mad + copy_offset, > @@ -646,6 +664,7 @@ found: > goto out; > } > > + file->already_used = 1; > file->agent[agent_id] = agent; > ret = 0; > > @@ -682,6 +701,20 @@ out: > return ret; > } > > +static long ib_umad_enable_pkey(struct ib_umad_file *file) > +{ > + int ret = 0; > + > + down_write(&file->port->mutex); > + if (file->already_used) > + ret = -EINVAL; > + else > + file->use_pkey_index = 1; > + up_write(&file->port->mutex); > + > + return ret; > +} > + > static long ib_umad_ioctl(struct file *filp, unsigned int cmd, > unsigned long arg) > { > @@ -690,6 +723,8 @@ static long ib_umad_ioctl(struct file *filp, unsigned int > cmd, > return ib_umad_reg_agent(filp->private_data, arg); > case IB_USER_MAD_UNREGISTER_AGENT: > return ib_umad_unreg_agent(filp->private_data, arg); > + case IB_USER_MAD_ENABLE_PKEY: > + return ib_umad_enable_pkey(filp->private_data); > default: > return -ENOIOCTLCMD; > } > diff --git a/include/rdma/ib_user_mad.h b/include/rdma/ib_user_mad.h > index d66b15e..2a32043 100644 > --- a/include/rdma/ib_user_mad.h > +++ b/include/rdma/ib_user_mad.h > @@ -52,7 +52,50 @@ > */ > > /** > + * ib_user_mad_hdr_old - Old version of MAD packet header without pkey_index > + * @id - ID of agent MAD received with/to be sent with > + * @status - 0 on successful receive, ETIMEDOUT if no response > + * received (transaction ID in data[] will be set to TID of original > + * request) (ignored on send) > + * @timeout_ms - Milliseconds to wait for response (unset on receive) > + * @retries - Number of automatic retries to attempt > + * @qpn - Remote QP number received from/to be sent to > + * @qkey - Remote Q_Key to be sent with (unset on receive) > + * @lid - Remote lid received from/to be sent to > + * @sl - Service level received with/to be sent with > + * @path_bits - Local path bits received with/to be sent with > + * @grh_present - If set, GRH was received/should be sent > + * @gid_index - Local GID index to send with (unset on receive) > + * @hop_limit - Hop limit in GRH > + * @traffic_class - Traffic class in GRH > + * @gid - Remote GID in GRH > + * @flow_label - Flow label in GRH > + */ > +struct ib_user_mad_hdr_old { > + __u32 id; > + __u32 status; > + __u32 timeout_ms; > + __u32 retries; > + __u32 length; > + __be32 qpn; > + __be32 qkey; > + __be16 lid; > + __u8 sl; > + __u8 path_bits; > + __u8 grh_present; > + __u8 gid_index; > + __u8 hop_limit; > + __u8 traffic_class; > + __u8 gid[16]; > + __be32 flow_label; > +}; > + > +/** > * ib_user_mad_hdr - MAD packet header > + * This layout allows specifying/receiving the P_Key index. To use > + * this capability, an application must call the > + * IB_USER_MAD_ENABLE_PKEY ioctl on the user MAD file handle before > + * any other actions with the file handle. > * @id - ID of agent MAD received with/to be sent with > * @status - 0 on successful receive, ETIMEDOUT if no response > * received (transaction ID in data[] will be set to TID of original > @@ -70,6 +113,7 @@ > * @traffic_class - Traffic class in GRH > * @gid - Remote GID in GRH > * @flow_label - Flow label in GRH > + * @pkey_index - P_Key index > */ > struct ib_user_mad_hdr { > __u32 id; > @@ -88,6 +132,8 @@ struct ib_user_mad_hdr { > __u8 traffic_class; > __u8 gid[16]; > __be32 flow_label; > + __u16 pkey_index; > + __u8 reserved[6]; > }; > > /** > @@ -134,4 +180,6 @@ struct ib_user_mad_reg_req { > > #define IB_USER_MAD_UNREGISTER_AGENT _IOW(IB_IOCTL_MAGIC, 2, __u32) > > +#define IB_USER_MAD_ENABLE_PKEY _IO(IB_IOCTL_MAGIC, 3) > + > #endif /* IB_USER_MAD_H */ > _______________________________________________ > general mailing list > general@lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general