patch 3 of 3

---

Large RMPP support, send side: split a multipacket MAD buffer to a list of
segments, (multipacket_list) and send these using an gather list of size 2.

Signed-off-by: Jack Morgenstein <[EMAIL PROTECTED]>
Signed-off-by: Michael S. Tsirkin <[EMAIL PROTECTED]>

Index: last_stable/drivers/infiniband/core/mad_rmpp.c
===================================================================
--- last_stable.orig/drivers/infiniband/core/mad_rmpp.c
+++ last_stable/drivers/infiniband/core/mad_rmpp.c
@@ -570,16 +532,23 @@ start_rmpp(struct ib_mad_agent_private *
        return mad_recv_wc;
 }
 
-static inline u64 get_seg_addr(struct ib_mad_send_wr_private *mad_send_wr)
+static inline void *get_seg_addr(struct ib_mad_send_wr_private *mad_send_wr)
 {
-       return mad_send_wr->sg_list[0].addr + mad_send_wr->data_offset +
-              (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset) *
-              (mad_send_wr->seg_num - 1);
+       struct ib_mad_multipacket_seg *seg;
+       int i = 2;
+
+       list_for_each_entry(seg, &mad_send_wr->multipacket_list, list) {
+               if (i == mad_send_wr->seg_num)
+                       return seg->data;
+               i++;
+       }
+       return NULL;
 }
 
-static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
+int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
 {
        struct ib_rmpp_mad *rmpp_mad;
+       void *next_data;
        int timeout;
        u32 paylen;
 
@@ -592,14 +561,14 @@ static int send_next_seg(struct ib_mad_s
                paylen = mad_send_wr->total_seg * IB_MGMT_RMPP_DATA -
                         mad_send_wr->pad;
                rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen);
-               mad_send_wr->sg_list[0].length = sizeof(struct ib_rmpp_mad);
        } else {
-               mad_send_wr->send_wr.num_sge = 2;
-               mad_send_wr->sg_list[0].length = mad_send_wr->data_offset;
-               mad_send_wr->sg_list[1].addr = get_seg_addr(mad_send_wr);
-               mad_send_wr->sg_list[1].length = sizeof(struct ib_rmpp_mad) -
-                                                mad_send_wr->data_offset;
-               mad_send_wr->sg_list[1].lkey = mad_send_wr->sg_list[0].lkey;
+               next_data = get_seg_addr(mad_send_wr);
+               if (!next_data) {
+                       printk(KERN_ERR PFX "send_next_seg: "
+                              "could not find next segment\n");
+                       return -EINVAL;
+               }
+               mad_send_wr->send_buf.mad_payload = next_data;
                rmpp_mad->rmpp_hdr.paylen_newwin = 0;
        }
 
@@ -838,7 +807,7 @@ out:
 int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
 {
        struct ib_rmpp_mad *rmpp_mad;
-       int i, total_len, ret;
+       int ret;
 
        rmpp_mad = mad_send_wr->send_buf.mad;
        if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
@@ -848,20 +817,16 @@ int ib_send_rmpp_mad(struct ib_mad_send_
        if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA)
                return IB_RMPP_RESULT_INTERNAL;
 
-       if (mad_send_wr->send_wr.num_sge > 1)
-               return -EINVAL;         /* TODO: support num_sge > 1 */
+       if (mad_send_wr->send_wr.num_sge != 2)
+               return -EINVAL;
 
        mad_send_wr->seg_num = 1;
        mad_send_wr->newwin = 1;
        mad_send_wr->data_offset = data_offset(rmpp_mad->mad_hdr.mgmt_class);
 
-       total_len = 0;
-       for (i = 0; i < mad_send_wr->send_wr.num_sge; i++)
-               total_len += mad_send_wr->send_wr.sg_list[i].length;
-
-        mad_send_wr->total_seg = (total_len - mad_send_wr->data_offset) /
+       mad_send_wr->total_seg = (mad_send_wr->total_length - 
mad_send_wr->data_offset) /
                        (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset);
-       mad_send_wr->pad = total_len - IB_MGMT_RMPP_HDR -
+       mad_send_wr->pad = mad_send_wr->total_length - IB_MGMT_RMPP_HDR -
                           be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
 
        /* We need to wait for the final ACK even if there isn't a response */
Index: last_stable/drivers/infiniband/core/mad.c
===================================================================
--- last_stable.orig/drivers/infiniband/core/mad.c
+++ last_stable/drivers/infiniband/core/mad.c
@@ -779,6 +779,17 @@ static int get_buf_length(int hdr_len, i
        return hdr_len + data_len + pad;
 }
 
+static void free_send_multipacket_list(struct ib_mad_send_wr_private *
+                                      mad_send_wr)
+{
+       struct ib_mad_multipacket_seg *s, *t;
+
+       list_for_each_entry_safe(s, t, &mad_send_wr->multipacket_list, list) {
+               list_del(&s->list);
+               kfree(s);
+       }
+}
+
 struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
                                            u32 remote_qpn, u16 pkey_index,
                                            int rmpp_active,
@@ -787,39 +798,38 @@ struct ib_mad_send_buf * ib_create_send_
 {
        struct ib_mad_agent_private *mad_agent_priv;
        struct ib_mad_send_wr_private *mad_send_wr;
-       int length, buf_size;
+       int length, message_size, seg_size;
        void *buf;
 
        mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
                                      agent);
-       buf_size = get_buf_length(hdr_len, data_len);
+       message_size = get_buf_length(hdr_len, data_len);
 
        if ((!mad_agent->rmpp_version &&
-            (rmpp_active || buf_size > sizeof(struct ib_mad))) ||
-           (!rmpp_active && buf_size > sizeof(struct ib_mad)))
+            (rmpp_active || message_size > sizeof(struct ib_mad))) ||
+           (!rmpp_active && message_size > sizeof(struct ib_mad)))
                return ERR_PTR(-EINVAL);
 
-       length = sizeof *mad_send_wr + buf_size;
-       if (length >= PAGE_SIZE)
-               buf = (void *)__get_free_pages(gfp_mask, 
long_log2(roundup_pow_of_two(length)) - PAGE_SHIFT);
-       else
-               buf = kmalloc(length, gfp_mask);
+       length = sizeof *mad_send_wr + message_size;
+       buf = kzalloc(sizeof *mad_send_wr + sizeof(struct ib_mad), gfp_mask);
 
        if (!buf)
                return ERR_PTR(-ENOMEM);
 
-       memset(buf, 0, length);
-
-       mad_send_wr = buf + buf_size;
+       mad_send_wr = buf + sizeof(struct ib_mad);
+       INIT_LIST_HEAD(&mad_send_wr->multipacket_list);
        mad_send_wr->send_buf.mad = buf;
+       mad_send_wr->send_buf.mad_payload = buf + hdr_len;
 
        mad_send_wr->mad_agent_priv = mad_agent_priv;
-       mad_send_wr->sg_list[0].length = buf_size;
+       mad_send_wr->sg_list[0].length = hdr_len;
        mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey;
+       mad_send_wr->sg_list[1].length = sizeof(struct ib_mad) - hdr_len;
+       mad_send_wr->sg_list[1].lkey = mad_agent->mr->lkey;
 
        mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr;
        mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
-       mad_send_wr->send_wr.num_sge = 1;
+       mad_send_wr->send_wr.num_sge = 2;
        mad_send_wr->send_wr.opcode = IB_WR_SEND;
        mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED;
        mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn;
@@ -827,6 +837,7 @@ struct ib_mad_send_buf * ib_create_send_
        mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index;
 
        if (rmpp_active) {
+               struct ib_mad_multipacket_seg *seg;
                struct ib_rmpp_mad *rmpp_mad = mad_send_wr->send_buf.mad;
                rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len -
                                                   IB_MGMT_RMPP_HDR + data_len);
@@ -834,6 +845,27 @@ struct ib_mad_send_buf * ib_create_send_
                rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
                ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr,
                                  IB_MGMT_RMPP_FLAG_ACTIVE);
+               mad_send_wr->total_length = message_size;
+               /* allocate RMPP buffers */
+               message_size -= sizeof(struct ib_mad);
+               seg_size = sizeof(struct ib_mad) - hdr_len;
+               while (message_size > 0) {
+                       seg = kmalloc(sizeof(struct ib_mad_multipacket_seg) +
+                                     seg_size, gfp_mask);
+                       if (!seg) {
+                               printk(KERN_ERR "ib_create_send_mad: RMPP mem "
+                                      "alloc failed for len %zd, gfp %#x\n",
+                                      sizeof(struct ib_mad_multipacket_seg) +
+                                      seg_size, gfp_mask);
+                               free_send_multipacket_list(mad_send_wr);
+                               kfree(buf);
+                               return ERR_PTR(-ENOMEM);
+                       }
+                       seg->size = seg_size;
+                       list_add_tail(&seg->list,
+                                     &mad_send_wr->multipacket_list);
+                       message_size -= seg_size;
+               }
        }
 
        mad_send_wr->send_buf.mad_agent = mad_agent;
@@ -842,23 +874,36 @@ struct ib_mad_send_buf * ib_create_send_
 }
 EXPORT_SYMBOL(ib_create_send_mad);
 
+struct ib_mad_multipacket_seg *ib_get_multipacket_seg(struct ib_mad_send_buf *
+                                                     send_buf, int seg_num)
+{
+       struct ib_mad_send_wr_private *mad_send_wr;
+       struct ib_mad_multipacket_seg *seg;
+       int i = 2;
+
+       mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
+                                  send_buf);
+       list_for_each_entry(seg, &mad_send_wr->multipacket_list, list) {
+               if (i == seg_num)
+                       return seg;
+               i++;
+       }
+       return NULL;
+}
+EXPORT_SYMBOL(ib_get_multipacket_seg);
+
 void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
 {
        struct ib_mad_agent_private *mad_agent_priv;
-       void *mad_send_wr;
-       int length;
+       struct ib_mad_send_wr_private *mad_send_wr;
 
        mad_agent_priv = container_of(send_buf->mad_agent,
                                      struct ib_mad_agent_private, agent);
        mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
                                   send_buf);
 
-       length = sizeof(struct ib_mad_send_wr_private) + (mad_send_wr - 
send_buf->mad);
-       if (length >= PAGE_SIZE)
-               free_pages((unsigned long)send_buf->mad, 
long_log2(roundup_pow_of_two(length)) - PAGE_SHIFT);
-       else
-               kfree(send_buf->mad);
-
+       free_send_multipacket_list(mad_send_wr);
+       kfree(send_buf->mad);
        if (atomic_dec_and_test(&mad_agent_priv->refcount))
                wake_up(&mad_agent_priv->wait);
 }
@@ -881,10 +926,17 @@ int ib_send_mad(struct ib_mad_send_wr_pr
 
        mad_agent = mad_send_wr->send_buf.mad_agent;
        sge = mad_send_wr->sg_list;
-       sge->addr = dma_map_single(mad_agent->device->dma_device,
-                                  mad_send_wr->send_buf.mad, sge->length,
-                                  DMA_TO_DEVICE);
-       pci_unmap_addr_set(mad_send_wr, mapping, sge->addr);
+       sge[0].addr = dma_map_single(mad_agent->device->dma_device,
+                                    mad_send_wr->send_buf.mad,
+                                    sge[0].length,
+                                    DMA_TO_DEVICE);
+       pci_unmap_addr_set(mad_send_wr, header_mapping, sge[0].addr);
+
+       sge[1].addr = dma_map_single(mad_agent->device->dma_device,
+                                    mad_send_wr->send_buf.mad_payload,
+                                    sge[1].length,
+                                    DMA_TO_DEVICE);
+       pci_unmap_addr_set(mad_send_wr, payload_mapping, sge[1].addr);
 
        spin_lock_irqsave(&qp_info->send_queue.lock, flags);
        if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
@@ -901,11 +953,15 @@ int ib_send_mad(struct ib_mad_send_wr_pr
                list_add_tail(&mad_send_wr->mad_list.list, list);
        }
        spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
-       if (ret)
+       if (ret) {
                dma_unmap_single(mad_agent->device->dma_device,
-                                pci_unmap_addr(mad_send_wr, mapping),
-                                sge->length, DMA_TO_DEVICE);
+                                pci_unmap_addr(mad_send_wr, header_mapping),
+                                sge[0].length, DMA_TO_DEVICE);
 
+               dma_unmap_single(mad_agent->device->dma_device,
+                                pci_unmap_addr(mad_send_wr, payload_mapping),
+                                sge[1].length, DMA_TO_DEVICE);
+       }
        return ret;
 }
 
@@ -1876,8 +1932,11 @@ static void ib_mad_send_done_handler(str
 
 retry:
        dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
-                        pci_unmap_addr(mad_send_wr, mapping),
+                        pci_unmap_addr(mad_send_wr, header_mapping),
                         mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
+       dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
+                        pci_unmap_addr(mad_send_wr, payload_mapping),
+                        mad_send_wr->sg_list[1].length, DMA_TO_DEVICE);
        queued_send_wr = NULL;
        spin_lock_irqsave(&send_queue->lock, flags);
        list_del(&mad_list->list);
Index: last_stable/drivers/infiniband/core/user_mad.c
===================================================================
--- last_stable.orig/drivers/infiniband/core/user_mad.c
+++ last_stable/drivers/infiniband/core/user_mad.c
@@ -187,7 +270,7 @@ static void send_handler(struct ib_mad_a
        ib_free_send_mad(packet->msg);
 
        if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
-               timeout = kzalloc(sizeof *timeout + IB_MGMT_MAD_HDR, 
GFP_KERNEL);
+               timeout = alloc_packet();
                if (!timeout)
                        goto out;
 
@@ -198,40 +281,12 @@ static void send_handler(struct ib_mad_a
                       sizeof (struct ib_mad_hdr));
 
                if (queue_packet(file, agent, timeout))
-                       kfree(timeout);
+                       free_packet(timeout);
        }
 out:
        kfree(packet);
 }
 
-static struct ib_umad_packet *alloc_packet(int buf_size)
-{
-       struct ib_umad_packet *packet;
-       int length = sizeof *packet + buf_size;
-
-       if (length >= PAGE_SIZE)
-               packet = (void *)__get_free_pages(GFP_KERNEL, 
long_log2(roundup_pow_of_two(length)) - PAGE_SHIFT);
-       else
-               packet = kmalloc(length, GFP_KERNEL);
-
-       if (!packet)
-               return NULL;
-
-       memset(packet, 0, length);
-       return packet;
-}
-
-static void free_packet(struct ib_umad_packet *packet)
-{
-       int length = packet->length + sizeof *packet;
-       if (length >= PAGE_SIZE)
-               free_pages((unsigned long) packet, 
long_log2(roundup_pow_of_two(length)) - PAGE_SHIFT);
-       else
-               kfree(packet);
-}
-
-
-
 static void recv_handler(struct ib_mad_agent *agent,
                         struct ib_mad_recv_wc *mad_recv_wc)
 {
@@ -339,6 +422,8 @@ static ssize_t ib_umad_write(struct file
        __be64 *tid;
        int ret, length, hdr_len, copy_offset;
        int rmpp_active, has_rmpp_header;
+       int s, seg_num;
+       struct ib_mad_multipacket_seg *seg;
 
        if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)
                return -EINVAL;
@@ -415,6 +500,11 @@ static ssize_t ib_umad_write(struct file
                goto err_ah;
        }
 
+       if (!rmpp_active && length > sizeof(struct ib_mad)) {
+               ret = -EINVAL;
+               goto err_ah;
+       }
+
        packet->msg = ib_create_send_mad(agent,
                                         be32_to_cpu(packet->mad.hdr.qpn),
                                         0, rmpp_active,
@@ -432,14 +522,32 @@ static ssize_t ib_umad_write(struct file
 
        /* Copy MAD headers (RMPP header in place) */
        memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR);
-       /* Now, copy rest of message from user into send buffer */
+       /* complete copying first 256 bytes of message into send buffer */
        if (copy_from_user(packet->msg->mad + copy_offset,
                           buf + sizeof (struct ib_user_mad) + copy_offset,
-                          length - copy_offset)) {
+                          min_t(int, length, sizeof(struct ib_mad)) - 
copy_offset)) {
                ret = -EFAULT;
                goto err_msg;
        }
 
+       /* if RMPP, copy rest of send message from user to multipacket list */
+       length -= sizeof(struct ib_mad);
+       if (length > 0) {
+               buf +=  sizeof (struct ib_user_mad) + sizeof(struct ib_mad);
+               for (seg_num = 2; length > 0; ++seg_num, buf += s, length -= s) 
{
+                       seg = ib_get_multipacket_seg(packet->msg, seg_num);
+                       BUG_ON(!seg);
+                       s = min_t(int, length, seg->size);
+                       if (copy_from_user(seg->data, buf, s)) {
+                               ret = -EFAULT;
+                               goto err_msg;
+                       }
+               }
+               /* Pad last segment with zeroes. */
+               if (seg->size - s)
+                       memset(seg->data + s, 0, seg->size - s);
+       }
+
        /*
         * If userspace is generating a request that will generate a
         * response, we need to make sure the high-order part of the
_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to