Quoting r. Michael S. Tsirkin <[EMAIL PROTECTED]>:
> Subject: user_mad: large rmpp length problem
> 
> Hello!
> ib_umad_write currently accepts a count parameter from user
> and attempts to allocate mad of size count - sizeof (struct ib_user_mad)
> in kernel memory.
> 
> This, obviously, fails with -ENOMEM, which means that we cant
> send large transactions with RMPP.
> 
> The proper fix appears to be to transfer the data by chunks,
> waking the user process and copying a fixed number of bytes each time.

Here's a very simple patch which, while not ideal, let us go up to 512KB.

---

Allocate memory for large MAD buffers with __get_free_pages,
making it possible to get buffers up to 512KB in size.

Signed-off-by: Michael S. Tsirkin <[EMAIL PROTECTED]>
Signed-off-by: Jack Morgenstein <[EMAIL PROTECTED]>

Index: linux-kernel/drivers/infiniband/core/user_mad.c
===================================================================
--- linux-kernel.orig/drivers/infiniband/core/user_mad.c
+++ linux-kernel/drivers/infiniband/core/user_mad.c
@@ -204,6 +204,34 @@ out:
        kfree(packet);
 }
 
+static struct ib_umad_packet *alloc_packet(int buf_size)
+{
+       struct ib_umad_packet *packet;
+       int length = sizeof *packet + buf_size;
+
+       if (length >= PAGE_SIZE)
+               packet = (void *)__get_free_pages(GFP_KERNEL, 
long_log2(roundup_pow_of_two(length)) - PAGE_SHIFT);
+       else
+               packet = kmalloc(length, GFP_KERNEL);
+
+       if (!packet)
+               return NULL;
+
+       memset(packet, 0, length);
+       return packet;
+}
+
+static void free_packet(struct ib_umad_packet *packet)
+{
+       int length = packet->length + sizeof *packet;
+       if (length >= PAGE_SIZE)
+               free_pages((unsigned long) packet, 
long_log2(roundup_pow_of_two(length)) - PAGE_SHIFT);
+       else
+               kfree(packet);
+}
+
+
+
 static void recv_handler(struct ib_mad_agent *agent,
                         struct ib_mad_recv_wc *mad_recv_wc)
 {
@@ -215,7 +243,7 @@ static void recv_handler(struct ib_mad_a
                goto out;
 
        length = mad_recv_wc->mad_len;
-       packet = kzalloc(sizeof *packet + length, GFP_KERNEL);
+       packet = alloc_packet(length);
        if (!packet)
                goto out;
 
@@ -240,7 +268,7 @@ static void recv_handler(struct ib_mad_a
        }
 
        if (queue_packet(file, agent, packet))
-               kfree(packet);
+               free_packet(packet);
 
 out:
        ib_free_recv_mad(mad_recv_wc);
@@ -294,7 +322,7 @@ static ssize_t ib_umad_read(struct file 
                list_add(&packet->list, &file->recv_list);
                spin_unlock_irq(&file->recv_lock);
        } else
-               kfree(packet);
+               free_packet(packet);
        return ret;
 }
 
Index: linux-kernel/drivers/infiniband/core/mad.c
===================================================================
--- linux-kernel.orig/drivers/infiniband/core/mad.c
+++ linux-kernel/drivers/infiniband/core/mad.c
@@ -779,7 +779,7 @@ struct ib_mad_send_buf * ib_create_send_
 {
        struct ib_mad_agent_private *mad_agent_priv;
        struct ib_mad_send_wr_private *mad_send_wr;
-       int buf_size;
+       int length, buf_size;
        void *buf;
 
        mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
@@ -791,10 +791,17 @@ struct ib_mad_send_buf * ib_create_send_
            (!rmpp_active && buf_size > sizeof(struct ib_mad)))
                return ERR_PTR(-EINVAL);
 
-       buf = kzalloc(sizeof *mad_send_wr + buf_size, gfp_mask);
+       length = sizeof *mad_send_wr + buf_size;
+       if (length >= PAGE_SIZE)
+               buf = (void *)__get_free_pages(gfp_mask, 
long_log2(roundup_pow_of_two(length)) - PAGE_SHIFT);
+       else
+               buf = kmalloc(length, gfp_mask);
+
        if (!buf)
                return ERR_PTR(-ENOMEM);
 
+       memset(buf, 0, length);
+
        mad_send_wr = buf + buf_size;
        mad_send_wr->send_buf.mad = buf;
 
@@ -830,10 +837,19 @@ EXPORT_SYMBOL(ib_create_send_mad);
 void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
 {
        struct ib_mad_agent_private *mad_agent_priv;
+       void *mad_send_wr;
+       int length;
 
        mad_agent_priv = container_of(send_buf->mad_agent,
                                      struct ib_mad_agent_private, agent);
-       kfree(send_buf->mad);
+       mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
+                                  send_buf);
+
+       length = sizeof(struct ib_mad_send_wr_private) + (mad_send_wr - 
send_buf->mad);
+       if (length >= PAGE_SIZE)
+               free_pages((unsigned long)send_buf->mad, 
long_log2(roundup_pow_of_two(length)) - PAGE_SHIFT);
+       else
+               kfree(send_buf->mad);
 
        if (atomic_dec_and_test(&mad_agent_priv->refcount))
                wake_up(&mad_agent_priv->wait);

-- 
MST
_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to