On Mon, Oct 20, 2008 at 07:43:04AM -0700, Roland Dreier wrote:
>  > Some architectures support weak ordering in which case better
>  > performance is possible. IB registered memory used for data can be
>  > weakly ordered becuase the the completion queues' buffers are
>  > registered as strongly ordered. This will result in flushing all data
>  > related outstanding DMA requests by the HCA when a completion is DMAed
>  > to a completion queue buffer.
> 
> This would break the Mellanox HW's guarantee of writing the last byte of
> an RDMA last, right?  So on platforms where this has an effect (only
> Cell at the moment) some applications could be subtly broken?
>
In theory it would break Mellanox's guarantee for strict ordering on
data, but in practice it will not since the only architecture that
supports weak ordering is CELL. As Arnd suggested in his response
email, here is the patch with a module parameter which by default will
not configure weak ordering for data. Anyone wishing to benefit from
weak ordering will have to set the module parameter accordingly.


>From 2c1e0f4d8138c1fbd675e7ada4384f59269acb1f Mon Sep 17 00:00:00 2001
From: Eli Cohen <[EMAIL PROTECTED]>
Date: Mon, 20 Oct 2008 15:52:22 +0200
Subject: [PATCH] ib_core: Use weak ordering for data registered memory

Some architectures support weak ordering in which case better
performance is possible. IB registered memory used for data can be
weakly ordered becuase the the completion queues' buffers are
registered as strongly ordered. This will result in flushing all data
related outstanding DMA requests by the HCA when a completion is DMAed
to a completion queue buffer.
This patch will allow weak ordering for data if ib_core is loaded with
the module parameter, allow_weak_ordering, set to a none zero value.

Signed-off-by: Eli Cohen <[EMAIL PROTECTED]>
Signed-off-by: Arnd Bergmann <[EMAIL PROTECTED]>
---
 drivers/infiniband/core/umem.c |   12 ++++++++++--
 include/rdma/ib_umem.h         |    2 ++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 6f7c096..d21853d 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -40,6 +40,10 @@
 
 #include "uverbs.h"
 
+static int allow_weak_ordering;
+module_param(allow_weak_ordering, bool, 0444);
+MODULE_PARM_DESC(allow_weak_ordering,  "Allow weak ordering for data 
registered memory");
+
 #define IB_UMEM_MAX_PAGE_CHUNK                                         \
        ((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) /      \
         ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] -        \
@@ -51,8 +55,8 @@ static void __ib_umem_release(struct ib_device *dev, struct 
ib_umem *umem, int d
        int i;
 
        list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
-               ib_dma_unmap_sg(dev, chunk->page_list,
-                               chunk->nents, DMA_BIDIRECTIONAL);
+               ib_dma_unmap_sg_attrs(dev, chunk->page_list,
+                                     chunk->nents, DMA_BIDIRECTIONAL, 
&chunk->attrs);
                for (i = 0; i < chunk->nents; ++i) {
                        struct page *page = sg_page(&chunk->page_list[i]);
 
@@ -91,6 +95,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, 
unsigned long addr,
 
        if (dmasync)
                dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
+       else if (allow_weak_ordering)
+               dma_set_attr(DMA_ATTR_WEAK_ORDERING, &attrs);
+
 
        if (!can_do_mlock())
                return ERR_PTR(-EPERM);
@@ -155,6 +162,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, 
unsigned long addr,
                if (ret < 0)
                        goto out;
 
+               chunk->attrs = attrs;
                cur_base += ret * PAGE_SIZE;
                npages   -= ret;
 
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 9ee0d2e..90f3712 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -36,6 +36,7 @@
 #include <linux/list.h>
 #include <linux/scatterlist.h>
 #include <linux/workqueue.h>
+#include <linux/dma-attrs.h>
 
 struct ib_ucontext;
 
@@ -56,6 +57,7 @@ struct ib_umem_chunk {
        struct list_head        list;
        int                     nents;
        int                     nmap;
+       struct dma_attrs        attrs;
        struct scatterlist      page_list[0];
 };
 
-- 
1.6.0.2

_______________________________________________
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to