Support for the IB BMME and iWARP equivalent memory extensions to 
non shared memory regions.  This includes:

- allocation of an ib_mr for use in fast register work requests

- device-specific alloc/free of physical buffer lists for use in fast
register work requests.  This allows devices to allocate this memory as
needed (like via dma_alloc_coherent).

- fast register memory region work request

- invalidate local memory region work request

- read with invalidate local memory region work request (iWARP only)


Design details:

- New device capability flag added: IB_DEVICE_MEM_MGT_EXTENSIONS indicates
device support for this feature.

- New send WR opcode IB_WR_FAST_REG_MR used to issue a fast_reg request.

- New send WR opcode IB_WR_INVALIDATE_MR used to invalidate a fast_reg mr.

- New API function, ib_alloc_mr() used to allocate fast_reg memory
regions.

- New API function, ib_alloc_fast_reg_page_list to allocate
device-specific page lists.

- New API function, ib_free_fast_reg_page_list to free said page lists.

- New API function, ib_update_fast_reg_key to allow the key portion of
the R_Key and L_Key of a fast_reg MR to be updated.  Applications call
this if desired before posting the IB_WR_FAST_REG_MR.


Usage Model:

- MR allocated with ib_alloc_mr()

- Page lists allocated via ib_alloc_fast_reg_page_list().

- MR R_Key/L_Key "key" field updated with ib_update_fast_reg_key().

- MR made VALID and bound to a specific page list via
ib_post_send(IB_WR_FAST_REG_MR)

- MR made INVALID via ib_post_send(IB_WR_INVALIDATE_MR)

- MR deallocated with ib_dereg_mr()

- page lists dealloced via ib_free_fast_reg_page_list().

Applications can allocate a fast_reg mr once, and then can repeatedly
bind the mr to different physical memory SGLs via posting work
requests to the For each outstanding mr-to-pbl binding in the SQ pipe,
a fast_reg_page_list needs to be allocated.  Thus pipelining can be
achieved while still allowing device-specific page_list processing.

The 4B fast_reg rkey or stag is composed of a 3B index, and a 1B key.
The application can change the key each time it fast-registers thus
allowing more control over the peer's use of the rkey (ie it can
effectively be changed each time the rkey is rebound to a page list).

Signed-off-by: Steve Wise <[EMAIL PROTECTED]>
---

 drivers/infiniband/core/verbs.c |   46 ++++++++++++++++++++++++
 include/rdma/ib_verbs.h         |   76 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 122 insertions(+), 0 deletions(-)

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 0504208..0a334b4 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -755,6 +755,52 @@ int ib_dereg_mr(struct ib_mr *mr)
 }
 EXPORT_SYMBOL(ib_dereg_mr);
 
+struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
+{
+       struct ib_mr *mr;
+
+       if (!pd->device->alloc_fast_reg_mr)
+               return ERR_PTR(-ENOSYS);
+
+       mr = pd->device->alloc_fast_reg_mr(pd, max_page_list_len);
+
+       if (!IS_ERR(mr)) {
+               mr->device  = pd->device;
+               mr->pd      = pd;
+               mr->uobject = NULL;
+               atomic_inc(&pd->usecnt);
+               atomic_set(&mr->usecnt, 0);
+       }
+
+       return mr;
+}
+EXPORT_SYMBOL(ib_alloc_fast_reg_mr);
+
+struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(
+                               struct ib_device *device, int max_page_list_len)
+{
+       struct ib_fast_reg_page_list *page_list;
+
+       if (!device->alloc_fast_reg_page_list)
+               return ERR_PTR(-ENOSYS);
+
+       page_list = device->alloc_fast_reg_page_list(device, max_page_list_len);
+
+       if (!IS_ERR(page_list)) {
+               page_list->device = device;
+               page_list->max_page_list_len = max_page_list_len;
+       }
+
+       return page_list;
+}
+EXPORT_SYMBOL(ib_alloc_fast_reg_page_list);
+
+void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
+{
+       page_list->device->free_fast_reg_page_list(page_list);
+}
+EXPORT_SYMBOL(ib_free_fast_reg_page_list);
+
 /* Memory windows */
 
 struct ib_mw *ib_alloc_mw(struct ib_pd *pd)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 911a661..4530ebd 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -106,6 +106,7 @@ enum ib_device_cap_flags {
        IB_DEVICE_UD_IP_CSUM            = (1<<18),
        IB_DEVICE_UD_TSO                = (1<<19),
        IB_DEVICE_SEND_W_INV            = (1<<21),
+       IB_DEVICE_MEM_MGT_EXTENSIONS    = (1<<22),
 };
 
 enum ib_atomic_cap {
@@ -151,6 +152,7 @@ struct ib_device_attr {
        int                     max_srq;
        int                     max_srq_wr;
        int                     max_srq_sge;
+       unsigned int            max_fast_reg_page_list_len;
        u16                     max_pkeys;
        u8                      local_ca_ack_delay;
 };
@@ -414,6 +416,8 @@ enum ib_wc_opcode {
        IB_WC_FETCH_ADD,
        IB_WC_BIND_MW,
        IB_WC_LSO,
+       IB_WC_FAST_REG_MR,
+       IB_WC_INVALIDATE_MR,
 /*
  * Set value of IB_WC_RECV so consumers can test if a completion is a
  * receive by testing (opcode & IB_WC_RECV).
@@ -628,6 +632,9 @@ enum ib_wr_opcode {
        IB_WR_ATOMIC_FETCH_AND_ADD,
        IB_WR_LSO,
        IB_WR_SEND_WITH_INV,
+       IB_WR_FAST_REG_MR,
+       IB_WR_INVALIDATE_MR,
+       IB_WR_READ_WITH_INV,
 };
 
 enum ib_send_flags {
@@ -676,6 +683,19 @@ struct ib_send_wr {
                        u16     pkey_index; /* valid for GSI only */
                        u8      port_num;   /* valid for DR SMPs on switch only 
*/
                } ud;
+               struct {
+                       u64                             iova_start;
+                       struct ib_fast_reg_page_list    *page_list;
+                       unsigned int                    page_shift;
+                       unsigned int                    page_list_len;
+                       unsigned int                    first_byte_offset;
+                       u32                             length;
+                       int                             access_flags;
+                       u32                             rkey;
+               } fast_reg;
+               struct {
+                       u32                             rkey;
+               } local_inv;
        } wr;
 };
 
@@ -1014,6 +1034,10 @@ struct ib_device {
        int                        (*query_mr)(struct ib_mr *mr,
                                               struct ib_mr_attr *mr_attr);
        int                        (*dereg_mr)(struct ib_mr *mr);
+       struct ib_mr *             (*alloc_fast_reg_mr)(struct ib_pd *pd,
+                                              int max_page_list_len);
+       struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct 
ib_device *device, int page_list_len);
+       void                       (*free_fast_reg_page_list)(struct 
ib_fast_reg_page_list *page_list);
        int                        (*rereg_phys_mr)(struct ib_mr *mr,
                                                    int mr_rereg_mask,
                                                    struct ib_pd *pd,
@@ -1808,6 +1832,58 @@ int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr 
*mr_attr);
 int ib_dereg_mr(struct ib_mr *mr);
 
 /**
+ * ib_alloc_fast_reg_mr - Allocates memory region usable with the
+ * IB_WR_FAST_REG_MR send work request.
+ * @pd: The protection domain associated with the region.
+ * @max_page_list_len: requested max physical buffer list size to be allocated.
+ */
+struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len);
+
+struct ib_fast_reg_page_list {
+       struct ib_device        *device;
+       u64                     *page_list;
+       unsigned int            max_page_list_len;
+};
+
+/**
+ * ib_alloc_fast_reg_page_list - Allocates a page list array
+ * @device - ib device pointer.
+ * @page_list_len - size of the page list array to be allocated.
+ *
+ * This allocates and returns a struct ib_fast_reg_page_list *
+ * and a page_list array that is at least page_list_len in size.
+ * The actual size is returned in max_page_list_len.
+ * The caller is responsible for initializing the contents of the
+ * page_list array before posting a send work request with the
+ * IB_WC_FAST_REG_MR opcode. The page_list array entries must be
+ * translated using one of the ib_dma_*() functions similar to the
+ * addresses passed to ib_map_phys_fmr(). Once the ib_post_send()
+ * is issued, the struct ib_fast_reg_page_list must not be modified
+ * by the caller until a completion notice is returned by the device.
+ */
+struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(
+                               struct ib_device *device, int page_list_len);
+
+/**
+ * ib_free_fast_reg_page_list - Deallocates a previously allocated
+ * page list array.
+ * @page_list - struct ib_fast_reg_page_list pointer to be deallocated.
+ */
+void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
+
+/**
+ * ib_update_fast_reg_key - updates the key portion of the fast_reg 
+ * R_Key and L_Key.
+ * @mr - struct ib_mr pointer to be updated.
+ * @newkey - new key to be used.
+ */
+static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey)
+{
+       mr->lkey = (mr->lkey & 0xffffff00) | newkey;
+       mr->rkey = (mr->rkey & 0xffffff00) | newkey;
+}
+
+/**
  * ib_alloc_mw - Allocates a memory window.
  * @pd: The protection domain associated with the memory window.
  */
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to