In order to support that we provide the user with an interface
to pass a scattered list of buffers to the IB core layer called
ib_indir_reg_list and provide the a new send work request opcode
called IB_WR_REG_INDIR_MR. We extend wr union with a new type of
memory registration called indir_reg where the user can place the
relevant information to perform such a memory registration.
The verbs user is expected to perform these steps:
0. Make sure that the device supports Indirect memory registration via
ib_device_cap_flag IB_DEVICE_INDIR_REGISTRATION and make sure
that ib_device_attr max_indir_reg_mr_list_len suffice for the
expected scatterlist length
1. Allocate a memory region with IB_MR_INDIRECT_REG creation flag
This is done via ib_create_mr() with mr_init_attr.flags =
IB_MR_INDIRECT_REG
2. Allocate an ib_indir_reg_list structure to hold the scattered buffers
pointers. This is done via new ib_alloc_indir_reg_list() verb
3. Populate the scattered buffers in ib_indir_reg_list.sg_list
4. Post a work request with a new opcode IB_WR_REG_INDIR_MR and
provide the populated ib_indir_reg_list
5. Perform data transfer
6. Get completion of kind IB_WC_REG_INDIR_MR (if requested)
7. Free indirect MR and ib_indir_reg_list via
ib_destroy_mr() and ib_free_indir_reg_list()
Signed-off-by: Sagi Grimberg <sa...@mellanox.com>
---
drivers/infiniband/core/verbs.c | 29 ++++++++++++++++++++
include/rdma/ib_verbs.h | 55
+++++++++++++++++++++++++++++++++++++-
2 files changed, 82 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/core/verbs.c
b/drivers/infiniband/core/verbs.c
index c2b89cc..0364551 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1445,3 +1445,32 @@ int ib_check_mr_status(struct ib_mr *mr, u32
check_mask,
mr->device->check_mr_status(mr, check_mask, mr_status) :
-ENOSYS;
}
EXPORT_SYMBOL(ib_check_mr_status);
+
+struct ib_indir_reg_list *
+ib_alloc_indir_reg_list(struct ib_device *device,
+ unsigned int max_indir_list_len)
+{
+ struct ib_indir_reg_list *indir_list;
+
+ if (!device->alloc_indir_reg_list)
+ return ERR_PTR(-ENOSYS);
+
+ indir_list = device->alloc_indir_reg_list(device,
+ max_indir_list_len);
+ if (!IS_ERR(indir_list)) {
+ indir_list->device = device;
+ indir_list->max_indir_list_len = max_indir_list_len;
+ }
+
+ return indir_list;
+}
+EXPORT_SYMBOL(ib_alloc_indir_reg_list);
+
+void
+ib_free_indir_reg_list(struct ib_device *device,
+ struct ib_indir_reg_list *indir_list)
+{
+ if (device->free_indir_reg_list)
+ device->free_indir_reg_list(device, indir_list);
+}
+EXPORT_SYMBOL(ib_free_indir_reg_list);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 470a011..f5fe53c 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -123,7 +123,8 @@ enum ib_device_cap_flags {
IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23),
IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24),
IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
- IB_DEVICE_SIGNATURE_HANDOVER = (1<<30)
+ IB_DEVICE_SIGNATURE_HANDOVER = (1<<30),
+ IB_DEVICE_INDIR_REGISTRATION = (1<<31)
};
enum ib_signature_prot_cap {
@@ -182,6 +183,7 @@ struct ib_device_attr {
int max_srq_wr;
int max_srq_sge;
unsigned int max_fast_reg_page_list_len;
+ unsigned int max_indir_reg_mr_list_len;
u16 max_pkeys;
u8 local_ca_ack_delay;
int sig_prot_cap;
@@ -476,7 +478,8 @@ __attribute_const__ int ib_rate_to_mult(enum
ib_rate rate);
__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate);
enum ib_mr_create_flags {
- IB_MR_SIGNATURE_EN = 1,
+ IB_MR_SIGNATURE_EN = 1 << 0,
+ IB_MR_INDIRECT_REG = 1 << 1
};
/**
@@ -651,6 +654,7 @@ enum ib_wc_opcode {
IB_WC_FAST_REG_MR,
IB_WC_MASKED_COMP_SWAP,
IB_WC_MASKED_FETCH_ADD,
+ IB_WC_REG_INDIR_MR,
/*
* Set value of IB_WC_RECV so consumers can test if a completion is a
* receive by testing (opcode & IB_WC_RECV).
@@ -945,6 +949,7 @@ enum ib_wr_opcode {
IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
IB_WR_BIND_MW,
IB_WR_REG_SIG_MR,
+ IB_WR_REG_INDIR_MR,
/* reserve values for low level drivers' internal use.
* These values will not be used at all in the ib core layer.
*/
@@ -984,6 +989,12 @@ struct ib_fast_reg_page_list {
unsigned int max_page_list_len;
};
+struct ib_indir_reg_list {
+ struct ib_device *device;
+ struct ib_sge *sg_list;
+ unsigned int max_indir_list_len;
+};
+
/**
* struct ib_mw_bind_info - Parameters for a memory window bind
operation.
* @mr: A memory region to bind the memory window to.
@@ -1056,6 +1067,14 @@ struct ib_send_wr {
int access_flags;
struct ib_sge *prot;
} sig_handover;
+ struct {
+ u64 iova_start;
+ struct ib_indir_reg_list *indir_list;
+ unsigned int indir_list_len;
+ u64 length;
+ unsigned int access_flags;
+ u32 mkey;
+ } indir_reg;