Re: [PATCH v2 1/2] migration/rdma: Try to register On-Demand Paging memory region

2021-08-23 Thread Marcel Apfelbaum
Hi Zhijian,

On Mon, Aug 23, 2021 at 11:42 AM lizhij...@fujitsu.com
 wrote:
>
> CCing  Marcel
>
>
> On 23/08/2021 11:33, Li Zhijian wrote:
> > Previously, for the fsdax mem-backend-file, it will register failed with
> > Operation not supported. In this case, we can try to register it with
> > On-Demand Paging[1] like what rpma_mr_reg() does on rpma[2].
> >
> > [1]: 
> > https://community.mellanox.com/s/article/understanding-on-demand-paging--odp-x
> > [2]: http://pmem.io/rpma/manpages/v0.9.0/rpma_mr_reg.3
> >
> > CC: Marcel Apfelbaum 
> > Signed-off-by: Li Zhijian 
> >
> > ---
> > V2: add ODP sanity check and remove goto
> > ---
> >   migration/rdma.c   | 73 ++
> >   migration/trace-events |  1 +
> >   2 files changed, 54 insertions(+), 20 deletions(-)
> >
> > diff --git a/migration/rdma.c b/migration/rdma.c
> > index 5c2d113aa94..eb80431aae2 100644
> > --- a/migration/rdma.c
> > +++ b/migration/rdma.c
> > @@ -1117,19 +1117,47 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma)
> >   return 0;
> >   }
> >
> > +/* Check whether On-Demand Paging is supported by RDAM device */
> > +static bool rdma_support_odp(struct ibv_context *dev)
> > +{
> > +struct ibv_device_attr_ex attr = {0};
> > +int ret = ibv_query_device_ex(dev, NULL, );
> > +if (ret) {
> > +return false;
> > +}
> > +
> > +if (attr.odp_caps.general_caps & IBV_ODP_SUPPORT) {
> > +return true;
> > +}
> > +
> > +return false;
> > +}
> > +
> >   static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma)
> >   {
> >   int i;
> >   RDMALocalBlocks *local = >local_ram_blocks;
> >
> >   for (i = 0; i < local->nb_blocks; i++) {
> > +int access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE;
> > +
> >   local->block[i].mr =
> >   ibv_reg_mr(rdma->pd,
> >   local->block[i].local_host_addr,
> > -local->block[i].length,
> > -IBV_ACCESS_LOCAL_WRITE |
> > -IBV_ACCESS_REMOTE_WRITE
> > +local->block[i].length, access
> >   );
> > +
> > +if (!local->block[i].mr &&
> > +errno == ENOTSUP && rdma_support_odp(rdma->verbs)) {
> > +access |= IBV_ACCESS_ON_DEMAND;
> > +/* register ODP mr */
> > +local->block[i].mr =
> > +ibv_reg_mr(rdma->pd,
> > +   local->block[i].local_host_addr,
> > +   local->block[i].length, access);
> > +
> > trace_qemu_rdma_register_odp_mr(local->block[i].block_name);
> > +}
> > +
> >   if (!local->block[i].mr) {
> >   perror("Failed to register local dest ram block!");
> >   break;
> > @@ -1215,28 +1243,33 @@ static int 
> > qemu_rdma_register_and_get_keys(RDMAContext *rdma,
> >*/
> >   if (!block->pmr[chunk]) {
> >   uint64_t len = chunk_end - chunk_start;
> > +int access = rkey ? IBV_ACCESS_LOCAL_WRITE | 
> > IBV_ACCESS_REMOTE_WRITE :
> > + 0;
> >
> >   trace_qemu_rdma_register_and_get_keys(len, chunk_start);
> >
> > -block->pmr[chunk] = ibv_reg_mr(rdma->pd,
> > -chunk_start, len,
> > -(rkey ? (IBV_ACCESS_LOCAL_WRITE |
> > -IBV_ACCESS_REMOTE_WRITE) : 0));
> > -
> > -if (!block->pmr[chunk]) {
> > -perror("Failed to register chunk!");
> > -fprintf(stderr, "Chunk details: block: %d chunk index %d"
> > -" start %" PRIuPTR " end %" PRIuPTR
> > -" host %" PRIuPTR
> > -" local %" PRIuPTR " registrations: %d\n",
> > -block->index, chunk, (uintptr_t)chunk_start,
> > -(uintptr_t)chunk_end, host_addr,
> > -(uintptr_t)block->local_host_addr,
> > -rdma->total_registrations);
> > -return -1;
> > +block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access);
> > +if (!block->pmr[chunk] &&
> > +errno == ENOTSUP && rdma_support_odp(rdma->verbs)) {
> > +access |= IBV_ACCESS_ON_DEMAND;
> > +/* register ODP mr */
> > +block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, 
> > access);
> > +trace_qemu_rdma_register_odp_mr(block->block_name);
> >   }
> > -rdma->total_registrations++;
> >   }
> > +if (!block->pmr[chunk]) {
> > +perror("Failed to register chunk!");
> > +fprintf(stderr, "Chunk details: block: %d chunk index %d"
> > +" start %" PRIuPTR " end %" PRIuPTR
> > +" host %" PRIuPTR
> > +" local %" PRIuPTR " registrations: %d\n",
> > +

Re: [PATCH v2 1/2] migration/rdma: Try to register On-Demand Paging memory region

2021-08-23 Thread lizhij...@fujitsu.com
CCing  Marcel


On 23/08/2021 11:33, Li Zhijian wrote:
> Previously, for the fsdax mem-backend-file, it will register failed with
> Operation not supported. In this case, we can try to register it with
> On-Demand Paging[1] like what rpma_mr_reg() does on rpma[2].
>
> [1]: 
> https://community.mellanox.com/s/article/understanding-on-demand-paging--odp-x
> [2]: http://pmem.io/rpma/manpages/v0.9.0/rpma_mr_reg.3
>
> CC: Marcel Apfelbaum 
> Signed-off-by: Li Zhijian 
>
> ---
> V2: add ODP sanity check and remove goto
> ---
>   migration/rdma.c   | 73 ++
>   migration/trace-events |  1 +
>   2 files changed, 54 insertions(+), 20 deletions(-)
>
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 5c2d113aa94..eb80431aae2 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -1117,19 +1117,47 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma)
>   return 0;
>   }
>   
> +/* Check whether On-Demand Paging is supported by RDAM device */
> +static bool rdma_support_odp(struct ibv_context *dev)
> +{
> +struct ibv_device_attr_ex attr = {0};
> +int ret = ibv_query_device_ex(dev, NULL, );
> +if (ret) {
> +return false;
> +}
> +
> +if (attr.odp_caps.general_caps & IBV_ODP_SUPPORT) {
> +return true;
> +}
> +
> +return false;
> +}
> +
>   static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma)
>   {
>   int i;
>   RDMALocalBlocks *local = >local_ram_blocks;
>   
>   for (i = 0; i < local->nb_blocks; i++) {
> +int access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE;
> +
>   local->block[i].mr =
>   ibv_reg_mr(rdma->pd,
>   local->block[i].local_host_addr,
> -local->block[i].length,
> -IBV_ACCESS_LOCAL_WRITE |
> -IBV_ACCESS_REMOTE_WRITE
> +local->block[i].length, access
>   );
> +
> +if (!local->block[i].mr &&
> +errno == ENOTSUP && rdma_support_odp(rdma->verbs)) {
> +access |= IBV_ACCESS_ON_DEMAND;
> +/* register ODP mr */
> +local->block[i].mr =
> +ibv_reg_mr(rdma->pd,
> +   local->block[i].local_host_addr,
> +   local->block[i].length, access);
> +trace_qemu_rdma_register_odp_mr(local->block[i].block_name);
> +}
> +
>   if (!local->block[i].mr) {
>   perror("Failed to register local dest ram block!");
>   break;
> @@ -1215,28 +1243,33 @@ static int 
> qemu_rdma_register_and_get_keys(RDMAContext *rdma,
>*/
>   if (!block->pmr[chunk]) {
>   uint64_t len = chunk_end - chunk_start;
> +int access = rkey ? IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE 
> :
> + 0;
>   
>   trace_qemu_rdma_register_and_get_keys(len, chunk_start);
>   
> -block->pmr[chunk] = ibv_reg_mr(rdma->pd,
> -chunk_start, len,
> -(rkey ? (IBV_ACCESS_LOCAL_WRITE |
> -IBV_ACCESS_REMOTE_WRITE) : 0));
> -
> -if (!block->pmr[chunk]) {
> -perror("Failed to register chunk!");
> -fprintf(stderr, "Chunk details: block: %d chunk index %d"
> -" start %" PRIuPTR " end %" PRIuPTR
> -" host %" PRIuPTR
> -" local %" PRIuPTR " registrations: %d\n",
> -block->index, chunk, (uintptr_t)chunk_start,
> -(uintptr_t)chunk_end, host_addr,
> -(uintptr_t)block->local_host_addr,
> -rdma->total_registrations);
> -return -1;
> +block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access);
> +if (!block->pmr[chunk] &&
> +errno == ENOTSUP && rdma_support_odp(rdma->verbs)) {
> +access |= IBV_ACCESS_ON_DEMAND;
> +/* register ODP mr */
> +block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, 
> access);
> +trace_qemu_rdma_register_odp_mr(block->block_name);
>   }
> -rdma->total_registrations++;
>   }
> +if (!block->pmr[chunk]) {
> +perror("Failed to register chunk!");
> +fprintf(stderr, "Chunk details: block: %d chunk index %d"
> +" start %" PRIuPTR " end %" PRIuPTR
> +" host %" PRIuPTR
> +" local %" PRIuPTR " registrations: %d\n",
> +block->index, chunk, (uintptr_t)chunk_start,
> +(uintptr_t)chunk_end, host_addr,
> +(uintptr_t)block->local_host_addr,
> +rdma->total_registrations);
> +return -1;
> +}
> +rdma->total_registrations++;
>   
>   if (lkey) {
> 

[PATCH v2 1/2] migration/rdma: Try to register On-Demand Paging memory region

2021-08-22 Thread Li Zhijian
Previously, for the fsdax mem-backend-file, it will register failed with
Operation not supported. In this case, we can try to register it with
On-Demand Paging[1] like what rpma_mr_reg() does on rpma[2].

[1]: 
https://community.mellanox.com/s/article/understanding-on-demand-paging--odp-x
[2]: http://pmem.io/rpma/manpages/v0.9.0/rpma_mr_reg.3

CC: Marcel Apfelbaum 
Signed-off-by: Li Zhijian 

---
V2: add ODP sanity check and remove goto
---
 migration/rdma.c   | 73 ++
 migration/trace-events |  1 +
 2 files changed, 54 insertions(+), 20 deletions(-)

diff --git a/migration/rdma.c b/migration/rdma.c
index 5c2d113aa94..eb80431aae2 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -1117,19 +1117,47 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma)
 return 0;
 }
 
+/* Check whether On-Demand Paging is supported by RDAM device */
+static bool rdma_support_odp(struct ibv_context *dev)
+{
+struct ibv_device_attr_ex attr = {0};
+int ret = ibv_query_device_ex(dev, NULL, );
+if (ret) {
+return false;
+}
+
+if (attr.odp_caps.general_caps & IBV_ODP_SUPPORT) {
+return true;
+}
+
+return false;
+}
+
 static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma)
 {
 int i;
 RDMALocalBlocks *local = >local_ram_blocks;
 
 for (i = 0; i < local->nb_blocks; i++) {
+int access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE;
+
 local->block[i].mr =
 ibv_reg_mr(rdma->pd,
 local->block[i].local_host_addr,
-local->block[i].length,
-IBV_ACCESS_LOCAL_WRITE |
-IBV_ACCESS_REMOTE_WRITE
+local->block[i].length, access
 );
+
+if (!local->block[i].mr &&
+errno == ENOTSUP && rdma_support_odp(rdma->verbs)) {
+access |= IBV_ACCESS_ON_DEMAND;
+/* register ODP mr */
+local->block[i].mr =
+ibv_reg_mr(rdma->pd,
+   local->block[i].local_host_addr,
+   local->block[i].length, access);
+trace_qemu_rdma_register_odp_mr(local->block[i].block_name);
+}
+
 if (!local->block[i].mr) {
 perror("Failed to register local dest ram block!");
 break;
@@ -1215,28 +1243,33 @@ static int qemu_rdma_register_and_get_keys(RDMAContext 
*rdma,
  */
 if (!block->pmr[chunk]) {
 uint64_t len = chunk_end - chunk_start;
+int access = rkey ? IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE :
+ 0;
 
 trace_qemu_rdma_register_and_get_keys(len, chunk_start);
 
-block->pmr[chunk] = ibv_reg_mr(rdma->pd,
-chunk_start, len,
-(rkey ? (IBV_ACCESS_LOCAL_WRITE |
-IBV_ACCESS_REMOTE_WRITE) : 0));
-
-if (!block->pmr[chunk]) {
-perror("Failed to register chunk!");
-fprintf(stderr, "Chunk details: block: %d chunk index %d"
-" start %" PRIuPTR " end %" PRIuPTR
-" host %" PRIuPTR
-" local %" PRIuPTR " registrations: %d\n",
-block->index, chunk, (uintptr_t)chunk_start,
-(uintptr_t)chunk_end, host_addr,
-(uintptr_t)block->local_host_addr,
-rdma->total_registrations);
-return -1;
+block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access);
+if (!block->pmr[chunk] &&
+errno == ENOTSUP && rdma_support_odp(rdma->verbs)) {
+access |= IBV_ACCESS_ON_DEMAND;
+/* register ODP mr */
+block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access);
+trace_qemu_rdma_register_odp_mr(block->block_name);
 }
-rdma->total_registrations++;
 }
+if (!block->pmr[chunk]) {
+perror("Failed to register chunk!");
+fprintf(stderr, "Chunk details: block: %d chunk index %d"
+" start %" PRIuPTR " end %" PRIuPTR
+" host %" PRIuPTR
+" local %" PRIuPTR " registrations: %d\n",
+block->index, chunk, (uintptr_t)chunk_start,
+(uintptr_t)chunk_end, host_addr,
+(uintptr_t)block->local_host_addr,
+rdma->total_registrations);
+return -1;
+}
+rdma->total_registrations++;
 
 if (lkey) {
 *lkey = block->pmr[chunk]->lkey;
diff --git a/migration/trace-events b/migration/trace-events
index a1c0f034ab8..5f6aa580def 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -212,6 +212,7 @@ qemu_rdma_poll_write(const char *compstr, int64_t comp, int 
left, uint64_t block
 qemu_rdma_poll_other(const char