Re: [PATCH v2 1/2] migration/rdma: Try to register On-Demand Paging memory region
Hi Zhijian, On Mon, Aug 23, 2021 at 11:42 AM lizhij...@fujitsu.com wrote: > > CCing Marcel > > > On 23/08/2021 11:33, Li Zhijian wrote: > > Previously, for the fsdax mem-backend-file, it will register failed with > > Operation not supported. In this case, we can try to register it with > > On-Demand Paging[1] like what rpma_mr_reg() does on rpma[2]. > > > > [1]: > > https://community.mellanox.com/s/article/understanding-on-demand-paging--odp-x > > [2]: http://pmem.io/rpma/manpages/v0.9.0/rpma_mr_reg.3 > > > > CC: Marcel Apfelbaum > > Signed-off-by: Li Zhijian > > > > --- > > V2: add ODP sanity check and remove goto > > --- > > migration/rdma.c | 73 ++ > > migration/trace-events | 1 + > > 2 files changed, 54 insertions(+), 20 deletions(-) > > > > diff --git a/migration/rdma.c b/migration/rdma.c > > index 5c2d113aa94..eb80431aae2 100644 > > --- a/migration/rdma.c > > +++ b/migration/rdma.c > > @@ -1117,19 +1117,47 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma) > > return 0; > > } > > > > +/* Check whether On-Demand Paging is supported by RDAM device */ > > +static bool rdma_support_odp(struct ibv_context *dev) > > +{ > > +struct ibv_device_attr_ex attr = {0}; > > +int ret = ibv_query_device_ex(dev, NULL, ); > > +if (ret) { > > +return false; > > +} > > + > > +if (attr.odp_caps.general_caps & IBV_ODP_SUPPORT) { > > +return true; > > +} > > + > > +return false; > > +} > > + > > static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma) > > { > > int i; > > RDMALocalBlocks *local = >local_ram_blocks; > > > > for (i = 0; i < local->nb_blocks; i++) { > > +int access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE; > > + > > local->block[i].mr = > > ibv_reg_mr(rdma->pd, > > local->block[i].local_host_addr, > > -local->block[i].length, > > -IBV_ACCESS_LOCAL_WRITE | > > -IBV_ACCESS_REMOTE_WRITE > > +local->block[i].length, access > > ); > > + > > +if (!local->block[i].mr && > > +errno == ENOTSUP && rdma_support_odp(rdma->verbs)) { > > +access |= IBV_ACCESS_ON_DEMAND; > > +/* register ODP mr */ > > +local->block[i].mr = > > +ibv_reg_mr(rdma->pd, > > + local->block[i].local_host_addr, > > + local->block[i].length, access); > > + > > trace_qemu_rdma_register_odp_mr(local->block[i].block_name); > > +} > > + > > if (!local->block[i].mr) { > > perror("Failed to register local dest ram block!"); > > break; > > @@ -1215,28 +1243,33 @@ static int > > qemu_rdma_register_and_get_keys(RDMAContext *rdma, > >*/ > > if (!block->pmr[chunk]) { > > uint64_t len = chunk_end - chunk_start; > > +int access = rkey ? IBV_ACCESS_LOCAL_WRITE | > > IBV_ACCESS_REMOTE_WRITE : > > + 0; > > > > trace_qemu_rdma_register_and_get_keys(len, chunk_start); > > > > -block->pmr[chunk] = ibv_reg_mr(rdma->pd, > > -chunk_start, len, > > -(rkey ? (IBV_ACCESS_LOCAL_WRITE | > > -IBV_ACCESS_REMOTE_WRITE) : 0)); > > - > > -if (!block->pmr[chunk]) { > > -perror("Failed to register chunk!"); > > -fprintf(stderr, "Chunk details: block: %d chunk index %d" > > -" start %" PRIuPTR " end %" PRIuPTR > > -" host %" PRIuPTR > > -" local %" PRIuPTR " registrations: %d\n", > > -block->index, chunk, (uintptr_t)chunk_start, > > -(uintptr_t)chunk_end, host_addr, > > -(uintptr_t)block->local_host_addr, > > -rdma->total_registrations); > > -return -1; > > +block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access); > > +if (!block->pmr[chunk] && > > +errno == ENOTSUP && rdma_support_odp(rdma->verbs)) { > > +access |= IBV_ACCESS_ON_DEMAND; > > +/* register ODP mr */ > > +block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, > > access); > > +trace_qemu_rdma_register_odp_mr(block->block_name); > > } > > -rdma->total_registrations++; > > } > > +if (!block->pmr[chunk]) { > > +perror("Failed to register chunk!"); > > +fprintf(stderr, "Chunk details: block: %d chunk index %d" > > +" start %" PRIuPTR " end %" PRIuPTR > > +" host %" PRIuPTR > > +" local %" PRIuPTR " registrations: %d\n", > > +
Re: [PATCH v2 1/2] migration/rdma: Try to register On-Demand Paging memory region
CCing Marcel On 23/08/2021 11:33, Li Zhijian wrote: > Previously, for the fsdax mem-backend-file, it will register failed with > Operation not supported. In this case, we can try to register it with > On-Demand Paging[1] like what rpma_mr_reg() does on rpma[2]. > > [1]: > https://community.mellanox.com/s/article/understanding-on-demand-paging--odp-x > [2]: http://pmem.io/rpma/manpages/v0.9.0/rpma_mr_reg.3 > > CC: Marcel Apfelbaum > Signed-off-by: Li Zhijian > > --- > V2: add ODP sanity check and remove goto > --- > migration/rdma.c | 73 ++ > migration/trace-events | 1 + > 2 files changed, 54 insertions(+), 20 deletions(-) > > diff --git a/migration/rdma.c b/migration/rdma.c > index 5c2d113aa94..eb80431aae2 100644 > --- a/migration/rdma.c > +++ b/migration/rdma.c > @@ -1117,19 +1117,47 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma) > return 0; > } > > +/* Check whether On-Demand Paging is supported by RDAM device */ > +static bool rdma_support_odp(struct ibv_context *dev) > +{ > +struct ibv_device_attr_ex attr = {0}; > +int ret = ibv_query_device_ex(dev, NULL, ); > +if (ret) { > +return false; > +} > + > +if (attr.odp_caps.general_caps & IBV_ODP_SUPPORT) { > +return true; > +} > + > +return false; > +} > + > static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma) > { > int i; > RDMALocalBlocks *local = >local_ram_blocks; > > for (i = 0; i < local->nb_blocks; i++) { > +int access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE; > + > local->block[i].mr = > ibv_reg_mr(rdma->pd, > local->block[i].local_host_addr, > -local->block[i].length, > -IBV_ACCESS_LOCAL_WRITE | > -IBV_ACCESS_REMOTE_WRITE > +local->block[i].length, access > ); > + > +if (!local->block[i].mr && > +errno == ENOTSUP && rdma_support_odp(rdma->verbs)) { > +access |= IBV_ACCESS_ON_DEMAND; > +/* register ODP mr */ > +local->block[i].mr = > +ibv_reg_mr(rdma->pd, > + local->block[i].local_host_addr, > + local->block[i].length, access); > +trace_qemu_rdma_register_odp_mr(local->block[i].block_name); > +} > + > if (!local->block[i].mr) { > perror("Failed to register local dest ram block!"); > break; > @@ -1215,28 +1243,33 @@ static int > qemu_rdma_register_and_get_keys(RDMAContext *rdma, >*/ > if (!block->pmr[chunk]) { > uint64_t len = chunk_end - chunk_start; > +int access = rkey ? IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE > : > + 0; > > trace_qemu_rdma_register_and_get_keys(len, chunk_start); > > -block->pmr[chunk] = ibv_reg_mr(rdma->pd, > -chunk_start, len, > -(rkey ? (IBV_ACCESS_LOCAL_WRITE | > -IBV_ACCESS_REMOTE_WRITE) : 0)); > - > -if (!block->pmr[chunk]) { > -perror("Failed to register chunk!"); > -fprintf(stderr, "Chunk details: block: %d chunk index %d" > -" start %" PRIuPTR " end %" PRIuPTR > -" host %" PRIuPTR > -" local %" PRIuPTR " registrations: %d\n", > -block->index, chunk, (uintptr_t)chunk_start, > -(uintptr_t)chunk_end, host_addr, > -(uintptr_t)block->local_host_addr, > -rdma->total_registrations); > -return -1; > +block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access); > +if (!block->pmr[chunk] && > +errno == ENOTSUP && rdma_support_odp(rdma->verbs)) { > +access |= IBV_ACCESS_ON_DEMAND; > +/* register ODP mr */ > +block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, > access); > +trace_qemu_rdma_register_odp_mr(block->block_name); > } > -rdma->total_registrations++; > } > +if (!block->pmr[chunk]) { > +perror("Failed to register chunk!"); > +fprintf(stderr, "Chunk details: block: %d chunk index %d" > +" start %" PRIuPTR " end %" PRIuPTR > +" host %" PRIuPTR > +" local %" PRIuPTR " registrations: %d\n", > +block->index, chunk, (uintptr_t)chunk_start, > +(uintptr_t)chunk_end, host_addr, > +(uintptr_t)block->local_host_addr, > +rdma->total_registrations); > +return -1; > +} > +rdma->total_registrations++; > > if (lkey) { >
[PATCH v2 1/2] migration/rdma: Try to register On-Demand Paging memory region
Previously, for the fsdax mem-backend-file, it will register failed with Operation not supported. In this case, we can try to register it with On-Demand Paging[1] like what rpma_mr_reg() does on rpma[2]. [1]: https://community.mellanox.com/s/article/understanding-on-demand-paging--odp-x [2]: http://pmem.io/rpma/manpages/v0.9.0/rpma_mr_reg.3 CC: Marcel Apfelbaum Signed-off-by: Li Zhijian --- V2: add ODP sanity check and remove goto --- migration/rdma.c | 73 ++ migration/trace-events | 1 + 2 files changed, 54 insertions(+), 20 deletions(-) diff --git a/migration/rdma.c b/migration/rdma.c index 5c2d113aa94..eb80431aae2 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -1117,19 +1117,47 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma) return 0; } +/* Check whether On-Demand Paging is supported by RDAM device */ +static bool rdma_support_odp(struct ibv_context *dev) +{ +struct ibv_device_attr_ex attr = {0}; +int ret = ibv_query_device_ex(dev, NULL, ); +if (ret) { +return false; +} + +if (attr.odp_caps.general_caps & IBV_ODP_SUPPORT) { +return true; +} + +return false; +} + static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma) { int i; RDMALocalBlocks *local = >local_ram_blocks; for (i = 0; i < local->nb_blocks; i++) { +int access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE; + local->block[i].mr = ibv_reg_mr(rdma->pd, local->block[i].local_host_addr, -local->block[i].length, -IBV_ACCESS_LOCAL_WRITE | -IBV_ACCESS_REMOTE_WRITE +local->block[i].length, access ); + +if (!local->block[i].mr && +errno == ENOTSUP && rdma_support_odp(rdma->verbs)) { +access |= IBV_ACCESS_ON_DEMAND; +/* register ODP mr */ +local->block[i].mr = +ibv_reg_mr(rdma->pd, + local->block[i].local_host_addr, + local->block[i].length, access); +trace_qemu_rdma_register_odp_mr(local->block[i].block_name); +} + if (!local->block[i].mr) { perror("Failed to register local dest ram block!"); break; @@ -1215,28 +1243,33 @@ static int qemu_rdma_register_and_get_keys(RDMAContext *rdma, */ if (!block->pmr[chunk]) { uint64_t len = chunk_end - chunk_start; +int access = rkey ? IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE : + 0; trace_qemu_rdma_register_and_get_keys(len, chunk_start); -block->pmr[chunk] = ibv_reg_mr(rdma->pd, -chunk_start, len, -(rkey ? (IBV_ACCESS_LOCAL_WRITE | -IBV_ACCESS_REMOTE_WRITE) : 0)); - -if (!block->pmr[chunk]) { -perror("Failed to register chunk!"); -fprintf(stderr, "Chunk details: block: %d chunk index %d" -" start %" PRIuPTR " end %" PRIuPTR -" host %" PRIuPTR -" local %" PRIuPTR " registrations: %d\n", -block->index, chunk, (uintptr_t)chunk_start, -(uintptr_t)chunk_end, host_addr, -(uintptr_t)block->local_host_addr, -rdma->total_registrations); -return -1; +block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access); +if (!block->pmr[chunk] && +errno == ENOTSUP && rdma_support_odp(rdma->verbs)) { +access |= IBV_ACCESS_ON_DEMAND; +/* register ODP mr */ +block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access); +trace_qemu_rdma_register_odp_mr(block->block_name); } -rdma->total_registrations++; } +if (!block->pmr[chunk]) { +perror("Failed to register chunk!"); +fprintf(stderr, "Chunk details: block: %d chunk index %d" +" start %" PRIuPTR " end %" PRIuPTR +" host %" PRIuPTR +" local %" PRIuPTR " registrations: %d\n", +block->index, chunk, (uintptr_t)chunk_start, +(uintptr_t)chunk_end, host_addr, +(uintptr_t)block->local_host_addr, +rdma->total_registrations); +return -1; +} +rdma->total_registrations++; if (lkey) { *lkey = block->pmr[chunk]->lkey; diff --git a/migration/trace-events b/migration/trace-events index a1c0f034ab8..5f6aa580def 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -212,6 +212,7 @@ qemu_rdma_poll_write(const char *compstr, int64_t comp, int left, uint64_t block qemu_rdma_poll_other(const char