> -----Original Message-----
> From: Yann Droneaud [mailto:[email protected]]
> Sent: Wednesday, October 01, 2014 7:25 PM
> To: Yishai Hadas
> Cc: [email protected]; [email protected]; Shachar Raindel
> Subject: Re: [PATCH for-next 5/9] IB/core: Invalidation support for peer
> memory
>
> Le mercredi 01 octobre 2014 à 18:18 +0300, Yishai Hadas a écrit :
> > Adds the required functionality to invalidate a given peer
> > memory represented by some core context.
> >
> > Each umem that was built over peer memory and supports invalidation
> has
> > some invalidation context assigned to it with the required data to
> > manage, once peer will call the invalidation callback below actions
> are
> > taken:
> >
> > 1) Taking lock on peer client to sync with inflight dereg_mr on that
> > memory.
> > 2) Once lock is taken have a lookup for ticket id to find the matching
> > core context.
> > 3) In case found will call umem invalidation function, otherwise call
> is
> > returned.
> >
> > Some notes:
> > 1) As peer invalidate callback defined to be blocking it must return
> > just after that pages are not going to be accessed any more. For that
> > reason ib_invalidate_peer_memory is waiting for a completion event in
> > case there is other inflight call coming as part of dereg_mr.
> >
> > 2) The peer memory API assumes that a lock might be taken by a peer
> > client to protect its memory operations. Specifically, its invalidate
> > callback might be called under that lock which may lead to an AB/BA
> > dead-lock in case IB core will call get/put pages APIs with the IB
> core peer's lock taken,
> > for that reason as part of ib_umem_activate_invalidation_notifier
> lock is taken
> > then checking for some inflight invalidation state before activating
> it.
> >
> > 3) Once a peer client admits as part of its registration that it may
> > require invalidation support, it can't be an owner of a memory range
> > which doesn't support it.
> >
> > Signed-off-by: Yishai Hadas <[email protected]>
> > Signed-off-by: Shachar Raindel <[email protected]>
> > ---
> > drivers/infiniband/core/peer_mem.c | 86
> +++++++++++++++++++++++++++++++++---
> > drivers/infiniband/core/umem.c | 51 ++++++++++++++++++---
> > include/rdma/ib_peer_mem.h | 4 +-
> > include/rdma/ib_umem.h | 17 +++++++
> > 4 files changed, 143 insertions(+), 15 deletions(-)
> >
> > diff --git a/drivers/infiniband/core/peer_mem.c
> b/drivers/infiniband/core/peer_mem.c
> > index ad10672..d6bd192 100644
> > --- a/drivers/infiniband/core/peer_mem.c
> > +++ b/drivers/infiniband/core/peer_mem.c
> > @@ -38,10 +38,57 @@ static DEFINE_MUTEX(peer_memory_mutex);
> > static LIST_HEAD(peer_memory_list);
> > static int num_registered_peers;
> >
> > -static int ib_invalidate_peer_memory(void *reg_handle, void
> *core_context)
> > +/* Caller should be holding the peer client lock, ib_peer_client-
> >lock */
> > +static struct core_ticket *ib_peer_search_context(struct
> ib_peer_memory_client *ib_peer_client,
> > + unsigned long key)
> > +{
> > + struct core_ticket *core_ticket;
> > +
> > + list_for_each_entry(core_ticket, &ib_peer_client->core_ticket_list,
> > + ticket_list) {
> > + if (core_ticket->key == key)
> > + return core_ticket;
> > + }
> >
> > + return NULL;
> > +}
> > +
>
> You have now two functions to lookup key in ticket list:
> see peer_ticket_exists().
>
As discussed in previous e-mail, peer_ticket_exists will be deleted.
> > +static int ib_invalidate_peer_memory(void *reg_handle, void
> *core_context)
> > {
> > - return -ENOSYS;
> > + struct ib_peer_memory_client *ib_peer_client =
> > + (struct ib_peer_memory_client *)reg_handle;
> > + struct invalidation_ctx *invalidation_ctx;
> > + struct core_ticket *core_ticket;
> > + int need_unlock = 1;
> > +
> > + mutex_lock(&ib_peer_client->lock);
> > + core_ticket = ib_peer_search_context(ib_peer_client,
> > + (unsigned long)core_context);
> > + if (!core_ticket)
> > + goto out;
> > +
> > + invalidation_ctx = (struct invalidation_ctx *)core_ticket->context;
> > + /* If context is not ready yet, mark it to be invalidated */
> > + if (!invalidation_ctx->func) {
> > + invalidation_ctx->peer_invalidated = 1;
> > + goto out;
> > + }
> > + invalidation_ctx->func(invalidation_ctx->cookie,
> > + invalidation_ctx->umem, 0, 0);
> > + if (invalidation_ctx->inflight_invalidation) {
> > + /* init the completion to wait on before letting other thread
> to run */
> > + init_completion(&invalidation_ctx->comp);
> > + mutex_unlock(&ib_peer_client->lock);
> > + need_unlock = 0;
> > + wait_for_completion(&invalidation_ctx->comp);
> > + }
> > +
> > + kfree(invalidation_ctx);
> > +out:
> > + if (need_unlock)
> > + mutex_unlock(&ib_peer_client->lock);
> > +
> > + return 0;
> > }
> >
> > static int peer_ticket_exists(struct ib_peer_memory_client
> *ib_peer_client,
> > @@ -168,11 +215,30 @@ int ib_peer_create_invalidation_ctx(struct
> ib_peer_memory_client *ib_peer_mem, s
> > void ib_peer_destroy_invalidation_ctx(struct ib_peer_memory_client
> *ib_peer_mem,
> > struct invalidation_ctx *invalidation_ctx)
> > {
> > - mutex_lock(&ib_peer_mem->lock);
> > - ib_peer_remove_context(ib_peer_mem, invalidation_ctx-
> >context_ticket);
> > - mutex_unlock(&ib_peer_mem->lock);
> > + int peer_callback;
> > + int inflight_invalidation;
> >
> > - kfree(invalidation_ctx);
> > + /* If we are under peer callback lock was already taken.*/
> > + if (!invalidation_ctx->peer_callback)
> > + mutex_lock(&ib_peer_mem->lock);
> > + ib_peer_remove_context(ib_peer_mem, invalidation_ctx-
> >context_ticket);
> > + /* make sure to check inflight flag after took the lock and remove
> from tree.
> > + * in addition, from that point using local variables for
> peer_callback and
> > + * inflight_invalidation as after the complete invalidation_ctx
> can't be accessed
> > + * any more as it may be freed by the callback.
> > + */
> > + peer_callback = invalidation_ctx->peer_callback;
> > + inflight_invalidation = invalidation_ctx->inflight_invalidation;
> > + if (inflight_invalidation)
> > + complete(&invalidation_ctx->comp);
> > +
> > + /* On peer callback lock is handled externally */
> > + if (!peer_callback)
> > + mutex_unlock(&ib_peer_mem->lock);
> > +
> > + /* in case under callback context or callback is pending let it
> free the invalidation context */
> > + if (!peer_callback && !inflight_invalidation)
> > + kfree(invalidation_ctx);
> > }
> > static int ib_memory_peer_check_mandatory(const struct
> peer_memory_client
> > *peer_client)
> > @@ -261,13 +327,19 @@ void ib_unregister_peer_memory_client(void
> *reg_handle)
> > EXPORT_SYMBOL(ib_unregister_peer_memory_client);
> >
> > struct ib_peer_memory_client *ib_get_peer_client(struct ib_ucontext
> *context, unsigned long addr,
> > - size_t size, void
> **peer_client_context)
> > + size_t size, unsigned long
> peer_mem_flags,
> > + void **peer_client_context)
> > {
> > struct ib_peer_memory_client *ib_peer_client;
> > int ret;
> >
> > mutex_lock(&peer_memory_mutex);
> > list_for_each_entry(ib_peer_client, &peer_memory_list,
> core_peer_list) {
> > + /* In case peer requires invalidation it can't own memory
> which doesn't support it */
> > + if (ib_peer_client->invalidation_required &&
> > + (!(peer_mem_flags & IB_PEER_MEM_INVAL_SUPP)))
> > + continue;
> > +
> > ret = ib_peer_client->peer_mem->acquire(addr, size,
> >
> > context->peer_mem_private_data,
> > context->peer_mem_name,
> > diff --git a/drivers/infiniband/core/umem.c
> b/drivers/infiniband/core/umem.c
> > index 0de9916..51f32a1 100644
> > --- a/drivers/infiniband/core/umem.c
> > +++ b/drivers/infiniband/core/umem.c
> > @@ -44,12 +44,19 @@
> >
> > static struct ib_umem *peer_umem_get(struct ib_peer_memory_client
> *ib_peer_mem,
> > struct ib_umem *umem, unsigned long addr,
> > - int dmasync)
> > + int dmasync, unsigned long peer_mem_flags)
> > {
> > int ret;
> > const struct peer_memory_client *peer_mem = ib_peer_mem->peer_mem;
> > + struct invalidation_ctx *invalidation_ctx = NULL;
> >
> > umem->ib_peer_mem = ib_peer_mem;
> > + if (peer_mem_flags & IB_PEER_MEM_INVAL_SUPP) {
> > + ret = ib_peer_create_invalidation_ctx(ib_peer_mem, umem,
> &invalidation_ctx);
> > + if (ret)
> > + goto end;
> > + }
> > +
> > /*
> > * We always request write permissions to the pages, to force
> breaking of any CoW
> > * during the registration of the MR. For read-only MRs we use the
> "force" flag to
> > @@ -60,7 +67,9 @@ static struct ib_umem *peer_umem_get(struct
> ib_peer_memory_client *ib_peer_mem,
> > 1, !umem->writable,
> > &umem->sg_head,
> > umem->peer_mem_client_context,
> > - NULL);
> > + invalidation_ctx ?
> > + (void *)invalidation_ctx->context_ticket :
> > NULL);
> > +
>
> NULL may be a valid "ticket" once converted to unsigned long and looked
> up in the ticket list.
>
We will move to use uint64 value, and make sure the first ticket value
is 1. A ticket value of 0 indicates that there is no support for
invalidation of this memory.
> > if (ret)
> > goto out;
> >
> > @@ -84,6 +93,9 @@ put_pages:
> > peer_mem->put_pages(umem->peer_mem_client_context,
> > &umem->sg_head);
> > out:
> > + if (invalidation_ctx)
> > + ib_peer_destroy_invalidation_ctx(ib_peer_mem,
> invalidation_ctx);
> > +end:
> > ib_put_peer_client(ib_peer_mem, umem->peer_mem_client_context);
> > kfree(umem);
> > return ERR_PTR(ret);
> > @@ -91,15 +103,19 @@ out:
> >
> > static void peer_umem_release(struct ib_umem *umem)
> > {
> > - const struct peer_memory_client *peer_mem =
> > - umem->ib_peer_mem->peer_mem;
> > + struct ib_peer_memory_client *ib_peer_mem = umem->ib_peer_mem;
> > + const struct peer_memory_client *peer_mem = ib_peer_mem->peer_mem;
> > + struct invalidation_ctx *invalidation_ctx = umem->invalidation_ctx;
> > +
> > + if (invalidation_ctx)
> > + ib_peer_destroy_invalidation_ctx(ib_peer_mem,
> invalidation_ctx);
> >
> > peer_mem->dma_unmap(&umem->sg_head,
> > umem->peer_mem_client_context,
> > umem->context->device->dma_device);
> > peer_mem->put_pages(&umem->sg_head,
> > umem->peer_mem_client_context);
> > - ib_put_peer_client(umem->ib_peer_mem, umem-
> >peer_mem_client_context);
> > + ib_put_peer_client(ib_peer_mem, umem->peer_mem_client_context);
> > kfree(umem);
> > }
> >
> > @@ -127,6 +143,27 @@ static void __ib_umem_release(struct ib_device
> *dev, struct ib_umem *umem, int d
> >
> > }
> >
> > +int ib_umem_activate_invalidation_notifier(struct ib_umem *umem,
> > + umem_invalidate_func_t func,
> > + void *cookie)
> > +{
> > + struct invalidation_ctx *invalidation_ctx = umem->invalidation_ctx;
> > + int ret = 0;
> > +
> > + mutex_lock(&umem->ib_peer_mem->lock);
> > + if (invalidation_ctx->peer_invalidated) {
> > + pr_err("ib_umem_activate_invalidation_notifier: pages were
> invalidated by peer\n");
> > + ret = -EINVAL;
> > + goto end;
> > + }
> > + invalidation_ctx->func = func;
> > + invalidation_ctx->cookie = cookie;
> > + /* from that point any pending invalidations can be called */
> > +end:
> > + mutex_unlock(&umem->ib_peer_mem->lock);
> > + return ret;
> > +}
> > +EXPORT_SYMBOL(ib_umem_activate_invalidation_notifier);
> > /**
> > * ib_umem_get - Pin and DMA map userspace memory.
> > * @context: userspace context to pin memory for
> > @@ -179,11 +216,11 @@ struct ib_umem *ib_umem_get(struct ib_ucontext
> *context, unsigned long addr,
> > if (peer_mem_flags & IB_PEER_MEM_ALLOW) {
> > struct ib_peer_memory_client *peer_mem_client;
> >
> > - peer_mem_client = ib_get_peer_client(context, addr, size,
> > + peer_mem_client = ib_get_peer_client(context, addr, size,
> peer_mem_flags,
> >
> > &umem->peer_mem_client_context);
> > if (peer_mem_client)
> > return peer_umem_get(peer_mem_client, umem, addr,
> > - dmasync);
> > + dmasync, peer_mem_flags);
> > }
> >
> > /* We assume the memory is from hugetlb until proved otherwise */
> > diff --git a/include/rdma/ib_peer_mem.h b/include/rdma/ib_peer_mem.h
> > index d3fbb50..8f67aaf 100644
> > --- a/include/rdma/ib_peer_mem.h
> > +++ b/include/rdma/ib_peer_mem.h
> > @@ -22,6 +22,7 @@ struct ib_peer_memory_client {
> >
> > enum ib_peer_mem_flags {
> > IB_PEER_MEM_ALLOW = 1,
> > + IB_PEER_MEM_INVAL_SUPP = (1<<1),
> > };
> >
> > struct core_ticket {
> > @@ -31,7 +32,8 @@ struct core_ticket {
> > };
> >
> > struct ib_peer_memory_client *ib_get_peer_client(struct ib_ucontext
> *context, unsigned long addr,
> > - size_t size, void
> **peer_client_context);
> > + size_t size, unsigned long
> peer_mem_flags,
> > + void **peer_client_context);
> >
> > void ib_put_peer_client(struct ib_peer_memory_client *ib_peer_client,
> > void *peer_client_context);
> > diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
> > index 4b8a042..83d6059 100644
> > --- a/include/rdma/ib_umem.h
> > +++ b/include/rdma/ib_umem.h
> > @@ -39,10 +39,21 @@
> > #include <rdma/ib_peer_mem.h>
> >
> > struct ib_ucontext;
> > +struct ib_umem;
> > +
> > +typedef void (*umem_invalidate_func_t)(void *invalidation_cookie,
> > + struct ib_umem *umem,
> > + unsigned long addr, size_t size);
> >
> > struct invalidation_ctx {
> > struct ib_umem *umem;
> > unsigned long context_ticket;
> > + umem_invalidate_func_t func;
> > + void *cookie;
> > + int peer_callback;
> > + int inflight_invalidation;
> > + int peer_invalidated;
> > + struct completion comp;
> > };
> >
> > struct ib_umem {
> > @@ -73,6 +84,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext
> *context, unsigned long addr,
> > unsigned long peer_mem_flags);
> > void ib_umem_release(struct ib_umem *umem);
> > int ib_umem_page_count(struct ib_umem *umem);
> > +int ib_umem_activate_invalidation_notifier(struct ib_umem *umem,
> > + umem_invalidate_func_t func,
> > + void *cookie);
> >
> > #else /* CONFIG_INFINIBAND_USER_MEM */
> >
> > @@ -87,6 +101,9 @@ static inline struct ib_umem *ib_umem_get(struct
> ib_ucontext *context,
> > static inline void ib_umem_release(struct ib_umem *umem) { }
> > static inline int ib_umem_page_count(struct ib_umem *umem) { return
> 0; }
> >
> > +static inline int ib_umem_activate_invalidation_notifier(struct
> ib_umem *umem,
> > + umem_invalidate_func_t
> > func,
> > + void *cookie) {return
> > 0; }
> > #endif /* CONFIG_INFINIBAND_USER_MEM */
> >
> > #endif /* IB_UMEM_H */
>