Le mercredi 01 octobre 2014 à 18:18 +0300, Yishai Hadas a écrit :
> Adds the required functionality to invalidate a given peer
> memory represented by some core context.
> 
> Each umem that was built over peer memory and supports invalidation has
> some invalidation context assigned to it with the required data to
> manage, once peer will call the invalidation callback below actions are
> taken:
> 
> 1) Taking lock on peer client to sync with inflight dereg_mr on that
> memory.
> 2) Once lock is taken have a lookup for ticket id to find the matching
> core context.
> 3) In case found will call umem invalidation function, otherwise call is
> returned.
> 
> Some notes:
> 1) As peer invalidate callback defined to be blocking it must return
> just after that pages are not going to be accessed any more. For that
> reason ib_invalidate_peer_memory is waiting for a completion event in
> case there is other inflight call coming as part of dereg_mr.
> 
> 2) The peer memory API assumes that a lock might be taken by a peer
> client to protect its memory operations. Specifically, its invalidate
> callback might be called under that lock which may lead to an AB/BA
> dead-lock in case IB core will call get/put pages APIs with the IB core 
> peer's lock taken,
> for that reason as part of  ib_umem_activate_invalidation_notifier lock is 
> taken
> then checking for some inflight invalidation state before activating it.
> 
> 3) Once a peer client admits as part of its registration that it may
> require invalidation support, it can't be an owner of a memory range
> which doesn't support it.
> 
> Signed-off-by: Yishai Hadas <[email protected]>
> Signed-off-by: Shachar Raindel <[email protected]>
> ---
>  drivers/infiniband/core/peer_mem.c |   86 
> +++++++++++++++++++++++++++++++++---
>  drivers/infiniband/core/umem.c     |   51 ++++++++++++++++++---
>  include/rdma/ib_peer_mem.h         |    4 +-
>  include/rdma/ib_umem.h             |   17 +++++++
>  4 files changed, 143 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/infiniband/core/peer_mem.c 
> b/drivers/infiniband/core/peer_mem.c
> index ad10672..d6bd192 100644
> --- a/drivers/infiniband/core/peer_mem.c
> +++ b/drivers/infiniband/core/peer_mem.c
> @@ -38,10 +38,57 @@ static DEFINE_MUTEX(peer_memory_mutex);
>  static LIST_HEAD(peer_memory_list);
>  static int num_registered_peers;
>  
> -static int ib_invalidate_peer_memory(void *reg_handle, void *core_context)
> +/* Caller should be holding the peer client lock, ib_peer_client->lock */
> +static struct core_ticket *ib_peer_search_context(struct 
> ib_peer_memory_client *ib_peer_client,
> +                                               unsigned long key)
> +{
> +     struct core_ticket *core_ticket;
> +
> +     list_for_each_entry(core_ticket, &ib_peer_client->core_ticket_list,
> +                         ticket_list) {
> +             if (core_ticket->key == key)
> +                     return core_ticket;
> +     }
>  
> +     return NULL;
> +}
> +

You have now two functions to lookup key in ticket list:
see peer_ticket_exists().

> +static int ib_invalidate_peer_memory(void *reg_handle, void *core_context)
>  {
> -     return -ENOSYS;
> +     struct ib_peer_memory_client *ib_peer_client =
> +             (struct ib_peer_memory_client *)reg_handle;
> +     struct invalidation_ctx *invalidation_ctx;
> +     struct core_ticket *core_ticket;
> +     int need_unlock = 1;
> +
> +     mutex_lock(&ib_peer_client->lock);
> +     core_ticket = ib_peer_search_context(ib_peer_client,
> +                                          (unsigned long)core_context);
> +     if (!core_ticket)
> +             goto out;
> +
> +     invalidation_ctx = (struct invalidation_ctx *)core_ticket->context;
> +     /* If context is not ready yet, mark it to be invalidated */
> +     if (!invalidation_ctx->func) {
> +             invalidation_ctx->peer_invalidated = 1;
> +             goto out;
> +     }
> +     invalidation_ctx->func(invalidation_ctx->cookie,
> +                                     invalidation_ctx->umem, 0, 0);
> +     if (invalidation_ctx->inflight_invalidation) {
> +             /* init the completion to wait on before letting other thread 
> to run */
> +             init_completion(&invalidation_ctx->comp);
> +             mutex_unlock(&ib_peer_client->lock);
> +             need_unlock = 0;
> +             wait_for_completion(&invalidation_ctx->comp);
> +     }
> +
> +     kfree(invalidation_ctx);
> +out:
> +     if (need_unlock)
> +             mutex_unlock(&ib_peer_client->lock);
> +
> +     return 0;
>  }
>  
>  static int peer_ticket_exists(struct ib_peer_memory_client *ib_peer_client,
> @@ -168,11 +215,30 @@ int ib_peer_create_invalidation_ctx(struct 
> ib_peer_memory_client *ib_peer_mem, s
>  void ib_peer_destroy_invalidation_ctx(struct ib_peer_memory_client 
> *ib_peer_mem,
>                                     struct invalidation_ctx *invalidation_ctx)
>  {
> -     mutex_lock(&ib_peer_mem->lock);
> -     ib_peer_remove_context(ib_peer_mem, invalidation_ctx->context_ticket);
> -     mutex_unlock(&ib_peer_mem->lock);
> +     int peer_callback;
> +     int inflight_invalidation;
>  
> -     kfree(invalidation_ctx);
> +     /* If we are under peer callback lock was already taken.*/
> +     if (!invalidation_ctx->peer_callback)
> +             mutex_lock(&ib_peer_mem->lock);
> +     ib_peer_remove_context(ib_peer_mem, invalidation_ctx->context_ticket);
> +     /* make sure to check inflight flag after took the lock and remove from 
> tree.
> +      * in addition, from that point using local variables for peer_callback 
> and
> +      * inflight_invalidation as after the complete invalidation_ctx can't 
> be accessed
> +      * any more as it may be freed by the callback.
> +      */
> +     peer_callback = invalidation_ctx->peer_callback;
> +     inflight_invalidation = invalidation_ctx->inflight_invalidation;
> +     if (inflight_invalidation)
> +             complete(&invalidation_ctx->comp);
> +
> +     /* On peer callback lock is handled externally */
> +     if (!peer_callback)
> +             mutex_unlock(&ib_peer_mem->lock);
> +
> +     /* in case under callback context or callback is pending let it free 
> the invalidation context */
> +     if (!peer_callback && !inflight_invalidation)
> +             kfree(invalidation_ctx);
>  }
>  static int ib_memory_peer_check_mandatory(const struct peer_memory_client
>                                                    *peer_client)
> @@ -261,13 +327,19 @@ void ib_unregister_peer_memory_client(void *reg_handle)
>  EXPORT_SYMBOL(ib_unregister_peer_memory_client);
>  
>  struct ib_peer_memory_client *ib_get_peer_client(struct ib_ucontext 
> *context, unsigned long addr,
> -                                              size_t size, void 
> **peer_client_context)
> +                                              size_t size, unsigned long 
> peer_mem_flags,
> +                                              void **peer_client_context)
>  {
>       struct ib_peer_memory_client *ib_peer_client;
>       int ret;
>  
>       mutex_lock(&peer_memory_mutex);
>       list_for_each_entry(ib_peer_client, &peer_memory_list, core_peer_list) {
> +             /* In case peer requires invalidation it can't own memory which 
> doesn't support it */
> +             if (ib_peer_client->invalidation_required &&
> +                 (!(peer_mem_flags & IB_PEER_MEM_INVAL_SUPP)))
> +                     continue;
> +
>               ret = ib_peer_client->peer_mem->acquire(addr, size,
>                                                  
> context->peer_mem_private_data,
>                                                  context->peer_mem_name,
> diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
> index 0de9916..51f32a1 100644
> --- a/drivers/infiniband/core/umem.c
> +++ b/drivers/infiniband/core/umem.c
> @@ -44,12 +44,19 @@
>  
>  static struct ib_umem *peer_umem_get(struct ib_peer_memory_client 
> *ib_peer_mem,
>                                    struct ib_umem *umem, unsigned long addr,
> -                                  int dmasync)
> +                                  int dmasync, unsigned long peer_mem_flags)
>  {
>       int ret;
>       const struct peer_memory_client *peer_mem = ib_peer_mem->peer_mem;
> +     struct invalidation_ctx *invalidation_ctx = NULL;
>  
>       umem->ib_peer_mem = ib_peer_mem;
> +     if (peer_mem_flags & IB_PEER_MEM_INVAL_SUPP) {
> +             ret = ib_peer_create_invalidation_ctx(ib_peer_mem, umem, 
> &invalidation_ctx);
> +             if (ret)
> +                     goto end;
> +     }
> +
>       /*
>        * We always request write permissions to the pages, to force breaking 
> of any CoW
>        * during the registration of the MR. For read-only MRs we use the 
> "force" flag to
> @@ -60,7 +67,9 @@ static struct ib_umem *peer_umem_get(struct 
> ib_peer_memory_client *ib_peer_mem,
>                                 1, !umem->writable,
>                                 &umem->sg_head,
>                                 umem->peer_mem_client_context,
> -                               NULL);
> +                               invalidation_ctx ?
> +                               (void *)invalidation_ctx->context_ticket : 
> NULL);
> +

NULL may be a valid "ticket" once converted to unsigned long and looked
up in the ticket list.

>       if (ret)
>               goto out;
>  
> @@ -84,6 +93,9 @@ put_pages:
>       peer_mem->put_pages(umem->peer_mem_client_context,
>                                       &umem->sg_head);
>  out:
> +     if (invalidation_ctx)
> +             ib_peer_destroy_invalidation_ctx(ib_peer_mem, invalidation_ctx);
> +end:
>       ib_put_peer_client(ib_peer_mem, umem->peer_mem_client_context);
>       kfree(umem);
>       return ERR_PTR(ret);
> @@ -91,15 +103,19 @@ out:
>  
>  static void peer_umem_release(struct ib_umem *umem)
>  {
> -     const struct peer_memory_client *peer_mem =
> -                             umem->ib_peer_mem->peer_mem;
> +     struct ib_peer_memory_client *ib_peer_mem = umem->ib_peer_mem;
> +     const struct peer_memory_client *peer_mem = ib_peer_mem->peer_mem;
> +     struct invalidation_ctx *invalidation_ctx = umem->invalidation_ctx;
> +
> +     if (invalidation_ctx)
> +             ib_peer_destroy_invalidation_ctx(ib_peer_mem, invalidation_ctx);
>  
>       peer_mem->dma_unmap(&umem->sg_head,
>                           umem->peer_mem_client_context,
>                           umem->context->device->dma_device);
>       peer_mem->put_pages(&umem->sg_head,
>                           umem->peer_mem_client_context);
> -     ib_put_peer_client(umem->ib_peer_mem, umem->peer_mem_client_context);
> +     ib_put_peer_client(ib_peer_mem, umem->peer_mem_client_context);
>       kfree(umem);
>  }
>  
> @@ -127,6 +143,27 @@ static void __ib_umem_release(struct ib_device *dev, 
> struct ib_umem *umem, int d
>  
>  }
>  
> +int ib_umem_activate_invalidation_notifier(struct ib_umem *umem,
> +                                        umem_invalidate_func_t func,
> +                                        void *cookie)
> +{
> +     struct invalidation_ctx *invalidation_ctx = umem->invalidation_ctx;
> +     int ret = 0;
> +
> +     mutex_lock(&umem->ib_peer_mem->lock);
> +     if (invalidation_ctx->peer_invalidated) {
> +             pr_err("ib_umem_activate_invalidation_notifier: pages were 
> invalidated by peer\n");
> +             ret = -EINVAL;
> +             goto end;
> +     }
> +     invalidation_ctx->func = func;
> +     invalidation_ctx->cookie = cookie;
> +     /* from that point any pending invalidations can be called */
> +end:
> +     mutex_unlock(&umem->ib_peer_mem->lock);
> +     return ret;
> +}
> +EXPORT_SYMBOL(ib_umem_activate_invalidation_notifier);
>  /**
>   * ib_umem_get - Pin and DMA map userspace memory.
>   * @context: userspace context to pin memory for
> @@ -179,11 +216,11 @@ struct ib_umem *ib_umem_get(struct ib_ucontext 
> *context, unsigned long addr,
>       if (peer_mem_flags & IB_PEER_MEM_ALLOW) {
>               struct ib_peer_memory_client *peer_mem_client;
>  
> -             peer_mem_client =  ib_get_peer_client(context, addr, size,
> +             peer_mem_client =  ib_get_peer_client(context, addr, size, 
> peer_mem_flags,
>                                                     
> &umem->peer_mem_client_context);
>               if (peer_mem_client)
>                       return peer_umem_get(peer_mem_client, umem, addr,
> -                                     dmasync);
> +                                     dmasync, peer_mem_flags);
>       }
>  
>       /* We assume the memory is from hugetlb until proved otherwise */
> diff --git a/include/rdma/ib_peer_mem.h b/include/rdma/ib_peer_mem.h
> index d3fbb50..8f67aaf 100644
> --- a/include/rdma/ib_peer_mem.h
> +++ b/include/rdma/ib_peer_mem.h
> @@ -22,6 +22,7 @@ struct ib_peer_memory_client {
>  
>  enum ib_peer_mem_flags {
>       IB_PEER_MEM_ALLOW       = 1,
> +     IB_PEER_MEM_INVAL_SUPP = (1<<1),
>  };
>  
>  struct core_ticket {
> @@ -31,7 +32,8 @@ struct core_ticket {
>  };
>  
>  struct ib_peer_memory_client *ib_get_peer_client(struct ib_ucontext 
> *context, unsigned long addr,
> -                                              size_t size, void 
> **peer_client_context);
> +                                              size_t size, unsigned long 
> peer_mem_flags,
> +                                              void **peer_client_context);
>  
>  void ib_put_peer_client(struct ib_peer_memory_client *ib_peer_client,
>                       void *peer_client_context);
> diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
> index 4b8a042..83d6059 100644
> --- a/include/rdma/ib_umem.h
> +++ b/include/rdma/ib_umem.h
> @@ -39,10 +39,21 @@
>  #include <rdma/ib_peer_mem.h>
>  
>  struct ib_ucontext;
> +struct ib_umem;
> +
> +typedef void (*umem_invalidate_func_t)(void *invalidation_cookie,
> +                                         struct ib_umem *umem,
> +                                         unsigned long addr, size_t size);
>  
>  struct invalidation_ctx {
>       struct ib_umem *umem;
>       unsigned long context_ticket;
> +     umem_invalidate_func_t func;
> +     void *cookie;
> +     int peer_callback;
> +     int inflight_invalidation;
> +     int peer_invalidated;
> +     struct completion comp;
>  };
>  
>  struct ib_umem {
> @@ -73,6 +84,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, 
> unsigned long addr,
>                              unsigned long peer_mem_flags);
>  void ib_umem_release(struct ib_umem *umem);
>  int ib_umem_page_count(struct ib_umem *umem);
> +int  ib_umem_activate_invalidation_notifier(struct ib_umem *umem,
> +                                         umem_invalidate_func_t func,
> +                                         void *cookie);
>  
>  #else /* CONFIG_INFINIBAND_USER_MEM */
>  
> @@ -87,6 +101,9 @@ static inline struct ib_umem *ib_umem_get(struct 
> ib_ucontext *context,
>  static inline void ib_umem_release(struct ib_umem *umem) { }
>  static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; }
>  
> +static inline int ib_umem_activate_invalidation_notifier(struct ib_umem 
> *umem,
> +                                                      umem_invalidate_func_t 
> func,
> +                                                      void *cookie) {return 
> 0; }
>  #endif /* CONFIG_INFINIBAND_USER_MEM */
>  
>  #endif /* IB_UMEM_H */


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to