Adds an infrastructure to manage core context for a given umem, it's needed for the invalidation flow.
Core context is supplied to peer clients as some opaque data for a given memory pages represented by a umem. If the peer client needs to invalidate memory it provided through the peer memory callbacks, it should call the invalidation callback, supplying the relevant core context. IB core will use this context to invalidate the relevant memory. To prevent cases when there are inflight invalidation calls in parallel to releasing this memory (e.g. by dereg_mr) we must ensure that context is valid before accessing it, that's why couldn't use the core context pointer directly. For that reason we added a lookup table to map between a ticket id to a core context. Peer client will get/supply the ticket id, core will check whether exists before accessing its corresponding context. Signed-off-by: Yishai Hadas <[email protected]> Signed-off-by: Shachar Raindel <[email protected]> --- drivers/infiniband/core/peer_mem.c | 132 ++++++++++++++++++++++++++++++++++++ include/rdma/ib_peer_mem.h | 19 +++++ include/rdma/ib_umem.h | 6 ++ 3 files changed, 157 insertions(+), 0 deletions(-) diff --git a/drivers/infiniband/core/peer_mem.c b/drivers/infiniband/core/peer_mem.c index 3936e13..ad10672 100644 --- a/drivers/infiniband/core/peer_mem.c +++ b/drivers/infiniband/core/peer_mem.c @@ -44,6 +44,136 @@ static int ib_invalidate_peer_memory(void *reg_handle, void *core_context) return -ENOSYS; } +static int peer_ticket_exists(struct ib_peer_memory_client *ib_peer_client, + unsigned long ticket) +{ + struct core_ticket *core_ticket; + + list_for_each_entry(core_ticket, &ib_peer_client->core_ticket_list, + ticket_list) { + if (core_ticket->key == ticket) + return 1; + } + + return 0; +} + +static int peer_get_free_ticket(struct ib_peer_memory_client *ib_peer_client, + unsigned long *new_ticket) +{ + unsigned long candidate_ticket = ib_peer_client->last_ticket + 1; + static int max_retries = 1000; + int i; + + for (i = 0; i < max_retries; i++) { + if (peer_ticket_exists(ib_peer_client, candidate_ticket)) { + candidate_ticket++; + continue; + } + *new_ticket = candidate_ticket; + return 0; + } + + return -EINVAL; +} + +static int ib_peer_insert_context(struct ib_peer_memory_client *ib_peer_client, + void *context, + unsigned long *context_ticket) +{ + struct core_ticket *core_ticket = kzalloc(sizeof(*core_ticket), GFP_KERNEL); + int ret; + + if (!core_ticket) + return -ENOMEM; + + mutex_lock(&ib_peer_client->lock); + if (ib_peer_client->last_ticket < ib_peer_client->last_ticket + 1 && + !ib_peer_client->ticket_wrapped) { + core_ticket->key = ib_peer_client->last_ticket++; + } else { + /* special/rare case when wrap around occurred, not expected on 64 bit machines */ + unsigned long new_ticket; + + ib_peer_client->ticket_wrapped = 1; + ret = peer_get_free_ticket(ib_peer_client, &new_ticket); + if (ret) { + mutex_unlock(&ib_peer_client->lock); + kfree(core_ticket); + return ret; + } + ib_peer_client->last_ticket = new_ticket; + core_ticket->key = ib_peer_client->last_ticket; + } + core_ticket->context = context; + list_add_tail(&core_ticket->ticket_list, + &ib_peer_client->core_ticket_list); + *context_ticket = core_ticket->key; + mutex_unlock(&ib_peer_client->lock); + return 0; +} + +/* Caller should be holding the peer client lock, specifically, the caller should hold ib_peer_client->lock */ +static int ib_peer_remove_context(struct ib_peer_memory_client *ib_peer_client, + unsigned long key) +{ + struct core_ticket *core_ticket; + + list_for_each_entry(core_ticket, &ib_peer_client->core_ticket_list, + ticket_list) { + if (core_ticket->key == key) { + list_del(&core_ticket->ticket_list); + kfree(core_ticket); + return 0; + } + } + + return 1; +} + +/** +** ib_peer_create_invalidation_ctx - creates invalidation context for a given umem +** @ib_peer_mem: peer client to be used +** @umem: umem struct belongs to that context +** @invalidation_ctx: output context +**/ +int ib_peer_create_invalidation_ctx(struct ib_peer_memory_client *ib_peer_mem, struct ib_umem *umem, + struct invalidation_ctx **invalidation_ctx) +{ + int ret; + struct invalidation_ctx *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ret = ib_peer_insert_context(ib_peer_mem, ctx, + &ctx->context_ticket); + if (ret) { + kfree(ctx); + return ret; + } + + ctx->umem = umem; + umem->invalidation_ctx = ctx; + *invalidation_ctx = ctx; + return 0; +} + +/** + * ** ib_peer_destroy_invalidation_ctx - destroy a given invalidation context + * ** @ib_peer_mem: peer client to be used + * ** @invalidation_ctx: context to be invalidated + * **/ +void ib_peer_destroy_invalidation_ctx(struct ib_peer_memory_client *ib_peer_mem, + struct invalidation_ctx *invalidation_ctx) +{ + mutex_lock(&ib_peer_mem->lock); + ib_peer_remove_context(ib_peer_mem, invalidation_ctx->context_ticket); + mutex_unlock(&ib_peer_mem->lock); + + kfree(invalidation_ctx); +} static int ib_memory_peer_check_mandatory(const struct peer_memory_client *peer_client) { @@ -94,6 +224,8 @@ void *ib_register_peer_memory_client(const struct peer_memory_client *peer_clien if (!ib_peer_client) return NULL; + INIT_LIST_HEAD(&ib_peer_client->core_ticket_list); + mutex_init(&ib_peer_client->lock); init_completion(&ib_peer_client->unload_comp); kref_init(&ib_peer_client->ref); ib_peer_client->peer_mem = peer_client; diff --git a/include/rdma/ib_peer_mem.h b/include/rdma/ib_peer_mem.h index 98056c5..d3fbb50 100644 --- a/include/rdma/ib_peer_mem.h +++ b/include/rdma/ib_peer_mem.h @@ -4,6 +4,8 @@ #include <rdma/peer_mem.h> struct ib_ucontext; +struct ib_umem; +struct invalidation_ctx; struct ib_peer_memory_client { const struct peer_memory_client *peer_mem; @@ -11,16 +13,33 @@ struct ib_peer_memory_client { int invalidation_required; struct kref ref; struct completion unload_comp; + /* lock is used via the invalidation flow */ + struct mutex lock; + struct list_head core_ticket_list; + unsigned long last_ticket; + int ticket_wrapped; }; enum ib_peer_mem_flags { IB_PEER_MEM_ALLOW = 1, }; +struct core_ticket { + unsigned long key; + void *context; + struct list_head ticket_list; +}; + struct ib_peer_memory_client *ib_get_peer_client(struct ib_ucontext *context, unsigned long addr, size_t size, void **peer_client_context); void ib_put_peer_client(struct ib_peer_memory_client *ib_peer_client, void *peer_client_context); +int ib_peer_create_invalidation_ctx(struct ib_peer_memory_client *ib_peer_mem, struct ib_umem *umem, + struct invalidation_ctx **invalidation_ctx); + +void ib_peer_destroy_invalidation_ctx(struct ib_peer_memory_client *ib_peer_mem, + struct invalidation_ctx *invalidation_ctx); + #endif diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index a22dde0..4b8a042 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -40,6 +40,11 @@ struct ib_ucontext; +struct invalidation_ctx { + struct ib_umem *umem; + unsigned long context_ticket; +}; + struct ib_umem { struct ib_ucontext *context; size_t length; @@ -56,6 +61,7 @@ struct ib_umem { int npages; /* peer memory that manages this umem */ struct ib_peer_memory_client *ib_peer_mem; + struct invalidation_ctx *invalidation_ctx; /* peer memory private context */ void *peer_mem_client_context; }; -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to [email protected] More majordomo info at http://vger.kernel.org/majordomo-info.html
