GPU use-cases for mmu_interval_notifiers with hmm often involve starting a gpu operation and then waiting for it to complete. These operations are typically context preemption or TLB flushing.
With single-pass notifiers per GPU this doesn't scale in multi-gpu scenarios. In those scenarios we'd want to first start preemption- or TLB flushing on all GPUs and as a second pass wait for them to complete on all gpus. One can do this on per-driver basis multiplexing per-driver notifiers but that would mean sharing the notifier "user" lock across all GPUs and that doesn't scale well either, so adding support for multi-pass in the core appears like the right choice. Implement multi-pass capability in the mmu_interval_notifier. Use a linked list for the additional passes to minimize the impact for use-cases that don't need the multi-pass functionality. Cc: Jason Gunthorpe <j...@ziepe.ca> Cc: Andrew Morton <a...@linux-foundation.org> Cc: Simona Vetter <simona.vet...@ffwll.ch> Cc: Dave Airlie <airl...@gmail.com> Cc: <dri-devel@lists.freedesktop.org> Cc: <linux...@kvack.org> Cc: <linux-ker...@vger.kernel.org> Signed-off-by: Thomas Hellström <thomas.hellst...@linux.intel.com> --- include/linux/mmu_notifier.h | 30 ++++++++++++++++ mm/mmu_notifier.c | 67 +++++++++++++++++++++++++++++++----- 2 files changed, 88 insertions(+), 9 deletions(-) diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index d1094c2d5fb6..1107a8eafd8a 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -233,6 +233,32 @@ struct mmu_notifier { unsigned int users; }; +/** + * struct mmu_interval_notifier_pass - mmu_interval_notifier multi-pass abstraction + * @link: List link for the notifiers pending pass list + * + * Allocate, typically using GFP_NOWAIT in the interval notifier's first pass. + * If allocation fails (which is not unlikely under memory pressure), fall back + * to single-pass operation. + */ +struct mmu_interval_notifier_pass { + struct list_head link; + /** + * @pass: Driver callback for additionall pass. + * @additional_pass: Pointer to the mmu_interval_notifier_pass structure. + * @range: The mmu_notifier_range. + * @cur_seq: The current sequence set by the first pass. + * + * Return: Either a pointer to a valid mmu_interval_notifier_pass for + * another pass to be called, or %NULL if processing is complete for this + * notifier. There is no error reporting mechanism for additional passes. + */ + struct mmu_interval_notifier_pass * + (*pass) (struct mmu_interval_notifier_pass *additional_pass, + const struct mmu_notifier_range *range, + unsigned long cur_seq); +}; + /** * struct mmu_interval_notifier_ops * @invalidate: Upon return the caller must stop using any SPTEs within this @@ -243,6 +269,10 @@ struct mmu_interval_notifier_ops { bool (*invalidate)(struct mmu_interval_notifier *interval_sub, const struct mmu_notifier_range *range, unsigned long cur_seq); + bool (*invalidate_multipass)(struct mmu_interval_notifier *interval_sub, + const struct mmu_notifier_range *range, + unsigned long cur_seq, + struct mmu_interval_notifier_pass **pass); }; struct mmu_interval_notifier { diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 8e0125dc0522..dd6af87db103 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -260,6 +260,22 @@ mmu_interval_read_begin(struct mmu_interval_notifier *interval_sub) } EXPORT_SYMBOL_GPL(mmu_interval_read_begin); +static void mn_itree_additional_passes(struct list_head *additional_passes, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + struct mmu_interval_notifier_pass *p, *next; + + while (!list_empty(additional_passes)) { + list_for_each_entry_safe(p, next, additional_passes, link) { + list_del_init(&p->link); + p = p->pass(p, range, cur_seq); + if (p) + list_add_tail(&p->link, additional_passes); + } + } +} + static void mn_itree_release(struct mmu_notifier_subscriptions *subscriptions, struct mm_struct *mm) { @@ -272,17 +288,32 @@ static void mn_itree_release(struct mmu_notifier_subscriptions *subscriptions, }; struct mmu_interval_notifier *interval_sub; unsigned long cur_seq; + LIST_HEAD(additional_passes); bool ret; for (interval_sub = mn_itree_inv_start_range(subscriptions, &range, &cur_seq); interval_sub; interval_sub = mn_itree_inv_next(interval_sub, &range)) { - ret = interval_sub->ops->invalidate(interval_sub, &range, - cur_seq); + if (interval_sub->ops->invalidate_multipass) { + struct mmu_interval_notifier_pass *second = NULL; + + ret = interval_sub->ops->invalidate_multipass(interval_sub, + &range, + cur_seq, + &second); + if (ret && second) + list_add_tail(&second->link, &additional_passes); + + } else { + ret = interval_sub->ops->invalidate(interval_sub, + &range, + cur_seq); + } WARN_ON(!ret); } + mn_itree_additional_passes(&additional_passes, &range, cur_seq); mn_itree_inv_end(subscriptions); } @@ -431,6 +462,8 @@ static int mn_itree_invalidate(struct mmu_notifier_subscriptions *subscriptions, { struct mmu_interval_notifier *interval_sub; unsigned long cur_seq; + LIST_HEAD(additional_passes); + int err = 0; for (interval_sub = mn_itree_inv_start_range(subscriptions, range, &cur_seq); @@ -438,23 +471,39 @@ static int mn_itree_invalidate(struct mmu_notifier_subscriptions *subscriptions, interval_sub = mn_itree_inv_next(interval_sub, range)) { bool ret; - ret = interval_sub->ops->invalidate(interval_sub, range, - cur_seq); + if (interval_sub->ops->invalidate_multipass) { + struct mmu_interval_notifier_pass *second = NULL; + + ret = interval_sub->ops->invalidate_multipass(interval_sub, + range, + cur_seq, + &second); + if (ret && second) + list_add_tail(&second->link, &additional_passes); + + } else { + ret = interval_sub->ops->invalidate(interval_sub, + range, + cur_seq); + } if (!ret) { if (WARN_ON(mmu_notifier_range_blockable(range))) continue; - goto out_would_block; + err = -EAGAIN; + break; } } - return 0; -out_would_block: + mn_itree_additional_passes(&additional_passes, range, cur_seq); + /* * On -EAGAIN the non-blocking caller is not allowed to call * invalidate_range_end() */ - mn_itree_inv_end(subscriptions); - return -EAGAIN; + if (err) + mn_itree_inv_end(subscriptions); + + return err; } static int mn_hlist_invalidate_range_start( -- 2.50.1