On Mon, 2017-08-28 at 10:47 +0200, Frederic Barrat wrote: > > > Signed-off-by: Frederic Barrat <fbar...@linux.vnet.ibm.com> > diff --git a/arch/powerpc/include/asm/mmu_context.h > b/arch/powerpc/include/asm/mmu_context.h > index 309592589e30..6447c0df7ec4 100644 > --- a/arch/powerpc/include/asm/mmu_context.h > +++ b/arch/powerpc/include/asm/mmu_context.h > @@ -77,6 +77,41 @@ extern void switch_cop(struct mm_struct *next); > extern int use_cop(unsigned long acop, struct mm_struct *mm); > extern void drop_cop(unsigned long acop, struct mm_struct *mm); > > +#ifdef CONFIG_PPC_BOOK3S_64 > +static inline void inc_mm_active_cpus(struct mm_struct *mm) > +{ > + atomic_inc(&mm->context.active_cpus); > +} > + > +static inline void dec_mm_active_cpus(struct mm_struct *mm) > +{ > + atomic_dec(&mm->context.active_cpus); > +} > + > +static inline void mm_context_add_copro(struct mm_struct *mm) > +{ > + inc_mm_active_cpus(mm); > +} > + > +static inline void mm_context_remove_copro(struct mm_struct *mm) > +{ > + /* > + * Need to broadcast a global flush of the full mm before > + * decrementing active_cpus count, as the next TLBI may be > + * local and the nMMU and/or PSL need to be cleaned up. > + * Should be rare enough so that it's acceptable. > + */ > + flush_tlb_mm(mm); > + dec_mm_active_cpus(mm); > +}
You probably need to kill the pwc too. With my recent optimizations flush_tlb_mm won't do that anymore. You need a bigger hammer (I don't have the code at hand right now to tell you what exactly :-) Basically something that does a RIC_FLUSH_ALL. > +#else > +static inline void inc_mm_active_cpus(struct mm_struct *mm) { } > +static inline void dec_mm_active_cpus(struct mm_struct *mm) { } > +static inline void mm_context_add_copro(struct mm_struct *mm) { } > +static inline void mm_context_remove_copro(struct mm_struct *mm) { } > +#endif > + > + > extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct > *next, > struct task_struct *tsk); > > diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c > index 0f613bc63c50..d60a62bf4fc7 100644 > --- a/arch/powerpc/mm/mmu_context.c > +++ b/arch/powerpc/mm/mmu_context.c > @@ -34,15 +34,6 @@ static inline void switch_mm_pgdir(struct task_struct *tsk, > struct mm_struct *mm) { } > #endif > > -#ifdef CONFIG_PPC_BOOK3S_64 > -static inline void inc_mm_active_cpus(struct mm_struct *mm) > -{ > - atomic_inc(&mm->context.active_cpus); > -} > -#else > -static inline void inc_mm_active_cpus(struct mm_struct *mm) { } > -#endif > - > void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, > struct task_struct *tsk) > { > diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c > index e0dfd1eadd70..33daf33e0e05 100644 > --- a/drivers/misc/cxl/api.c > +++ b/drivers/misc/cxl/api.c > @@ -15,6 +15,7 @@ > #include <linux/module.h> > #include <linux/mount.h> > #include <linux/sched/mm.h> > +#include <linux/mmu_context.h> > > #include "cxl.h" > > @@ -332,8 +333,11 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed, > cxl_context_mm_count_get(ctx); > > /* decrement the use count */ > - if (ctx->mm) > + if (ctx->mm) { > mmput(ctx->mm); > + /* make TLBIs for this context global */ > + mm_context_add_copro(ctx->mm); > + } > } > > /* > @@ -342,13 +346,25 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed, > */ > cxl_ctx_get(); > > + /* > + * Barrier is needed to make sure all TLBIs are global before > + * we attach and the context starts being used by the adapter. > + * > + * Needed after mm_context_add_copro() for radix and > + * cxl_ctx_get() for hash/p8 > + */ > + smp_mb(); > + > if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) { > put_pid(ctx->pid); > ctx->pid = NULL; > cxl_adapter_context_put(ctx->afu->adapter); > cxl_ctx_put(); > - if (task) > + if (task) { > cxl_context_mm_count_put(ctx); > + if (ctx->mm) > + mm_context_remove_copro(ctx->mm); > + } > goto out; > } > > diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c > index 8c32040b9c09..12a41b2753f0 100644 > --- a/drivers/misc/cxl/context.c > +++ b/drivers/misc/cxl/context.c > @@ -18,6 +18,7 @@ > #include <linux/slab.h> > #include <linux/idr.h> > #include <linux/sched/mm.h> > +#include <linux/mmu_context.h> > #include <asm/cputable.h> > #include <asm/current.h> > #include <asm/copro.h> > @@ -267,6 +268,8 @@ int __detach_context(struct cxl_context *ctx) > > /* Decrease the mm count on the context */ > cxl_context_mm_count_put(ctx); > + if (ctx->mm) > + mm_context_remove_copro(ctx->mm); > ctx->mm = NULL; > > return 0; > diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c > index b76a491a485d..411e83cbbd82 100644 > --- a/drivers/misc/cxl/file.c > +++ b/drivers/misc/cxl/file.c > @@ -19,6 +19,7 @@ > #include <linux/mm.h> > #include <linux/slab.h> > #include <linux/sched/mm.h> > +#include <linux/mmu_context.h> > #include <asm/cputable.h> > #include <asm/current.h> > #include <asm/copro.h> > @@ -220,9 +221,12 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, > /* ensure this mm_struct can't be freed */ > cxl_context_mm_count_get(ctx); > > - /* decrement the use count */ > - if (ctx->mm) > + if (ctx->mm) { > + /* decrement the use count */ > mmput(ctx->mm); > + /* make TLBIs for this context global */ > + mm_context_add_copro(ctx->mm); > + } > > /* > * Increment driver use count. Enables global TLBIs for hash > @@ -230,6 +234,15 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, > */ > cxl_ctx_get(); > > + /* > + * Barrier is needed to make sure all TLBIs are global before > + * we attach and the context starts being used by the adapter. > + * > + * Needed after mm_context_add_copro() for radix and > + * cxl_ctx_get() for hash/p8 > + */ > + smp_mb(); > + > trace_cxl_attach(ctx, work.work_element_descriptor, > work.num_interrupts, amr); > > if ((rc = cxl_ops->attach_process(ctx, false, > work.work_element_descriptor, > @@ -240,6 +253,8 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, > ctx->pid = NULL; > cxl_ctx_put(); > cxl_context_mm_count_put(ctx); > + if (ctx->mm) > + mm_context_remove_copro(ctx->mm); > goto out; > } >