serialize_against_pte_lookup() performs IPIs to all CPUs in mm_cpumask.
Take this opportunity to try trim the CPU out of mm_cpumask. This can
reduce the cost of future serialize_against_pte_lookup() and/or the
cost of future TLB flushes.

Signed-off-by: Nicholas Piggin <npig...@gmail.com>
---
 arch/powerpc/mm/book3s64/pgtable.c   | 13 ++++++++++---
 arch/powerpc/mm/book3s64/radix_tlb.c | 20 +++++++++++++-------
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/pgtable.c 
b/arch/powerpc/mm/book3s64/pgtable.c
index e18ae50a275c..ec23faf102b2 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -79,10 +79,17 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
        return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
 }
 
-static void do_nothing(void *unused)
-{
+void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush);
 
+static void do_serialize(void *arg)
+{
+       /* We've taken the IPI, so try to trim the mask while here */
+       if (radix_enabled()) {
+               struct mm_struct *mm = arg;
+               exit_lazy_flush_tlb(mm, false);
+       }
 }
+
 /*
  * Serialize against find_current_mm_pte which does lock-less
  * lookup in page tables with local interrupts disabled. For huge pages
@@ -96,7 +103,7 @@ static void do_nothing(void *unused)
 void serialize_against_pte_lookup(struct mm_struct *mm)
 {
        smp_mb();
-       smp_call_function_many(mm_cpumask(mm), do_nothing, NULL, 1);
+       smp_call_function_many(mm_cpumask(mm), do_serialize, mm, 1);
 }
 
 /*
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c 
b/arch/powerpc/mm/book3s64/radix_tlb.c
index 4dca7cbf07e9..d04c80d6f52c 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -630,7 +630,11 @@ static bool mm_needs_flush_escalation(struct mm_struct *mm)
        return false;
 }
 
-static void exit_lazy_flush_tlb(struct mm_struct *mm)
+/*
+ * If always_flush is true, then flush even if this CPU can't be removed
+ * from mm_cpumask.
+ */
+void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
 {
        unsigned long pid = mm->context.id;
        int cpu = smp_processor_id();
@@ -643,7 +647,7 @@ static void exit_lazy_flush_tlb(struct mm_struct *mm)
         * done with interrupts off.
         */
        if (current->mm == mm)
-               goto out_flush;
+               goto out;
 
        if (current->active_mm == mm) {
                WARN_ON_ONCE(current->mm != NULL);
@@ -665,17 +669,19 @@ static void exit_lazy_flush_tlb(struct mm_struct *mm)
        if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
                atomic_dec(&mm->context.active_cpus);
                cpumask_clear_cpu(cpu, mm_cpumask(mm));
+               always_flush = true;
        }
 
-out_flush:
-       _tlbiel_pid(pid, RIC_FLUSH_ALL);
+out:
+       if (always_flush)
+               _tlbiel_pid(pid, RIC_FLUSH_ALL);
 }
 
 #ifdef CONFIG_SMP
 static void do_exit_flush_lazy_tlb(void *arg)
 {
        struct mm_struct *mm = arg;
-       exit_lazy_flush_tlb(mm);
+       exit_lazy_flush_tlb(mm, true);
 }
 
 static void exit_flush_lazy_tlbs(struct mm_struct *mm)
@@ -737,7 +743,7 @@ static enum tlb_flush_type flush_type_needed(struct 
mm_struct *mm, bool fullmm)
                         * to trim.
                         */
                        if (tick_and_test_trim_clock()) {
-                               exit_lazy_flush_tlb(mm);
+                               exit_lazy_flush_tlb(mm, true);
                                return FLUSH_TYPE_NONE;
                        }
                }
@@ -783,7 +789,7 @@ static enum tlb_flush_type flush_type_needed(struct 
mm_struct *mm, bool fullmm)
                if (current->mm == mm)
                        return FLUSH_TYPE_LOCAL;
                if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
-                       exit_lazy_flush_tlb(mm);
+                       exit_lazy_flush_tlb(mm, true);
                return FLUSH_TYPE_NONE;
        }
 
-- 
2.23.0

Reply via email to