On Fri, Feb 01, 2019 at 10:22:40AM +0100, Peter Zijlstra wrote:
> On Thu, Jan 31, 2019 at 12:27:54PM -0800, [email protected] wrote:
> > diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
> > index 374a197..229a73b 100644
> > --- a/arch/x86/events/core.c
> > +++ b/arch/x86/events/core.c
> > @@ -2578,3 +2578,34 @@ void perf_get_x86_pmu_capability(struct 
> > x86_pmu_capability *cap)
> >     cap->events_mask_len    = x86_pmu.events_mask_len;
> >  }
> >  EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
> > +
> > +u64 perf_get_page_size(u64 virt)
> > +{
> > +   unsigned long flags;
> > +   unsigned int level;
> > +   pte_t *pte;
> > +
> > +   if (!virt)
> > +           return 0;
> > +
> > +   /*
> > +    * Interrupts are disabled, so it prevents any tear down
> > +    * of the page tables.
> > +    * See the comment near struct mmu_table_batch.
> > +    */
> > +   local_irq_save(flags);
> > +   if (virt >= TASK_SIZE)
> > +           pte = lookup_address(virt, &level);
> > +   else {
> > +           if (current->mm) {
> > +                   pte = lookup_address_in_pgd(pgd_offset(current->mm, 
> > virt),
> > +                                               virt, &level);
> > +           } else
> > +                   level = PG_LEVEL_NUM;
> > +   }
> > +   local_irq_restore(flags);
> > +   if (level >= PG_LEVEL_NUM)
> > +           return 0;
> > +
> > +   return (u64)page_level_size(level);
> > +}
> 
> *sigh* there really isn't anything x86 specific there.
> 
> > diff --git a/kernel/events/core.c b/kernel/events/core.c
> > index 236bb8d..d233f45 100644
> > --- a/kernel/events/core.c
> > +++ b/kernel/events/core.c
> > @@ -6352,6 +6358,12 @@ static u64 perf_virt_to_phys(u64 virt)
> >     return phys_addr;
> >  }
> >  
> > +/* Return page size of given virtual address. IRQ-safe required. */
> > +u64 __weak perf_get_page_size(u64 virt)
> > +{
> > +   return 0;
> > +}
> > +
> >  static struct perf_callchain_entry __empty_callchain = { .nr = 0, };
> >  
> >  struct perf_callchain_entry *
> 
> How about something like so instead?
> 
> (completely untested, will likely make your grandma eat puppies)
> 
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -6357,10 +6357,72 @@ static u64 perf_virt_to_phys(u64 virt)
>       return phys_addr;
>  }
>  
> -/* Return page size of given virtual address. IRQ-safe required. */
> -u64 __weak perf_get_page_size(u64 virt)
> +static u64 __perf_get_page_size(struct mm_struct *mm, unsigned long addr)
>  {
> -     return 0;
> +     pgd_t *pgd;
> +     p4d_t *p4d;
> +     pud_t *pud;
> +     pmd_t *pmd;
> +
> +     pgd = pgd_offset(mm, addr);
> +     if (pgd_none(*pgd))
> +             return 0;
> +
> +     p4d = p4d_offset(pgd, addr);
> +     if (p4d_none(*p4d))
> +             return 0;
> +
> +     if (p4d_large(*p4d));

We dont have 512GiB pages yet.

> +             return 1ULL << P4D_SHIFT;

                return P4D_SIZE;

And the same P?D_SIZE below.

> +
> +     if (!p4d_present(*p4d))
> +             return 0;

No need to check p4d_none() *and* p4d_present(). Just p4d_present() should
be enough. Large is still suppose to be present. The same for other levels.

> +
> +     pud = pud_offset(p4d, addr);
> +     if (pud_none(*pud))
> +             return 0;
> +
> +     if (pud_large(*pud))
> +             return 1ULL << PUD_SHIFT;
> +
> +     if (!pud_present(*pud))
> +             return 0;
> +
> +     pmd = pmd_offset(pud, addr);
> +     if (pmd_none(*pmd))
> +             return 0;
> +
> +     if (pmd_large(*pmd))
> +             return 1ULL << PMD_SHIFT;
> +
> +     if (!pmd_present(*pmd))
> +             return 0;
> +
> +     return 1ULL << PAGE_SHIFT;
> +}
> +
> +static u64 perf_get_page_size(unsigned long addr)
> +{
> +     struct mm_struct *mm;
> +     unsigned long flags;
> +     u64 ret;
> +
> +     /*
> +      * Software page-table walkers must disable IRQs, see asm-generic/tlb.h.
> +      */
> +     local_irq_save(flags);
> +     mm = current->mm;
> +     if (!mm) {
> +             /*
> +              * For kernel threads and the like, use init_mm so that
> +              * we can find kernel memory.
> +              */
> +             mm = &init_mm;
> +     }
> +     ret = __perf_get_page_size(mm, addr);
> +     local_irq_restore(flags);
> +
> +     return ret;
>  }
>  
>  static struct perf_callchain_entry __empty_callchain = { .nr = 0, };

-- 
 Kirill A. Shutemov

Reply via email to