This patch a set of new trace events to collect the speculative page fault
event failures.

Signed-off-by: Laurent Dufour <lduf...@linux.vnet.ibm.com>
---
 include/trace/events/pagefault.h | 87 ++++++++++++++++++++++++++++++++++++++++
 mm/memory.c                      | 62 ++++++++++++++++++++++------
 2 files changed, 136 insertions(+), 13 deletions(-)
 create mode 100644 include/trace/events/pagefault.h

diff --git a/include/trace/events/pagefault.h b/include/trace/events/pagefault.h
new file mode 100644
index 000000000000..1d793f8c739b
--- /dev/null
+++ b/include/trace/events/pagefault.h
@@ -0,0 +1,87 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM pagefault
+
+#if !defined(_TRACE_PAGEFAULT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_PAGEFAULT_H
+
+#include <linux/tracepoint.h>
+#include <linux/mm.h>
+
+DECLARE_EVENT_CLASS(spf,
+
+       TP_PROTO(unsigned long caller,
+                struct vm_area_struct *vma, unsigned long address),
+
+       TP_ARGS(caller, vma, address),
+
+       TP_STRUCT__entry(
+               __field(unsigned long, caller)
+               __field(unsigned long, vm_start)
+               __field(unsigned long, vm_end)
+               __field(unsigned long, address)
+       ),
+
+       TP_fast_assign(
+               __entry->caller         = caller;
+               __entry->vm_start       = vma->vm_start;
+               __entry->vm_end         = vma->vm_end;
+               __entry->address        = address;
+       ),
+
+       TP_printk("ip:%lx vma:%lx-%lx address:%lx",
+                 __entry->caller, __entry->vm_start, __entry->vm_end,
+                 __entry->address)
+);
+
+DEFINE_EVENT(spf, spf_pte_lock,
+
+       TP_PROTO(unsigned long caller,
+                struct vm_area_struct *vma, unsigned long address),
+
+       TP_ARGS(caller, vma, address)
+);
+
+DEFINE_EVENT(spf, spf_vma_changed,
+
+       TP_PROTO(unsigned long caller,
+                struct vm_area_struct *vma, unsigned long address),
+
+       TP_ARGS(caller, vma, address)
+);
+
+DEFINE_EVENT(spf, spf_vma_noanon,
+
+       TP_PROTO(unsigned long caller,
+                struct vm_area_struct *vma, unsigned long address),
+
+       TP_ARGS(caller, vma, address)
+);
+
+DEFINE_EVENT(spf, spf_vma_notsup,
+
+       TP_PROTO(unsigned long caller,
+                struct vm_area_struct *vma, unsigned long address),
+
+       TP_ARGS(caller, vma, address)
+);
+
+DEFINE_EVENT(spf, spf_vma_access,
+
+       TP_PROTO(unsigned long caller,
+                struct vm_area_struct *vma, unsigned long address),
+
+       TP_ARGS(caller, vma, address)
+);
+
+DEFINE_EVENT(spf, spf_pmd_changed,
+
+       TP_PROTO(unsigned long caller,
+                struct vm_area_struct *vma, unsigned long address),
+
+       TP_ARGS(caller, vma, address)
+);
+
+#endif /* _TRACE_PAGEFAULT_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mm/memory.c b/mm/memory.c
index 83640079d407..6ccb1f45473a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -80,6 +80,9 @@
 
 #include "internal.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/pagefault.h>
+
 #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
 #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for 
last_cpupid.
 #endif
@@ -2460,23 +2463,30 @@ static bool pte_spinlock(struct vm_fault *vmf)
        }
 
        local_irq_disable();
-       if (vma_has_changed(vmf))
+       if (vma_has_changed(vmf)) {
+               trace_spf_vma_changed(_RET_IP_, vmf->vma, vmf->address);
                goto out;
+       }
 
        /*
         * We check if the pmd value is still the same to ensure that there
         * is a huge collapse operation in progress in our back.
         */
        pmdval = READ_ONCE(*vmf->pmd);
-       if (!pmd_same(pmdval, vmf->orig_pmd))
+       if (!pmd_same(pmdval, vmf->orig_pmd)) {
+               trace_spf_pmd_changed(_RET_IP_, vmf->vma, vmf->address);
                goto out;
+       }
 
        vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
-       if (unlikely(!spin_trylock(vmf->ptl)))
+       if (unlikely(!spin_trylock(vmf->ptl))) {
+               trace_spf_pte_lock(_RET_IP_, vmf->vma, vmf->address);
                goto out;
+       }
 
        if (vma_has_changed(vmf)) {
                spin_unlock(vmf->ptl);
+               trace_spf_vma_changed(_RET_IP_, vmf->vma, vmf->address);
                goto out;
        }
 
@@ -2516,16 +2526,20 @@ static bool pte_map_lock(struct vm_fault *vmf)
         * block on the PTL and thus we're safe.
         */
        local_irq_disable();
-       if (vma_has_changed(vmf))
+       if (vma_has_changed(vmf)) {
+               trace_spf_vma_changed(_RET_IP_, vmf->vma, vmf->address);
                goto out;
+       }
 
        /*
         * We check if the pmd value is still the same to ensure that there
         * is a huge collapse operation in progress in our back.
         */
        pmdval = READ_ONCE(*vmf->pmd);
-       if (!pmd_same(pmdval, vmf->orig_pmd))
+       if (!pmd_same(pmdval, vmf->orig_pmd)) {
+               trace_spf_pmd_changed(_RET_IP_, vmf->vma, vmf->address);
                goto out;
+       }
 
        /*
         * Same as pte_offset_map_lock() except that we call
@@ -2538,11 +2552,13 @@ static bool pte_map_lock(struct vm_fault *vmf)
        pte = pte_offset_map(vmf->pmd, vmf->address);
        if (unlikely(!spin_trylock(ptl))) {
                pte_unmap(pte);
+               trace_spf_pte_lock(_RET_IP_, vmf->vma, vmf->address);
                goto out;
        }
 
        if (vma_has_changed(vmf)) {
                pte_unmap_unlock(pte, ptl);
+               trace_spf_vma_changed(_RET_IP_, vmf->vma, vmf->address);
                goto out;
        }
 
@@ -4297,47 +4313,60 @@ int handle_speculative_fault(struct mm_struct *mm, 
unsigned long address,
                return ret;
 
        seq = raw_read_seqcount(&vma->vm_sequence); /* rmb <-> 
seqlock,vma_rb_erase() */
-       if (seq & 1)
+       if (seq & 1) {
+               trace_spf_vma_changed(_RET_IP_, vma, address);
                goto out_put;
+       }
 
        /*
         * Can't call vm_ops service has we don't know what they would do
         * with the VMA.
         * This include huge page from hugetlbfs.
         */
-       if (vma->vm_ops)
+       if (vma->vm_ops) {
+               trace_spf_vma_notsup(_RET_IP_, vma, address);
                goto out_put;
+       }
 
        /*
         * __anon_vma_prepare() requires the mmap_sem to be held
         * because vm_next and vm_prev must be safe. This can't be guaranteed
         * in the speculative path.
         */
-       if (unlikely(!vma->anon_vma))
+       if (unlikely(!vma->anon_vma)) {
+               trace_spf_vma_notsup(_RET_IP_, vma, address);
                goto out_put;
+       }
 
        vmf.vma_flags = READ_ONCE(vma->vm_flags);
        vmf.vma_page_prot = READ_ONCE(vma->vm_page_prot);
 
        /* Can't call userland page fault handler in the speculative path */
-       if (unlikely(vmf.vma_flags & VM_UFFD_MISSING))
+       if (unlikely(vmf.vma_flags & VM_UFFD_MISSING)) {
+               trace_spf_vma_notsup(_RET_IP_, vma, address);
                goto out_put;
+       }
 
-       if (vmf.vma_flags & VM_GROWSDOWN || vmf.vma_flags & VM_GROWSUP)
+       if (vmf.vma_flags & VM_GROWSDOWN || vmf.vma_flags & VM_GROWSUP) {
                /*
                 * This could be detected by the check address against VMA's
                 * boundaries but we want to trace it as not supported instead
                 * of changed.
                 */
+               trace_spf_vma_notsup(_RET_IP_, vma, address);
                goto out_put;
+       }
 
        if (address < READ_ONCE(vma->vm_start)
-           || READ_ONCE(vma->vm_end) <= address)
+           || READ_ONCE(vma->vm_end) <= address) {
+               trace_spf_vma_changed(_RET_IP_, vma, address);
                goto out_put;
+       }
 
        if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
                                       flags & FAULT_FLAG_INSTRUCTION,
                                       flags & FAULT_FLAG_REMOTE)) {
+               trace_spf_vma_access(_RET_IP_, vma, address);
                ret = VM_FAULT_SIGSEGV;
                goto out_put;
        }
@@ -4345,10 +4374,12 @@ int handle_speculative_fault(struct mm_struct *mm, 
unsigned long address,
        /* This is one is required to check that the VMA has write access set */
        if (flags & FAULT_FLAG_WRITE) {
                if (unlikely(!(vmf.vma_flags & VM_WRITE))) {
+                       trace_spf_vma_access(_RET_IP_, vma, address);
                        ret = VM_FAULT_SIGSEGV;
                        goto out_put;
                }
        } else if (unlikely(!(vmf.vma_flags & (VM_READ|VM_EXEC|VM_WRITE)))) {
+               trace_spf_vma_access(_RET_IP_, vma, address);
                ret = VM_FAULT_SIGSEGV;
                goto out_put;
        }
@@ -4361,8 +4392,10 @@ int handle_speculative_fault(struct mm_struct *mm, 
unsigned long address,
        pol = __get_vma_policy(vma, address);
        if (!pol)
                pol = get_task_policy(current);
-       if (pol && pol->mode == MPOL_INTERLEAVE)
+       if (pol && pol->mode == MPOL_INTERLEAVE) {
+               trace_spf_vma_notsup(_RET_IP_, vma, address);
                goto out_put;
+       }
 #endif
 
        /*
@@ -4435,8 +4468,10 @@ int handle_speculative_fault(struct mm_struct *mm, 
unsigned long address,
         * We need to re-validate the VMA after checking the bounds, otherwise
         * we might have a false positive on the bounds.
         */
-       if (read_seqcount_retry(&vma->vm_sequence, seq))
+       if (read_seqcount_retry(&vma->vm_sequence, seq)) {
+               trace_spf_vma_changed(_RET_IP_, vma, address);
                goto out_put;
+       }
 
        mem_cgroup_oom_enable();
        ret = handle_pte_fault(&vmf);
@@ -4455,6 +4490,7 @@ int handle_speculative_fault(struct mm_struct *mm, 
unsigned long address,
        return ret;
 
 out_walk:
+       trace_spf_vma_notsup(_RET_IP_, vma, address);
        local_irq_enable();
 out_put:
        put_vma(vma);
-- 
2.7.4

Reply via email to