Introduce -t flag for kmem command to support getting page owner and
slab debug trace. Here is the user help manual:

1. Dump page_owner allocated stack trace for each allocated page in
buddy system when used with "kmem -pt":
crash> kmem -pt
Page allocated via order 0, mask 0x1112c4a, pid 1, ts 16155269152 ns
PFN 0x40000, type Movable, Flags 0xffff00000020836
set_page_owner+84
post_alloc_hook+308
prep_new_page+48
get_page_from_freelist+736
__alloc_pages+348
alloc_pages+280
__page_cache_alloc+120
page_cache_ra_unbounded+272
do_page_cache_ra+172
do_sync_mmap_readahead+492
filemap_fault+340
__do_fault+64
__handle_mm_fault+528
handle_mm_fault+208
__do_page_fault+232
do_page_fault+1264
......

2. Dump page_owner allocated/freed trace for an allocated page when used
"kmem -pt" with a page address.

3. Dump slab debug trace when used "-st" with an allocated slab object address:
crash> kmem -st ffff000007e79d00
CACHE             OBJSIZE  ALLOCATED     TOTAL  SLABS  SSIZE  NAME
ffff000001c0ed00     3392         93       104     13    32k  task_struct
  SLAB              MEMORY            NODE  TOTAL  ALLOCATED  FREE
  fffffc00001f9e00  ffff000007e78000     0      8          6     2
  FREE / [ALLOCATED]
  [ffff000007e79d00]
object ffff000007e79d00 allocated in alloc_task_struct_node+36 age=8431 cpu=2 
pid=415
__slab_alloc+60
kmem_cache_alloc_node+528
alloc_task_struct_node+36
dup_task_struct+56
copy_process+724
kernel_clone+276
__do_sys_clone+152
__se_sys_clone+60
__arm64_sys_clone+88
__invoke_syscall+36
invoke_syscall+284
el0_svc_common+248
do_el0_svc+56
el0_svc+248
el0t_64_sync_handler+92
el0t_64_sync+344

object ffff000007e79d00 freed in free_task_struct+32 age=12132 cpu=1 pid=0
kmem_cache_free+780
free_task_struct+32
free_task+164
__put_task_struct+328
put_task_struct+44
delayed_put_task_struct+64
rcu_do_batch+972
rcu_core+592
rcu_core_si+24
__softirqentry_text_start+388
do_softirq_own_stack+12
invoke_softirq+216
__irq_exit_rcu+164
irq_exit+20
handle_domain_irq+120

4. Dump slab debug trace for each allocated object belongs to this slab
when used "-st" with an slab page address.

5. Dump slab debug trace for each allocated object belongs to slab cache
when used "-S -t" with a slab cache address.

With this patch, the page allocation times can be sorted by page_owner_sort
tool easily, and the slab allocation/free times can be sorted by a script.

Signed-off-by: qiwu.chen <qiwu.c...@transsion.com>
---
 defs.h   |  50 ++++++
 help.c   |   8 +-
 memory.c | 538 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 572 insertions(+), 24 deletions(-)

diff --git a/defs.h b/defs.h
index dfbd241..e9eb9e3 100644
--- a/defs.h
+++ b/defs.h
@@ -206,6 +206,34 @@ static inline int string_exists(char *s) { return (s ? 
TRUE : FALSE); }
 #undef roundup
 #endif
 #define roundup(x, y)  ((((x)+((y)-1))/(y))*(y))
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define rounddown(x, y) ((x) & ~__round_mask(x, y))
+#define IS_ALIGNED(x, y) (((x) & ((typeof(x))(y) - 1)) == 0)
+
+/* stack depot macros before kernel commit 8151c7a35d8bd */
+#define STACK_ALLOC_ALIGN 4
+#define STACK_ALLOC_NULL_PROTECTION_BITS 1
+#define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */
+#define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGESHIFT() - 
STACK_ALLOC_ALIGN)
+#define DEPOT_STACK_BITS (sizeof(uint) * 8)
+#define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \
+               STACK_ALLOC_NULL_PROTECTION_BITS - STACK_ALLOC_OFFSET_BITS)
+
+/* stack depot macros since kernel commit 8151c7a35d8bd */
+#define STACK_DEPOT_EXTRA_BITS 5
+#define DEPOT_HANDLE_BITS (sizeof(uint) * 8)
+#define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */
+#define DEPOT_POOL_SIZE (1LL << (PAGESHIFT() + DEPOT_POOL_ORDER))
+#define DEPOT_STACK_ALIGN 4
+#define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGESHIFT() - DEPOT_STACK_ALIGN)
+#define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \
+                              STACK_DEPOT_EXTRA_BITS)
+
+/* GFP flags */
+#define __GFP_RECLAIMABLE      0x10u
+#define __GFP_MOVABLE          0x08u
+#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
+#define GFP_MOVABLE_SHIFT 3
 
 typedef uint64_t physaddr_t;
 
@@ -1626,6 +1654,7 @@ struct offset_table {                    /* stash of 
commonly-used offsets */
        long pglist_data_node_present_pages;
        long pglist_data_node_spanned_pages;
        long pglist_data_bdata;
+       long pglist_data_node_page_ext;
        long page_cache_bucket_chain;
         long zone_struct_free_pages;
         long zone_struct_free_area;
@@ -2243,6 +2272,23 @@ struct offset_table {                    /* stash of 
commonly-used offsets */
        long vmap_node_busy;
        long rb_list_head;
        long file_f_inode;
+       long stack_record_size;
+       long stack_record_entries;
+       long stack_record_count;
+       long page_owner_order;
+       long page_owner_gfp_mask;
+       long page_owner_ts_nsec;
+       long page_owner_free_ts_nsec;
+       long page_owner_pid;
+       long page_owner_handle;
+       long page_owner_free_handle;
+       long mem_section_page_ext;
+       long track_addr;
+       long track_addrs;
+       long track_pid;
+       long track_cpu;
+       long track_when;
+       long track_handle;
 };
 
 struct size_table {         /* stash of commonly-used sizes */
@@ -2419,6 +2465,10 @@ struct size_table {         /* stash of commonly-used 
sizes */
        long module_memory;
        long fred_frame;
        long vmap_node;
+       long page_ext;
+       long page_owner;
+       long stack_record;
+       long track;
 };
 
 struct array_table {
diff --git a/help.c b/help.c
index e95ac1d..6a59064 100644
--- a/help.c
+++ b/help.c
@@ -6815,8 +6815,8 @@ NULL
 char *help_kmem[] = {
 "kmem",
 "kernel memory",
-"[-f|-F|-c|-C|-i|-v|-V|-n|-z|-o|-h] [-p | -m member[,member]]\n"
-"       [[-s|-S|-S=cpu[s]|-r] [slab] [-I slab[,slab]]] [-g [flags]] [[-P] 
address]]",
+"[-f|-F|-c|-C|-i|-v|-V|-n|-z|-o|-h] [-p|-t | -m member[,member]]\n"
+"       [[-s|-S|-S=cpu[s]|-r|-t] [slab] [-I slab[,slab]]] [-g [flags]] [[-P] 
address]]",
 "  This command displays information about the use of kernel memory.\n",
 "        -f  displays the contents of the system free memory headers.",
 "            also verifies that the page count equals nr_free_pages.",
@@ -6845,6 +6845,7 @@ char *help_kmem[] = {
 "            mem_map[] array, made up of the page struct address, its 
associated",
 "            physical address, the page.mapping, page.index, page._count and",
 "            page.flags fields.",
+"        -t  displays page_owner allocated stack trace for each allocated page 
in buddy system.",
 " -m member  similar to -p, but displays page structure contents specified by",
 "            a comma-separated list of one or more struct page members.  The",
 "            \"flags\" member will always be expressed in hexadecimal format, 
and",
@@ -6893,12 +6894,15 @@ char *help_kmem[] = {
 "   address  when used with -s or -S, searches the kmalloc() slab subsystem",
 "            for the slab containing of this virtual address, showing whether",
 "            it is in use or free.",
+"            when added extra -t, displays the slab debug trace for the 
allocated",
+"            object belongs to this slab",
 "   address  when used with -f, the address can be either a page pointer,",
 "            a physical address, or a kernel virtual address; the free_area",
 "            header containing the page (if any) is displayed.",
 "   address  when used with -p, the address can be either a page pointer, a",
 "            physical address, or a kernel virtual address; its basic mem_map",
 "            page information is displayed.",
+"            when added extra -t, display the page_owner traces for this page",
 "   address  when used with -m, the address can be either a page pointer, a",
 "            physical address, or a kernel virtual address; the specified",
 "            members of the associated page struct are displayed.",
diff --git a/memory.c b/memory.c
index a74ebaf..7bf8f86 100644
--- a/memory.c
+++ b/memory.c
@@ -323,6 +323,11 @@ static ulong kmem_cache_nodelists(ulong);
 static void dump_hstates(void);
 static ulong freelist_ptr(struct meminfo *, ulong, ulong);
 static ulong handle_each_vm_area(struct handle_each_vm_area_args *);
+static void page_owner_init(void);
+static int page_owner_enabled(void);
+static void stack_depot_init(void);
+static void dump_page_owner(struct meminfo *, ulong, physaddr_t);
+enum track_item { TRACK_ALLOC, TRACK_FREE, TRACK_ALL };
 
 /*
  *  Memory display modes specific to this file.
@@ -860,6 +865,16 @@ vm_init(void)
                        "kmem_cache_node", "partial");
                MEMBER_OFFSET_INIT(kmem_cache_node_full, 
                        "kmem_cache_node", "full");
+
+               STRUCT_SIZE_INIT(track, "track");
+               MEMBER_OFFSET_INIT(track_addr, "track", "addr");
+               if (MEMBER_EXISTS("track", "addrs"))
+                       MEMBER_OFFSET_INIT(track_addrs, "track", "addrs");
+               if (MEMBER_EXISTS("track", "handle"))
+                       MEMBER_OFFSET_INIT(track_handle, "track", "handle");
+               MEMBER_OFFSET_INIT(track_when, "track", "when");
+               MEMBER_OFFSET_INIT(track_cpu, "track", "cpu");
+               MEMBER_OFFSET_INIT(track_pid, "track", "pid");
        } else {
                MEMBER_OFFSET_INIT(kmem_cache_s_c_nextp,  
                        "kmem_cache_s", "c_nextp");
@@ -983,6 +998,8 @@ vm_init(void)
                vt->flags |= DISCONTIGMEM;
 
        sparse_mem_init();
+       page_owner_init();
+       stack_depot_init();
 
        vt->vmalloc_start = machdep->vmalloc_start();
        if (IS_VMALLOC_ADDR(vt->mem_map))
@@ -1099,6 +1116,8 @@ vm_init(void)
                MEMBER_OFFSET_INIT(pglist_data_bdata, "pglist_data", "bdata");
                MEMBER_OFFSET_INIT(pglist_data_nr_zones, "pglist_data", 
                        "nr_zones");
+               MEMBER_OFFSET_INIT(pglist_data_node_page_ext, "pglist_data", 
+                       "node_page_ext");
                MEMBER_OFFSET_INIT(pglist_data_node_start_pfn, "pglist_data", 
                        "node_start_pfn");
                MEMBER_OFFSET_INIT(pglist_data_pgdat_next, "pglist_data", 
@@ -5037,6 +5056,8 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
 #define SLAB_BITFIELD          (ADDRESS_SPECIFIED << 25)
 #define SLAB_GATHER_FAILURE    (ADDRESS_SPECIFIED << 26)
 #define GET_SLAB_ROOT_CACHES   (ADDRESS_SPECIFIED << 27)
+#define GET_SLAB_DEBUG_TRACE   (ADDRESS_SPECIFIED << 28)
+#define GET_PAGE_OWNER        (ADDRESS_SPECIFIED << 29)
 
 #define GET_ALL \
        (GET_SHARED_PAGES|GET_TOTALRAM_PAGES|GET_BUFFERS_PAGES|GET_SLAB_PAGES)
@@ -5048,7 +5069,7 @@ cmd_kmem(void)
        int c;
        int sflag, Sflag, pflag, fflag, Fflag, vflag, zflag, oflag, gflag; 
        int nflag, cflag, Cflag, iflag, lflag, Lflag, Pflag, Vflag, hflag;
-       int rflag;
+       int rflag, tflag;
        struct meminfo meminfo;
        ulonglong value[MAXARGS];
        char buf[BUFSIZE];
@@ -5061,13 +5082,13 @@ cmd_kmem(void)
        spec_addr = choose_cpu = 0;
         sflag =        Sflag = pflag = fflag = Fflag = Pflag = zflag = oflag = 
0;
        vflag = Cflag = cflag = iflag = nflag = lflag = Lflag = Vflag = 0;
-       gflag = hflag = rflag = 0;
+       gflag = hflag = rflag = tflag = 0;
        escape = FALSE;
        BZERO(&meminfo, sizeof(struct meminfo));
        BZERO(&value[0], sizeof(ulonglong)*MAXARGS);
        pc->curcmd_flags &= ~HEADER_PRINTED;
 
-        while ((c = getopt(argcnt, args, "gI:sS::rFfm:pvczCinl:L:PVoh")) != 
EOF) {
+        while ((c = getopt(argcnt, args, "gI:sS::rFfm:pvczCinl:L:PVoht")) != 
EOF) {
                 switch(c)
                {
                case 'V':
@@ -5204,6 +5225,10 @@ cmd_kmem(void)
                        gflag = 1;
                        break;
 
+               case 't':
+                       tflag = 1;
+                       break;
+
                default:
                        argerrs++;
                        break;
@@ -5213,7 +5238,7 @@ cmd_kmem(void)
        if (argerrs)
                cmd_usage(pc->curcmd, SYNOPSIS);
 
-        if ((sflag + Sflag + pflag + fflag + Fflag + Vflag + oflag +
+        if ((fflag + Fflag + Vflag + oflag +
             vflag + Cflag + cflag + iflag + lflag + Lflag + gflag +
             hflag + rflag) > 1) {
                error(INFO, "only one flag allowed!\n");
@@ -5264,10 +5289,13 @@ cmd_kmem(void)
                 if (pflag) {
                        meminfo.spec_addr = value[i];
                        meminfo.flags = ADDRESS_SPECIFIED;
+                       if (tflag)
+                               meminfo.flags |= GET_PAGE_OWNER;
                         dump_mem_map(&meminfo);
                         pflag++;
                 }
 
+
                 if (sflag || Sflag) {
                        if (vt->flags & KMEM_CACHE_UNAVAIL) 
                                error(FATAL, 
@@ -5292,6 +5320,8 @@ cmd_kmem(void)
                                meminfo.reqname = p1;
                                meminfo.cache = value[i];
                                meminfo.flags |= CACHE_SET;
+                               if (tflag)
+                                       meminfo.flags |= GET_SLAB_DEBUG_TRACE;
                                if ((i+1) == spec_addr) { /* done? */ 
                                        if (meminfo.calls++)
                                                fprintf(fp, "\n");
@@ -5301,6 +5331,8 @@ cmd_kmem(void)
                        } else {
                                meminfo.spec_addr = value[i];
                                meminfo.flags = ADDRESS_SPECIFIED;
+                               if (tflag)
+                                       meminfo.flags |= GET_SLAB_DEBUG_TRACE;
                                if (Sflag && (vt->flags & KMALLOC_SLUB))
                                        meminfo.flags |= VERBOSE;
                                if (meminfo.calls++)
@@ -5346,25 +5378,28 @@ cmd_kmem(void)
                        gflag++;
                }
 
-                /* 
-                 * no value arguments allowed! 
-                 */
-                if (zflag || nflag || iflag || Fflag || Cflag || Lflag || 
+               if (tflag)
+                       tflag++;
+
+               /*
+                * no value arguments allowed!
+                */
+               if (zflag || nflag || iflag || Fflag || Cflag || Lflag ||
                    Vflag || oflag || hflag || rflag) {
                        error(INFO, 
                            "no address arguments allowed with this option\n");
                         cmd_usage(pc->curcmd, SYNOPSIS);
                }
 
-               if (!(sflag + Sflag + pflag + fflag + vflag + cflag + 
-                     lflag + Lflag + gflag)) {
+               if (!(sflag + Sflag + pflag + fflag + vflag + cflag +
+                 lflag + Lflag + gflag + tflag)) {
                        meminfo.spec_addr = value[i];
-                        meminfo.flags = ADDRESS_SPECIFIED;
-                        if (meminfo.calls++)
-                                fprintf(fp, "\n");
+                                               meminfo.flags = 
ADDRESS_SPECIFIED;
+                                               if (meminfo.calls++)
+                                                               fprintf(fp, 
"\n");
                        else
                                kmem_cache_init();   
-                        kmem_search(&meminfo);
+                                               kmem_search(&meminfo);
                }
 
        }
@@ -5372,8 +5407,11 @@ cmd_kmem(void)
        if (iflag == 1)
                dump_kmeminfo();
 
-       if (pflag == 1)
+       if (pflag == 1) {
+               if (tflag)
+                       meminfo.flags = GET_PAGE_OWNER;
                dump_mem_map(&meminfo);
+       }
 
        if (fflag == 1)
                vt->dump_free_pages(&meminfo);
@@ -5457,7 +5495,7 @@ cmd_kmem(void)
        if (!(sflag + Sflag + pflag + fflag + Fflag + vflag + 
              Vflag + zflag + oflag + cflag + Cflag + iflag + 
              nflag + lflag + Lflag + gflag + hflag + rflag +
-             meminfo.calls))
+             tflag + meminfo.calls))
                cmd_usage(pc->curcmd, SYNOPSIS);
 
 }
@@ -5749,7 +5787,8 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
 
        switch (mi->flags)
        {
-       case ADDRESS_SPECIFIED: 
+       case ADDRESS_SPECIFIED:
+       case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
                switch (mi->memtype)
                {
                case KVADDR:
@@ -5774,6 +5813,10 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
                print_hdr = TRUE;
                break;
 
+       case GET_PAGE_OWNER:
+               print_hdr = FALSE;
+               break;
+
        case GET_ALL:
                shared = 0;
                 reserved = 0;
@@ -5926,6 +5969,10 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
 
                                /* FALLTHROUGH */
 
+                       case GET_PAGE_OWNER:
+                               dump_page_owner(mi, pp, phys);
+                               break;
+
                        case GET_SLAB_PAGES:
                                if (v22) {
                                        if ((flags >> v22_PG_Slab) & 1) 
@@ -6083,6 +6130,7 @@ display_members:
        
                        if (done)
                                break;
+
                }
 
                if (done)
@@ -6119,7 +6167,10 @@ display_members:
                break;
 
        case ADDRESS_SPECIFIED:
+       case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
                mi->retval = done;
+               if (mi->flags & GET_PAGE_OWNER)
+                       dump_page_owner(mi, pp, phys);
                break; 
        }
 
@@ -6129,6 +6180,331 @@ display_members:
        FREEBUF(page_cache);
 }
 
+static int stack_depot_enabled(void)
+{
+       struct syment *sp;
+       int disable = TRUE;
+
+       if ((sp = symbol_search("stack_depot_disable")))
+               readmem(sp->value, KVADDR, &disable, sizeof(int),
+                       "stack_depot_disable", RETURN_ON_ERROR);
+       else if ((sp = symbol_search("stack_depot_disabled")))
+               readmem(sp->value, KVADDR, &disable, sizeof(int),
+                       "stack_depot_disabled", RETURN_ON_ERROR);
+       else if ((sp = symbol_search("stack_slabs")))
+               return sp->value ? FALSE : TRUE;
+
+       return !disable;
+}
+
+static void stack_depot_init(void)
+{
+       if (stack_depot_enabled()) {
+               STRUCT_SIZE_INIT(stack_record, "stack_record");
+               MEMBER_OFFSET_INIT(stack_record_size, "stack_record", "size");
+               MEMBER_OFFSET_INIT(stack_record_entries, "stack_record", 
"entries");
+               if (MEMBER_EXISTS("stack_record", "count"))
+                       MEMBER_OFFSET_INIT(stack_record_count, "stack_record", 
"count");
+       }
+}
+
+/* Fetch stack entries from a depot. */
+static unsigned int stack_depot_fetch(uint handle, ulong *entries)
+{
+       struct syment *sp;
+       uint valid, offset, slabindex, poolindex, pools_num, 
stack_record_count, stack_size = 0;
+       ulong stack_record_addr, sym_value;
+
+       if (!handle)
+               return 0;
+
+       if ((sp = symbol_search("stack_slabs"))) {
+               valid = (handle >> (STACK_ALLOC_INDEX_BITS + 
STACK_ALLOC_OFFSET_BITS)) & STACK_ALLOC_NULL_PROTECTION_BITS;
+               if (!valid)
+                       return 0;
+
+               slabindex = handle & ((1 << STACK_ALLOC_INDEX_BITS) - 1);
+               if (!readmem(sp->value + slabindex * sizeof(void *), KVADDR, 
&stack_record_addr,
+                               sizeof(void *), "stack_record_addr", 
FAULT_ON_ERROR))
+                       return 0;
+
+               offset = (handle >> STACK_ALLOC_INDEX_BITS) & ((1 << 
STACK_ALLOC_OFFSET_BITS) - 1);
+               stack_record_addr += (offset << STACK_ALLOC_ALIGN);
+               *entries = stack_record_addr + OFFSET(stack_record_entries);
+               if (!readmem(stack_record_addr + OFFSET(stack_record_size), 
KVADDR, &stack_size, sizeof(stack_size), 
+                                       "stack_record_entries", FAULT_ON_ERROR))
+                       return 0;
+       } else if ((sp = symbol_search("stack_pools")) && (sym_value = 
symbol_value("pools_num"))) {
+               poolindex = handle & ((1 << DEPOT_POOL_INDEX_BITS) - 1);
+               readmem(sym_value, KVADDR, &pools_num, sizeof(int), 
"pools_num", RETURN_ON_ERROR);
+               if (poolindex >= pools_num) {
+                       error(INFO, "pool index %d out of bounds (%d) for stack 
id %08x\n", poolindex, pools_num, handle);
+                       return 0;
+               }
+
+               readmem(sp->value + (poolindex-1) * sizeof(void *), KVADDR, 
&stack_record_addr,
+                               sizeof(void *), "stack_record_addr", 
FAULT_ON_ERROR);
+               if (!stack_record_addr)
+                       return 0;
+
+               offset = (handle >> DEPOT_POOL_INDEX_BITS) & ((1 << 
DEPOT_OFFSET_BITS) - 1);
+               stack_record_addr += (offset << DEPOT_STACK_ALIGN);
+               if (!readmem(stack_record_addr + OFFSET(stack_record_count), 
KVADDR, &stack_record_count,
+                               sizeof(stack_record_count), 
"stack_record_count", FAULT_ON_ERROR) || !stack_record_count)
+                       return 0;
+
+               *entries = stack_record_addr + OFFSET(stack_record_entries);
+               if (!readmem(stack_record_addr + OFFSET(stack_record_size), 
KVADDR, &stack_size, sizeof(stack_size), 
+                                       "stack_record_entries", FAULT_ON_ERROR))
+                       return 0;
+       }
+
+       return stack_size;
+}
+
+static void stack_trace_print(ulong entries, uint nr_entries)
+{
+       int i;
+       struct syment *sp;
+       ulong value, offset;
+       char buf[BUFSIZE];
+
+       if (!nr_entries)
+               return;
+
+       for (i = 0; i < nr_entries; i++) {
+               if (!readmem(entries, KVADDR, &value, sizeof(value), 
"stack_trace", FAULT_ON_ERROR))
+                       break;
+
+               entries += sizeof(ulong);
+               sp = value_search(value, &offset);
+               if (!sp)
+                       break;
+
+               fprintf(fp, "%s\n", value_to_symstr(sp->value+offset, buf, 0));
+       }
+       fprintf(fp, "\n");
+}
+
+static ulong gfp_migratetype(ulong gfp_flags)
+{
+       struct syment *sp;
+       int page_group_by_mobility_disabled;
+
+       if ((sp = symbol_search("page_group_by_mobility_disabled"))) {
+               readmem(sp->value, KVADDR, &page_group_by_mobility_disabled, 
sizeof(int),
+                               "page_group_by_mobility_disabled", 
RETURN_ON_ERROR);
+               if (page_group_by_mobility_disabled) {
+                       ulong migrate_unmovable;
+
+                       enumerator_value("MIGRATE_UNMOVABLE", 
&migrate_unmovable);
+                       return migrate_unmovable;
+               }
+       }
+
+       return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
+}
+
+static void migratetype_name(ulong migratetype, char *buf)
+{
+       struct syment *sp;
+       ulong migratetype_name_addr;
+
+       sp = symbol_search("migratetype_names");
+       if (!sp)
+               return;
+
+       readmem(sp->value + migratetype * sizeof(ulong), KVADDR, 
&migratetype_name_addr,
+                               sizeof(ulong), "migratetype_name", 
RETURN_ON_ERROR);
+       read_string(migratetype_name_addr, buf, BUFSIZE-1);
+}
+
+static void print_page_owner(ulong pfn, ulong page, char *page_owner, enum 
track_item alloc)
+{
+       int i, pid;
+       ushort order;
+       uint handle, free_handle, gfp_mask, nr_entries;
+       u64 ts_nsec, free_ts_nsec;
+       ulong entries, offset, page_flags;
+       struct syment *sp;
+       char buf[BUFSIZE];
+
+       order = USHORT(page_owner + OFFSET(page_owner_order));
+       gfp_mask = UINT(page_owner + OFFSET(page_owner_gfp_mask));
+       handle = UINT(page_owner + OFFSET(page_owner_handle));
+       free_handle = UINT(page_owner + OFFSET(page_owner_free_handle));
+       ts_nsec = ULONGLONG(page_owner + OFFSET(page_owner_ts_nsec));
+       free_ts_nsec = ULONGLONG(page_owner + OFFSET(page_owner_free_ts_nsec));
+       pid = INT(page_owner + OFFSET(page_owner_pid));
+
+       if (handle && (alloc != TRACK_FREE)) {
+               fprintf(fp, "Page allocated via order %u, mask %#x, pid %d, ts 
%llu ns\n",
+                                       order, gfp_mask, pid, ts_nsec);
+               migratetype_name(gfp_migratetype(gfp_mask), buf);
+               readmem(page+OFFSET(page_flags), KVADDR, &page_flags, 
sizeof(ulong),
+                               "page.flags", FAULT_ON_ERROR);
+               fprintf(fp, "PFN %#lx, type %s, Flags %#lx\n", pfn, buf, 
page_flags);
+               nr_entries = stack_depot_fetch(handle, &entries);
+               stack_trace_print(entries, nr_entries);
+       }
+
+       if (alloc != TRACK_ALLOC &&
+               (free_handle = UINT(page_owner + 
OFFSET(page_owner_free_handle)))) {
+               nr_entries = stack_depot_fetch(handle, &entries);
+               fprintf(fp, "page last free ts %llu ns, stack trace:\n", 
free_ts_nsec);
+               stack_trace_print(entries, nr_entries);
+       }
+}
+
+/* Get the max order for zoned buddy allocator */
+static inline ulong get_max_order(void)
+{
+       char *string;
+
+       if ((kt->ikconfig_flags & IKCONFIG_AVAIL) &&
+               get_kernel_config("CONFIG_FORCE_MAX_ZONEORDER", &string) == 
IKCONFIG_STR)
+               return atol(string);
+
+       return 11;
+}
+
+#define MAX_ORDER      get_max_order()
+#define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))
+
+static int lookup_page_ext(ulong pfn, ulong pp, ulong *page_ext)
+{
+       int node;
+       ulong page_ext_size, section, section_nr, pgdat, node_page_ext, 
node_start_pfn, page_ext_idx;
+
+       if (!kernel_symbol_exists("page_ext_size") ||
+               !readmem(symbol_value("page_ext_size"), KVADDR, &page_ext_size, 
sizeof(page_ext_size),
+                                       "page_ext_size", QUIET|RETURN_ON_ERROR))
+               return FALSE;
+
+       if (IS_SPARSEMEM()) {
+               section_nr = pfn_to_section_nr(pfn);
+               if (!(section = valid_section_nr(section_nr)))
+                       return FALSE;
+
+               if (!readmem(section + OFFSET(mem_section_page_ext), KVADDR, 
&node_page_ext, sizeof(ulong),
+                               "mem_section page_ext", FAULT_ON_ERROR) || 
!node_page_ext)
+                       return FALSE;
+
+               *page_ext = node_page_ext + pfn * page_ext_size;
+               return TRUE;
+       }
+
+       if ((node = page_to_nid(pp) >= 0)) {
+               pgdat = vt->node_table[node].pgdat;
+               if (!VALID_MEMBER(pglist_data_node_page_ext) ||
+                       !readmem(pgdat + OFFSET(pglist_data_node_page_ext), 
KVADDR, &node_page_ext,
+                       sizeof(ulong), "pglist node_page_ext", FAULT_ON_ERROR) 
|| !node_page_ext)
+                       return FALSE;
+
+               if (!VALID_MEMBER(pglist_data_node_start_pfn) ||
+                       !readmem(pgdat + OFFSET(pglist_data_node_start_pfn), 
KVADDR, &node_start_pfn,
+                       sizeof(ulong),  "pglist node_start_pfn", 
FAULT_ON_ERROR) || !node_start_pfn)
+                       return FALSE;
+
+               page_ext_idx = pfn - rounddown(node_start_pfn, 
MAX_ORDER_NR_PAGES);
+               *page_ext = node_page_ext + pfn * page_ext_size;
+               return TRUE;
+       }
+
+       return FALSE;
+}
+
+static ulong get_page_owner(ulong page_ext)
+{
+       struct syment *sp;
+       ulong page_owner_ops_offset;
+
+       sp = symbol_search("page_owner_ops");
+       if (!sp)
+               return FALSE;
+
+       if (!readmem(sp->value, KVADDR, &page_owner_ops_offset, sizeof(ulong),
+               "page_owner_ops_offset", RETURN_ON_ERROR))
+               return FALSE;
+
+       return page_ext + page_owner_ops_offset;
+}
+
+static int page_owner_enabled(void)
+{
+       struct syment *sp;
+       int enabled;
+
+       if ((sp = symbol_search("page_owner_enabled")) &&
+               readmem(sp->value, KVADDR, &enabled, sizeof(int), 
"page_owner_enabled", RETURN_ON_ERROR))
+               return enabled;
+
+       if ((sp = symbol_search("page_owner_inited")) &&
+               readmem(sp->value, KVADDR, &enabled, sizeof(int), 
"page_owner_inited", RETURN_ON_ERROR))
+               return enabled;
+
+       return FALSE;
+}
+
+static void page_owner_init(void)
+{
+       if (page_owner_enabled()) {
+               STRUCT_SIZE_INIT(page_ext, "page_ext");
+               STRUCT_SIZE_INIT(page_owner, "page_owner");
+               MEMBER_OFFSET_INIT(mem_section_page_ext, "mem_section", 
"page_ext");
+               MEMBER_OFFSET_INIT(page_owner_handle, "page_owner", "handle");
+               MEMBER_OFFSET_INIT(page_owner_free_handle, "page_owner", 
"free_handle");
+               MEMBER_OFFSET_INIT(page_owner_order, "page_owner", "order");
+               MEMBER_OFFSET_INIT(page_owner_gfp_mask, "page_owner", 
"gfp_mask");
+               MEMBER_OFFSET_INIT(page_owner_ts_nsec, "page_owner", "ts_nsec");
+               MEMBER_OFFSET_INIT(page_owner_free_ts_nsec, "page_owner", 
"free_ts_nsec");
+               MEMBER_OFFSET_INIT(page_owner_pid, "page_owner", "pid");
+       }
+}
+
+static void dump_page_owner(struct meminfo *mi, ulong pp, physaddr_t phys)
+{
+       ulong pfn, page_ext_addr, page_owner_addr, page_ext;
+       long page_ext_owner, page_ext_owner_allocated;
+       char *page_owner;
+
+       pfn = BTOP(phys);
+       if (!lookup_page_ext(pfn, pp, &page_ext_addr))
+               return;
+
+       page_owner_addr = get_page_owner(page_ext_addr);
+       if (!page_owner_addr)
+               return;
+
+       page_owner = (char *)GETBUF(SIZE(page_owner));
+       if (!readmem(page_owner_addr, KVADDR, page_owner, SIZE(page_owner), 
"page_owner", FAULT_ON_ERROR))
+               goto exit;
+
+       enumerator_value("PAGE_EXT_OWNER", &page_ext_owner);
+       if (!readmem(page_ext_addr, KVADDR, &page_ext, sizeof(ulong), 
"page_ext", FAULT_ON_ERROR)
+               || !(page_ext & (1 << page_ext_owner)))
+               goto exit;
+
+       enumerator_value("PAGE_EXT_OWNER_ALLOCATED", &page_ext_owner_allocated);
+       if (mi->flags == GET_PAGE_OWNER) {
+               if (!(page_ext & (1 << page_ext_owner_allocated)) ||
+                   !IS_ALIGNED(pfn, 1 << USHORT(page_owner + 
OFFSET(page_owner_order))))
+                       goto exit;
+
+               /* dump allocated page owner for current memory usage */
+               print_page_owner(pfn, pp, page_owner, TRACK_ALLOC);
+       } else {
+               if (page_ext & (1 << page_ext_owner_allocated))
+                       fprintf(fp, "page_owner tracks the page 0x%lx as 
allocated\n", pp);
+               else
+                       fprintf(fp, "page_owner tracks the page 0x%lx as 
freed\n", pp);
+               print_page_owner(pfn, pp, page_owner, TRACK_ALL);
+       }
+
+exit:
+       FREEBUF(page_owner);
+}
+
 static void
 dump_mem_map(struct meminfo *mi)
 {
@@ -6161,6 +6537,18 @@ dump_mem_map(struct meminfo *mi)
        char style3[100];
        char style4[100];
 
+       if (mi->flags & GET_PAGE_OWNER) {
+               if (!page_owner_enabled()) {
+                       error(INFO, "page_owner is disabled\n");
+                       return;
+               }
+
+               if (!stack_depot_enabled()) {
+                       error(INFO, "stack_depot is disabled\n");
+                       return;
+               }
+       }
+
        if (IS_SPARSEMEM()) {
                dump_mem_map_SPARSEMEM(mi);
                return;
@@ -6238,7 +6626,8 @@ dump_mem_map(struct meminfo *mi)
        
        switch (mi->flags)
        {
-       case ADDRESS_SPECIFIED: 
+       case ADDRESS_SPECIFIED:
+       case ADDRESS_SPECIFIED|GET_PAGE_OWNER: 
                switch (mi->memtype)
                {
                case KVADDR:
@@ -6263,6 +6652,10 @@ dump_mem_map(struct meminfo *mi)
                print_hdr = TRUE;
                break;
 
+       case GET_PAGE_OWNER:
+               print_hdr = FALSE;
+               break;
+
        case GET_ALL:
                shared = 0;
                 reserved = 0;
@@ -6376,6 +6769,10 @@ dump_mem_map(struct meminfo *mi)
 
                                /* FALLTHROUGH */
 
+                       case GET_PAGE_OWNER:
+                               dump_page_owner(mi, pp, phys);
+                               break;
+
                        case GET_SLAB_PAGES:
                                if (v22) {
                                        if ((flags >> v22_PG_Slab) & 1) 
@@ -6570,7 +6967,10 @@ display_members:
                break;
 
        case ADDRESS_SPECIFIED:
+       case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
                mi->retval = done;
+               if (mi->flags & GET_PAGE_OWNER)
+                       dump_page_owner(mi, pp, phys);
                break; 
        }
 
@@ -19618,6 +20018,99 @@ do_kmem_cache_slub(struct meminfo *si)
        FREEBUF(per_cpu);
 }
 
+/*
+ * Return offset of the end of info block which is inuse + free pointer if
+ * not overlapping with object.
+ */
+static inline uint get_info_end(struct meminfo *si)
+{
+       uint inuse = UINT(si->cache_buf + OFFSET(kmem_cache_inuse));
+       uint offset = UINT(si->cache_buf + OFFSET(kmem_cache_offset));
+
+       if (offset >= inuse)
+               return inuse + sizeof(void *);
+       else
+               return inuse;
+}
+
+static inline u64 get_jiffies(void)
+{
+       ulong jiffies;
+       u64 jiffies_64;
+
+       if (symbol_exists("jiffies_64")) {
+               get_symbol_data("jiffies_64", sizeof(u64), &jiffies_64);
+               return jiffies_64;
+       } else {
+               get_symbol_data("jiffies", sizeof(ulong), &jiffies);
+               return (u64)jiffies;
+       }
+}
+
+#define TRACK_ADDRS_COUNT 16
+void print_track(struct meminfo *si, char *track, ulong object, enum 
track_item alloc)
+{
+       ulong track_addr, addr, addrs, when, entries, nr_entries;
+       uint i, cpu, pid, handle;
+       char buf[BUFSIZE];
+
+       track_addr = object + get_info_end(si) + alloc * STRUCT_SIZE("track");
+       if (!readmem(track_addr, KVADDR, track, SIZE(track), "track", 
FAULT_ON_ERROR))
+               return;
+
+       addr = ULONG(track + OFFSET(track_addr));
+       if (addr) {
+               when = ULONG(track + OFFSET(track_when));
+               cpu = UINT(track + OFFSET(track_cpu));
+               pid = UINT(track + OFFSET(track_pid));
+               fprintf(fp, "object %lx %s in %s age=%llu cpu=%u pid=%d\n",
+                       object, alloc ? "freed" : "allocated", 
value_to_symstr(addr, buf, 0),
+                       get_jiffies() - (u64)when, cpu, pid);
+               if (VALID_MEMBER(track_addrs)) {
+                       addrs = track_addr + OFFSET(track_addrs);
+                       stack_trace_print(addrs, TRACK_ADDRS_COUNT);
+               } else if (VALID_MEMBER(track_handle)) {
+                       handle = UINT(track + OFFSET(track_handle));
+                       nr_entries = stack_depot_fetch(handle, &entries);
+                       stack_trace_print(entries, nr_entries);
+               } else {
+                       fprintf(fp, "stack trace missing\n");
+                       handle = track_addr + OFFSET(track_handle);
+                       nr_entries = stack_depot_fetch(handle, &entries);
+                       stack_trace_print(entries, nr_entries);
+               }
+       }
+}
+
+#define SLAB_STORE_USER (0x00010000UL)
+static ulong get_slab_store_user_flag(void)
+{
+       ulong slab_store_user_flag;
+
+       if (enumerator_value("_SLAB_STORE_USER", &slab_store_user_flag))
+               return (1 << slab_store_user_flag);
+       else
+               return SLAB_STORE_USER;
+}
+
+static void slab_debug_trace_show(struct meminfo *si, ulong object)
+{
+       ulong flags;
+       char *track;
+
+       if (!(si->flags & GET_SLAB_DEBUG_TRACE))
+               return;
+
+       flags = ULONG(si->cache_buf + OFFSET(kmem_cache_flags));
+       if (!(flags & get_slab_store_user_flag()))
+               return;
+
+       track = (char *)GETBUF(SIZE(track));
+       print_track(si, track, object, TRACK_ALLOC);
+       print_track(si, track, object, TRACK_FREE);
+       FREEBUF(track);
+}
+
 #define DUMP_SLAB_INFO_SLUB() \
       { \
         char b1[BUFSIZE], b2[BUFSIZE]; \
@@ -19672,7 +20165,8 @@ do_slab_slub(struct meminfo *si, int verbose)
 
        if (!verbose) {
                DUMP_SLAB_INFO_SLUB();
-               return TRUE;
+               if (!(si->flags & GET_SLAB_DEBUG_TRACE))
+                       return TRUE;
        }
 
        cpu_freelist = 0;
@@ -19775,7 +20269,8 @@ do_slab_slub(struct meminfo *si, int verbose)
                if (is_free && (cpu_slab >= 0))
                        fprintf(fp, "(cpu %d cache)", cpu_slab);
                fprintf(fp, "\n");
-
+               if (!is_free)
+                       slab_debug_trace_show(si, p + red_left_pad);
        }
 
        return TRUE;
@@ -19886,11 +20381,10 @@ do_node_lists_slub(struct meminfo *si, ulong 
node_ptr, int node)
 
         }
 
-#define SLAB_STORE_USER (0x00010000UL)
        flags = ULONG(si->cache_buf + OFFSET(kmem_cache_flags));
        
        if (INVALID_MEMBER(kmem_cache_node_full) ||
-           !(flags & SLAB_STORE_USER)) {
+           !(flags & get_slab_store_user_flag())) {
                fprintf(fp, "NODE %d FULL:\n  (not tracked)\n", node);
                return;
        }
-- 
2.25.1
--
Crash-utility mailing list -- devel@lists.crash-utility.osci.io
To unsubscribe send an email to devel-le...@lists.crash-utility.osci.io
https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/
Contribution Guidelines: https://github.com/crash-utility/crash/wiki

Reply via email to