On Thu, Sep 12, 2024 at 11:41 AM <devel-requ...@lists.crash-utility.osci.io>
wrote:

> Date: Thu, 12 Sep 2024 03:36:53 -0000
> From: qiwu.c...@transsion.com
> Subject: [Crash-utility] [PATCH v2 1/2] kmem: introduce -t flag to get
>         page owner
> To: devel@lists.crash-utility.osci.io
> Message-ID: <20240912033653.15869.73...@lists.crash-utility.osci.io>
> Content-Type: text/plain; charset="utf-8"
>
> Introduce -t flag for kmem command to get page owner.
>

Thank you for the patch, qiwu.

Kazu implemented an owner command (see: *page_owner.c
<https://raw.githubusercontent.com/k-hagio/crash-pageowner/main/page_owner.c>*),
which is a similar feature, please refer to this section: page_owner.c
https://crash-utility.github.io/extensions.html


Thanks
Lianbo

Here is the user help manual:
>
> 1. Dump page_owner allocated stack trace for each allocated page in
> buddy system when used with "kmem -pt":
> crash> kmem -pt
> Page allocated via order 0, mask 0x1112c4a, pid 1, ts 16155269152 ns
> PFN 0x40000, type Movable, Flags 0xffff00000020836
> set_page_owner+84
> post_alloc_hook+308
> prep_new_page+48
> get_page_from_freelist+736
> __alloc_pages+348
> alloc_pages+280
> __page_cache_alloc+120
> page_cache_ra_unbounded+272
> do_page_cache_ra+172
> do_sync_mmap_readahead+492
> filemap_fault+340
> __do_fault+64
> __handle_mm_fault+528
> handle_mm_fault+208
> __do_page_fault+232
> do_page_fault+1264
> ......
>
> 2. Dump page_owner allocated/freed trace for an allocated page when used
> "kmem -pt" with a page address.
> crash> kmem -pt fffffc00001f9e40
>       PAGE       PHYSICAL      MAPPING       INDEX CNT FLAGS
> fffffc00001f9e40 47e79000 dead000000000008        0  0 ffff00000000000
> page_owner tracks the page 0xfffffc00001f9e40 as allocated
> Page allocated via order 3, mask 0xd20c0, pid 163, ts 39197221904 ns
> PFN 0x47e79, type Unmovable, Flags 0xffff00000000000
> set_page_owner+84
> post_alloc_hook+308
> prep_new_page+48
> get_page_from_freelist+736
> __alloc_pages+348
> alloc_pages+280
> alloc_slab_page+60
> allocate_slab+212
> new_slab+200
> ___slab_alloc+1432
> __slab_alloc+60
> kmem_cache_alloc_node+528
> alloc_task_struct_node+36
> dup_task_struct+56
> copy_process+724
> kernel_clone+276
>
> page last free ts 38730338480 ns, stack trace:
> set_page_owner+84
> post_alloc_hook+308
> prep_new_page+48
> get_page_from_freelist+736
> __alloc_pages+348
> alloc_pages+280
> alloc_slab_page+60
> allocate_slab+212
> new_slab+200
> ___slab_alloc+1432
> __slab_alloc+60
> kmem_cache_alloc_node+528
> alloc_task_struct_node+36
> dup_task_struct+56
> copy_process+724
> kernel_clone+276
>
> With this patch, the page allocation times can be sorted by page_owner_sort
> tool easily.
>
> Signed-off-by: qiwu.chen <qiwu.c...@transsion.com>
> ---
>  defs.h   |  43 ++++++
>  help.c   |   4 +-
>  memory.c | 434 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
>  3 files changed, 461 insertions(+), 20 deletions(-)
>
> diff --git a/defs.h b/defs.h
> index 2231cb6..3d729c8 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -206,6 +206,34 @@ static inline int string_exists(char *s) { return (s
> ? TRUE : FALSE); }
>  #undef roundup
>  #endif
>  #define roundup(x, y)  ((((x)+((y)-1))/(y))*(y))
> +#define __round_mask(x, y) ((__typeof__(x))((y)-1))
> +#define rounddown(x, y) ((x) & ~__round_mask(x, y))
> +#define IS_ALIGNED(x, y) (((x) & ((typeof(x))(y) - 1)) == 0)
> +
> +/* stack depot macros before kernel commit 8151c7a35d8bd */
> +#define STACK_ALLOC_ALIGN 4
> +#define STACK_ALLOC_NULL_PROTECTION_BITS 1
> +#define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages
> */
> +#define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGESHIFT() -
> STACK_ALLOC_ALIGN)
> +#define DEPOT_STACK_BITS (sizeof(uint) * 8)
> +#define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \
> +               STACK_ALLOC_NULL_PROTECTION_BITS - STACK_ALLOC_OFFSET_BITS)
> +
> +/* stack depot macros since kernel commit 8151c7a35d8bd */
> +#define STACK_DEPOT_EXTRA_BITS 5
> +#define DEPOT_HANDLE_BITS (sizeof(uint) * 8)
> +#define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */
> +#define DEPOT_POOL_SIZE (1LL << (PAGESHIFT() + DEPOT_POOL_ORDER))
> +#define DEPOT_STACK_ALIGN 4
> +#define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGESHIFT() -
> DEPOT_STACK_ALIGN)
> +#define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \
> +                              STACK_DEPOT_EXTRA_BITS)
> +
> +/* GFP flags */
> +#define __GFP_RECLAIMABLE      0x10u
> +#define __GFP_MOVABLE          0x08u
> +#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
> +#define GFP_MOVABLE_SHIFT 3
>
>  typedef uint64_t physaddr_t;
>
> @@ -2243,6 +2271,18 @@ struct offset_table {                    /* stash
> of commonly-used offsets */
>         long vmap_node_busy;
>         long rb_list_head;
>         long file_f_inode;
> +       long pglist_data_node_page_ext;
> +       long stack_record_size;
> +       long stack_record_entries;
> +       long stack_record_count;
> +       long page_owner_order;
> +       long page_owner_gfp_mask;
> +       long page_owner_ts_nsec;
> +       long page_owner_free_ts_nsec;
> +       long page_owner_pid;
> +       long page_owner_handle;
> +       long page_owner_free_handle;
> +       long mem_section_page_ext;
>  };
>
>  struct size_table {         /* stash of commonly-used sizes */
> @@ -2419,6 +2459,9 @@ struct size_table {         /* stash of
> commonly-used sizes */
>         long module_memory;
>         long fred_frame;
>         long vmap_node;
> +       long page_ext;
> +       long page_owner;
> +       long stack_record;
>  };
>
>  struct array_table {
> diff --git a/help.c b/help.c
> index e95ac1d..f8ec62f 100644
> --- a/help.c
> +++ b/help.c
> @@ -6815,7 +6815,7 @@ NULL
>  char *help_kmem[] = {
>  "kmem",
>  "kernel memory",
> -"[-f|-F|-c|-C|-i|-v|-V|-n|-z|-o|-h] [-p | -m member[,member]]\n"
> +"[-f|-F|-c|-C|-i|-v|-V|-n|-z|-o|-h] [-p|-t | -m member[,member]]\n"
>  "       [[-s|-S|-S=cpu[s]|-r] [slab] [-I slab[,slab]]] [-g [flags]] [[-P]
> address]]",
>  "  This command displays information about the use of kernel memory.\n",
>  "        -f  displays the contents of the system free memory headers.",
> @@ -6845,6 +6845,7 @@ char *help_kmem[] = {
>  "            mem_map[] array, made up of the page struct address, its
> associated",
>  "            physical address, the page.mapping, page.index, page._count
> and",
>  "            page.flags fields.",
> +"        -t  displays page_owner allocated stack trace for each allocated
> page.",
>  " -m member  similar to -p, but displays page structure contents
> specified by",
>  "            a comma-separated list of one or more struct page members.
> The",
>  "            \"flags\" member will always be expressed in hexadecimal
> format, and",
> @@ -6899,6 +6900,7 @@ char *help_kmem[] = {
>  "   address  when used with -p, the address can be either a page pointer,
> a",
>  "            physical address, or a kernel virtual address; its basic
> mem_map",
>  "            page information is displayed.",
> +"            when added extra -t, display the page_owner traces for this
> page",
>  "   address  when used with -m, the address can be either a page pointer,
> a",
>  "            physical address, or a kernel virtual address; the
> specified",
>  "            members of the associated page struct are displayed.",
> diff --git a/memory.c b/memory.c
> index 967a9cf..6c69b6a 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -323,6 +323,11 @@ static ulong kmem_cache_nodelists(ulong);
>  static void dump_hstates(void);
>  static ulong freelist_ptr(struct meminfo *, ulong, ulong);
>  static ulong handle_each_vm_area(struct handle_each_vm_area_args *);
> +static void page_owner_init(void);
> +static int page_owner_enabled(void);
> +static void stack_depot_init(void);
> +static void dump_page_owner(struct meminfo *, ulong, physaddr_t);
> +enum track_item { TRACK_ALLOC, TRACK_FREE, TRACK_ALL };
>
>  /*
>   *  Memory display modes specific to this file.
> @@ -983,6 +988,8 @@ vm_init(void)
>                 vt->flags |= DISCONTIGMEM;
>
>         sparse_mem_init();
> +       page_owner_init();
> +       stack_depot_init();
>
>         vt->vmalloc_start = machdep->vmalloc_start();
>         if (IS_VMALLOC_ADDR(vt->mem_map))
> @@ -1099,6 +1106,8 @@ vm_init(void)
>                 MEMBER_OFFSET_INIT(pglist_data_bdata, "pglist_data",
> "bdata");
>                 MEMBER_OFFSET_INIT(pglist_data_nr_zones, "pglist_data",
>                         "nr_zones");
> +               MEMBER_OFFSET_INIT(pglist_data_node_page_ext,
> "pglist_data",
> +                       "node_page_ext");
>                 MEMBER_OFFSET_INIT(pglist_data_node_start_pfn,
> "pglist_data",
>                         "node_start_pfn");
>                 MEMBER_OFFSET_INIT(pglist_data_pgdat_next, "pglist_data",
> @@ -5037,6 +5046,7 @@ get_task_mem_usage(ulong task, struct task_mem_usage
> *tm)
>  #define SLAB_BITFIELD          (ADDRESS_SPECIFIED << 25)
>  #define SLAB_GATHER_FAILURE    (ADDRESS_SPECIFIED << 26)
>  #define GET_SLAB_ROOT_CACHES   (ADDRESS_SPECIFIED << 27)
> +#define GET_PAGE_OWNER        (ADDRESS_SPECIFIED << 28)
>
>  #define GET_ALL \
>
> (GET_SHARED_PAGES|GET_TOTALRAM_PAGES|GET_BUFFERS_PAGES|GET_SLAB_PAGES)
> @@ -5048,7 +5058,7 @@ cmd_kmem(void)
>         int c;
>         int sflag, Sflag, pflag, fflag, Fflag, vflag, zflag, oflag, gflag;
>         int nflag, cflag, Cflag, iflag, lflag, Lflag, Pflag, Vflag, hflag;
> -       int rflag;
> +       int rflag, tflag;
>         struct meminfo meminfo;
>         ulonglong value[MAXARGS];
>         char buf[BUFSIZE];
> @@ -5061,13 +5071,13 @@ cmd_kmem(void)
>         spec_addr = choose_cpu = 0;
>          sflag =        Sflag = pflag = fflag = Fflag = Pflag = zflag =
> oflag = 0;
>         vflag = Cflag = cflag = iflag = nflag = lflag = Lflag = Vflag = 0;
> -       gflag = hflag = rflag = 0;
> +       gflag = hflag = rflag = tflag = 0;
>         escape = FALSE;
>         BZERO(&meminfo, sizeof(struct meminfo));
>         BZERO(&value[0], sizeof(ulonglong)*MAXARGS);
>         pc->curcmd_flags &= ~HEADER_PRINTED;
>
> -        while ((c = getopt(argcnt, args, "gI:sS::rFfm:pvczCinl:L:PVoh"))
> != EOF) {
> +        while ((c = getopt(argcnt, args, "gI:sS::rFfm:pvczCinl:L:PVoht"))
> != EOF) {
>                  switch(c)
>                 {
>                 case 'V':
> @@ -5204,6 +5214,10 @@ cmd_kmem(void)
>                         gflag = 1;
>                         break;
>
> +               case 't':
> +                       tflag = 1;
> +                       break;
> +
>                 default:
>                         argerrs++;
>                         break;
> @@ -5213,7 +5227,7 @@ cmd_kmem(void)
>         if (argerrs)
>                 cmd_usage(pc->curcmd, SYNOPSIS);
>
> -        if ((sflag + Sflag + pflag + fflag + Fflag + Vflag + oflag +
> +        if ((fflag + Fflag + Vflag + oflag +
>              vflag + Cflag + cflag + iflag + lflag + Lflag + gflag +
>              hflag + rflag) > 1) {
>                 error(INFO, "only one flag allowed!\n");
> @@ -5264,10 +5278,13 @@ cmd_kmem(void)
>                  if (pflag) {
>                         meminfo.spec_addr = value[i];
>                         meminfo.flags = ADDRESS_SPECIFIED;
> +                       if (tflag)
> +                               meminfo.flags |= GET_PAGE_OWNER;
>                          dump_mem_map(&meminfo);
>                          pflag++;
>                  }
>
> +
>                  if (sflag || Sflag) {
>                         if (vt->flags & KMEM_CACHE_UNAVAIL)
>                                 error(FATAL,
> @@ -5346,25 +5363,25 @@ cmd_kmem(void)
>                         gflag++;
>                 }
>
> -                /*
> -                 * no value arguments allowed!
> -                 */
> -                if (zflag || nflag || iflag || Fflag || Cflag || Lflag ||
> +               /*
> +                * no value arguments allowed!
> +                */
> +               if (zflag || nflag || iflag || Fflag || Cflag || Lflag ||
>                     Vflag || oflag || hflag || rflag) {
>                         error(INFO,
>                             "no address arguments allowed with this
> option\n");
>                          cmd_usage(pc->curcmd, SYNOPSIS);
>                 }
>
> -               if (!(sflag + Sflag + pflag + fflag + vflag + cflag +
> -                     lflag + Lflag + gflag)) {
> +               if (!(sflag + Sflag + pflag + fflag + vflag + cflag +
> +                     lflag + Lflag + gflag + tflag)) {
>                         meminfo.spec_addr = value[i];
> -                        meminfo.flags = ADDRESS_SPECIFIED;
> -                        if (meminfo.calls++)
> -                                fprintf(fp, "\n");
> +                       meminfo.flags = ADDRESS_SPECIFIED;
> +                       if (meminfo.calls++)
> +                               fprintf(fp, "\n");
>                         else
>                                 kmem_cache_init();
> -                        kmem_search(&meminfo);
> +                       kmem_search(&meminfo);
>                 }
>
>         }
> @@ -5372,8 +5389,11 @@ cmd_kmem(void)
>         if (iflag == 1)
>                 dump_kmeminfo();
>
> -       if (pflag == 1)
> +       if (pflag == 1) {
> +               if (tflag)
> +                       meminfo.flags = GET_PAGE_OWNER;
>                 dump_mem_map(&meminfo);
> +       }
>
>         if (fflag == 1)
>                 vt->dump_free_pages(&meminfo);
> @@ -5457,7 +5477,7 @@ cmd_kmem(void)
>         if (!(sflag + Sflag + pflag + fflag + Fflag + vflag +
>               Vflag + zflag + oflag + cflag + Cflag + iflag +
>               nflag + lflag + Lflag + gflag + hflag + rflag +
> -             meminfo.calls))
> +             tflag + meminfo.calls))
>                 cmd_usage(pc->curcmd, SYNOPSIS);
>
>  }
> @@ -5749,7 +5769,8 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
>
>         switch (mi->flags)
>         {
> -       case ADDRESS_SPECIFIED:
> +       case ADDRESS_SPECIFIED:
> +       case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
>                 switch (mi->memtype)
>                 {
>                 case KVADDR:
> @@ -5774,6 +5795,10 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
>                 print_hdr = TRUE;
>                 break;
>
> +       case GET_PAGE_OWNER:
> +               print_hdr = FALSE;
> +               break;
> +
>         case GET_ALL:
>                 shared = 0;
>                  reserved = 0;
> @@ -5959,6 +5984,10 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi)
>                                                 shared++;
>                                 }
>                                 continue;
> +
> +                       case GET_PAGE_OWNER:
> +                               dump_page_owner(mi, pp, phys);
> +                               continue;
>                         }
>                         page_mapping = VALID_MEMBER(page_mapping);
>
> @@ -6083,6 +6112,7 @@ display_members:
>
>                         if (done)
>                                 break;
> +
>                 }
>
>                 if (done)
> @@ -6119,7 +6149,10 @@ display_members:
>                 break;
>
>         case ADDRESS_SPECIFIED:
> +       case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
>                 mi->retval = done;
> +               if (mi->flags & GET_PAGE_OWNER)
> +                       dump_page_owner(mi, pp, phys);
>                 break;
>         }
>
> @@ -6129,6 +6162,345 @@ display_members:
>         FREEBUF(page_cache);
>  }
>
> +static int stack_depot_enabled(void)
> +{
> +       struct syment *sp;
> +       int disable = TRUE;
> +
> +       if ((sp = symbol_search("stack_depot_disable")))
> +               readmem(sp->value, KVADDR, &disable, sizeof(int),
> +                       "stack_depot_disable", RETURN_ON_ERROR);
> +       else if ((sp = symbol_search("stack_depot_disabled")))
> +               readmem(sp->value, KVADDR, &disable, sizeof(int),
> +                       "stack_depot_disabled", RETURN_ON_ERROR);
> +       else if ((sp = symbol_search("stack_slabs")))
> +               return sp->value ? FALSE : TRUE;
> +
> +       return !disable;
> +}
> +
> +static void stack_depot_init(void)
> +{
> +       if (stack_depot_enabled()) {
> +               STRUCT_SIZE_INIT(stack_record, "stack_record");
> +               MEMBER_OFFSET_INIT(stack_record_size, "stack_record",
> "size");
> +               MEMBER_OFFSET_INIT(stack_record_entries, "stack_record",
> "entries");
> +               if (MEMBER_EXISTS("stack_record", "count"))
> +                       MEMBER_OFFSET_INIT(stack_record_count,
> "stack_record", "count");
> +       }
> +}
> +
> +/* Fetch stack entries from a depot. */
> +static unsigned int stack_depot_fetch(uint handle, ulong *entries)
> +{
> +       struct syment *sp;
> +       uint valid, offset, slabindex, poolindex, pools_num,
> stack_record_count;
> +       uint stack_size = 0;
> +       ulong stack_record_addr, sym_value;
> +
> +       if (!handle)
> +               return 0;
> +
> +       if ((sp = symbol_search("stack_slabs"))) {
> +               valid = (handle >> (STACK_ALLOC_INDEX_BITS +
> STACK_ALLOC_OFFSET_BITS))
> +                       & STACK_ALLOC_NULL_PROTECTION_BITS;
> +               if (!valid)
> +                       return 0;
> +
> +               slabindex = handle & ((1 << STACK_ALLOC_INDEX_BITS) - 1);
> +               readmem(sp->value + slabindex * sizeof(void *), KVADDR,
> &stack_record_addr,
> +                       sizeof(void *), "stack_record_addr",
> FAULT_ON_ERROR);
> +
> +               offset = (handle >> STACK_ALLOC_INDEX_BITS) &
> +                       ((1 << STACK_ALLOC_OFFSET_BITS) - 1);
> +               stack_record_addr += (offset << STACK_ALLOC_ALIGN);
> +               *entries = stack_record_addr +
> OFFSET(stack_record_entries);
> +               readmem(stack_record_addr + OFFSET(stack_record_size),
> KVADDR, &stack_size,
> +                       sizeof(stack_size), "stack_record_entries",
> FAULT_ON_ERROR);
> +       } else if ((sp = symbol_search("stack_pools")) &&
> +                       (sym_value = symbol_value("pools_num"))) {
> +               poolindex = handle & ((1 << DEPOT_POOL_INDEX_BITS) - 1);
> +               readmem(sym_value, KVADDR, &pools_num, sizeof(int),
> +                       "pools_num", RETURN_ON_ERROR);
> +               if (poolindex >= pools_num) {
> +                       error(INFO, "pool index %d out of bounds (%d) for
> stack id %08x\n",
> +                               poolindex, pools_num, handle);
> +                       return 0;
> +               }
> +
> +               readmem(sp->value + (poolindex-1) * sizeof(void *),
> KVADDR, &stack_record_addr,
> +                       sizeof(void *), "stack_record_addr",
> FAULT_ON_ERROR);
> +               if (!stack_record_addr)
> +                       return 0;
> +
> +               offset = (handle >> DEPOT_POOL_INDEX_BITS) & ((1 <<
> DEPOT_OFFSET_BITS) - 1);
> +               stack_record_addr += (offset << DEPOT_STACK_ALIGN);
> +               readmem(stack_record_addr + OFFSET(stack_record_count),
> KVADDR, &stack_record_count,
> +                       sizeof(stack_record_count), "stack_record_count",
> FAULT_ON_ERROR);
> +               if (!stack_record_count)
> +                       return 0;
> +
> +               *entries = stack_record_addr +
> OFFSET(stack_record_entries);
> +               readmem(stack_record_addr + OFFSET(stack_record_size),
> KVADDR, &stack_size,
> +                       sizeof(stack_size), "stack_record_entries",
> FAULT_ON_ERROR);
> +       }
> +
> +       return stack_size;
> +}
> +
> +static void stack_trace_print(ulong entries, uint nr_entries)
> +{
> +       int i;
> +       struct syment *sp;
> +       ulong value, offset;
> +       char buf[BUFSIZE];
> +
> +       if (!nr_entries)
> +               return;
> +
> +       for (i = 0; i < nr_entries; i++) {
> +               if (!readmem(entries, KVADDR, &value, sizeof(value),
> +                               "stack_trace", FAULT_ON_ERROR))
> +                       break;
> +
> +               entries += sizeof(ulong);
> +               sp = value_search(value, &offset);
> +               if (!sp)
> +                       break;
> +
> +               fprintf(fp, "%s\n", value_to_symstr(sp->value+offset, buf,
> 0));
> +       }
> +       fprintf(fp, "\n");
> +}
> +
> +static ulong gfp_migratetype(ulong gfp_flags)
> +{
> +       struct syment *sp;
> +       int page_group_by_mobility_disabled;
> +
> +       if ((sp = symbol_search("page_group_by_mobility_disabled"))) {
> +               readmem(sp->value, KVADDR,
> &page_group_by_mobility_disabled, sizeof(int),
> +                       "page_group_by_mobility_disabled",
> RETURN_ON_ERROR);
> +               if (page_group_by_mobility_disabled) {
> +                       ulong migrate_unmovable;
> +
> +                       enumerator_value("MIGRATE_UNMOVABLE",
> &migrate_unmovable);
> +                       return migrate_unmovable;
> +               }
> +       }
> +
> +       return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
> +}
> +
> +static void migratetype_name(ulong migratetype, char *buf)
> +{
> +       struct syment *sp;
> +       ulong migratetype_name_addr;
> +
> +       sp = symbol_search("migratetype_names");
> +       if (!sp)
> +               return;
> +
> +       readmem(sp->value + migratetype * sizeof(ulong), KVADDR,
> &migratetype_name_addr,
> +               sizeof(ulong), "migratetype_name", RETURN_ON_ERROR);
> +       read_string(migratetype_name_addr, buf, BUFSIZE-1);
> +}
> +
> +static void print_page_owner(ulong pfn, ulong page, char *page_owner,
> enum track_item alloc)
> +{
> +       int i, pid;
> +       ushort order;
> +       uint handle, free_handle, gfp_mask, nr_entries;
> +       u64 ts_nsec, free_ts_nsec;
> +       ulong entries, offset, page_flags;
> +       struct syment *sp;
> +       char buf[BUFSIZE];
> +
> +       order = USHORT(page_owner + OFFSET(page_owner_order));
> +       gfp_mask = UINT(page_owner + OFFSET(page_owner_gfp_mask));
> +       handle = UINT(page_owner + OFFSET(page_owner_handle));
> +       free_handle = UINT(page_owner + OFFSET(page_owner_free_handle));
> +       ts_nsec = ULONGLONG(page_owner + OFFSET(page_owner_ts_nsec));
> +       free_ts_nsec = ULONGLONG(page_owner +
> OFFSET(page_owner_free_ts_nsec));
> +       pid = INT(page_owner + OFFSET(page_owner_pid));
> +
> +       if (handle && (alloc != TRACK_FREE)) {
> +               fprintf(fp, "Page allocated via order %u, mask %#x, pid
> %d, ts %llu ns\n",
> +                               order, gfp_mask, pid, ts_nsec);
> +               migratetype_name(gfp_migratetype(gfp_mask), buf);
> +               readmem(page+OFFSET(page_flags), KVADDR, &page_flags,
> sizeof(ulong),
> +                               "page.flags", FAULT_ON_ERROR);
> +               fprintf(fp, "PFN %#lx, type %s, Flags %#lx\n", pfn, buf,
> page_flags);
> +               nr_entries = stack_depot_fetch(handle, &entries);
> +               stack_trace_print(entries, nr_entries);
> +       }
> +
> +       if (alloc != TRACK_ALLOC &&
> +               (free_handle = UINT(page_owner +
> OFFSET(page_owner_free_handle)))) {
> +               nr_entries = stack_depot_fetch(handle, &entries);
> +               fprintf(fp, "page last free ts %llu ns, stack trace:\n",
> free_ts_nsec);
> +               stack_trace_print(entries, nr_entries);
> +       }
> +}
> +
> +/* Get the max order for zoned buddy allocator */
> +static inline ulong get_max_order(void)
> +{
> +       char *string;
> +
> +       if ((kt->ikconfig_flags & IKCONFIG_AVAIL) &&
> +           get_kernel_config("CONFIG_FORCE_MAX_ZONEORDER", &string) ==
> IKCONFIG_STR)
> +               return atol(string);
> +
> +       return 11;
> +}
> +
> +#define MAX_ORDER      get_max_order()
> +#define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))
> +
> +static int lookup_page_ext(ulong pfn, ulong pp, ulong *page_ext)
> +{
> +       int node;
> +       ulong page_ext_size, section, section_nr, pgdat;
> +       ulong node_page_ext, node_start_pfn, page_ext_idx;
> +
> +       if (!kernel_symbol_exists("page_ext_size"))
> +               return FALSE;
> +
> +       readmem(symbol_value("page_ext_size"), KVADDR, &page_ext_size,
> +               sizeof(page_ext_size), "page_ext_size", FAULT_ON_ERROR);
> +
> +       if (IS_SPARSEMEM()) {
> +               section_nr = pfn_to_section_nr(pfn);
> +               if (!(section = valid_section_nr(section_nr)))
> +                       return FALSE;
> +
> +               readmem(section + OFFSET(mem_section_page_ext), KVADDR,
> &node_page_ext,
> +                       sizeof(ulong), "mem_section page_ext",
> FAULT_ON_ERROR);
> +               if (!node_page_ext)
> +                       return FALSE;
> +
> +               *page_ext = node_page_ext + pfn * page_ext_size;
> +               return TRUE;
> +       }
> +
> +       if ((node = page_to_nid(pp) >= 0)) {
> +               pgdat = vt->node_table[node].pgdat;
> +               if (!VALID_MEMBER(pglist_data_node_page_ext) ||
> +                   !VALID_MEMBER(pglist_data_node_start_pfn))
> +                       return FALSE;
> +
> +               readmem(pgdat + OFFSET(pglist_data_node_page_ext), KVADDR,
> &node_page_ext,
> +                       sizeof(ulong), "pglist node_page_ext",
> FAULT_ON_ERROR);
> +               if (!node_page_ext)
> +                       return FALSE;
> +
> +               readmem(pgdat + OFFSET(pglist_data_node_start_pfn),
> KVADDR, &node_start_pfn,
> +                       sizeof(ulong),  "pglist node_start_pfn",
> FAULT_ON_ERROR);
> +               if (!node_start_pfn)
> +                       return FALSE;
> +
> +               page_ext_idx = pfn - rounddown(node_start_pfn,
> MAX_ORDER_NR_PAGES);
> +               *page_ext = node_page_ext + pfn * page_ext_size;
> +               return TRUE;
> +       }
> +
> +       return FALSE;
> +}
> +
> +static ulong get_page_owner(ulong page_ext)
> +{
> +       struct syment *sp;
> +       ulong page_owner_ops_offset;
> +
> +       sp = symbol_search("page_owner_ops");
> +       if (!sp)
> +               return FALSE;
> +
> +       readmem(sp->value, KVADDR, &page_owner_ops_offset, sizeof(ulong),
> +               "page_owner_ops_offset", RETURN_ON_ERROR);
> +
> +       return page_ext + page_owner_ops_offset;
> +}
> +
> +static int page_owner_enabled(void)
> +{
> +       struct syment *sp;
> +       int enabled;
> +
> +       if ((sp = symbol_search("page_owner_enabled")) &&
> +            readmem(sp->value, KVADDR, &enabled, sizeof(int),
> +                    "page_owner_enabled", RETURN_ON_ERROR))
> +               return enabled;
> +
> +       if ((sp = symbol_search("page_owner_inited")) &&
> +            readmem(sp->value, KVADDR, &enabled, sizeof(int),
> +                    "page_owner_inited", RETURN_ON_ERROR))
> +               return enabled;
> +
> +       return FALSE;
> +}
> +
> +static void page_owner_init(void)
> +{
> +       if (page_owner_enabled()) {
> +               STRUCT_SIZE_INIT(page_ext, "page_ext");
> +               STRUCT_SIZE_INIT(page_owner, "page_owner");
> +               MEMBER_OFFSET_INIT(mem_section_page_ext, "mem_section",
> "page_ext");
> +               MEMBER_OFFSET_INIT(page_owner_handle, "page_owner",
> "handle");
> +               MEMBER_OFFSET_INIT(page_owner_free_handle, "page_owner",
> "free_handle");
> +               MEMBER_OFFSET_INIT(page_owner_order, "page_owner",
> "order");
> +               MEMBER_OFFSET_INIT(page_owner_gfp_mask, "page_owner",
> "gfp_mask");
> +               MEMBER_OFFSET_INIT(page_owner_ts_nsec, "page_owner",
> "ts_nsec");
> +               MEMBER_OFFSET_INIT(page_owner_free_ts_nsec, "page_owner",
> "free_ts_nsec");
> +               MEMBER_OFFSET_INIT(page_owner_pid, "page_owner", "pid");
> +       }
> +}
> +
> +static void dump_page_owner(struct meminfo *mi, ulong pp, physaddr_t phys)
> +{
> +       ulong pfn, page_ext_addr, page_owner_addr, page_ext;
> +       long page_ext_owner, page_ext_owner_allocated;
> +       char *page_owner;
> +
> +       pfn = BTOP(phys);
> +       if (!lookup_page_ext(pfn, pp, &page_ext_addr))
> +               return;
> +
> +       page_owner_addr = get_page_owner(page_ext_addr);
> +       if (!page_owner_addr)
> +               return;
> +
> +       page_owner = (char *)GETBUF(SIZE(page_owner));
> +       readmem(page_owner_addr, KVADDR, page_owner, SIZE(page_owner),
> +               "page_owner", FAULT_ON_ERROR);
> +
> +       enumerator_value("PAGE_EXT_OWNER", &page_ext_owner);
> +       readmem(page_ext_addr, KVADDR, &page_ext, sizeof(ulong),
> +               "page_ext", FAULT_ON_ERROR);
> +       if (!(page_ext & (1 << page_ext_owner)))
> +               goto exit;
> +
> +       enumerator_value("PAGE_EXT_OWNER_ALLOCATED",
> &page_ext_owner_allocated);
> +       if (mi->flags == GET_PAGE_OWNER) {
> +               if (!(page_ext & (1 << page_ext_owner_allocated)) ||
> +                   !IS_ALIGNED(pfn, 1 << USHORT(page_owner +
> OFFSET(page_owner_order))))
> +                       goto exit;
> +
> +               /* dump allocated page owner for current memory usage */
> +               print_page_owner(pfn, pp, page_owner, TRACK_ALLOC);
> +       } else {
> +               if (page_ext & (1 << page_ext_owner_allocated))
> +                       fprintf(fp, "page_owner tracks the page 0x%lx as
> allocated\n", pp);
> +               else
> +                       fprintf(fp, "page_owner tracks the page 0x%lx as
> freed\n", pp);
> +               print_page_owner(pfn, pp, page_owner, TRACK_ALL);
> +       }
> +
> +exit:
> +       FREEBUF(page_owner);
> +}
> +
>  static void
>  dump_mem_map(struct meminfo *mi)
>  {
> @@ -6161,6 +6533,19 @@ dump_mem_map(struct meminfo *mi)
>         char style3[100];
>         char style4[100];
>
> +       if (mi->flags & GET_PAGE_OWNER) {
> +               if (!page_owner_enabled()) {
> +                       error(INFO, "page_owner is disabled\n");
> +                       return;
> +               }
> +
> +               /* TODO: support page owner for early kernels without
> stack depot */
> +               if (!stack_depot_enabled()) {
> +                       error(INFO, "stack_depot is disabled\n");
> +                       return;
> +               }
> +       }
> +
>         if (IS_SPARSEMEM()) {
>                 dump_mem_map_SPARSEMEM(mi);
>                 return;
> @@ -6238,7 +6623,8 @@ dump_mem_map(struct meminfo *mi)
>
>         switch (mi->flags)
>         {
> -       case ADDRESS_SPECIFIED:
> +       case ADDRESS_SPECIFIED:
> +       case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
>                 switch (mi->memtype)
>                 {
>                 case KVADDR:
> @@ -6263,6 +6649,10 @@ dump_mem_map(struct meminfo *mi)
>                 print_hdr = TRUE;
>                 break;
>
> +       case GET_PAGE_OWNER:
> +               print_hdr = FALSE;
> +               break;
> +
>         case GET_ALL:
>                 shared = 0;
>                  reserved = 0;
> @@ -6376,6 +6766,10 @@ dump_mem_map(struct meminfo *mi)
>
>                                 /* FALLTHROUGH */
>
> +                       case GET_PAGE_OWNER:
> +                               dump_page_owner(mi, pp, phys);
> +                               break;
> +
>                         case GET_SLAB_PAGES:
>                                 if (v22) {
>                                         if ((flags >> v22_PG_Slab) & 1)
> @@ -6570,7 +6964,10 @@ display_members:
>                 break;
>
>         case ADDRESS_SPECIFIED:
> +       case ADDRESS_SPECIFIED|GET_PAGE_OWNER:
>                 mi->retval = done;
> +               if (mi->flags & GET_PAGE_OWNER)
> +                       dump_page_owner(mi, pp, phys);
>                 break;
>         }
>
> @@ -19776,7 +20173,6 @@ do_slab_slub(struct meminfo *si, int verbose)
>                 if (is_free && (cpu_slab >= 0))
>                         fprintf(fp, "(cpu %d cache)", cpu_slab);
>                 fprintf(fp, "\n");
> -
>         }
>
>         return TRUE;
> --
> 2.25.1
>
--
Crash-utility mailing list -- devel@lists.crash-utility.osci.io
To unsubscribe send an email to devel-le...@lists.crash-utility.osci.io
https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/
Contribution Guidelines: https://github.com/crash-utility/crash/wiki

Reply via email to