Adding more people.

On Tue, Aug 08 2017, Daniel Colascione wrote:
> /proc/pid/smaps_rollup is a new proc file that improves the
> performance of user programs that determine aggregate memory
> statistics (e.g., total PSS) of a process.
>
> Anroid regularly "samples" the memory usage of various processes in
> order to blance its memory pool sizes. This sampling process involves
> opening /proc/pid/smaps and summing certain fields. For very large
> processes, sampling memory use this way can take several hundred
> milliseconds, due mostly to the overhead of the seq_printf calls in
> task_mmu.c.
>
> smaps_rollup improves the situation. It contains most of the fields of
> /proc/pid/smaps, but instead of a set of fields for each VMA,
> smaps_rollup instead contains one synthetic smaps-format entry
> representing the whole process. In the single smaps_rollup synthetic
> entry, each field is the summation of the corresponding field in all
> of the real-smaps VMAs. Using a common format for smaps_rollup and
> smaps allows userspace parsers to repurpose parsers meant for use with
> non-rollup smaps for smaps_rollup, and it allows userspace to switch
> between smaps_rollup and smaps at runtime (say, based on the
> availablity of smaps_rollup in a given kernel) with minimal fuss.
>
> By using smaps_rollup instead of smaps, a caller can avoid the
> significant overhead of formatting, reading, and parsing each of a
> large process's potentially very numerous memory mappings. For
> sampling system_server's PSS in Android, we measured a 12x speedup,
> representing a savings of several hundred milliseconds.
>
> One alternative to a new per-process proc file would have been
> including PSS information in /proc/pid/status. We considered this
> option but thought that PSS would be too expensive (by a few orders of
> magnitude) to collect relative to what's already emitted as part of
> /proc/pid/status, and slowing every user of /proc/pid/status for the
> sake of readers that happen to want PSS feels wrong.
>
> The code itself works by reusing the existing VMA-walking framework we
> use for regular smaps generation and keeping the mem_size_stats
> structure around between VMA walks instead of using a fresh one for
> each VMA.  In this way, summation happens automatically.  We let
> seq_file walk over the VMAs just as it does for regular smaps and just
> emit nothing to the seq_file until we hit the last VMA.
>
> Signed-off-by: Daniel Colascione <dan...@google.com>
> ---
>  fs/proc/base.c     |   2 +
>  fs/proc/internal.h |   3 +
>  fs/proc/task_mmu.c | 196 
> ++++++++++++++++++++++++++++++++++++-----------------
>  3 files changed, 139 insertions(+), 62 deletions(-)
>
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index 719c2e943ea1..a9587b9cace5 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -2930,6 +2930,7 @@ static const struct pid_entry tgid_base_stuff[] = {
>  #ifdef CONFIG_PROC_PAGE_MONITOR
>       REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
>       REG("smaps",      S_IRUGO, proc_pid_smaps_operations),
> +     REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
>       REG("pagemap",    S_IRUSR, proc_pagemap_operations),
>  #endif
>  #ifdef CONFIG_SECURITY
> @@ -3323,6 +3324,7 @@ static const struct pid_entry tid_base_stuff[] = {
>  #ifdef CONFIG_PROC_PAGE_MONITOR
>       REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
>       REG("smaps",     S_IRUGO, proc_tid_smaps_operations),
> +     REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
>       REG("pagemap",    S_IRUSR, proc_pagemap_operations),
>  #endif
>  #ifdef CONFIG_SECURITY
> diff --git a/fs/proc/internal.h b/fs/proc/internal.h
> index aa2b89071630..2cbfcd32e884 100644
> --- a/fs/proc/internal.h
> +++ b/fs/proc/internal.h
> @@ -269,10 +269,12 @@ extern int proc_remount(struct super_block *, int *, 
> char *);
>  /*
>   * task_[no]mmu.c
>   */
> +struct mem_size_stats;
>  struct proc_maps_private {
>       struct inode *inode;
>       struct task_struct *task;
>       struct mm_struct *mm;
> +     struct mem_size_stats *rollup;
>  #ifdef CONFIG_MMU
>       struct vm_area_struct *tail_vma;
>  #endif
> @@ -288,6 +290,7 @@ extern const struct file_operations 
> proc_tid_maps_operations;
>  extern const struct file_operations proc_pid_numa_maps_operations;
>  extern const struct file_operations proc_tid_numa_maps_operations;
>  extern const struct file_operations proc_pid_smaps_operations;
> +extern const struct file_operations proc_pid_smaps_rollup_operations;
>  extern const struct file_operations proc_tid_smaps_operations;
>  extern const struct file_operations proc_clear_refs_operations;
>  extern const struct file_operations proc_pagemap_operations;
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index b836fd61ed87..02b55df7291c 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -252,6 +252,7 @@ static int proc_map_release(struct inode *inode, struct 
> file *file)
>       if (priv->mm)
>               mmdrop(priv->mm);
>  
> +     kfree(priv->rollup);
>       return seq_release_private(inode, file);
>  }
>  
> @@ -278,6 +279,23 @@ static int is_stack(struct proc_maps_private *priv,
>               vma->vm_end >= vma->vm_mm->start_stack;
>  }
>  
> +static void show_vma_header_prefix(struct seq_file *m,
> +                                unsigned long start, unsigned long end,
> +                                vm_flags_t flags, unsigned long long pgoff,
> +                                dev_t dev, unsigned long ino)
> +{
> +     seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
> +     seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
> +                start,
> +                end,
> +                flags & VM_READ ? 'r' : '-',
> +                flags & VM_WRITE ? 'w' : '-',
> +                flags & VM_EXEC ? 'x' : '-',
> +                flags & VM_MAYSHARE ? 's' : 'p',
> +                pgoff,
> +                MAJOR(dev), MINOR(dev), ino);
> +}
> +
>  static void
>  show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
>  {
> @@ -300,17 +318,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct 
> *vma, int is_pid)
>  
>       start = vma->vm_start;
>       end = vma->vm_end;
> -
> -     seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
> -     seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
> -                     start,
> -                     end,
> -                     flags & VM_READ ? 'r' : '-',
> -                     flags & VM_WRITE ? 'w' : '-',
> -                     flags & VM_EXEC ? 'x' : '-',
> -                     flags & VM_MAYSHARE ? 's' : 'p',
> -                     pgoff,
> -                     MAJOR(dev), MINOR(dev), ino);
> +     show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino);
>  
>       /*
>        * Print the dentry name for named mappings, and a
> @@ -429,6 +437,7 @@ const struct file_operations proc_tid_maps_operations = {
>  
>  #ifdef CONFIG_PROC_PAGE_MONITOR
>  struct mem_size_stats {
> +     bool first;
>       unsigned long resident;
>       unsigned long shared_clean;
>       unsigned long shared_dirty;
> @@ -442,7 +451,9 @@ struct mem_size_stats {
>       unsigned long swap;
>       unsigned long shared_hugetlb;
>       unsigned long private_hugetlb;
> +     unsigned long first_vma_start;
>       u64 pss;
> +     u64 pss_locked;
>       u64 swap_pss;
>       bool check_shmem_swap;
>  };
> @@ -718,18 +729,36 @@ void __weak arch_show_smap(struct seq_file *m, struct 
> vm_area_struct *vma)
>  
>  static int show_smap(struct seq_file *m, void *v, int is_pid)
>  {
> +     struct proc_maps_private *priv = m->private;
>       struct vm_area_struct *vma = v;
> -     struct mem_size_stats mss;
> +     struct mem_size_stats mss_stack;
> +     struct mem_size_stats *mss;
>       struct mm_walk smaps_walk = {
>               .pmd_entry = smaps_pte_range,
>  #ifdef CONFIG_HUGETLB_PAGE
>               .hugetlb_entry = smaps_hugetlb_range,
>  #endif
>               .mm = vma->vm_mm,
> -             .private = &mss,
>       };
> +     int ret = 0;
> +     bool rollup_mode;
> +     bool last_vma;
> +
> +     if (priv->rollup) {
> +             rollup_mode = true;
> +             mss = priv->rollup;
> +             if (mss->first) {
> +                     mss->first_vma_start = vma->vm_start;
> +                     mss->first = false;
> +             }
> +             last_vma = !m_next_vma(priv, vma);
> +     } else {
> +             rollup_mode = false;
> +             memset(&mss_stack, 0, sizeof(mss_stack));
> +             mss = &mss_stack;
> +     }
>  
> -     memset(&mss, 0, sizeof mss);
> +     smaps_walk.private = mss;
>  
>  #ifdef CONFIG_SHMEM
>       if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) {
> @@ -747,9 +776,9 @@ static int show_smap(struct seq_file *m, void *v, int 
> is_pid)
>  
>               if (!shmem_swapped || (vma->vm_flags & VM_SHARED) ||
>                                       !(vma->vm_flags & VM_WRITE)) {
> -                     mss.swap = shmem_swapped;
> +                     mss->swap = shmem_swapped;
>               } else {
> -                     mss.check_shmem_swap = true;
> +                     mss->check_shmem_swap = true;
>                       smaps_walk.pte_hole = smaps_pte_hole;
>               }
>       }
> @@ -757,54 +786,71 @@ static int show_smap(struct seq_file *m, void *v, int 
> is_pid)
>  
>       /* mmap_sem is held in m_start */
>       walk_page_vma(vma, &smaps_walk);
> +     if (vma->vm_flags & VM_LOCKED)
> +             mss->pss_locked += mss->pss;
> +
> +     if (!rollup_mode) {
> +             show_map_vma(m, vma, is_pid);
> +     } else if (last_vma) {
> +             show_vma_header_prefix(
> +                     m, mss->first_vma_start, vma->vm_end, 0, 0, 0, 0);
> +             seq_pad(m, ' ');
> +             seq_puts(m, "[rollup]\n");
> +     } else {
> +             ret = SEQ_SKIP;
> +     }
>  
> -     show_map_vma(m, vma, is_pid);
> -
> -     seq_printf(m,
> -                "Size:           %8lu kB\n"
> -                "Rss:            %8lu kB\n"
> -                "Pss:            %8lu kB\n"
> -                "Shared_Clean:   %8lu kB\n"
> -                "Shared_Dirty:   %8lu kB\n"
> -                "Private_Clean:  %8lu kB\n"
> -                "Private_Dirty:  %8lu kB\n"
> -                "Referenced:     %8lu kB\n"
> -                "Anonymous:      %8lu kB\n"
> -                "LazyFree:       %8lu kB\n"
> -                "AnonHugePages:  %8lu kB\n"
> -                "ShmemPmdMapped: %8lu kB\n"
> -                "Shared_Hugetlb: %8lu kB\n"
> -                "Private_Hugetlb: %7lu kB\n"
> -                "Swap:           %8lu kB\n"
> -                "SwapPss:        %8lu kB\n"
> -                "KernelPageSize: %8lu kB\n"
> -                "MMUPageSize:    %8lu kB\n"
> -                "Locked:         %8lu kB\n",
> -                (vma->vm_end - vma->vm_start) >> 10,
> -                mss.resident >> 10,
> -                (unsigned long)(mss.pss >> (10 + PSS_SHIFT)),
> -                mss.shared_clean  >> 10,
> -                mss.shared_dirty  >> 10,
> -                mss.private_clean >> 10,
> -                mss.private_dirty >> 10,
> -                mss.referenced >> 10,
> -                mss.anonymous >> 10,
> -                mss.lazyfree >> 10,
> -                mss.anonymous_thp >> 10,
> -                mss.shmem_thp >> 10,
> -                mss.shared_hugetlb >> 10,
> -                mss.private_hugetlb >> 10,
> -                mss.swap >> 10,
> -                (unsigned long)(mss.swap_pss >> (10 + PSS_SHIFT)),
> -                vma_kernel_pagesize(vma) >> 10,
> -                vma_mmu_pagesize(vma) >> 10,
> -                (vma->vm_flags & VM_LOCKED) ?
> -                     (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);
> -
> -     arch_show_smap(m, vma);
> -     show_smap_vma_flags(m, vma);
> +     if (!rollup_mode)
> +             seq_printf(m,
> +                        "Size:           %8lu kB\n"
> +                        "KernelPageSize: %8lu kB\n"
> +                        "MMUPageSize:    %8lu kB\n",
> +                        (vma->vm_end - vma->vm_start) >> 10,
> +                        vma_kernel_pagesize(vma) >> 10,
> +                        vma_mmu_pagesize(vma) >> 10);
> +
> +
> +     if (!rollup_mode || last_vma)
> +             seq_printf(m,
> +                        "Rss:            %8lu kB\n"
> +                        "Pss:            %8lu kB\n"
> +                        "Shared_Clean:   %8lu kB\n"
> +                        "Shared_Dirty:   %8lu kB\n"
> +                        "Private_Clean:  %8lu kB\n"
> +                        "Private_Dirty:  %8lu kB\n"
> +                        "Referenced:     %8lu kB\n"
> +                        "Anonymous:      %8lu kB\n"
> +                        "LazyFree:       %8lu kB\n"
> +                        "AnonHugePages:  %8lu kB\n"
> +                        "ShmemPmdMapped: %8lu kB\n"
> +                        "Shared_Hugetlb: %8lu kB\n"
> +                        "Private_Hugetlb: %7lu kB\n"
> +                        "Swap:           %8lu kB\n"
> +                        "SwapPss:        %8lu kB\n"
> +                        "Locked:         %8lu kB\n",
> +                        mss->resident >> 10,
> +                        (unsigned long)(mss->pss >> (10 + PSS_SHIFT)),
> +                        mss->shared_clean  >> 10,
> +                        mss->shared_dirty  >> 10,
> +                        mss->private_clean >> 10,
> +                        mss->private_dirty >> 10,
> +                        mss->referenced >> 10,
> +                        mss->anonymous >> 10,
> +                        mss->lazyfree >> 10,
> +                        mss->anonymous_thp >> 10,
> +                        mss->shmem_thp >> 10,
> +                        mss->shared_hugetlb >> 10,
> +                        mss->private_hugetlb >> 10,
> +                        mss->swap >> 10,
> +                        (unsigned long)(mss->swap_pss >> (10 + PSS_SHIFT)),
> +                        (unsigned long)(mss->pss >> (10 + PSS_SHIFT)));
> +
> +     if (!rollup_mode) {
> +             arch_show_smap(m, vma);
> +             show_smap_vma_flags(m, vma);
> +     }
>       m_cache_vma(m, vma);
> -     return 0;
> +     return ret;
>  }
>  
>  static int show_pid_smap(struct seq_file *m, void *v)
> @@ -836,6 +882,25 @@ static int pid_smaps_open(struct inode *inode, struct 
> file *file)
>       return do_maps_open(inode, file, &proc_pid_smaps_op);
>  }
>  
> +static int pid_smaps_rollup_open(struct inode *inode, struct file *file)
> +{
> +     struct seq_file *seq;
> +     struct proc_maps_private *priv;
> +     int ret = do_maps_open(inode, file, &proc_pid_smaps_op);
> +
> +     if (ret < 0)
> +             return ret;
> +     seq = file->private_data;
> +     priv = seq->private;
> +     priv->rollup = kzalloc(sizeof(*priv->rollup), GFP_KERNEL);
> +     if (!priv->rollup) {
> +             proc_map_release(inode, file);
> +             return -ENOMEM;
> +     }
> +     priv->rollup->first = true;
> +     return 0;
> +}
> +
>  static int tid_smaps_open(struct inode *inode, struct file *file)
>  {
>       return do_maps_open(inode, file, &proc_tid_smaps_op);
> @@ -848,6 +913,13 @@ const struct file_operations proc_pid_smaps_operations = 
> {
>       .release        = proc_map_release,
>  };
>  
> +const struct file_operations proc_pid_smaps_rollup_operations = {
> +     .open           = pid_smaps_rollup_open,
> +     .read           = seq_read,
> +     .llseek         = seq_lseek,
> +     .release        = proc_map_release,
> +};
> +
>  const struct file_operations proc_tid_smaps_operations = {
>       .open           = tid_smaps_open,
>       .read           = seq_read,

Reply via email to