Andi reported that maps cloning is eating lot of memory and
it's probably unnecessary, because they keep the same data.

Changing 'struct map_shared' to be a pointer inside 'struct map',
so it can be shared on fork. Changing the map__clone function to
actually share 'struct map_shared' for cloned maps.

The 'struct map_shared' carries its own refcnt counter, which is
incremented when it's assigned to new 'struct map' and decremented
when 'struct map' gets deleted in map__delete (its refcnt is 0).

This 'maps sharing' seems to save lot of heap for reports with
many forks/cloned mmaps (over 60% in example below).

Profile kernel build:

  $ perf record make -j 40

Get heap profile (tools/perf directory):

  $ <install gperftools>
  $ make TCMALLOC=1
  $ HEAPPROFILE=/tmp/heapprof ./perf report -i perf.data --stdio > out
  $ pprof ./perf /tmp/heapprof.000*

Before:

  (pprof) top
  Total: 2335.5 MB
    1735.1  74.3%  74.3%   1735.1  74.3% memdup
     402.0  17.2%  91.5%    402.0  17.2% zalloc
     140.2   6.0%  97.5%    145.8   6.2% map__new
      33.6   1.4%  98.9%     33.6   1.4% symbol__new
      12.4   0.5%  99.5%     12.4   0.5% alloc_event
       6.2   0.3%  99.7%      6.2   0.3% nsinfo__new
       5.5   0.2% 100.0%      5.5   0.2% nsinfo__copy
       0.3   0.0% 100.0%      0.3   0.0% dso__new
       0.1   0.0% 100.0%      0.1   0.0% do_read_string
       0.0   0.0% 100.0%      0.0   0.0% __GI__IO_file_doallocate

After:

  (pprof) top
  Total: 758.5 MB
     385.7  50.8%  50.8%    385.7  50.8% memdup
     248.4  32.7%  83.6%    248.4  32.7% zalloc
      91.2  12.0%  95.6%     94.8  12.5% map__new
      19.3   2.5%  98.2%     19.3   2.5% symbol__new
       6.2   0.8%  99.0%      6.2   0.8% alloc_event
       3.9   0.5%  99.5%      3.9   0.5% nsinfo__new
       3.5   0.5% 100.0%      3.5   0.5% nsinfo__copy
       0.2   0.0% 100.0%      0.2   0.0% dso__new
       0.0   0.0% 100.0%      0.0   0.0% do_read_string
       0.0   0.0% 100.0%      0.0   0.0% elf_fill

The perf report output is the same.

Reported-by: Andi Kleen <a...@linux.intel.com>
Link: http://lkml.kernel.org/n/tip-9x8utaeeqwyktzhruhrpp...@git.kernel.org
Signed-off-by: Jiri Olsa <jo...@kernel.org>
---
 tools/perf/util/map.c    | 62 ++++++++++++++++++++++++++++++++--------
 tools/perf/util/map.h    |  7 +++--
 tools/perf/util/symbol.c |  2 +-
 3 files changed, 55 insertions(+), 16 deletions(-)

diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 98c56714f1ae..9df18ef76241 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -145,6 +145,7 @@ void map__init(struct map *map, u64 start, u64 end, u64 
pgoff, struct dso *dso)
        map->groups   = NULL;
        sh->erange_warned = false;
        refcount_set(&map->refcnt, 1);
+       refcount_set(&sh->refcnt, 1);
 }
 
 struct map *map__new(struct machine *machine, u64 start, u64 len,
@@ -153,14 +154,16 @@ struct map *map__new(struct machine *machine, u64 start, 
u64 len,
                     struct thread *thread)
 {
        struct map *map = malloc(sizeof(*map));
+       struct map_shared *sh = malloc(sizeof(*sh));
        struct nsinfo *nsi = NULL;
        struct nsinfo *nnsi;
 
-       if (map != NULL) {
+       if (map != NULL && sh != NULL) {
                char newfilename[PATH_MAX];
                struct dso *dso;
                int anon, no_dso, vdso, android;
-               struct map_shared *sh = map_sh(map);
+
+               map->shared = sh;
 
                android = is_android_lib(filename);
                anon = is_anon_memory(filename, flags);
@@ -220,11 +223,12 @@ struct map *map__new(struct machine *machine, u64 start, 
u64 len,
                }
                dso->nsinfo = nsi;
                dso__put(dso);
+               return map;
        }
-       return map;
 out_delete:
        nsinfo__put(nsi);
        free(map);
+       free(sh);
        return NULL;
 }
 
@@ -235,13 +239,19 @@ struct map *map__new(struct machine *machine, u64 start, 
u64 len,
  */
 struct map *map__new2(u64 start, struct dso *dso)
 {
+       struct map_shared *sh = zalloc(sizeof(*sh));
        struct map *map = calloc(1, (sizeof(*map) +
                                     (dso->kernel ? sizeof(struct kmap) : 0)));
-       if (map != NULL) {
+
+       if (map != NULL && sh != NULL) {
+               map->shared = sh;
                /*
                 * ->end will be filled after we load all the symbols
                 */
                map__init(map, start, 0, 0, dso);
+       } else {
+               zfree(&map);
+               free(sh);
        }
 
        return map;
@@ -289,15 +299,29 @@ bool map__has_symbols(const struct map *map)
        return dso__has_symbols(map_sh(map)->dso);
 }
 
-static void map__exit(struct map *map)
+static void map_shared__delete(struct map_shared *sh)
 {
-       BUG_ON(!RB_EMPTY_NODE(&map->rb_node));
-       dso__zput(map_sh(map)->dso);
+       dso__put(sh->dso);
+       free(sh);
+}
+
+static struct map_shared *map_shared__get(struct map_shared *sh)
+{
+       if (sh)
+               refcount_inc(&sh->refcnt);
+       return sh;
+}
+
+static void map_shared__put(struct map_shared *sh)
+{
+       if (sh && refcount_dec_and_test(&sh->refcnt))
+               map_shared__delete(sh);
 }
 
 void map__delete(struct map *map)
 {
-       map__exit(map);
+       BUG_ON(!RB_EMPTY_NODE(&map->rb_node));
+       map_shared__put(map->shared);
        free(map);
 }
 
@@ -390,12 +414,26 @@ struct symbol *map__find_symbol_by_name(struct map *map, 
const char *name)
        return dso__find_symbol_by_name(map_sh(map)->dso, name);
 }
 
-struct map *map__clone(struct map *from)
+struct map *map__clone(struct map *from, bool share)
 {
        struct map *map = memdup(from, sizeof(*map));
+       struct map_shared *sh;
 
        if (map != NULL) {
                refcount_set(&map->refcnt, 1);
+
+               if (share) {
+                       sh = map_shared__get(map->shared);
+               } else {
+                       sh = memdup(map->shared, sizeof(*sh));
+                       if (!sh) {
+                               free(map);
+                               return NULL;
+                       }
+                       map->shared = sh;
+                       refcount_set(&sh->refcnt, 1);
+               }
+
                RB_CLEAR_NODE(&map->rb_node);
                dso__get(map_sh(map)->dso);
                map->groups = NULL;
@@ -840,7 +878,7 @@ static int maps__fixup_overlappings(struct maps *maps, 
struct map *map, FILE *fp
                 * overlapped by the new map:
                 */
                if (map_sh(map)->start > map_sh(pos)->start) {
-                       struct map *before = map__clone(pos);
+                       struct map *before = map__clone(pos, false);
 
                        if (before == NULL) {
                                err = -ENOMEM;
@@ -855,7 +893,7 @@ static int maps__fixup_overlappings(struct maps *maps, 
struct map *map, FILE *fp
                }
 
                if (map_sh(map)->end < map_sh(pos)->end) {
-                       struct map *after = map__clone(pos);
+                       struct map *after = map__clone(pos, false);
 
                        if (after == NULL) {
                                err = -ENOMEM;
@@ -902,7 +940,7 @@ int map_groups__clone(struct thread *thread, struct 
map_groups *parent)
        down_read(&maps->lock);
 
        for (map = maps__first(maps); map; map = map__next(map)) {
-               struct map *new = map__clone(map);
+               struct map *new = map__clone(map, true);
                if (new == NULL)
                        goto out_unlock;
 
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 297e8f9d1c4c..2788894bdd1c 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -38,6 +38,7 @@ struct map_shared {
        u64                     (*unmap_ip)(struct map *, u64);
 
        struct dso              *dso;
+       refcount_t               refcnt;
 };
 
 struct map {
@@ -46,12 +47,12 @@ struct map {
                struct list_head node;
        };
        struct rb_node           rb_node_name;
-       struct map_shared        shared;
+       struct map_shared       *shared;
        struct map_groups       *groups;
        refcount_t               refcnt;
 };
 
-#define map_sh(__m)  (&((__m)->shared))
+#define map_sh(__m)  ((__m)->shared)
 
 struct kmap;
 
@@ -123,7 +124,7 @@ struct map *map__new(struct machine *machine, u64 start, 
u64 len,
                     char *filename, struct thread *thread);
 struct map *map__new2(u64 start, struct dso *dso);
 void map__delete(struct map *map);
-struct map *map__clone(struct map *map);
+struct map *map__clone(struct map *map, bool share);
 
 static inline struct map *map__get(struct map *map)
 {
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 80c4429304f0..bca6cbdd7eea 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1213,7 +1213,7 @@ int map_groups__merge_in(struct map_groups *kmaps, struct 
map *new_map)
                                 * |new.............| -> |new..|       |new..|
                                 *       |old....|    ->       |old....|
                                 */
-                               struct map *m = map__clone(new_map);
+                               struct map *m = map__clone(new_map, false);
 
                                if (!m)
                                        return -ENOMEM;
-- 
2.21.0

Reply via email to