[tip:perf/core] perf map: Remove extra indirection from map__find()
Commit-ID: b18e088825883bcb8dc4c4a641494049cf8ccec3 Gitweb: https://git.kernel.org/tip/b18e088825883bcb8dc4c4a641494049cf8ccec3 Author: Eric Saint-Etienne AuthorDate: Fri, 23 Nov 2018 02:42:39 -0800 Committer: Arnaldo Carvalho de Melo CommitDate: Mon, 17 Dec 2018 14:53:57 -0300 perf map: Remove extra indirection from map__find() A double pointer is used in map__find() where a single pointer is enough because the function doesn't affect the rbtree and the rbtree is locked. Signed-off-by: Eric Saint-Etienne Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Eric Saint-Etienne Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1542969759-24346-1-git-send-email-eric.saint.etie...@oracle.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 781eed8e3265..a0d58b4d9c32 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -873,19 +873,18 @@ void maps__remove(struct maps *maps, struct map *map) struct map *maps__find(struct maps *maps, u64 ip) { - struct rb_node **p, *parent = NULL; + struct rb_node *p; struct map *m; down_read(>lock); - p = >entries.rb_node; - while (*p != NULL) { - parent = *p; - m = rb_entry(parent, struct map, rb_node); + p = maps->entries.rb_node; + while (p != NULL) { + m = rb_entry(p, struct map, rb_node); if (ip < m->start) - p = &(*p)->rb_left; + p = p->rb_left; else if (ip >= m->end) - p = &(*p)->rb_right; + p = p->rb_right; else goto out; }
[tip:perf/core] perf map: Remove extra indirection from map__find()
Commit-ID: 97dfc3423ca701212802a02994d1ec3e688f4891 Gitweb: https://git.kernel.org/tip/97dfc3423ca701212802a02994d1ec3e688f4891 Author: Eric Saint-Etienne AuthorDate: Fri, 23 Nov 2018 02:42:39 -0800 Committer: Arnaldo Carvalho de Melo CommitDate: Thu, 29 Nov 2018 20:42:46 -0300 perf map: Remove extra indirection from map__find() A double pointer is used in map__find() where a single pointer is enough because the function doesn't affect the rbtree and the rbtree is locked. Signed-off-by: Eric Saint-Etienne Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Eric Saint-Etienne Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1542969759-24346-1-git-send-email-eric.saint.etie...@oracle.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 781eed8e3265..a0d58b4d9c32 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -873,19 +873,18 @@ void maps__remove(struct maps *maps, struct map *map) struct map *maps__find(struct maps *maps, u64 ip) { - struct rb_node **p, *parent = NULL; + struct rb_node *p; struct map *m; down_read(>lock); - p = >entries.rb_node; - while (*p != NULL) { - parent = *p; - m = rb_entry(parent, struct map, rb_node); + p = maps->entries.rb_node; + while (p != NULL) { + m = rb_entry(p, struct map, rb_node); if (ip < m->start) - p = &(*p)->rb_left; + p = p->rb_left; else if (ip >= m->end) - p = &(*p)->rb_right; + p = p->rb_right; else goto out; }
[PATCH UEK5] perf symbols: Cannot disassemble some routines when debuginfo present
When the kernel is compiled with -ffunction-sections and perf uses the kernel debuginfo, perf fails the very first symbol lookup and ends up with an offset in [kernel.vmlinux]. It's due to how perf loads the .text map first then loads the other maps in the kernel debuginfo image. This issue is specific to the debuginfo when using -ffunction-sections and it doesn't happen with kallsyms because there's not multiple stages of loading /proc/kallsyms. This patch makes sure that the event address we're looking up is indeed within the map we've found, otherwise we look another map up again. Only one extra lookup at most is required for the proper map to be found, if it exists. Orabug: 28543586 Reviewed-by: Eric Saint-Etienne --- tools/perf/util/event.c | 13 + 1 file changed, 13 insertions(+) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index fc690fe..1fbbaf1 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1447,6 +1447,19 @@ void thread__find_addr_map(struct thread *thread, u8 cpumode, */ if (load_map) map__load(al->map); + + /* +* Sometimes we find the wrong map: one whose end address is +* ~0ULL, in particular when map__load() set a proper end +* address for the map we're considering. +* +* So we make sure that it's actually the right map that we've +* found, and if not, we try again and within at most one +* attempt we'll get the right map, if it exists. +*/ + if (al->addr < al->map->start || al->addr >= al->map->end) + goto try_again; + al->addr = al->map->map_ip(al->map, al->addr); } } -- 1.8.3.1
[PATCH v3] perf symbols: Cannot disassemble some routines when debuginfo present
When the kernel is compiled with -ffunction-sections and perf uses the kernel debuginfo, perf fails the very first symbol lookup and ends up with an hex offset inside [kernel.vmlinux]. It's due to how perf loads the maps. Indeed only .text gets loaded by map_groups__find() into al->map. Consequently al->map address range encompass the whole kernel image. But then map__load() loads many function maps by splitting al->map, which reduces al->map range drastically. Very likely the target address is then in one of those newly created function maps, so we need to lookup the map again to find that new map. I'm not sure if this issue is only specific to the kernel but at least it occurs withe the kernel dso, and when we're not using the kernel debuginfo, perf will fallback to using kallsyms and then the first lookup will work. The split of .text section happens in dso_process_kernel_symbol() where we call map_groups__find_by_name() to find an existing map, but with -ffunction-sections and a symbol belonging to a new (function) map, such map doesn't exist yet so we end up creating one and adjusting existing maps accordingly because adjust_kernel_syms is set there. This patch makes sure that the event address we're looking-up is indeed within the map we've found, otherwise we lookup another map again. Only one extra lookup at most is required for the proper map to be found, if it exists. Signed-off-by: Eric Saint-Etienne Reviewed-by: Darren Kenny --- tools/perf/util/event.c | 53 +++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index e9c108a..f7cad1a 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1569,9 +1569,58 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, * Kernel maps might be changed when loading symbols so loading * must be done prior to using kernel maps. */ - if (load_map) + if (load_map) { + /* +* Note when using -ffunction-sections on the kernel: +* +* Only .text got loaded into al->map at this point. +* Consequently al->map address range encompass the +* whole image. +* +* map__load() will split this map into many function +* maps by shrinking al->map accordingly. +* +* The split happens in dso_process_kernel_symbol() +* where we call map_groups__find_by_name() to find an +* existing map, but with -ffunction-sections and a +* symbol belonging to a new (function) map, such map +* doesn't exist yet so we end up creating one and +* adjusting existing maps accordingly because +* adjust_kernel_syms is set there. +*/ + map__load(al->map); - al->addr = al->map->map_ip(al->map, al->addr); + + /* +* Note when using -ffunction-sections on the kernel: +* +* Very likely the target address will now be in one of +* the newly created function maps but al->map still +* points to .text which has been drastically shrank by +* the split done in map__load() +*/ + if (al->addr < al->map->start || + al->addr >= al->map->end) { + al->map = map_groups__find(mg, al->addr); + + /* +* map_groups__find() should always find a map +* because the target address was initially +* found in .text which got split by map__load() +* *WITHOUT INTRODUCING ANY GAP* +*/ + WARN_ONCE(al->map == NULL, + "map__load created unexpected gaps!"); + } + } + + /* +* In case the later call to map_groups__find() didn't find a +* suitable map (it should always, but better be safe) we make +* sure that al->map is still valid before deferencing it. +*/ + if (al->map != NULL) + al->addr = al->map->map_ip(al->map, al->addr); } return al->map; -- 1.8.3.1
[PATCH v3] perf symbols: Cannot disassemble some routines when debuginfo present
When the kernel is compiled with -ffunction-sections and perf uses the kernel debuginfo, perf fails the very first symbol lookup and ends up with an hex offset inside [kernel.vmlinux]. It's due to how perf loads the maps. Indeed only .text gets loaded by map_groups__find() into al->map. Consequently al->map address range encompass the whole kernel image. But then map__load() loads many function maps by splitting al->map, which reduces al->map range drastically. Very likely the target address is then in one of those newly created function maps, so we need to lookup the map again to find that new map. I'm not sure if this issue is only specific to the kernel but at least it occurs withe the kernel dso, and when we're not using the kernel debuginfo, perf will fallback to using kallsyms and then the first lookup will work. The split of .text section happens in dso_process_kernel_symbol() where we call map_groups__find_by_name() to find an existing map, but with -ffunction-sections and a symbol belonging to a new (function) map, such map doesn't exist yet so we end up creating one and adjusting existing maps accordingly because adjust_kernel_syms is set there. This patch makes sure that the event address we're looking-up is indeed within the map we've found, otherwise we lookup another map again. Only one extra lookup at most is required for the proper map to be found, if it exists. Signed-off-by: Eric Saint-Etienne Reviewed-by: Darren Kenny --- tools/perf/util/event.c | 53 +++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index e9c108a..f7cad1a 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1569,9 +1569,58 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, * Kernel maps might be changed when loading symbols so loading * must be done prior to using kernel maps. */ - if (load_map) + if (load_map) { + /* +* Note when using -ffunction-sections on the kernel: +* +* Only .text got loaded into al->map at this point. +* Consequently al->map address range encompass the +* whole image. +* +* map__load() will split this map into many function +* maps by shrinking al->map accordingly. +* +* The split happens in dso_process_kernel_symbol() +* where we call map_groups__find_by_name() to find an +* existing map, but with -ffunction-sections and a +* symbol belonging to a new (function) map, such map +* doesn't exist yet so we end up creating one and +* adjusting existing maps accordingly because +* adjust_kernel_syms is set there. +*/ + map__load(al->map); - al->addr = al->map->map_ip(al->map, al->addr); + + /* +* Note when using -ffunction-sections on the kernel: +* +* Very likely the target address will now be in one of +* the newly created function maps but al->map still +* points to .text which has been drastically shrank by +* the split done in map__load() +*/ + if (al->addr < al->map->start || + al->addr >= al->map->end) { + al->map = map_groups__find(mg, al->addr); + + /* +* map_groups__find() should always find a map +* because the target address was initially +* found in .text which got split by map__load() +* *WITHOUT INTRODUCING ANY GAP* +*/ + WARN_ONCE(al->map == NULL, + "map__load created unexpected gaps!"); + } + } + + /* +* In case the later call to map_groups__find() didn't find a +* suitable map (it should always, but better be safe) we make +* sure that al->map is still valid before deferencing it. +*/ + if (al->map != NULL) + al->addr = al->map->map_ip(al->map, al->addr); } return al->map; -- 1.8.3.1
[PATCH v2] perf symbols: Cannot disassemble some routines when debuginfo present
When the kernel is compiled with -ffunction-sections and perf uses the kernel debuginfo, perf fails the very first symbol lookup and ends up with an hex offset inside [kernel.vmlinux]. It's due to how perf loads the maps. Indeed only .text gets loaded by map_groups__find() into al->map. Consequently al->map address range encompass the whole kernel image. But then map__load() loads many function maps by splitting al->map, which reduces al->map range drastically. Very likely the target address is then in one of those newly created function maps, so we need to lookup the map again to find that new map. I'm not sure if this issue is only specific to the kernel but at least it occurs withe the kernel dso, and when we're not using the kernel debuginfo, perf will fallback to using kallsyms and then the first lookup will work. The split of .text section happens in dso_process_kernel_symbol() where we call map_groups__find_by_name() to find an existing map, but with -ffunction-sections and a symbol belonging to a new (function) map, such map doesn't exist yet so we end up creating one and adjusting existing maps accordingly because adjust_kernel_syms is set there. This patch makes sure that the event address we're looking-up is indeed within the map we've found, otherwise we lookup another map again. Only one extra lookup at most is required for the proper map to be found, if it exists. Signed-off-by: Eric Saint-Etienne Reviewed-by: Darren Kenny --- tools/perf/util/event.c | 49 +++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index e9c108a..be333a9 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1569,9 +1569,54 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, * Kernel maps might be changed when loading symbols so loading * must be done prior to using kernel maps. */ - if (load_map) + if (load_map) { + /* +* Note when using -ffunction-sections on the kernel: +* +* Only .text got loaded into al->map at this point. +* Consequently al->map address range encompass the +* whole image. +* +* map__load() will split this map into many function +* maps by shrinking al->map accordingly. +* +* The split happens in dso_process_kernel_symbol() +* where we call map_groups__find_by_name() to find an +* existing map, but with -ffunction-sections and a +* symbol belonging to a new (function) map, such map +* doesn't exist yet so we end up creating one and +* adjusting existing maps accordingly because +* adjust_kernel_syms is set there. +*/ + map__load(al->map); - al->addr = al->map->map_ip(al->map, al->addr); + + /* +* Note when using -ffunction-sections on the kernel: +* +* Very likely the target address will now be in one of +* the newly created function maps but al->map still +* points to .text which has been drastically shrank by +* the split done in map__load() +*/ + if (al->addr < al->map->start || + al->addr >= al->map->end) + al->map = map_groups__find(mg, al->addr); + + /* +* map_groups__find() should always find a map because +* the target address was initially found in .text which +* got split *without introducing gaps* by map__load() +*/ + } + + /* +* In case the later call to map_groups__find() didn't find a +* suitable map (it should always, but better be safe) we make +* sure that al->map is still valid before deferencing it. +*/ + if (al->map != NULL) + al->addr = al->map->map_ip(al->map, al->addr); } return al->map; -- 1.8.3.1
[PATCH v2] perf symbols: Cannot disassemble some routines when debuginfo present
When the kernel is compiled with -ffunction-sections and perf uses the kernel debuginfo, perf fails the very first symbol lookup and ends up with an hex offset inside [kernel.vmlinux]. It's due to how perf loads the maps. Indeed only .text gets loaded by map_groups__find() into al->map. Consequently al->map address range encompass the whole kernel image. But then map__load() loads many function maps by splitting al->map, which reduces al->map range drastically. Very likely the target address is then in one of those newly created function maps, so we need to lookup the map again to find that new map. I'm not sure if this issue is only specific to the kernel but at least it occurs withe the kernel dso, and when we're not using the kernel debuginfo, perf will fallback to using kallsyms and then the first lookup will work. The split of .text section happens in dso_process_kernel_symbol() where we call map_groups__find_by_name() to find an existing map, but with -ffunction-sections and a symbol belonging to a new (function) map, such map doesn't exist yet so we end up creating one and adjusting existing maps accordingly because adjust_kernel_syms is set there. This patch makes sure that the event address we're looking-up is indeed within the map we've found, otherwise we lookup another map again. Only one extra lookup at most is required for the proper map to be found, if it exists. Signed-off-by: Eric Saint-Etienne Reviewed-by: Darren Kenny --- tools/perf/util/event.c | 49 +++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index e9c108a..be333a9 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1569,9 +1569,54 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, * Kernel maps might be changed when loading symbols so loading * must be done prior to using kernel maps. */ - if (load_map) + if (load_map) { + /* +* Note when using -ffunction-sections on the kernel: +* +* Only .text got loaded into al->map at this point. +* Consequently al->map address range encompass the +* whole image. +* +* map__load() will split this map into many function +* maps by shrinking al->map accordingly. +* +* The split happens in dso_process_kernel_symbol() +* where we call map_groups__find_by_name() to find an +* existing map, but with -ffunction-sections and a +* symbol belonging to a new (function) map, such map +* doesn't exist yet so we end up creating one and +* adjusting existing maps accordingly because +* adjust_kernel_syms is set there. +*/ + map__load(al->map); - al->addr = al->map->map_ip(al->map, al->addr); + + /* +* Note when using -ffunction-sections on the kernel: +* +* Very likely the target address will now be in one of +* the newly created function maps but al->map still +* points to .text which has been drastically shrank by +* the split done in map__load() +*/ + if (al->addr < al->map->start || + al->addr >= al->map->end) + al->map = map_groups__find(mg, al->addr); + + /* +* map_groups__find() should always find a map because +* the target address was initially found in .text which +* got split *without introducing gaps* by map__load() +*/ + } + + /* +* In case the later call to map_groups__find() didn't find a +* suitable map (it should always, but better be safe) we make +* sure that al->map is still valid before deferencing it. +*/ + if (al->map != NULL) + al->addr = al->map->map_ip(al->map, al->addr); } return al->map; -- 1.8.3.1
RE: [PATCH] perf symbols: Cannot disassemble some routines when debuginfo present
> > + /* > > +* When using -ffunction-sections, only .text gets loaded by > > +* map_groups__find() into al->map. Consequently al->map address > > +* range encompass the whole code. > > +* > > +* But map__load() has just loaded many function maps by > > +* splitting al->map, which reduced al->map range drastically. > > +* Very likely the target address is now in one of those newly > > +* created function maps, so we need to lookup the map again > > +* to find that new map. > > +*/ > > hum, so map__load actualy can split the map to create new maps? > > cold you please point me to that code? I haven't touch this area for some > time and I can't find it The split happens in dso_process_kernel_symbol() in symbol-elf.c where we call map_groups__find_by_name() to find an existing map, but with -ffunction-sections and a symbol belonging to new (function) map, such map doesn't exist yet so we end up creating one and adjusting existing maps accordingly because adjust_kernel_syms is set. Makes sense? As of 4.20-rc3 the call chain is as follows: event:c:1573 map__load() map.c:315 dso__load() symobl.c:1528 dso__load_kernel_sym() symbol.c:1896 dso__load_vmlinux_path() (or we directly call dso__load_vmlinux() at line 1892) symbol.c:1744 dso__load_vmlinux() symbol.c:1719 dso__load_sym() symbol-elf.c:1090 dso_process_kernel_symbol() -eric
RE: [PATCH] perf symbols: Cannot disassemble some routines when debuginfo present
> > + /* > > +* When using -ffunction-sections, only .text gets loaded by > > +* map_groups__find() into al->map. Consequently al->map address > > +* range encompass the whole code. > > +* > > +* But map__load() has just loaded many function maps by > > +* splitting al->map, which reduced al->map range drastically. > > +* Very likely the target address is now in one of those newly > > +* created function maps, so we need to lookup the map again > > +* to find that new map. > > +*/ > > hum, so map__load actualy can split the map to create new maps? > > cold you please point me to that code? I haven't touch this area for some > time and I can't find it The split happens in dso_process_kernel_symbol() in symbol-elf.c where we call map_groups__find_by_name() to find an existing map, but with -ffunction-sections and a symbol belonging to new (function) map, such map doesn't exist yet so we end up creating one and adjusting existing maps accordingly because adjust_kernel_syms is set. Makes sense? As of 4.20-rc3 the call chain is as follows: event:c:1573 map__load() map.c:315 dso__load() symobl.c:1528 dso__load_kernel_sym() symbol.c:1896 dso__load_vmlinux_path() (or we directly call dso__load_vmlinux() at line 1892) symbol.c:1744 dso__load_vmlinux() symbol.c:1719 dso__load_sym() symbol-elf.c:1090 dso_process_kernel_symbol() -eric
[PATCH] perf map: remove extra indirection from map__find()
A double pointer is used in map__find() where a single pointer is enough because the function doesn't affect the rbtree and the rbtree is locked. Signed-off-by: Eric Saint-Etienne --- tools/perf/util/map.c | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 354e545..3dac766 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -846,19 +846,18 @@ void maps__remove(struct maps *maps, struct map *map) struct map *maps__find(struct maps *maps, u64 ip) { - struct rb_node **p, *parent = NULL; + struct rb_node *p; struct map *m; down_read(>lock); - p = >entries.rb_node; - while (*p != NULL) { - parent = *p; - m = rb_entry(parent, struct map, rb_node); + p = maps->entries.rb_node; + while (p != NULL) { + m = rb_entry(p, struct map, rb_node); if (ip < m->start) - p = &(*p)->rb_left; + p = p->rb_left; else if (ip >= m->end) - p = &(*p)->rb_right; + p = p->rb_right; else goto out; } -- 1.8.3.1
[PATCH] perf map: remove extra indirection from map__find()
A double pointer is used in map__find() where a single pointer is enough because the function doesn't affect the rbtree and the rbtree is locked. Signed-off-by: Eric Saint-Etienne --- tools/perf/util/map.c | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 354e545..3dac766 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -846,19 +846,18 @@ void maps__remove(struct maps *maps, struct map *map) struct map *maps__find(struct maps *maps, u64 ip) { - struct rb_node **p, *parent = NULL; + struct rb_node *p; struct map *m; down_read(>lock); - p = >entries.rb_node; - while (*p != NULL) { - parent = *p; - m = rb_entry(parent, struct map, rb_node); + p = maps->entries.rb_node; + while (p != NULL) { + m = rb_entry(p, struct map, rb_node); if (ip < m->start) - p = &(*p)->rb_left; + p = p->rb_left; else if (ip >= m->end) - p = &(*p)->rb_right; + p = p->rb_right; else goto out; } -- 1.8.3.1
[PATCH] perf symbols: Cannot disassemble some routines when debuginfo present
When the kernel is compiled with -ffunction-sections and perf uses the kernel debuginfo, perf fails the very first symbol lookup and ends up with an hex offset inside [kernel.vmlinux]. It's due to how perf loads the maps. Indeed only .text gets loaded by map_groups__find() into al->map. Consequently al->map address range encompass the whole code. But map__load() has just loaded many function maps by splitting al->map, which reduced al->map range drastically. Very likely the target address is now in one of those newly created function maps, so we need to lookup the map again to find that new map. This issue is not specific to the kernel but to how the image is linked. For the kernel, when we're not using the kernel debuginfo, perf will fallback to using kallsyms and then the first lookup will work. This patch makes sure that the event address we're looking-up is indeed within the map we've found, otherwise we lookup another map again. Only one extra lookup at most is required for the proper map to be found, if it exists. Signed-off-by: Eric Saint-Etienne Reviewed-by: Darren Kenny --- tools/perf/util/event.c | 23 ++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index e9c108a..a69ef52 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1571,7 +1571,28 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, */ if (load_map) map__load(al->map); - al->addr = al->map->map_ip(al->map, al->addr); + + /* +* When using -ffunction-sections, only .text gets loaded by +* map_groups__find() into al->map. Consequently al->map address +* range encompass the whole code. +* +* But map__load() has just loaded many function maps by +* splitting al->map, which reduced al->map range drastically. +* Very likely the target address is now in one of those newly +* created function maps, so we need to lookup the map again +* to find that new map. +*/ + if (al->addr < al->map->start || al->addr >= al->map->end) + al->map = map_groups__find(mg, al->addr); + + /* +* The new map *ought* to exist because the initial al->map +* contained that address and subsequently has been split into +* many *contiguous* maps. +*/ + if (al->map != NULL) + al->addr = al->map->map_ip(al->map, al->addr); } return al->map; -- 1.8.3.1
[PATCH] perf symbols: Cannot disassemble some routines when debuginfo present
When the kernel is compiled with -ffunction-sections and perf uses the kernel debuginfo, perf fails the very first symbol lookup and ends up with an hex offset inside [kernel.vmlinux]. It's due to how perf loads the maps. Indeed only .text gets loaded by map_groups__find() into al->map. Consequently al->map address range encompass the whole code. But map__load() has just loaded many function maps by splitting al->map, which reduced al->map range drastically. Very likely the target address is now in one of those newly created function maps, so we need to lookup the map again to find that new map. This issue is not specific to the kernel but to how the image is linked. For the kernel, when we're not using the kernel debuginfo, perf will fallback to using kallsyms and then the first lookup will work. This patch makes sure that the event address we're looking-up is indeed within the map we've found, otherwise we lookup another map again. Only one extra lookup at most is required for the proper map to be found, if it exists. Signed-off-by: Eric Saint-Etienne Reviewed-by: Darren Kenny --- tools/perf/util/event.c | 23 ++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index e9c108a..a69ef52 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1571,7 +1571,28 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, */ if (load_map) map__load(al->map); - al->addr = al->map->map_ip(al->map, al->addr); + + /* +* When using -ffunction-sections, only .text gets loaded by +* map_groups__find() into al->map. Consequently al->map address +* range encompass the whole code. +* +* But map__load() has just loaded many function maps by +* splitting al->map, which reduced al->map range drastically. +* Very likely the target address is now in one of those newly +* created function maps, so we need to lookup the map again +* to find that new map. +*/ + if (al->addr < al->map->start || al->addr >= al->map->end) + al->map = map_groups__find(mg, al->addr); + + /* +* The new map *ought* to exist because the initial al->map +* contained that address and subsequently has been split into +* many *contiguous* maps. +*/ + if (al->map != NULL) + al->addr = al->map->map_ip(al->map, al->addr); } return al->map; -- 1.8.3.1
[tip:perf/core] perf symbols: Fix slowness due to -ffunction-section
Commit-ID: 1e6285699b3034e6f4d1f091edd46d717580bf7c Gitweb: https://git.kernel.org/tip/1e6285699b3034e6f4d1f091edd46d717580bf7c Author: Eric Saint-Etienne AuthorDate: Wed, 21 Nov 2018 09:51:19 -0800 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 21 Nov 2018 22:39:59 -0300 perf symbols: Fix slowness due to -ffunction-section Perf can take minutes to parse an image when -ffunction-section is used. This is especially true with the kernel image when it is compiled this way, which is the arm64 default since the patcheset "Enable deadcode elimination at link time". Perf organize maps using a rbtree. Whenever perf finds a new symbols, it first searches this rbtree for the map it belongs to, by strcmp()'aring section names. When it finds the map with the right name, it uses it to add the symbol. With a usual image there aren't so many maps but when using -ffunction-section there's basically one map per function. With the kernel image that's north of 40,000 maps. For most symbols perf has to parses the entire rbtree to eventually create a new map and add it. Consequently perf spends most of the time browsing a rbtree that keeps getting larger. This performance fix introduces a secondary rbtree that indexes maps based on the section name. Signed-off-by: Eric Saint-Etienne Reviewed-by: Dave Kleikamp Reviewed-by: David Aldridge Reviewed-by: Rob Gardner Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1542822679-25591-1-git-send-email-eric.saint.etie...@oracle.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c| 27 +++ tools/perf/util/map.h| 2 ++ tools/perf/util/symbol.c | 15 +-- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 354e54550d2b..781eed8e3265 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -21,6 +21,7 @@ #include "unwind.h" static void __maps__insert(struct maps *maps, struct map *map); +static void __maps__insert_name(struct maps *maps, struct map *map); static inline int is_anon_memory(const char *filename, u32 flags) { @@ -496,6 +497,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip) static void maps__init(struct maps *maps) { maps->entries = RB_ROOT; + maps->names = RB_ROOT; init_rwsem(>lock); } @@ -664,6 +666,7 @@ size_t map_groups__fprintf(struct map_groups *mg, FILE *fp) static void __map_groups__insert(struct map_groups *mg, struct map *map) { __maps__insert(>maps, map); + __maps__insert_name(>maps, map); map->groups = mg; } @@ -824,10 +827,34 @@ static void __maps__insert(struct maps *maps, struct map *map) map__get(map); } +static void __maps__insert_name(struct maps *maps, struct map *map) +{ + struct rb_node **p = >names.rb_node; + struct rb_node *parent = NULL; + struct map *m; + int rc; + + while (*p != NULL) { + parent = *p; + m = rb_entry(parent, struct map, rb_node_name); + rc = strcmp(m->dso->short_name, map->dso->short_name); + if (rc < 0) + p = &(*p)->rb_left; + else if (rc > 0) + p = &(*p)->rb_right; + else + return; + } + rb_link_node(>rb_node_name, parent, p); + rb_insert_color(>rb_node_name, >names); + map__get(map); +} + void maps__insert(struct maps *maps, struct map *map) { down_write(>lock); __maps__insert(maps, map); + __maps__insert_name(maps, map); up_write(>lock); } diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index e0f327b51e66..5c792c90fc4c 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -25,6 +25,7 @@ struct map { struct rb_node rb_node; struct list_head node; }; + struct rb_node rb_node_name; u64 start; u64 end; boolerange_warned; @@ -57,6 +58,7 @@ struct kmap { struct maps { struct rb_root entries; + struct rb_root names; struct rw_semaphore lock; }; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index d188b7588152..dcce74bae6de 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1680,11 +1680,22 @@ struct map *map_groups__find_by_name(struct map_groups *mg, const char *name) { struct maps *maps = >maps; struct map *map; + struct rb_node *node; down_read(>lock); - for (map = maps__first(maps); map; map = map__next(map)) { - if (map->dso && strcmp(map->dso->short_name, name) == 0) + for (node = maps-&g
[tip:perf/core] perf symbols: Fix slowness due to -ffunction-section
Commit-ID: 1e6285699b3034e6f4d1f091edd46d717580bf7c Gitweb: https://git.kernel.org/tip/1e6285699b3034e6f4d1f091edd46d717580bf7c Author: Eric Saint-Etienne AuthorDate: Wed, 21 Nov 2018 09:51:19 -0800 Committer: Arnaldo Carvalho de Melo CommitDate: Wed, 21 Nov 2018 22:39:59 -0300 perf symbols: Fix slowness due to -ffunction-section Perf can take minutes to parse an image when -ffunction-section is used. This is especially true with the kernel image when it is compiled this way, which is the arm64 default since the patcheset "Enable deadcode elimination at link time". Perf organize maps using a rbtree. Whenever perf finds a new symbols, it first searches this rbtree for the map it belongs to, by strcmp()'aring section names. When it finds the map with the right name, it uses it to add the symbol. With a usual image there aren't so many maps but when using -ffunction-section there's basically one map per function. With the kernel image that's north of 40,000 maps. For most symbols perf has to parses the entire rbtree to eventually create a new map and add it. Consequently perf spends most of the time browsing a rbtree that keeps getting larger. This performance fix introduces a secondary rbtree that indexes maps based on the section name. Signed-off-by: Eric Saint-Etienne Reviewed-by: Dave Kleikamp Reviewed-by: David Aldridge Reviewed-by: Rob Gardner Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1542822679-25591-1-git-send-email-eric.saint.etie...@oracle.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c| 27 +++ tools/perf/util/map.h| 2 ++ tools/perf/util/symbol.c | 15 +-- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 354e54550d2b..781eed8e3265 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -21,6 +21,7 @@ #include "unwind.h" static void __maps__insert(struct maps *maps, struct map *map); +static void __maps__insert_name(struct maps *maps, struct map *map); static inline int is_anon_memory(const char *filename, u32 flags) { @@ -496,6 +497,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip) static void maps__init(struct maps *maps) { maps->entries = RB_ROOT; + maps->names = RB_ROOT; init_rwsem(>lock); } @@ -664,6 +666,7 @@ size_t map_groups__fprintf(struct map_groups *mg, FILE *fp) static void __map_groups__insert(struct map_groups *mg, struct map *map) { __maps__insert(>maps, map); + __maps__insert_name(>maps, map); map->groups = mg; } @@ -824,10 +827,34 @@ static void __maps__insert(struct maps *maps, struct map *map) map__get(map); } +static void __maps__insert_name(struct maps *maps, struct map *map) +{ + struct rb_node **p = >names.rb_node; + struct rb_node *parent = NULL; + struct map *m; + int rc; + + while (*p != NULL) { + parent = *p; + m = rb_entry(parent, struct map, rb_node_name); + rc = strcmp(m->dso->short_name, map->dso->short_name); + if (rc < 0) + p = &(*p)->rb_left; + else if (rc > 0) + p = &(*p)->rb_right; + else + return; + } + rb_link_node(>rb_node_name, parent, p); + rb_insert_color(>rb_node_name, >names); + map__get(map); +} + void maps__insert(struct maps *maps, struct map *map) { down_write(>lock); __maps__insert(maps, map); + __maps__insert_name(maps, map); up_write(>lock); } diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index e0f327b51e66..5c792c90fc4c 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -25,6 +25,7 @@ struct map { struct rb_node rb_node; struct list_head node; }; + struct rb_node rb_node_name; u64 start; u64 end; boolerange_warned; @@ -57,6 +58,7 @@ struct kmap { struct maps { struct rb_root entries; + struct rb_root names; struct rw_semaphore lock; }; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index d188b7588152..dcce74bae6de 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1680,11 +1680,22 @@ struct map *map_groups__find_by_name(struct map_groups *mg, const char *name) { struct maps *maps = >maps; struct map *map; + struct rb_node *node; down_read(>lock); - for (map = maps__first(maps); map; map = map__next(map)) { - if (map->dso && strcmp(map->dso->short_name, name) == 0) + for (node = maps-&g
[PATCH] perf symbols: fix slowness due to -ffunction-section
Perf can take minutes to parse an image when -ffunction-section is used. This is especially true with the kernel image when it is compiled this way, which is the arm64 default since the patcheset "Enable deadcode elimination at link time". Perf organize maps using a rbtree. Whenever perf finds a new symbols, it first searches this rbtree for the map it belongs to, by strcmp()'aring section names. When it finds the map with the right name, it uses it to add the symbol. With a usual image there aren't so many maps but when using -ffunction-section there's basically one map per function. With the kernel image that's north of 40,000 maps. For most symbols perf has to parses the entire rbtree to eventually create a new map and add it. Consequently perf spends most of the time browsing a rbtree that keeps getting larger. This performance fix introduces a secondary rbtree that indexes maps based on the section name. Signed-off-by: Eric Saint-Etienne Reviewed-by: Dave Kleikamp Reviewed-by: David Aldridge Reviewed-by: Rob Gardner --- tools/perf/util/map.c| 27 +++ tools/perf/util/map.h| 2 ++ tools/perf/util/symbol.c | 15 +-- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 354e545..781eed8 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -21,6 +21,7 @@ #include "unwind.h" static void __maps__insert(struct maps *maps, struct map *map); +static void __maps__insert_name(struct maps *maps, struct map *map); static inline int is_anon_memory(const char *filename, u32 flags) { @@ -496,6 +497,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip) static void maps__init(struct maps *maps) { maps->entries = RB_ROOT; + maps->names = RB_ROOT; init_rwsem(>lock); } @@ -664,6 +666,7 @@ size_t map_groups__fprintf(struct map_groups *mg, FILE *fp) static void __map_groups__insert(struct map_groups *mg, struct map *map) { __maps__insert(>maps, map); + __maps__insert_name(>maps, map); map->groups = mg; } @@ -824,10 +827,34 @@ static void __maps__insert(struct maps *maps, struct map *map) map__get(map); } +static void __maps__insert_name(struct maps *maps, struct map *map) +{ + struct rb_node **p = >names.rb_node; + struct rb_node *parent = NULL; + struct map *m; + int rc; + + while (*p != NULL) { + parent = *p; + m = rb_entry(parent, struct map, rb_node_name); + rc = strcmp(m->dso->short_name, map->dso->short_name); + if (rc < 0) + p = &(*p)->rb_left; + else if (rc > 0) + p = &(*p)->rb_right; + else + return; + } + rb_link_node(>rb_node_name, parent, p); + rb_insert_color(>rb_node_name, >names); + map__get(map); +} + void maps__insert(struct maps *maps, struct map *map) { down_write(>lock); __maps__insert(maps, map); + __maps__insert_name(maps, map); up_write(>lock); } diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index e0f327b..5c792c9 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -25,6 +25,7 @@ struct map { struct rb_node rb_node; struct list_head node; }; + struct rb_node rb_node_name; u64 start; u64 end; boolerange_warned; @@ -57,6 +58,7 @@ struct kmap { struct maps { struct rb_root entries; + struct rb_root names; struct rw_semaphore lock; }; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index d188b75..dcce74b 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1680,11 +1680,22 @@ struct map *map_groups__find_by_name(struct map_groups *mg, const char *name) { struct maps *maps = >maps; struct map *map; + struct rb_node *node; down_read(>lock); - for (map = maps__first(maps); map; map = map__next(map)) { - if (map->dso && strcmp(map->dso->short_name, name) == 0) + for (node = maps->names.rb_node; node; ) { + int rc; + + map = rb_entry(node, struct map, rb_node_name); + + rc = strcmp(map->dso->short_name, name); + if (rc < 0) + node = node->rb_left; + else if (rc > 0) + node = node->rb_right; + else + goto out_unlock; } -- 1.8.3.1
[PATCH] perf symbols: fix slowness due to -ffunction-section
Perf can take minutes to parse an image when -ffunction-section is used. This is especially true with the kernel image when it is compiled this way, which is the arm64 default since the patcheset "Enable deadcode elimination at link time". Perf organize maps using a rbtree. Whenever perf finds a new symbols, it first searches this rbtree for the map it belongs to, by strcmp()'aring section names. When it finds the map with the right name, it uses it to add the symbol. With a usual image there aren't so many maps but when using -ffunction-section there's basically one map per function. With the kernel image that's north of 40,000 maps. For most symbols perf has to parses the entire rbtree to eventually create a new map and add it. Consequently perf spends most of the time browsing a rbtree that keeps getting larger. This performance fix introduces a secondary rbtree that indexes maps based on the section name. Signed-off-by: Eric Saint-Etienne Reviewed-by: Dave Kleikamp Reviewed-by: David Aldridge Reviewed-by: Rob Gardner --- tools/perf/util/map.c| 27 +++ tools/perf/util/map.h| 2 ++ tools/perf/util/symbol.c | 15 +-- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 354e545..781eed8 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -21,6 +21,7 @@ #include "unwind.h" static void __maps__insert(struct maps *maps, struct map *map); +static void __maps__insert_name(struct maps *maps, struct map *map); static inline int is_anon_memory(const char *filename, u32 flags) { @@ -496,6 +497,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip) static void maps__init(struct maps *maps) { maps->entries = RB_ROOT; + maps->names = RB_ROOT; init_rwsem(>lock); } @@ -664,6 +666,7 @@ size_t map_groups__fprintf(struct map_groups *mg, FILE *fp) static void __map_groups__insert(struct map_groups *mg, struct map *map) { __maps__insert(>maps, map); + __maps__insert_name(>maps, map); map->groups = mg; } @@ -824,10 +827,34 @@ static void __maps__insert(struct maps *maps, struct map *map) map__get(map); } +static void __maps__insert_name(struct maps *maps, struct map *map) +{ + struct rb_node **p = >names.rb_node; + struct rb_node *parent = NULL; + struct map *m; + int rc; + + while (*p != NULL) { + parent = *p; + m = rb_entry(parent, struct map, rb_node_name); + rc = strcmp(m->dso->short_name, map->dso->short_name); + if (rc < 0) + p = &(*p)->rb_left; + else if (rc > 0) + p = &(*p)->rb_right; + else + return; + } + rb_link_node(>rb_node_name, parent, p); + rb_insert_color(>rb_node_name, >names); + map__get(map); +} + void maps__insert(struct maps *maps, struct map *map) { down_write(>lock); __maps__insert(maps, map); + __maps__insert_name(maps, map); up_write(>lock); } diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index e0f327b..5c792c9 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -25,6 +25,7 @@ struct map { struct rb_node rb_node; struct list_head node; }; + struct rb_node rb_node_name; u64 start; u64 end; boolerange_warned; @@ -57,6 +58,7 @@ struct kmap { struct maps { struct rb_root entries; + struct rb_root names; struct rw_semaphore lock; }; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index d188b75..dcce74b 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1680,11 +1680,22 @@ struct map *map_groups__find_by_name(struct map_groups *mg, const char *name) { struct maps *maps = >maps; struct map *map; + struct rb_node *node; down_read(>lock); - for (map = maps__first(maps); map; map = map__next(map)) { - if (map->dso && strcmp(map->dso->short_name, name) == 0) + for (node = maps->names.rb_node; node; ) { + int rc; + + map = rb_entry(node, struct map, rb_node_name); + + rc = strcmp(map->dso->short_name, name); + if (rc < 0) + node = node->rb_left; + else if (rc > 0) + node = node->rb_right; + else + goto out_unlock; } -- 1.8.3.1