From: Andi Kleen <a...@linux.intel.com>

I was tired of seeing ~10% locking overhead in perf script while
doing larger PT decodes. perf script doesn't need any locking
because it is single threaded. Nearly all of it comes from
looking at the maps cache. I added a global variable to indicate
perf is running single threaded, and make perf script set that.
I only did it for locks that actually show up in profiles,
not for ones that do not. This is currently only the mapping lock.
Potentially other tools (probably all except for top/report)
could use that.

Signed-off-by: Andi Kleen <a...@linux.intel.com>
---
 tools/perf/builtin-stat.c | 2 ++
 tools/perf/perf.h         | 3 +++
 tools/perf/util/map.c     | 7 +++++--
 tools/perf/util/util.c    | 7 +++++++
 4 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f53f449d864d..b404644f559a 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -2393,6 +2393,8 @@ int cmd_stat(int argc, const char **argv, const char 
*prefix __maybe_unused)
 
        setlocale(LC_ALL, "");
 
+       perf_set_singlethreaded();
+
        evsel_list = perf_evlist__new();
        if (evsel_list == NULL)
                return -ENOMEM;
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 806c216a1078..d0fbbd168987 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -77,4 +77,7 @@ struct record_opts {
 struct option;
 extern const char * const *record_usage;
 extern struct option *record_options;
+extern int perf_st;
+extern void perf_set_singlethreaded(void);
+
 #endif
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 1d9ebcf9e38e..cd8146323d8a 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -1,3 +1,4 @@
+#include "perf.h"
 #include "symbol.h"
 #include <errno.h>
 #include <inttypes.h>
@@ -825,7 +826,8 @@ struct map *maps__find(struct maps *maps, u64 ip)
        struct rb_node **p, *parent = NULL;
        struct map *m;
 
-       pthread_rwlock_rdlock(&maps->lock);
+       if (!perf_st)
+               pthread_rwlock_rdlock(&maps->lock);
 
        p = &maps->entries.rb_node;
        while (*p != NULL) {
@@ -841,7 +843,8 @@ struct map *maps__find(struct maps *maps, u64 ip)
 
        m = NULL;
 out:
-       pthread_rwlock_unlock(&maps->lock);
+       if (!perf_st)
+               pthread_rwlock_unlock(&maps->lock);
        return m;
 }
 
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index d8b45cea54d0..6a3a8854da63 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -802,3 +802,10 @@ int unit_number__scnprintf(char *buf, size_t size, u64 n)
 
        return scnprintf(buf, size, "%" PRIu64 "%c", n, unit[i]);
 }
+
+int perf_st;
+
+void perf_set_singlethreaded(void)
+{
+       perf_st = 1;
+}
-- 
2.9.3

Reply via email to