[PATCH 14/22] perf bpf: Use ERR_CAST instead of ERR_PTR(PTR_ERR())

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Wen Yang 

Use ERR_CAST inlined function instead of ERR_PTR(PTR_ERR(...)).  This
makes it more readable and also fix this warning detected by
err_cast.cocci:

  tools/perf/util/bpf-loader.c:1606:11-18: WARNING: ERR_CAST can be used with op

Signed-off-by: Wen Yang 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Julia Lawall 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Cc: Wen Yang 
Cc: zhong.weid...@zte.com.cn
Link: http://lkml.kernel.org/r/20181127090610.28488-1-wen.yan...@zte.com.cn
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/bpf-loader.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index f9ae1a993806..9a280647d829 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -1603,7 +1603,7 @@ struct perf_evsel *bpf__setup_output_event(struct 
perf_evlist *evlist, const cha
 
op = bpf_map__add_newop(map, NULL);
if (IS_ERR(op))
-   return ERR_PTR(PTR_ERR(op));
+   return ERR_CAST(op);
op->op_type = BPF_MAP_OP_SET_EVSEL;
op->v.evsel = evsel;
}
-- 
2.19.1



[PATCH 21/22] perf beauty mmap_flags: Check if the arch has a mmap.h file

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Arnaldo Carvalho de Melo 

If not, then just use what is in asm-generic. This fixes the build for
my sh4, m68k and riscv64 perf test build containers that were failing
due to 80ee5668b8a7 ("perf beauty: Add a generator for MAP_ mmap's flag
constants"), that were not covered in the cset introducing those
tools/arch/*/include/uapi/asm/mman.h files.

  f3539c12d819 ("tools include: Add uapi mman.h for each architecture")

Cc: Adrian Hunter 
Cc: David Ahern 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Wang Nan 
Fixes: 80ee5668b8a7 ("perf beauty: Add a generator for MAP_ mmap's flag 
constants")
Link: https://lkml.kernel.org/n/tip-rpy9t2e0wxpnum1yvxhre...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/Makefile.perf  | 2 +-
 tools/perf/trace/beauty/mmap_flags.sh | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 67e9adbe6ee8..bfdaefd500ab 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -474,7 +474,7 @@ $(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h 
$(madvise_behavior_t
 mmap_flags_array := $(beauty_outdir)/mmap_flags_array.c
 mmap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mmap_flags.sh
 
-$(mmap_flags_array): $(asm_generic_uapi_dir)/mman.h 
$(asm_generic_uapi_dir)/mman-common.h $(arch_asm_uapi_dir)/mman.h 
$(mmap_flags_tbl)
+$(mmap_flags_array): $(asm_generic_uapi_dir)/mman.h 
$(asm_generic_uapi_dir)/mman-common.h $(mmap_flags_tbl)
$(Q)$(SHELL) '$(mmap_flags_tbl)' $(asm_generic_uapi_dir) 
$(arch_asm_uapi_dir) > $@
 
 mount_flags_array := $(beauty_outdir)/mount_flags_array.c
diff --git a/tools/perf/trace/beauty/mmap_flags.sh 
b/tools/perf/trace/beauty/mmap_flags.sh
index 22c3fdca8975..cd41023107d7 100755
--- a/tools/perf/trace/beauty/mmap_flags.sh
+++ b/tools/perf/trace/beauty/mmap_flags.sh
@@ -20,12 +20,12 @@ egrep -q $regex ${arch_mman} && \
 (egrep $regex ${arch_mman} | \
sed -r "s/$regex/\2 \1/g"   | \
xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
-egrep -q '#[[:space:]]*include[[:space:]]+.*' 
${arch_mman} &&
+[ ! -f ${arch_mman} || egrep -q 
'#[[:space:]]*include[[:space:]]+.*' ${arch_mman} ] &&
 (egrep $regex ${header_dir}/mman.h | \
sed -r "s/$regex/\2 \1/g"   | \
xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
-- 
2.19.1



[PATCH 09/22] perf script: Use fallbacks for branch stacks

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Adrian Hunter 

Branch stacks do not necessarily have the same cpumode as the 'ip'. Use
the fallback functions in those cases.

This patch depends on patch "perf tools: Add fallback functions for cases
where cpumode is insufficient".

Signed-off-by: Adrian Hunter 
Cc: Andi Kleen 
Cc: David S. Miller 
Cc: Jiri Olsa 
Cc: Leo Yan 
Cc: Mathieu Poirier 
Cc: sta...@vger.kernel.org
Link: http://lkml.kernel.org/r/20181106210712.12098-4-adrian.hun...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/builtin-script.c  | 12 ++--
 .../util/scripting-engines/trace-event-python.c  | 16 
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 04913136bac9..3ea98fe72f7f 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -724,8 +724,8 @@ static int perf_sample__fprintf_brstack(struct perf_sample 
*sample,
if (PRINT_FIELD(DSO)) {
memset(, 0, sizeof(alf));
memset(, 0, sizeof(alt));
-   thread__find_map(thread, sample->cpumode, from, );
-   thread__find_map(thread, sample->cpumode, to, );
+   thread__find_map_fb(thread, sample->cpumode, from, 
);
+   thread__find_map_fb(thread, sample->cpumode, to, );
}
 
printed += fprintf(fp, " 0x%"PRIx64, from);
@@ -771,8 +771,8 @@ static int perf_sample__fprintf_brstacksym(struct 
perf_sample *sample,
from = br->entries[i].from;
to   = br->entries[i].to;
 
-   thread__find_symbol(thread, sample->cpumode, from, );
-   thread__find_symbol(thread, sample->cpumode, to, );
+   thread__find_symbol_fb(thread, sample->cpumode, from, );
+   thread__find_symbol_fb(thread, sample->cpumode, to, );
 
printed += symbol__fprintf_symname_offs(alf.sym, , fp);
if (PRINT_FIELD(DSO)) {
@@ -816,11 +816,11 @@ static int perf_sample__fprintf_brstackoff(struct 
perf_sample *sample,
from = br->entries[i].from;
to   = br->entries[i].to;
 
-   if (thread__find_map(thread, sample->cpumode, from, ) &&
+   if (thread__find_map_fb(thread, sample->cpumode, from, ) &&
!alf.map->dso->adjust_symbols)
from = map__map_ip(alf.map, from);
 
-   if (thread__find_map(thread, sample->cpumode, to, ) &&
+   if (thread__find_map_fb(thread, sample->cpumode, to, ) &&
!alt.map->dso->adjust_symbols)
to = map__map_ip(alt.map, to);
 
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c 
b/tools/perf/util/scripting-engines/trace-event-python.c
index 69aa93d4ee99..0c4b050f6fc2 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -494,14 +494,14 @@ static PyObject *python_process_brstack(struct 
perf_sample *sample,
pydict_set_item_string_decref(pyelem, "cycles",
PyLong_FromUnsignedLongLong(br->entries[i].flags.cycles));
 
-   thread__find_map(thread, sample->cpumode,
-br->entries[i].from, );
+   thread__find_map_fb(thread, sample->cpumode,
+   br->entries[i].from, );
dsoname = get_dsoname(al.map);
pydict_set_item_string_decref(pyelem, "from_dsoname",
  _PyUnicode_FromString(dsoname));
 
-   thread__find_map(thread, sample->cpumode,
-br->entries[i].to, );
+   thread__find_map_fb(thread, sample->cpumode,
+   br->entries[i].to, );
dsoname = get_dsoname(al.map);
pydict_set_item_string_decref(pyelem, "to_dsoname",
  _PyUnicode_FromString(dsoname));
@@ -576,14 +576,14 @@ static PyObject *python_process_brstacksym(struct 
perf_sample *sample,
if (!pyelem)
Py_FatalError("couldn't create Python dictionary");
 
-   thread__find_symbol(thread, sample->cpumode,
-   br->entries[i].from, );
+   thread__find_symbol_fb(thread, sample->cpumode,
+  br->entries[i].from, );
get_symoff(al.sym, , true, bf, sizeof(bf));
pydict_set_item_string_decref(pyelem, "from",
  _PyUnicode_FromString(bf));
 
-   thread__find_symbol(thread, sample->cpumode,
-   br->entries[i].to, );
+   thread__find_symbol_fb(thread, sample->cpumode,
+ 

[PATCH 13/22] tools include: Adopt ERR_CAST() from the kernel err.h header

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Arnaldo Carvalho de Melo 

Add ERR_CAST(), so that tools can use it, just like the kernel.

This addresses coccinelle checks that are being performed to tools/ in
addition to kernel sources, so lets add this to cover that and to get
tools code closer to kernel coding standards.

This originally was introduced in the kernel headers in this cset:

  d1bc8e954452 ("Add an ERR_CAST() function to complement ERR_PTR and co.")

Cc: Adrian Hunter 
Cc: Alexander Shishkin 
Cc: David Ahern 
Cc: David Howells 
Cc: Jiri Olsa 
Cc: Julia Lawall 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Cc: Wang Nan 
Cc: Wen Yang 
Cc: zhong.weid...@zte.com.cn
Link: https://lkml.kernel.org/n/tip-tlt97p066zyhzqhl5jt86...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/include/linux/err.h | 13 +
 1 file changed, 13 insertions(+)

diff --git a/tools/include/linux/err.h b/tools/include/linux/err.h
index 094649667bae..2f5a12b88a86 100644
--- a/tools/include/linux/err.h
+++ b/tools/include/linux/err.h
@@ -59,4 +59,17 @@ static inline int __must_check PTR_ERR_OR_ZERO(__force const 
void *ptr)
else
return 0;
 }
+
+/**
+ * ERR_CAST - Explicitly cast an error-valued pointer to another pointer type
+ * @ptr: The pointer to cast.
+ *
+ * Explicitly cast an error-valued pointer to another pointer type in such a
+ * way as to make it clear that's what's going on.
+ */
+static inline void * __must_check ERR_CAST(__force const void *ptr)
+{
+   /* cast away the const */
+   return (void *) ptr;
+}
 #endif /* _LINUX_ERR_H */
-- 
2.19.1



[PATCH 17/22] tools build feature: Check if libaio is available

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Alexey Budankov 

This will be used by 'perf record' to speed up reading the perf ring
buffer.

Committer testing:

  $ make -C tools/perf O=/tmp/build/perf
  make: Entering directory '/home/acme/git/perf/tools/perf'
BUILD:   Doing 'make -j8' parallel build

  Auto-detecting system features:
  ... dwarf: [ on  ]
  ...dwarf_getlocations: [ on  ]
  ... glibc: [ on  ]
  ...  gtk2: [ OFF ]
  ...  libaudit: [ OFF ]
  ...libbfd: [ OFF ]
  ...libelf: [ on  ]
  ...   libnuma: [ OFF ]
  ...numa_num_possible_cpus: [ OFF ]
  ...   libperl: [ OFF ]
  ... libpython: [ OFF ]
  ...  libslang: [ on  ]
  ... libcrypto: [ on  ]
  ... libunwind: [ on  ]
  ...libdw-dwarf-unwind: [ on  ]
  ...  zlib: [ on  ]
  ...  lzma: [ on  ]
  ... get_cpuid: [ on  ]
  ...   bpf: [ on  ]
  ...libaio: [ on  ]

  $ ls -la /tmp/build/perf/feature/test-libaio.*
  -rwxrwxr-x. 1 acme acme 18296 Nov 26 08:49 
/tmp/build/perf/feature/test-libaio.bin
  -rw-rw-r--. 1 acme acme  1165 Nov 26 08:49 
/tmp/build/perf/feature/test-libaio.d
  -rw-rw-r--. 1 acme acme 0 Nov 26 08:49 
/tmp/build/perf/feature/test-libaio.make.output
  $
  $ grep -i aio /tmp/build/perf/FEATURE-DUMP
  feature-libaio=1
  $

Signed-off-by: Alexey Budankov 
Tested-by: Arnaldo Carvalho de Melo 
Reviewed-by: Jiri Olsa 
Acked-by: Namhyung Kim 
Cc: Alexander Shishkin 
Cc: Andi Kleen 
Cc: Peter Zijlstra 
Link: 
http://lkml.kernel.org/r/5fcda10c-6c63-68df-383a-c6d9e5d1f...@linux.intel.com
[ split from a larger patch ]
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/build/Makefile.feature  |  6 --
 tools/build/feature/Makefile  |  6 +-
 tools/build/feature/test-all.c|  5 +
 tools/build/feature/test-libaio.c | 16 
 tools/perf/Makefile.config|  6 ++
 tools/perf/Makefile.perf  |  7 ++-
 6 files changed, 42 insertions(+), 4 deletions(-)
 create mode 100644 tools/build/feature/test-libaio.c

diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 8a123834a2a3..d47b8f73e2e7 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -70,7 +70,8 @@ FEATURE_TESTS_BASIC :=  \
 sched_getcpu   \
 sdt\
 setns  \
-libopencsd
+libopencsd \
+libaio
 
 # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
 # of all feature tests
@@ -116,7 +117,8 @@ FEATURE_DISPLAY ?=  \
  zlib   \
  lzma   \
  get_cpuid  \
- bpf
+ bpf   \
+ libaio
 
 # Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features.
 # If in the future we need per-feature checks/flags for features not
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 38c22e122cb0..2dbcc0d00f52 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -61,7 +61,8 @@ FILES=  \
  test-libopencsd.bin   \
  test-clang.bin\
  test-llvm.bin \
- test-llvm-version.bin
+ test-llvm-version.bin \
+ test-libaio.bin
 
 FILES := $(addprefix $(OUTPUT),$(FILES))
 
@@ -297,6 +298,9 @@ $(OUTPUT)test-clang.bin:
 
 -include $(OUTPUT)*.d
 
+$(OUTPUT)test-libaio.bin:
+   $(BUILD) -lrt
+
 ###
 
 clean:
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 58f01b950195..20cdaa4fc112 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -174,6 +174,10 @@
 # include "test-libopencsd.c"
 #undef main
 
+#define main main_test_libaio
+# include "test-libaio.c"
+#undef main
+
 int main(int argc, char *argv[])
 {
main_test_libpython();
@@ -214,6 +218,7 @@ int main(int argc, char *argv[])
main_test_sdt();
main_test_setns();
main_test_libopencsd();
+   main_test_libaio();
 
return 0;
 }
diff --git a/tools/build/feature/test-libaio.c 
b/tools/build/feature/test-libaio.c
new file mode 100644
index ..932133c9a265
--- /dev/null
+++ b/tools/build/feature/test-libaio.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+
+int main(void)
+{
+   struct aiocb aiocb;
+
+   aiocb.aio_fildes  = 0;
+   aiocb.aio_offset  = 0;
+   aiocb.aio_buf = 0;
+   aiocb.aio_nbytes  = 0;
+   

[PATCH 19/22] perf record: Enable asynchronous trace writing

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Alexey Budankov 

The trace file offset is read once before mmaps iterating loop and
written back after all performance data is enqueued for aio writing.

The trace file offset is incremented linearly after every successful aio
write operation.

record__aio_sync() blocks till completion of the started AIO operation
and then proceeds.

record__aio_mmap_read_sync() implements a barrier for all incomplete
aio write requests.

Signed-off-by: Alexey Budankov 
Reviewed-by: Jiri Olsa 
Acked-by: Namhyung Kim 
Cc: Alexander Shishkin 
Cc: Andi Kleen 
Cc: Peter Zijlstra 
Link: 
http://lkml.kernel.org/r/ce2d45e9-d236-871c-7c8f-1bed2d37e...@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/Documentation/perf-record.txt |   5 +
 tools/perf/builtin-record.c  | 218 ++-
 tools/perf/perf.h|   1 +
 tools/perf/util/evlist.c |   6 +-
 tools/perf/util/evlist.h |   2 +-
 tools/perf/util/mmap.c   |  77 +++-
 tools/perf/util/mmap.h   |  14 ++
 7 files changed, 314 insertions(+), 9 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt 
b/tools/perf/Documentation/perf-record.txt
index 246dee081efd..7efb4af88a68 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -435,6 +435,11 @@ Specify vmlinux path which has debuginfo.
 --buildid-all::
 Record build-id of all DSOs regardless whether it's actually hit or not.
 
+--aio::
+Enable asynchronous (Posix AIO) trace writing mode.
+Asynchronous mode is supported only when linking Perf tool with libc library
+providing implementation for Posix AIO API.
+
 --all-kernel::
 Configure all used events to run in kernel space.
 
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 488779bc4c8d..408d6477c960 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -124,6 +124,183 @@ static int record__write(struct record *rec, struct 
perf_mmap *map __maybe_unuse
return 0;
 }
 
+#ifdef HAVE_AIO_SUPPORT
+static int record__aio_write(struct aiocb *cblock, int trace_fd,
+   void *buf, size_t size, off_t off)
+{
+   int rc;
+
+   cblock->aio_fildes = trace_fd;
+   cblock->aio_buf= buf;
+   cblock->aio_nbytes = size;
+   cblock->aio_offset = off;
+   cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
+
+   do {
+   rc = aio_write(cblock);
+   if (rc == 0) {
+   break;
+   } else if (errno != EAGAIN) {
+   cblock->aio_fildes = -1;
+   pr_err("failed to queue perf data, error: %m\n");
+   break;
+   }
+   } while (1);
+
+   return rc;
+}
+
+static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
+{
+   void *rem_buf;
+   off_t rem_off;
+   size_t rem_size;
+   int rc, aio_errno;
+   ssize_t aio_ret, written;
+
+   aio_errno = aio_error(cblock);
+   if (aio_errno == EINPROGRESS)
+   return 0;
+
+   written = aio_ret = aio_return(cblock);
+   if (aio_ret < 0) {
+   if (aio_errno != EINTR)
+   pr_err("failed to write perf data, error: %m\n");
+   written = 0;
+   }
+
+   rem_size = cblock->aio_nbytes - written;
+
+   if (rem_size == 0) {
+   cblock->aio_fildes = -1;
+   /*
+* md->refcount is incremented in perf_mmap__push() for
+* every enqueued aio write request so decrement it because
+* the request is now complete.
+*/
+   perf_mmap__put(md);
+   rc = 1;
+   } else {
+   /*
+* aio write request may require restart with the
+* reminder if the kernel didn't write whole
+* chunk at once.
+*/
+   rem_off = cblock->aio_offset + written;
+   rem_buf = (void *)(cblock->aio_buf + written);
+   record__aio_write(cblock, cblock->aio_fildes,
+   rem_buf, rem_size, rem_off);
+   rc = 0;
+   }
+
+   return rc;
+}
+
+static void record__aio_sync(struct perf_mmap *md)
+{
+   struct aiocb *cblock = >aio.cblock;
+   struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
+
+   do {
+   if (cblock->aio_fildes == -1 || record__aio_complete(md, 
cblock))
+   return;
+
+   while (aio_suspend((const struct aiocb**), 1, )) 
{
+   if (!(errno == EAGAIN || errno == EINTR))
+   pr_err("failed to sync perf data, error: %m\n");
+   }
+   } while (1);
+}
+
+static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t 
size, off_t off)
+{
+   struct record 

[PATCH 17/22] tools build feature: Check if libaio is available

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Alexey Budankov 

This will be used by 'perf record' to speed up reading the perf ring
buffer.

Committer testing:

  $ make -C tools/perf O=/tmp/build/perf
  make: Entering directory '/home/acme/git/perf/tools/perf'
BUILD:   Doing 'make -j8' parallel build

  Auto-detecting system features:
  ... dwarf: [ on  ]
  ...dwarf_getlocations: [ on  ]
  ... glibc: [ on  ]
  ...  gtk2: [ OFF ]
  ...  libaudit: [ OFF ]
  ...libbfd: [ OFF ]
  ...libelf: [ on  ]
  ...   libnuma: [ OFF ]
  ...numa_num_possible_cpus: [ OFF ]
  ...   libperl: [ OFF ]
  ... libpython: [ OFF ]
  ...  libslang: [ on  ]
  ... libcrypto: [ on  ]
  ... libunwind: [ on  ]
  ...libdw-dwarf-unwind: [ on  ]
  ...  zlib: [ on  ]
  ...  lzma: [ on  ]
  ... get_cpuid: [ on  ]
  ...   bpf: [ on  ]
  ...libaio: [ on  ]

  $ ls -la /tmp/build/perf/feature/test-libaio.*
  -rwxrwxr-x. 1 acme acme 18296 Nov 26 08:49 
/tmp/build/perf/feature/test-libaio.bin
  -rw-rw-r--. 1 acme acme  1165 Nov 26 08:49 
/tmp/build/perf/feature/test-libaio.d
  -rw-rw-r--. 1 acme acme 0 Nov 26 08:49 
/tmp/build/perf/feature/test-libaio.make.output
  $
  $ grep -i aio /tmp/build/perf/FEATURE-DUMP
  feature-libaio=1
  $

Signed-off-by: Alexey Budankov 
Tested-by: Arnaldo Carvalho de Melo 
Reviewed-by: Jiri Olsa 
Acked-by: Namhyung Kim 
Cc: Alexander Shishkin 
Cc: Andi Kleen 
Cc: Peter Zijlstra 
Link: 
http://lkml.kernel.org/r/5fcda10c-6c63-68df-383a-c6d9e5d1f...@linux.intel.com
[ split from a larger patch ]
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/build/Makefile.feature  |  6 --
 tools/build/feature/Makefile  |  6 +-
 tools/build/feature/test-all.c|  5 +
 tools/build/feature/test-libaio.c | 16 
 tools/perf/Makefile.config|  6 ++
 tools/perf/Makefile.perf  |  7 ++-
 6 files changed, 42 insertions(+), 4 deletions(-)
 create mode 100644 tools/build/feature/test-libaio.c

diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 8a123834a2a3..d47b8f73e2e7 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -70,7 +70,8 @@ FEATURE_TESTS_BASIC :=  \
 sched_getcpu   \
 sdt\
 setns  \
-libopencsd
+libopencsd \
+libaio
 
 # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
 # of all feature tests
@@ -116,7 +117,8 @@ FEATURE_DISPLAY ?=  \
  zlib   \
  lzma   \
  get_cpuid  \
- bpf
+ bpf   \
+ libaio
 
 # Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features.
 # If in the future we need per-feature checks/flags for features not
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 38c22e122cb0..2dbcc0d00f52 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -61,7 +61,8 @@ FILES=  \
  test-libopencsd.bin   \
  test-clang.bin\
  test-llvm.bin \
- test-llvm-version.bin
+ test-llvm-version.bin \
+ test-libaio.bin
 
 FILES := $(addprefix $(OUTPUT),$(FILES))
 
@@ -297,6 +298,9 @@ $(OUTPUT)test-clang.bin:
 
 -include $(OUTPUT)*.d
 
+$(OUTPUT)test-libaio.bin:
+   $(BUILD) -lrt
+
 ###
 
 clean:
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 58f01b950195..20cdaa4fc112 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -174,6 +174,10 @@
 # include "test-libopencsd.c"
 #undef main
 
+#define main main_test_libaio
+# include "test-libaio.c"
+#undef main
+
 int main(int argc, char *argv[])
 {
main_test_libpython();
@@ -214,6 +218,7 @@ int main(int argc, char *argv[])
main_test_sdt();
main_test_setns();
main_test_libopencsd();
+   main_test_libaio();
 
return 0;
 }
diff --git a/tools/build/feature/test-libaio.c 
b/tools/build/feature/test-libaio.c
new file mode 100644
index ..932133c9a265
--- /dev/null
+++ b/tools/build/feature/test-libaio.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+
+int main(void)
+{
+   struct aiocb aiocb;
+
+   aiocb.aio_fildes  = 0;
+   aiocb.aio_offset  = 0;
+   aiocb.aio_buf = 0;
+   aiocb.aio_nbytes  = 0;
+   

[PATCH 19/22] perf record: Enable asynchronous trace writing

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Alexey Budankov 

The trace file offset is read once before mmaps iterating loop and
written back after all performance data is enqueued for aio writing.

The trace file offset is incremented linearly after every successful aio
write operation.

record__aio_sync() blocks till completion of the started AIO operation
and then proceeds.

record__aio_mmap_read_sync() implements a barrier for all incomplete
aio write requests.

Signed-off-by: Alexey Budankov 
Reviewed-by: Jiri Olsa 
Acked-by: Namhyung Kim 
Cc: Alexander Shishkin 
Cc: Andi Kleen 
Cc: Peter Zijlstra 
Link: 
http://lkml.kernel.org/r/ce2d45e9-d236-871c-7c8f-1bed2d37e...@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/Documentation/perf-record.txt |   5 +
 tools/perf/builtin-record.c  | 218 ++-
 tools/perf/perf.h|   1 +
 tools/perf/util/evlist.c |   6 +-
 tools/perf/util/evlist.h |   2 +-
 tools/perf/util/mmap.c   |  77 +++-
 tools/perf/util/mmap.h   |  14 ++
 7 files changed, 314 insertions(+), 9 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt 
b/tools/perf/Documentation/perf-record.txt
index 246dee081efd..7efb4af88a68 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -435,6 +435,11 @@ Specify vmlinux path which has debuginfo.
 --buildid-all::
 Record build-id of all DSOs regardless whether it's actually hit or not.
 
+--aio::
+Enable asynchronous (Posix AIO) trace writing mode.
+Asynchronous mode is supported only when linking Perf tool with libc library
+providing implementation for Posix AIO API.
+
 --all-kernel::
 Configure all used events to run in kernel space.
 
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 488779bc4c8d..408d6477c960 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -124,6 +124,183 @@ static int record__write(struct record *rec, struct 
perf_mmap *map __maybe_unuse
return 0;
 }
 
+#ifdef HAVE_AIO_SUPPORT
+static int record__aio_write(struct aiocb *cblock, int trace_fd,
+   void *buf, size_t size, off_t off)
+{
+   int rc;
+
+   cblock->aio_fildes = trace_fd;
+   cblock->aio_buf= buf;
+   cblock->aio_nbytes = size;
+   cblock->aio_offset = off;
+   cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
+
+   do {
+   rc = aio_write(cblock);
+   if (rc == 0) {
+   break;
+   } else if (errno != EAGAIN) {
+   cblock->aio_fildes = -1;
+   pr_err("failed to queue perf data, error: %m\n");
+   break;
+   }
+   } while (1);
+
+   return rc;
+}
+
+static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
+{
+   void *rem_buf;
+   off_t rem_off;
+   size_t rem_size;
+   int rc, aio_errno;
+   ssize_t aio_ret, written;
+
+   aio_errno = aio_error(cblock);
+   if (aio_errno == EINPROGRESS)
+   return 0;
+
+   written = aio_ret = aio_return(cblock);
+   if (aio_ret < 0) {
+   if (aio_errno != EINTR)
+   pr_err("failed to write perf data, error: %m\n");
+   written = 0;
+   }
+
+   rem_size = cblock->aio_nbytes - written;
+
+   if (rem_size == 0) {
+   cblock->aio_fildes = -1;
+   /*
+* md->refcount is incremented in perf_mmap__push() for
+* every enqueued aio write request so decrement it because
+* the request is now complete.
+*/
+   perf_mmap__put(md);
+   rc = 1;
+   } else {
+   /*
+* aio write request may require restart with the
+* reminder if the kernel didn't write whole
+* chunk at once.
+*/
+   rem_off = cblock->aio_offset + written;
+   rem_buf = (void *)(cblock->aio_buf + written);
+   record__aio_write(cblock, cblock->aio_fildes,
+   rem_buf, rem_size, rem_off);
+   rc = 0;
+   }
+
+   return rc;
+}
+
+static void record__aio_sync(struct perf_mmap *md)
+{
+   struct aiocb *cblock = >aio.cblock;
+   struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
+
+   do {
+   if (cblock->aio_fildes == -1 || record__aio_complete(md, 
cblock))
+   return;
+
+   while (aio_suspend((const struct aiocb**), 1, )) 
{
+   if (!(errno == EAGAIN || errno == EINTR))
+   pr_err("failed to sync perf data, error: %m\n");
+   }
+   } while (1);
+}
+
+static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t 
size, off_t off)
+{
+   struct record 

[PATCH 18/22] perf mmap: Map data buffer for preserving collected data

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Alexey Budankov 

The map->data buffer is used to preserve map->base profiling data for
writing to disk. AIO map->cblock is used to queue corresponding
map->data buffer for asynchronous writing.

Signed-off-by: Alexey Budankov 
Reviewed-by: Jiri Olsa 
Acked-by: Namhyung Kim 
Cc: Alexander Shishkin 
Cc: Andi Kleen 
Cc: Peter Zijlstra 
Link: 
http://lkml.kernel.org/r/5fcda10c-6c63-68df-383a-c6d9e5d1f...@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/evlist.c |  2 +-
 tools/perf/util/mmap.c   | 49 +++-
 tools/perf/util/mmap.h   | 11 -
 3 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 36526d229315..6f010b9f0a81 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1028,7 +1028,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, 
unsigned int pages,
 * Its value is decided by evsel's write_backward.
 * So  should not be passed through const pointer.
 */
-   struct mmap_params mp;
+   struct mmap_params mp = { .nr_cblocks = 0 };
 
if (!evlist->mmap)
evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index cdb95b3a1213..47cdc3ad6546 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -153,8 +153,55 @@ void __weak auxtrace_mmap_params__set_idx(struct 
auxtrace_mmap_params *mp __mayb
 {
 }
 
+#ifdef HAVE_AIO_SUPPORT
+static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp)
+{
+   int delta_max;
+
+   if (mp->nr_cblocks) {
+   map->aio.data = malloc(perf_mmap__mmap_len(map));
+   if (!map->aio.data) {
+   pr_debug2("failed to allocate data buffer, error %m\n");
+   return -1;
+   }
+   /*
+* Use cblock.aio_fildes value different from -1
+* to denote started aio write operation on the
+* cblock so it requires explicit record__aio_sync()
+* call prior the cblock may be reused again.
+*/
+   map->aio.cblock.aio_fildes = -1;
+   /*
+* Allocate cblock with max priority delta to
+* have faster aio write system calls.
+*/
+   delta_max = sysconf(_SC_AIO_PRIO_DELTA_MAX);
+   map->aio.cblock.aio_reqprio = delta_max;
+   }
+
+   return 0;
+}
+
+static void perf_mmap__aio_munmap(struct perf_mmap *map)
+{
+   if (map->aio.data)
+   zfree(>aio.data);
+}
+#else
+static int perf_mmap__aio_mmap(struct perf_mmap *map __maybe_unused,
+  struct mmap_params *mp __maybe_unused)
+{
+   return 0;
+}
+
+static void perf_mmap__aio_munmap(struct perf_mmap *map __maybe_unused)
+{
+}
+#endif
+
 void perf_mmap__munmap(struct perf_mmap *map)
 {
+   perf_mmap__aio_munmap(map);
if (map->base != NULL) {
munmap(map->base, perf_mmap__mmap_len(map));
map->base = NULL;
@@ -197,7 +244,7 @@ int perf_mmap__mmap(struct perf_mmap *map, struct 
mmap_params *mp, int fd, int c
>auxtrace_mp, map->base, fd))
return -1;
 
-   return 0;
+   return perf_mmap__aio_mmap(map, mp);
 }
 
 static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index cc5e2d6d17a9..3f10ad030c5e 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -6,6 +6,9 @@
 #include 
 #include 
 #include 
+#ifdef HAVE_AIO_SUPPORT
+#include 
+#endif
 #include "auxtrace.h"
 #include "event.h"
 
@@ -26,6 +29,12 @@ struct perf_mmap {
bool overwrite;
struct auxtrace_mmap auxtrace_mmap;
char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
+#ifdef HAVE_AIO_SUPPORT
+   struct {
+   void *data;
+   struct aiocb cblock;
+   } aio;
+#endif
 };
 
 /*
@@ -57,7 +66,7 @@ enum bkw_mmap_state {
 };
 
 struct mmap_params {
-   int prot, mask;
+   int prot, mask, nr_cblocks;
struct auxtrace_mmap_params auxtrace_mp;
 };
 
-- 
2.19.1



[PATCH 20/22] perf record: Extend trace writing to multi AIO

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Alexey Budankov 

Multi AIO trace writing allows caching more kernel data into userspace
memory postponing trace writing for the sake of overall profiling data
thruput increase. It could be seen as kernel data buffer extension into
userspace memory.

With an --aio option value different from 0 (default value is 1) the
tool has capability to cache more and more data into user space along
with delegating spill to AIO.

That allows avoiding to suspend at record__aio_sync() between calls of
record__mmap_read_evlist() and increases profiling data thruput at the
cost of userspace memory.

Signed-off-by: Alexey Budankov 
Reviewed-by: Jiri Olsa 
Acked-by: Namhyung Kim 
Cc: Alexander Shishkin 
Cc: Andi Kleen 
Cc: Peter Zijlstra 
Link: 
http://lkml.kernel.org/r/050bb053-e7f3-aa83-fde7-f27ff90be...@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/Documentation/perf-record.txt |  4 +-
 tools/perf/builtin-record.c  | 67 ++--
 tools/perf/util/mmap.c   | 64 ++
 tools/perf/util/mmap.h   |  9 ++--
 4 files changed, 102 insertions(+), 42 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt 
b/tools/perf/Documentation/perf-record.txt
index 7efb4af88a68..d232b13ea713 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -435,8 +435,8 @@ Specify vmlinux path which has debuginfo.
 --buildid-all::
 Record build-id of all DSOs regardless whether it's actually hit or not.
 
---aio::
-Enable asynchronous (Posix AIO) trace writing mode.
+--aio[=n]::
+Use  control blocks in asynchronous (Posix AIO) trace writing mode 
(default: 1, max: 4).
 Asynchronous mode is supported only when linking Perf tool with libc library
 providing implementation for Posix AIO API.
 
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 408d6477c960..4736dc96c4ca 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -196,16 +196,35 @@ static int record__aio_complete(struct perf_mmap *md, 
struct aiocb *cblock)
return rc;
 }
 
-static void record__aio_sync(struct perf_mmap *md)
+static int record__aio_sync(struct perf_mmap *md, bool sync_all)
 {
-   struct aiocb *cblock = >aio.cblock;
+   struct aiocb **aiocb = md->aio.aiocb;
+   struct aiocb *cblocks = md->aio.cblocks;
struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
+   int i, do_suspend;
 
do {
-   if (cblock->aio_fildes == -1 || record__aio_complete(md, 
cblock))
-   return;
+   do_suspend = 0;
+   for (i = 0; i < md->aio.nr_cblocks; ++i) {
+   if (cblocks[i].aio_fildes == -1 || 
record__aio_complete(md, [i])) {
+   if (sync_all)
+   aiocb[i] = NULL;
+   else
+   return i;
+   } else {
+   /*
+* Started aio write is not complete yet
+* so it has to be waited before the
+* next allocation.
+*/
+   aiocb[i] = [i];
+   do_suspend = 1;
+   }
+   }
+   if (!do_suspend)
+   return -1;
 
-   while (aio_suspend((const struct aiocb**), 1, )) 
{
+   while (aio_suspend((const struct aiocb **)aiocb, 
md->aio.nr_cblocks, )) {
if (!(errno == EAGAIN || errno == EINTR))
pr_err("failed to sync perf data, error: %m\n");
}
@@ -252,28 +271,36 @@ static void record__aio_mmap_read_sync(struct record *rec)
struct perf_mmap *map = [i];
 
if (map->base)
-   record__aio_sync(map);
+   record__aio_sync(map, true);
}
 }
 
 static int nr_cblocks_default = 1;
+static int nr_cblocks_max = 4;
 
 static int record__aio_parse(const struct option *opt,
-const char *str __maybe_unused,
+const char *str,
 int unset)
 {
struct record_opts *opts = (struct record_opts *)opt->value;
 
-   if (unset)
+   if (unset) {
opts->nr_cblocks = 0;
-   else
-   opts->nr_cblocks = nr_cblocks_default;
+   } else {
+   if (str)
+   opts->nr_cblocks = strtol(str, NULL, 0);
+   if (!opts->nr_cblocks)
+   opts->nr_cblocks = nr_cblocks_default;
+   }
 
return 0;
 }
 #else /* HAVE_AIO_SUPPORT */
-static void record__aio_sync(struct perf_mmap *md __maybe_unused)
+static int nr_cblocks_max = 0;
+
+static int 

[PATCH 18/22] perf mmap: Map data buffer for preserving collected data

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Alexey Budankov 

The map->data buffer is used to preserve map->base profiling data for
writing to disk. AIO map->cblock is used to queue corresponding
map->data buffer for asynchronous writing.

Signed-off-by: Alexey Budankov 
Reviewed-by: Jiri Olsa 
Acked-by: Namhyung Kim 
Cc: Alexander Shishkin 
Cc: Andi Kleen 
Cc: Peter Zijlstra 
Link: 
http://lkml.kernel.org/r/5fcda10c-6c63-68df-383a-c6d9e5d1f...@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/evlist.c |  2 +-
 tools/perf/util/mmap.c   | 49 +++-
 tools/perf/util/mmap.h   | 11 -
 3 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 36526d229315..6f010b9f0a81 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1028,7 +1028,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, 
unsigned int pages,
 * Its value is decided by evsel's write_backward.
 * So  should not be passed through const pointer.
 */
-   struct mmap_params mp;
+   struct mmap_params mp = { .nr_cblocks = 0 };
 
if (!evlist->mmap)
evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index cdb95b3a1213..47cdc3ad6546 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -153,8 +153,55 @@ void __weak auxtrace_mmap_params__set_idx(struct 
auxtrace_mmap_params *mp __mayb
 {
 }
 
+#ifdef HAVE_AIO_SUPPORT
+static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp)
+{
+   int delta_max;
+
+   if (mp->nr_cblocks) {
+   map->aio.data = malloc(perf_mmap__mmap_len(map));
+   if (!map->aio.data) {
+   pr_debug2("failed to allocate data buffer, error %m\n");
+   return -1;
+   }
+   /*
+* Use cblock.aio_fildes value different from -1
+* to denote started aio write operation on the
+* cblock so it requires explicit record__aio_sync()
+* call prior the cblock may be reused again.
+*/
+   map->aio.cblock.aio_fildes = -1;
+   /*
+* Allocate cblock with max priority delta to
+* have faster aio write system calls.
+*/
+   delta_max = sysconf(_SC_AIO_PRIO_DELTA_MAX);
+   map->aio.cblock.aio_reqprio = delta_max;
+   }
+
+   return 0;
+}
+
+static void perf_mmap__aio_munmap(struct perf_mmap *map)
+{
+   if (map->aio.data)
+   zfree(>aio.data);
+}
+#else
+static int perf_mmap__aio_mmap(struct perf_mmap *map __maybe_unused,
+  struct mmap_params *mp __maybe_unused)
+{
+   return 0;
+}
+
+static void perf_mmap__aio_munmap(struct perf_mmap *map __maybe_unused)
+{
+}
+#endif
+
 void perf_mmap__munmap(struct perf_mmap *map)
 {
+   perf_mmap__aio_munmap(map);
if (map->base != NULL) {
munmap(map->base, perf_mmap__mmap_len(map));
map->base = NULL;
@@ -197,7 +244,7 @@ int perf_mmap__mmap(struct perf_mmap *map, struct 
mmap_params *mp, int fd, int c
>auxtrace_mp, map->base, fd))
return -1;
 
-   return 0;
+   return perf_mmap__aio_mmap(map, mp);
 }
 
 static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index cc5e2d6d17a9..3f10ad030c5e 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -6,6 +6,9 @@
 #include 
 #include 
 #include 
+#ifdef HAVE_AIO_SUPPORT
+#include 
+#endif
 #include "auxtrace.h"
 #include "event.h"
 
@@ -26,6 +29,12 @@ struct perf_mmap {
bool overwrite;
struct auxtrace_mmap auxtrace_mmap;
char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
+#ifdef HAVE_AIO_SUPPORT
+   struct {
+   void *data;
+   struct aiocb cblock;
+   } aio;
+#endif
 };
 
 /*
@@ -57,7 +66,7 @@ enum bkw_mmap_state {
 };
 
 struct mmap_params {
-   int prot, mask;
+   int prot, mask, nr_cblocks;
struct auxtrace_mmap_params auxtrace_mp;
 };
 
-- 
2.19.1



[PATCH 20/22] perf record: Extend trace writing to multi AIO

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Alexey Budankov 

Multi AIO trace writing allows caching more kernel data into userspace
memory postponing trace writing for the sake of overall profiling data
thruput increase. It could be seen as kernel data buffer extension into
userspace memory.

With an --aio option value different from 0 (default value is 1) the
tool has capability to cache more and more data into user space along
with delegating spill to AIO.

That allows avoiding to suspend at record__aio_sync() between calls of
record__mmap_read_evlist() and increases profiling data thruput at the
cost of userspace memory.

Signed-off-by: Alexey Budankov 
Reviewed-by: Jiri Olsa 
Acked-by: Namhyung Kim 
Cc: Alexander Shishkin 
Cc: Andi Kleen 
Cc: Peter Zijlstra 
Link: 
http://lkml.kernel.org/r/050bb053-e7f3-aa83-fde7-f27ff90be...@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/Documentation/perf-record.txt |  4 +-
 tools/perf/builtin-record.c  | 67 ++--
 tools/perf/util/mmap.c   | 64 ++
 tools/perf/util/mmap.h   |  9 ++--
 4 files changed, 102 insertions(+), 42 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt 
b/tools/perf/Documentation/perf-record.txt
index 7efb4af88a68..d232b13ea713 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -435,8 +435,8 @@ Specify vmlinux path which has debuginfo.
 --buildid-all::
 Record build-id of all DSOs regardless whether it's actually hit or not.
 
---aio::
-Enable asynchronous (Posix AIO) trace writing mode.
+--aio[=n]::
+Use  control blocks in asynchronous (Posix AIO) trace writing mode 
(default: 1, max: 4).
 Asynchronous mode is supported only when linking Perf tool with libc library
 providing implementation for Posix AIO API.
 
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 408d6477c960..4736dc96c4ca 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -196,16 +196,35 @@ static int record__aio_complete(struct perf_mmap *md, 
struct aiocb *cblock)
return rc;
 }
 
-static void record__aio_sync(struct perf_mmap *md)
+static int record__aio_sync(struct perf_mmap *md, bool sync_all)
 {
-   struct aiocb *cblock = >aio.cblock;
+   struct aiocb **aiocb = md->aio.aiocb;
+   struct aiocb *cblocks = md->aio.cblocks;
struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
+   int i, do_suspend;
 
do {
-   if (cblock->aio_fildes == -1 || record__aio_complete(md, 
cblock))
-   return;
+   do_suspend = 0;
+   for (i = 0; i < md->aio.nr_cblocks; ++i) {
+   if (cblocks[i].aio_fildes == -1 || 
record__aio_complete(md, [i])) {
+   if (sync_all)
+   aiocb[i] = NULL;
+   else
+   return i;
+   } else {
+   /*
+* Started aio write is not complete yet
+* so it has to be waited before the
+* next allocation.
+*/
+   aiocb[i] = [i];
+   do_suspend = 1;
+   }
+   }
+   if (!do_suspend)
+   return -1;
 
-   while (aio_suspend((const struct aiocb**), 1, )) 
{
+   while (aio_suspend((const struct aiocb **)aiocb, 
md->aio.nr_cblocks, )) {
if (!(errno == EAGAIN || errno == EINTR))
pr_err("failed to sync perf data, error: %m\n");
}
@@ -252,28 +271,36 @@ static void record__aio_mmap_read_sync(struct record *rec)
struct perf_mmap *map = [i];
 
if (map->base)
-   record__aio_sync(map);
+   record__aio_sync(map, true);
}
 }
 
 static int nr_cblocks_default = 1;
+static int nr_cblocks_max = 4;
 
 static int record__aio_parse(const struct option *opt,
-const char *str __maybe_unused,
+const char *str,
 int unset)
 {
struct record_opts *opts = (struct record_opts *)opt->value;
 
-   if (unset)
+   if (unset) {
opts->nr_cblocks = 0;
-   else
-   opts->nr_cblocks = nr_cblocks_default;
+   } else {
+   if (str)
+   opts->nr_cblocks = strtol(str, NULL, 0);
+   if (!opts->nr_cblocks)
+   opts->nr_cblocks = nr_cblocks_default;
+   }
 
return 0;
 }
 #else /* HAVE_AIO_SUPPORT */
-static void record__aio_sync(struct perf_mmap *md __maybe_unused)
+static int nr_cblocks_max = 0;
+
+static int 

[PATCH 16/22] perf intel-pt: Fix error with config term "pt=0"

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Adrian Hunter 

Users should never use 'pt=0', but if they do it may give a meaningless
error:

$ perf record -e intel_pt/pt=0/u uname
Error:
The sys_perf_event_open() syscall returned with 22 (Invalid argument) 
for
event (intel_pt/pt=0/u).

Fix that by forcing 'pt=1'.

Committer testing:

  # perf record -e intel_pt/pt=0/u uname
  Error:
  The sys_perf_event_open() syscall returned with 22 (Invalid argument) for 
event (intel_pt/pt=0/u).
  /bin/dmesg | grep -i perf may provide additional information.

  # perf record -e intel_pt/pt=0/u uname
  pt=0 doesn't make sense, forcing pt=1
  Linux
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.020 MB perf.data ]
  #

Signed-off-by: Adrian Hunter 
Tested-by: Arnaldo Carvalho de Melo 
Cc: Jiri Olsa 
Link: http://lkml.kernel.org/r/b7c5b4e5-9497-10e5-fd43-5f3e4a0fe...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/arch/x86/util/intel-pt.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/tools/perf/arch/x86/util/intel-pt.c 
b/tools/perf/arch/x86/util/intel-pt.c
index db0ba8caf5a2..ba8ecaf52200 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -524,10 +524,21 @@ static int intel_pt_validate_config(struct perf_pmu 
*intel_pt_pmu,
struct perf_evsel *evsel)
 {
int err;
+   char c;
 
if (!evsel)
return 0;
 
+   /*
+* If supported, force pass-through config term (pt=1) even if user
+* sets pt=0, which avoids senseless kernel errors.
+*/
+   if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", ) == 1 &&
+   !(evsel->attr.config & 1)) {
+   pr_warning("pt=0 doesn't make sense, forcing pt=1\n");
+   evsel->attr.config |= 1;
+   }
+
err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds",
   "cyc_thresh", "caps/psb_cyc",
   evsel->attr.config);
-- 
2.19.1



[PATCH 06/22] perf machine: Record if a arch has a single user/kernel address space

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Adrian Hunter 

Some architectures have a single address space for kernel and user
addresses, which makes it possible to determine if an address is in
kernel space or user space. Some don't, e.g.: sparc.

Cache that info in perf_env so that, for instance, code needing to
fallback failed symbol lookups at the kernel space in single address
space arches can lookup at userspace.

Signed-off-by: Adrian Hunter 
Cc: Andi Kleen 
Cc: David S. Miller 
Cc: Jiri Olsa 
Cc: Leo Yan 
Cc: Mathieu Poirier 
Cc: sta...@vger.kernel.org
Link: http://lkml.kernel.org/r/20181106210712.12098-2-adrian.hun...@intel.com
[ split from a larger patch ]
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/arch/common.c  | 10 ++
 tools/perf/arch/common.h  |  1 +
 tools/perf/util/machine.h |  1 +
 tools/perf/util/session.c |  4 
 4 files changed, 16 insertions(+)

diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index 82657c01a3b8..5f69fd0b745a 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -200,3 +200,13 @@ int perf_env__lookup_objdump(struct perf_env *env, const 
char **path)
 
return perf_env__lookup_binutils_path(env, "objdump", path);
 }
+
+/*
+ * Some architectures have a single address space for kernel and user 
addresses,
+ * which makes it possible to determine if an address is in kernel space or 
user
+ * space.
+ */
+bool perf_env__single_address_space(struct perf_env *env)
+{
+   return strcmp(perf_env__arch(env), "sparc");
+}
diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h
index 2167001b18c5..c298a446d1f6 100644
--- a/tools/perf/arch/common.h
+++ b/tools/perf/arch/common.h
@@ -5,5 +5,6 @@
 #include "../util/env.h"
 
 int perf_env__lookup_objdump(struct perf_env *env, const char **path);
+bool perf_env__single_address_space(struct perf_env *env);
 
 #endif /* ARCH_PERF_COMMON_H */
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index d856b85862e2..ca897a73014c 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -42,6 +42,7 @@ struct machine {
u16   id_hdr_size;
bool  comm_exec;
bool  kptr_restrict_warned;
+   bool  single_address_space;
char  *root_dir;
char  *mmap_name;
struct threadsthreads[THREADS__TABLE_SIZE];
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 7d2c8ce6cfad..f8eab197f35c 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -24,6 +24,7 @@
 #include "thread.h"
 #include "thread-stack.h"
 #include "stat.h"
+#include "arch/common.h"
 
 static int perf_session__deliver_event(struct perf_session *session,
   union perf_event *event,
@@ -150,6 +151,9 @@ struct perf_session *perf_session__new(struct perf_data 
*data,
session->machines.host.env = _env;
}
 
+   session->machines.host.single_address_space =
+   perf_env__single_address_space(session->machines.host.env);
+
if (!data || perf_data__is_write(data)) {
/*
 * In O_RDONLY mode this will be performed when reading the
-- 
2.19.1



[PATCH 12/22] perf test: Fix perf_event_attr test failure

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Adrian Hunter 

Fix inconsistent use of tabs and spaces error:

  # perf test 16 -v
  16: Setup struct perf_event_attr  :
  --- start ---
  test child forked, pid 20224
File "/usr/libexec/perf-core/tests/attr.py", line 119
  log.warning("expected %s=%s, got %s" % (t, self[t], other[t]))
 ^
  TabError: inconsistent use of tabs and spaces in indentation
  test child finished with -1
   end 
  Setup struct perf_event_attr: FAILED!

Signed-off-by: Adrian Hunter 
Cc: Jiri Olsa 
Link: http://lkml.kernel.org/r/20181122140456.16817-1-adrian.hun...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/tests/attr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py
index ff9b60b99f52..44090a9a19f3 100644
--- a/tools/perf/tests/attr.py
+++ b/tools/perf/tests/attr.py
@@ -116,7 +116,7 @@ class Event(dict):
 if not self.has_key(t) or not other.has_key(t):
 continue
 if not data_equal(self[t], other[t]):
-   log.warning("expected %s=%s, got %s" % (t, self[t], other[t]))
+log.warning("expected %s=%s, got %s" % (t, self[t], other[t]))
 
 # Test file description needs to have following sections:
 # [config]
-- 
2.19.1



[GIT PULL 00/22] perf/core improvements and fixes

2018-11-30 Thread Arnaldo Carvalho de Melo
Hi Ingo,

Please consider pulling, more to come,

Regards,

- Arnaldo

Test results at the end of this message, as usual.

The following changes since commit b1a9d7b0190119dad5b9b7841751b5a7586bbc8b:

  Merge tag 'perf-urgent-for-mingo-4.20-20181121' of 
git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent 
(2018-11-21 15:57:21 +0100)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git 
tags/perf-core-for-mingo-4.21-20181130

for you to fetch changes up to 09d3f015d1e1b4fee7e9bbdcf54201d239393391:

  uprobes: Fix handle_swbp() vs. unregister() + register() race once more 
(2018-11-23 08:31:19 +0100)


perf/core improvements and fixes:

- Introduce 'perf record --aio' to use asynchronous IO trace writing in
  'perf record' disabled by default, i.e. one needs to explicitly use
  'perf record --aio' to use it, in which case the number of AIO aiocb
  structs will be one, specify 'perf record --aio=N' to ask for more,
  according to your needs, related to the number of processors in your
  machine. Reports about the effectiveness of this option are welcome
  so that we can decide on making it the default mode of operation. Read
  the respective patches commit logs for further information (Alexey Budankov)

- Add fallback routines to be used in places where we don't have the cpu mode
  (kernel/user space/hypervisor) and thus must first fallback lookups looking
  at all map trees when trying to resolve symbols (Adrian Hunter)

- Introduce 'perf top --kallsyms file' to match 'perf report --kallsyms', useful
  when dealing with BPF, where symbol resolution happens via kallsyms, not via
  the default vmlinux ELF symtabs (Arnaldo Carvalho de Melo)

- Fix CSV mode column output for non-cgroup events in 'perf stat' (Stephane 
Eranian)

- Fix 'perf stat' shadow stats for clock events. (Ravi Bangoria)

- Fix error with config term "pt=0", where we should just force "pt=1" and
  warn the user about the former being non-sensical (Adrian Hunter)

- Fix 'perf test' entry where we expect 'sleep' to come in a PERF_RECORD_COMM
  but instead we get 'coreutils' when sleep is provided by some versions of
  the 'coreutils' package (Adrian Hunter)

- Remove needless rb_tree extra indirection from map__find() (Eric 
Saint-Etienne)

- Add sanity check to libtraceevent's is_timestamp_in_us() (Tzvetomir Stoyanov)

- Use ERR_CAST instead of ERR_PTR(PTR_ERR()) (Wen Yang)

Signed-off-by: Arnaldo Carvalho de Melo 


Andrea Parri (1):
  uprobes: Fix handle_swbp() vs. unregister() + register() race once more

Jiri Olsa (3):
  perf/x86/intel: Move branch tracing setup to the Intel-specific source 
file
  perf/x86/intel: Add generic branch tracing check to intel_pmu_has_bts()
  perf/x86/intel: Disallow precise_ip on BTS events

 arch/x86/events/core.c   | 20 
 arch/x86/events/intel/core.c | 56 ++--
 arch/x86/events/perf_event.h | 13 ++
 kernel/events/uprobes.c  | 12 --
 4 files changed, 63 insertions(+), 38 deletions(-)

Test results:

XXX: Investigation on the watchpoint and breakpoint 'perf test' failures is
 underway, doesn't look like related to patches in this batch.

The first ones are container (docker) based builds of tools/perf with
and without libelf support.  Where clang is available, it is also used
to build perf with/without libelf, and building with LIBCLANGLLVM=1
(built-in clang) with gcc and clang when clang and its devel libraries
are installed.

The objtool and samples/bpf/ builds are disabled now that I'm switching from
using the sources in a local volume to fetching them from a http server to
build it inside the container, to make it easier to build in a container 
cluster.
Those will come back later.

Several are cross builds, the ones with -x-ARCH and the android one, and those
may not have all the features built, due to lack of multi-arch devel packages,
available and being used so far on just a few, like
debian:experimental-x-{arm64,mipsel}.

The 'perf test' one will perform a variety of tests exercising
tools/perf/util/, tools/lib/{bpf,traceevent,etc}, as well as run perf commands
with a variety of command line event specifications to then intercept the
sys_perf_event syscall to check that the perf_event_attr fields are set up as
expected, among a variety of other unit tests.

Then there is the 'make -C tools/perf build-test' ones, that build tools/perf/
with a variety of feature sets, exercising the build with an incomplete set of
features as well as with a complete one. It is planned to have it run on each
of the containers mentioned above, using some container orchestration
infrastructure. Get in contact if interested in helping having this in place.

  # dm
   1 alpine:3.4  

[GIT PULL 00/22] perf/core improvements and fixes

2018-11-30 Thread Arnaldo Carvalho de Melo
Hi Ingo,

Please consider pulling, more to come,

Regards,

- Arnaldo

Test results at the end of this message, as usual.

The following changes since commit b1a9d7b0190119dad5b9b7841751b5a7586bbc8b:

  Merge tag 'perf-urgent-for-mingo-4.20-20181121' of 
git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent 
(2018-11-21 15:57:21 +0100)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git 
tags/perf-core-for-mingo-4.21-20181130

for you to fetch changes up to 09d3f015d1e1b4fee7e9bbdcf54201d239393391:

  uprobes: Fix handle_swbp() vs. unregister() + register() race once more 
(2018-11-23 08:31:19 +0100)


perf/core improvements and fixes:

- Introduce 'perf record --aio' to use asynchronous IO trace writing in
  'perf record' disabled by default, i.e. one needs to explicitly use
  'perf record --aio' to use it, in which case the number of AIO aiocb
  structs will be one, specify 'perf record --aio=N' to ask for more,
  according to your needs, related to the number of processors in your
  machine. Reports about the effectiveness of this option are welcome
  so that we can decide on making it the default mode of operation. Read
  the respective patches commit logs for further information (Alexey Budankov)

- Add fallback routines to be used in places where we don't have the cpu mode
  (kernel/user space/hypervisor) and thus must first fallback lookups looking
  at all map trees when trying to resolve symbols (Adrian Hunter)

- Introduce 'perf top --kallsyms file' to match 'perf report --kallsyms', useful
  when dealing with BPF, where symbol resolution happens via kallsyms, not via
  the default vmlinux ELF symtabs (Arnaldo Carvalho de Melo)

- Fix CSV mode column output for non-cgroup events in 'perf stat' (Stephane 
Eranian)

- Fix 'perf stat' shadow stats for clock events. (Ravi Bangoria)

- Fix error with config term "pt=0", where we should just force "pt=1" and
  warn the user about the former being non-sensical (Adrian Hunter)

- Fix 'perf test' entry where we expect 'sleep' to come in a PERF_RECORD_COMM
  but instead we get 'coreutils' when sleep is provided by some versions of
  the 'coreutils' package (Adrian Hunter)

- Remove needless rb_tree extra indirection from map__find() (Eric 
Saint-Etienne)

- Add sanity check to libtraceevent's is_timestamp_in_us() (Tzvetomir Stoyanov)

- Use ERR_CAST instead of ERR_PTR(PTR_ERR()) (Wen Yang)

Signed-off-by: Arnaldo Carvalho de Melo 


Andrea Parri (1):
  uprobes: Fix handle_swbp() vs. unregister() + register() race once more

Jiri Olsa (3):
  perf/x86/intel: Move branch tracing setup to the Intel-specific source 
file
  perf/x86/intel: Add generic branch tracing check to intel_pmu_has_bts()
  perf/x86/intel: Disallow precise_ip on BTS events

 arch/x86/events/core.c   | 20 
 arch/x86/events/intel/core.c | 56 ++--
 arch/x86/events/perf_event.h | 13 ++
 kernel/events/uprobes.c  | 12 --
 4 files changed, 63 insertions(+), 38 deletions(-)

Test results:

XXX: Investigation on the watchpoint and breakpoint 'perf test' failures is
 underway, doesn't look like related to patches in this batch.

The first ones are container (docker) based builds of tools/perf with
and without libelf support.  Where clang is available, it is also used
to build perf with/without libelf, and building with LIBCLANGLLVM=1
(built-in clang) with gcc and clang when clang and its devel libraries
are installed.

The objtool and samples/bpf/ builds are disabled now that I'm switching from
using the sources in a local volume to fetching them from a http server to
build it inside the container, to make it easier to build in a container 
cluster.
Those will come back later.

Several are cross builds, the ones with -x-ARCH and the android one, and those
may not have all the features built, due to lack of multi-arch devel packages,
available and being used so far on just a few, like
debian:experimental-x-{arm64,mipsel}.

The 'perf test' one will perform a variety of tests exercising
tools/perf/util/, tools/lib/{bpf,traceevent,etc}, as well as run perf commands
with a variety of command line event specifications to then intercept the
sys_perf_event syscall to check that the perf_event_attr fields are set up as
expected, among a variety of other unit tests.

Then there is the 'make -C tools/perf build-test' ones, that build tools/perf/
with a variety of feature sets, exercising the build with an incomplete set of
features as well as with a complete one. It is planned to have it run on each
of the containers mentioned above, using some container orchestration
infrastructure. Get in contact if interested in helping having this in place.

  # dm
   1 alpine:3.4  

[PATCH 16/22] perf intel-pt: Fix error with config term "pt=0"

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Adrian Hunter 

Users should never use 'pt=0', but if they do it may give a meaningless
error:

$ perf record -e intel_pt/pt=0/u uname
Error:
The sys_perf_event_open() syscall returned with 22 (Invalid argument) 
for
event (intel_pt/pt=0/u).

Fix that by forcing 'pt=1'.

Committer testing:

  # perf record -e intel_pt/pt=0/u uname
  Error:
  The sys_perf_event_open() syscall returned with 22 (Invalid argument) for 
event (intel_pt/pt=0/u).
  /bin/dmesg | grep -i perf may provide additional information.

  # perf record -e intel_pt/pt=0/u uname
  pt=0 doesn't make sense, forcing pt=1
  Linux
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.020 MB perf.data ]
  #

Signed-off-by: Adrian Hunter 
Tested-by: Arnaldo Carvalho de Melo 
Cc: Jiri Olsa 
Link: http://lkml.kernel.org/r/b7c5b4e5-9497-10e5-fd43-5f3e4a0fe...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/arch/x86/util/intel-pt.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/tools/perf/arch/x86/util/intel-pt.c 
b/tools/perf/arch/x86/util/intel-pt.c
index db0ba8caf5a2..ba8ecaf52200 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -524,10 +524,21 @@ static int intel_pt_validate_config(struct perf_pmu 
*intel_pt_pmu,
struct perf_evsel *evsel)
 {
int err;
+   char c;
 
if (!evsel)
return 0;
 
+   /*
+* If supported, force pass-through config term (pt=1) even if user
+* sets pt=0, which avoids senseless kernel errors.
+*/
+   if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", ) == 1 &&
+   !(evsel->attr.config & 1)) {
+   pr_warning("pt=0 doesn't make sense, forcing pt=1\n");
+   evsel->attr.config |= 1;
+   }
+
err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds",
   "cyc_thresh", "caps/psb_cyc",
   evsel->attr.config);
-- 
2.19.1



[PATCH 06/22] perf machine: Record if a arch has a single user/kernel address space

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Adrian Hunter 

Some architectures have a single address space for kernel and user
addresses, which makes it possible to determine if an address is in
kernel space or user space. Some don't, e.g.: sparc.

Cache that info in perf_env so that, for instance, code needing to
fallback failed symbol lookups at the kernel space in single address
space arches can lookup at userspace.

Signed-off-by: Adrian Hunter 
Cc: Andi Kleen 
Cc: David S. Miller 
Cc: Jiri Olsa 
Cc: Leo Yan 
Cc: Mathieu Poirier 
Cc: sta...@vger.kernel.org
Link: http://lkml.kernel.org/r/20181106210712.12098-2-adrian.hun...@intel.com
[ split from a larger patch ]
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/arch/common.c  | 10 ++
 tools/perf/arch/common.h  |  1 +
 tools/perf/util/machine.h |  1 +
 tools/perf/util/session.c |  4 
 4 files changed, 16 insertions(+)

diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index 82657c01a3b8..5f69fd0b745a 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -200,3 +200,13 @@ int perf_env__lookup_objdump(struct perf_env *env, const 
char **path)
 
return perf_env__lookup_binutils_path(env, "objdump", path);
 }
+
+/*
+ * Some architectures have a single address space for kernel and user 
addresses,
+ * which makes it possible to determine if an address is in kernel space or 
user
+ * space.
+ */
+bool perf_env__single_address_space(struct perf_env *env)
+{
+   return strcmp(perf_env__arch(env), "sparc");
+}
diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h
index 2167001b18c5..c298a446d1f6 100644
--- a/tools/perf/arch/common.h
+++ b/tools/perf/arch/common.h
@@ -5,5 +5,6 @@
 #include "../util/env.h"
 
 int perf_env__lookup_objdump(struct perf_env *env, const char **path);
+bool perf_env__single_address_space(struct perf_env *env);
 
 #endif /* ARCH_PERF_COMMON_H */
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index d856b85862e2..ca897a73014c 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -42,6 +42,7 @@ struct machine {
u16   id_hdr_size;
bool  comm_exec;
bool  kptr_restrict_warned;
+   bool  single_address_space;
char  *root_dir;
char  *mmap_name;
struct threadsthreads[THREADS__TABLE_SIZE];
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 7d2c8ce6cfad..f8eab197f35c 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -24,6 +24,7 @@
 #include "thread.h"
 #include "thread-stack.h"
 #include "stat.h"
+#include "arch/common.h"
 
 static int perf_session__deliver_event(struct perf_session *session,
   union perf_event *event,
@@ -150,6 +151,9 @@ struct perf_session *perf_session__new(struct perf_data 
*data,
session->machines.host.env = _env;
}
 
+   session->machines.host.single_address_space =
+   perf_env__single_address_space(session->machines.host.env);
+
if (!data || perf_data__is_write(data)) {
/*
 * In O_RDONLY mode this will be performed when reading the
-- 
2.19.1



[PATCH 12/22] perf test: Fix perf_event_attr test failure

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Adrian Hunter 

Fix inconsistent use of tabs and spaces error:

  # perf test 16 -v
  16: Setup struct perf_event_attr  :
  --- start ---
  test child forked, pid 20224
File "/usr/libexec/perf-core/tests/attr.py", line 119
  log.warning("expected %s=%s, got %s" % (t, self[t], other[t]))
 ^
  TabError: inconsistent use of tabs and spaces in indentation
  test child finished with -1
   end 
  Setup struct perf_event_attr: FAILED!

Signed-off-by: Adrian Hunter 
Cc: Jiri Olsa 
Link: http://lkml.kernel.org/r/20181122140456.16817-1-adrian.hun...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/tests/attr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py
index ff9b60b99f52..44090a9a19f3 100644
--- a/tools/perf/tests/attr.py
+++ b/tools/perf/tests/attr.py
@@ -116,7 +116,7 @@ class Event(dict):
 if not self.has_key(t) or not other.has_key(t):
 continue
 if not data_equal(self[t], other[t]):
-   log.warning("expected %s=%s, got %s" % (t, self[t], other[t]))
+log.warning("expected %s=%s, got %s" % (t, self[t], other[t]))
 
 # Test file description needs to have following sections:
 # [config]
-- 
2.19.1



[PATCH 01/22] perf build: Give better hint about devel package for libssl

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Arnaldo Carvalho de Melo 

In debian/ubuntu its libssl-dev, but for fedora/RHEL/Centos/etc its
openssl-devel, fix it.

Cc: Adrian Hunter 
Cc: David Ahern 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Stephane Eranian 
Cc: Wang Nan 
Fixes: 8ee4646038e4 ("perf build: Add libcrypto feature detection")
Link: https://lkml.kernel.org/n/tip-lnxqszts6aq2c9jy4b7ml...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/Makefile.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index e110010e7faa..c643d5e0c26b 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -588,7 +588,7 @@ endif
 
 ifndef NO_LIBCRYPTO
   ifneq ($(feature-libcrypto), 1)
-msg := $(warning No libcrypto.h found, disables jitted code injection, 
please install libssl-devel or libssl-dev);
+msg := $(warning No libcrypto.h found, disables jitted code injection, 
please install openssl-devel or libssl-dev);
 NO_LIBCRYPTO := 1
   else
 CFLAGS += -DHAVE_LIBCRYPTO_SUPPORT
-- 
2.19.1



[PATCH 07/22] perf thread: Add fallback functions for cases where cpumode is insufficient

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Adrian Hunter 

For branch stacks or branch samples, the sample cpumode might not be
correct because it applies only to the sample 'ip' and not necessary to
'addr' or branch stack addresses. Add fallback functions that can be
used to deal with those cases

Signed-off-by: Adrian Hunter 
Cc: Andi Kleen 
Cc: David S. Miller 
Cc: Jiri Olsa 
Cc: Leo Yan 
Cc: Mathieu Poirier 
Cc: sta...@vger.kernel.org
Link: http://lkml.kernel.org/r/20181106210712.12098-2-adrian.hun...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/event.c   | 27 +++
 tools/perf/util/machine.c | 27 +++
 tools/perf/util/machine.h |  2 ++
 tools/perf/util/thread.h  |  4 
 4 files changed, 60 insertions(+)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index e9c108a6b1c3..9431b20c1337 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1577,6 +1577,24 @@ struct map *thread__find_map(struct thread *thread, u8 
cpumode, u64 addr,
return al->map;
 }
 
+/*
+ * For branch stacks or branch samples, the sample cpumode might not be correct
+ * because it applies only to the sample 'ip' and not necessary to 'addr' or
+ * branch stack addresses. If possible, use a fallback to deal with those 
cases.
+ */
+struct map *thread__find_map_fb(struct thread *thread, u8 cpumode, u64 addr,
+   struct addr_location *al)
+{
+   struct map *map = thread__find_map(thread, cpumode, addr, al);
+   struct machine *machine = thread->mg->machine;
+   u8 addr_cpumode = machine__addr_cpumode(machine, cpumode, addr);
+
+   if (map || addr_cpumode == cpumode)
+   return map;
+
+   return thread__find_map(thread, addr_cpumode, addr, al);
+}
+
 struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode,
   u64 addr, struct addr_location *al)
 {
@@ -1586,6 +1604,15 @@ struct symbol *thread__find_symbol(struct thread 
*thread, u8 cpumode,
return al->sym;
 }
 
+struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode,
+ u64 addr, struct addr_location *al)
+{
+   al->sym = NULL;
+   if (thread__find_map_fb(thread, cpumode, addr, al))
+   al->sym = map__find_symbol(al->map, al->addr);
+   return al->sym;
+}
+
 /*
  * Callers need to drop the reference to al->thread, obtained in
  * machine__findnew_thread()
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 8f36ce813bc5..9397e3f2444d 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2592,6 +2592,33 @@ int machine__get_kernel_start(struct machine *machine)
return err;
 }
 
+u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr)
+{
+   u8 addr_cpumode = cpumode;
+   bool kernel_ip;
+
+   if (!machine->single_address_space)
+   goto out;
+
+   kernel_ip = machine__kernel_ip(machine, addr);
+   switch (cpumode) {
+   case PERF_RECORD_MISC_KERNEL:
+   case PERF_RECORD_MISC_USER:
+   addr_cpumode = kernel_ip ? PERF_RECORD_MISC_KERNEL :
+  PERF_RECORD_MISC_USER;
+   break;
+   case PERF_RECORD_MISC_GUEST_KERNEL:
+   case PERF_RECORD_MISC_GUEST_USER:
+   addr_cpumode = kernel_ip ? PERF_RECORD_MISC_GUEST_KERNEL :
+  PERF_RECORD_MISC_GUEST_USER;
+   break;
+   default:
+   break;
+   }
+out:
+   return addr_cpumode;
+}
+
 struct dso *machine__findnew_dso(struct machine *machine, const char *filename)
 {
return dsos__findnew(>dsos, filename);
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index ca897a73014c..ebde3ea70225 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -100,6 +100,8 @@ static inline bool machine__kernel_ip(struct machine 
*machine, u64 ip)
return ip >= kernel_start;
 }
 
+u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr);
+
 struct thread *machine__find_thread(struct machine *machine, pid_t pid,
pid_t tid);
 struct comm *machine__thread_exec_comm(struct machine *machine,
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 30e2b4c165fe..5920c3bb8ffe 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -96,9 +96,13 @@ struct thread *thread__main_thread(struct machine *machine, 
struct thread *threa
 
 struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
 struct addr_location *al);
+struct map *thread__find_map_fb(struct thread *thread, u8 cpumode, u64 addr,
+   struct addr_location *al);
 
 struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode,
   u64 addr, struct addr_location *al);

[PATCH 15/22] perf top: Allow passing a kallsyms file

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Arnaldo Carvalho de Melo 

This basically replicates what was done for 'perf report' in:

   b226a5a72901 ("perf report: Allow user to specify path to kallsyms file")

This should help with resolving eBPF symbols, that are in kallsyms but,
of course, not in vmlinux.

Reported-by: Ivan Babrou 
Tested-by: Ivan Babrou 
Cc: Adrian Hunter 
Cc: Alexei Starovoitov 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: David S. Miller 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Wang Nan 
Link: https://lkml.kernel.org/n/tip-x52mx1ybq8128rtg9hjrj...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/Documentation/perf-top.txt | 3 +++
 tools/perf/builtin-top.c  | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/tools/perf/Documentation/perf-top.txt 
b/tools/perf/Documentation/perf-top.txt
index 808b664343c9..44d89fb9c788 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -70,6 +70,9 @@ Default is to monitor all CPUS.
 --ignore-vmlinux::
Ignore vmlinux files.
 
+--kallsyms=::
+   kallsyms pathname
+
 -m ::
 --mmap-pages=::
Number of mmap data pages (must be a power of two) or size
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index aa0c73e57924..1252d1759064 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1289,6 +1289,8 @@ int cmd_top(int argc, const char **argv)
   "file", "vmlinux pathname"),
OPT_BOOLEAN(0, "ignore-vmlinux", _conf.ignore_vmlinux,
"don't load vmlinux even if found"),
+   OPT_STRING(0, "kallsyms", _conf.kallsyms_name,
+  "file", "kallsyms pathname"),
OPT_BOOLEAN('K', "hide_kernel_symbols", _kernel_symbols,
"hide kernel symbols"),
OPT_CALLBACK('m', "mmap-pages", >mmap_pages, "pages",
-- 
2.19.1



[PATCH 11/22] perf tests record: Allow for 'sleep' being 'coreutils'

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Adrian Hunter 

If the 'sleep' command is provided by coreutils, then the "PERF_RECORD_*
events & perf_sample fields" test will fail because the MMAP name is
'coreutils' not 'sleep', and there is an extra COMM event. Fix the test
to detect that case.

Signed-off-by: Adrian Hunter 
Cc: Jiri Olsa 
Link: http://lkml.kernel.org/r/20181122135545.16295-1-adrian.hun...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/tests/perf-record.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 34394cc05077..07f6bd8ed719 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -58,6 +58,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int 
subtest __maybe_unus
char *bname, *mmap_filename;
u64 prev_time = 0;
bool found_cmd_mmap = false,
+found_coreutils_mmap = false,
 found_libc_mmap = false,
 found_vdso_mmap = false,
 found_ld_mmap = false;
@@ -254,6 +255,8 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int 
subtest __maybe_unus
if (bname != NULL) {
if (!found_cmd_mmap)
found_cmd_mmap = 
!strcmp(bname + 1, cmd);
+   if (!found_coreutils_mmap)
+   found_coreutils_mmap = 
!strcmp(bname + 1, "coreutils");
if (!found_libc_mmap)
found_libc_mmap = 
!strncmp(bname + 1, "libc", 4);
if (!found_ld_mmap)
@@ -292,7 +295,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int 
subtest __maybe_unus
}
 
 found_exit:
-   if (nr_events[PERF_RECORD_COMM] > 1) {
+   if (nr_events[PERF_RECORD_COMM] > 1 + !!found_coreutils_mmap) {
pr_debug("Excessive number of PERF_RECORD_COMM events!\n");
++errs;
}
@@ -302,7 +305,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int 
subtest __maybe_unus
++errs;
}
 
-   if (!found_cmd_mmap) {
+   if (!found_cmd_mmap && !found_coreutils_mmap) {
pr_debug("PERF_RECORD_MMAP for %s missing!\n", cmd);
++errs;
}
-- 
2.19.1



[PATCH 08/22] perf tools: Use fallback for sample_addr_correlates_sym() cases

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Adrian Hunter 

thread__resolve() is used in the sample_addr_correlates_sym() cases
where 'addr' is a destination of a branch which does not necessarily
have the same cpumode as the 'ip'. Use the fallback function in that
case.

This patch depends on patch "perf tools: Add fallback functions for
cases where cpumode is insufficient".

Signed-off-by: Adrian Hunter 
Cc: Andi Kleen 
Cc: David S. Miller 
Cc: Jiri Olsa 
Cc: Leo Yan 
Cc: Mathieu Poirier 
Cc: sta...@vger.kernel.org
Link: http://lkml.kernel.org/r/20181106210712.12098-3-adrian.hun...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 9431b20c1337..24493200cf80 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1706,7 +1706,7 @@ bool sample_addr_correlates_sym(struct perf_event_attr 
*attr)
 void thread__resolve(struct thread *thread, struct addr_location *al,
 struct perf_sample *sample)
 {
-   thread__find_map(thread, sample->cpumode, sample->addr, al);
+   thread__find_map_fb(thread, sample->cpumode, sample->addr, al);
 
al->cpu = sample->cpu;
al->sym = NULL;
-- 
2.19.1



[PATCH 11/22] perf tests record: Allow for 'sleep' being 'coreutils'

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Adrian Hunter 

If the 'sleep' command is provided by coreutils, then the "PERF_RECORD_*
events & perf_sample fields" test will fail because the MMAP name is
'coreutils' not 'sleep', and there is an extra COMM event. Fix the test
to detect that case.

Signed-off-by: Adrian Hunter 
Cc: Jiri Olsa 
Link: http://lkml.kernel.org/r/20181122135545.16295-1-adrian.hun...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/tests/perf-record.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 34394cc05077..07f6bd8ed719 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -58,6 +58,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int 
subtest __maybe_unus
char *bname, *mmap_filename;
u64 prev_time = 0;
bool found_cmd_mmap = false,
+found_coreutils_mmap = false,
 found_libc_mmap = false,
 found_vdso_mmap = false,
 found_ld_mmap = false;
@@ -254,6 +255,8 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int 
subtest __maybe_unus
if (bname != NULL) {
if (!found_cmd_mmap)
found_cmd_mmap = 
!strcmp(bname + 1, cmd);
+   if (!found_coreutils_mmap)
+   found_coreutils_mmap = 
!strcmp(bname + 1, "coreutils");
if (!found_libc_mmap)
found_libc_mmap = 
!strncmp(bname + 1, "libc", 4);
if (!found_ld_mmap)
@@ -292,7 +295,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int 
subtest __maybe_unus
}
 
 found_exit:
-   if (nr_events[PERF_RECORD_COMM] > 1) {
+   if (nr_events[PERF_RECORD_COMM] > 1 + !!found_coreutils_mmap) {
pr_debug("Excessive number of PERF_RECORD_COMM events!\n");
++errs;
}
@@ -302,7 +305,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int 
subtest __maybe_unus
++errs;
}
 
-   if (!found_cmd_mmap) {
+   if (!found_cmd_mmap && !found_coreutils_mmap) {
pr_debug("PERF_RECORD_MMAP for %s missing!\n", cmd);
++errs;
}
-- 
2.19.1



[PATCH 08/22] perf tools: Use fallback for sample_addr_correlates_sym() cases

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Adrian Hunter 

thread__resolve() is used in the sample_addr_correlates_sym() cases
where 'addr' is a destination of a branch which does not necessarily
have the same cpumode as the 'ip'. Use the fallback function in that
case.

This patch depends on patch "perf tools: Add fallback functions for
cases where cpumode is insufficient".

Signed-off-by: Adrian Hunter 
Cc: Andi Kleen 
Cc: David S. Miller 
Cc: Jiri Olsa 
Cc: Leo Yan 
Cc: Mathieu Poirier 
Cc: sta...@vger.kernel.org
Link: http://lkml.kernel.org/r/20181106210712.12098-3-adrian.hun...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 9431b20c1337..24493200cf80 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1706,7 +1706,7 @@ bool sample_addr_correlates_sym(struct perf_event_attr 
*attr)
 void thread__resolve(struct thread *thread, struct addr_location *al,
 struct perf_sample *sample)
 {
-   thread__find_map(thread, sample->cpumode, sample->addr, al);
+   thread__find_map_fb(thread, sample->cpumode, sample->addr, al);
 
al->cpu = sample->cpu;
al->sym = NULL;
-- 
2.19.1



[PATCH 01/22] perf build: Give better hint about devel package for libssl

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Arnaldo Carvalho de Melo 

In debian/ubuntu its libssl-dev, but for fedora/RHEL/Centos/etc its
openssl-devel, fix it.

Cc: Adrian Hunter 
Cc: David Ahern 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Stephane Eranian 
Cc: Wang Nan 
Fixes: 8ee4646038e4 ("perf build: Add libcrypto feature detection")
Link: https://lkml.kernel.org/n/tip-lnxqszts6aq2c9jy4b7ml...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/Makefile.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index e110010e7faa..c643d5e0c26b 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -588,7 +588,7 @@ endif
 
 ifndef NO_LIBCRYPTO
   ifneq ($(feature-libcrypto), 1)
-msg := $(warning No libcrypto.h found, disables jitted code injection, 
please install libssl-devel or libssl-dev);
+msg := $(warning No libcrypto.h found, disables jitted code injection, 
please install openssl-devel or libssl-dev);
 NO_LIBCRYPTO := 1
   else
 CFLAGS += -DHAVE_LIBCRYPTO_SUPPORT
-- 
2.19.1



[PATCH 07/22] perf thread: Add fallback functions for cases where cpumode is insufficient

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Adrian Hunter 

For branch stacks or branch samples, the sample cpumode might not be
correct because it applies only to the sample 'ip' and not necessary to
'addr' or branch stack addresses. Add fallback functions that can be
used to deal with those cases

Signed-off-by: Adrian Hunter 
Cc: Andi Kleen 
Cc: David S. Miller 
Cc: Jiri Olsa 
Cc: Leo Yan 
Cc: Mathieu Poirier 
Cc: sta...@vger.kernel.org
Link: http://lkml.kernel.org/r/20181106210712.12098-2-adrian.hun...@intel.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/event.c   | 27 +++
 tools/perf/util/machine.c | 27 +++
 tools/perf/util/machine.h |  2 ++
 tools/perf/util/thread.h  |  4 
 4 files changed, 60 insertions(+)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index e9c108a6b1c3..9431b20c1337 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1577,6 +1577,24 @@ struct map *thread__find_map(struct thread *thread, u8 
cpumode, u64 addr,
return al->map;
 }
 
+/*
+ * For branch stacks or branch samples, the sample cpumode might not be correct
+ * because it applies only to the sample 'ip' and not necessary to 'addr' or
+ * branch stack addresses. If possible, use a fallback to deal with those 
cases.
+ */
+struct map *thread__find_map_fb(struct thread *thread, u8 cpumode, u64 addr,
+   struct addr_location *al)
+{
+   struct map *map = thread__find_map(thread, cpumode, addr, al);
+   struct machine *machine = thread->mg->machine;
+   u8 addr_cpumode = machine__addr_cpumode(machine, cpumode, addr);
+
+   if (map || addr_cpumode == cpumode)
+   return map;
+
+   return thread__find_map(thread, addr_cpumode, addr, al);
+}
+
 struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode,
   u64 addr, struct addr_location *al)
 {
@@ -1586,6 +1604,15 @@ struct symbol *thread__find_symbol(struct thread 
*thread, u8 cpumode,
return al->sym;
 }
 
+struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode,
+ u64 addr, struct addr_location *al)
+{
+   al->sym = NULL;
+   if (thread__find_map_fb(thread, cpumode, addr, al))
+   al->sym = map__find_symbol(al->map, al->addr);
+   return al->sym;
+}
+
 /*
  * Callers need to drop the reference to al->thread, obtained in
  * machine__findnew_thread()
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 8f36ce813bc5..9397e3f2444d 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2592,6 +2592,33 @@ int machine__get_kernel_start(struct machine *machine)
return err;
 }
 
+u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr)
+{
+   u8 addr_cpumode = cpumode;
+   bool kernel_ip;
+
+   if (!machine->single_address_space)
+   goto out;
+
+   kernel_ip = machine__kernel_ip(machine, addr);
+   switch (cpumode) {
+   case PERF_RECORD_MISC_KERNEL:
+   case PERF_RECORD_MISC_USER:
+   addr_cpumode = kernel_ip ? PERF_RECORD_MISC_KERNEL :
+  PERF_RECORD_MISC_USER;
+   break;
+   case PERF_RECORD_MISC_GUEST_KERNEL:
+   case PERF_RECORD_MISC_GUEST_USER:
+   addr_cpumode = kernel_ip ? PERF_RECORD_MISC_GUEST_KERNEL :
+  PERF_RECORD_MISC_GUEST_USER;
+   break;
+   default:
+   break;
+   }
+out:
+   return addr_cpumode;
+}
+
 struct dso *machine__findnew_dso(struct machine *machine, const char *filename)
 {
return dsos__findnew(>dsos, filename);
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index ca897a73014c..ebde3ea70225 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -100,6 +100,8 @@ static inline bool machine__kernel_ip(struct machine 
*machine, u64 ip)
return ip >= kernel_start;
 }
 
+u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr);
+
 struct thread *machine__find_thread(struct machine *machine, pid_t pid,
pid_t tid);
 struct comm *machine__thread_exec_comm(struct machine *machine,
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 30e2b4c165fe..5920c3bb8ffe 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -96,9 +96,13 @@ struct thread *thread__main_thread(struct machine *machine, 
struct thread *threa
 
 struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
 struct addr_location *al);
+struct map *thread__find_map_fb(struct thread *thread, u8 cpumode, u64 addr,
+   struct addr_location *al);
 
 struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode,
   u64 addr, struct addr_location *al);

[PATCH 15/22] perf top: Allow passing a kallsyms file

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Arnaldo Carvalho de Melo 

This basically replicates what was done for 'perf report' in:

   b226a5a72901 ("perf report: Allow user to specify path to kallsyms file")

This should help with resolving eBPF symbols, that are in kallsyms but,
of course, not in vmlinux.

Reported-by: Ivan Babrou 
Tested-by: Ivan Babrou 
Cc: Adrian Hunter 
Cc: Alexei Starovoitov 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: David S. Miller 
Cc: Jiri Olsa 
Cc: Namhyung Kim 
Cc: Wang Nan 
Link: https://lkml.kernel.org/n/tip-x52mx1ybq8128rtg9hjrj...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/Documentation/perf-top.txt | 3 +++
 tools/perf/builtin-top.c  | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/tools/perf/Documentation/perf-top.txt 
b/tools/perf/Documentation/perf-top.txt
index 808b664343c9..44d89fb9c788 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -70,6 +70,9 @@ Default is to monitor all CPUS.
 --ignore-vmlinux::
Ignore vmlinux files.
 
+--kallsyms=::
+   kallsyms pathname
+
 -m ::
 --mmap-pages=::
Number of mmap data pages (must be a power of two) or size
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index aa0c73e57924..1252d1759064 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1289,6 +1289,8 @@ int cmd_top(int argc, const char **argv)
   "file", "vmlinux pathname"),
OPT_BOOLEAN(0, "ignore-vmlinux", _conf.ignore_vmlinux,
"don't load vmlinux even if found"),
+   OPT_STRING(0, "kallsyms", _conf.kallsyms_name,
+  "file", "kallsyms pathname"),
OPT_BOOLEAN('K', "hide_kernel_symbols", _kernel_symbols,
"hide kernel symbols"),
OPT_CALLBACK('m', "mmap-pages", >mmap_pages, "pages",
-- 
2.19.1



[PATCH 04/22] perf map: Remove extra indirection from map__find()

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Eric Saint-Etienne 

A double pointer is used in map__find() where a single pointer is enough
because the function doesn't affect the rbtree and the rbtree is locked.

Signed-off-by: Eric Saint-Etienne 
Acked-by: Jiri Olsa 
Cc: Alexander Shishkin 
Cc: Eric Saint-Etienne 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Link: 
http://lkml.kernel.org/r/1542969759-24346-1-git-send-email-eric.saint.etie...@oracle.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/map.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 781eed8e3265..a0d58b4d9c32 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -873,19 +873,18 @@ void maps__remove(struct maps *maps, struct map *map)
 
 struct map *maps__find(struct maps *maps, u64 ip)
 {
-   struct rb_node **p, *parent = NULL;
+   struct rb_node *p;
struct map *m;
 
down_read(>lock);
 
-   p = >entries.rb_node;
-   while (*p != NULL) {
-   parent = *p;
-   m = rb_entry(parent, struct map, rb_node);
+   p = maps->entries.rb_node;
+   while (p != NULL) {
+   m = rb_entry(p, struct map, rb_node);
if (ip < m->start)
-   p = &(*p)->rb_left;
+   p = p->rb_left;
else if (ip >= m->end)
-   p = &(*p)->rb_right;
+   p = p->rb_right;
else
goto out;
}
-- 
2.19.1



[PATCH 04/22] perf map: Remove extra indirection from map__find()

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Eric Saint-Etienne 

A double pointer is used in map__find() where a single pointer is enough
because the function doesn't affect the rbtree and the rbtree is locked.

Signed-off-by: Eric Saint-Etienne 
Acked-by: Jiri Olsa 
Cc: Alexander Shishkin 
Cc: Eric Saint-Etienne 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Link: 
http://lkml.kernel.org/r/1542969759-24346-1-git-send-email-eric.saint.etie...@oracle.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/map.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 781eed8e3265..a0d58b4d9c32 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -873,19 +873,18 @@ void maps__remove(struct maps *maps, struct map *map)
 
 struct map *maps__find(struct maps *maps, u64 ip)
 {
-   struct rb_node **p, *parent = NULL;
+   struct rb_node *p;
struct map *m;
 
down_read(>lock);
 
-   p = >entries.rb_node;
-   while (*p != NULL) {
-   parent = *p;
-   m = rb_entry(parent, struct map, rb_node);
+   p = maps->entries.rb_node;
+   while (p != NULL) {
+   m = rb_entry(p, struct map, rb_node);
if (ip < m->start)
-   p = &(*p)->rb_left;
+   p = p->rb_left;
else if (ip >= m->end)
-   p = &(*p)->rb_right;
+   p = p->rb_right;
else
goto out;
}
-- 
2.19.1



[PATCH 03/22] perf stat: Fix CSV mode column output for non-cgroup events

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Stephane Eranian 

When using the -x option, perf stat prints CSV-style output with one
event per line.  For each event, it prints the count, the unit, the
event name, the cgroup, and a bunch of other event specific fields (such
as insn per cycles).

When you use CSV-style mode, you expect a normalized output where each
event is printed with the same number of fields regardless of what it is
so it can easily be imported into a spreadsheet or parsed.

For instance, if an event does not have a unit, then print an empty
field for it.

Although this approach was implemented for the unit, it was not for the
cgroup.

When mixing cgroup and non-cgroup events, then non-cgroup events would
not show an empty field, instead the next field was printed, make
columns not line up correctly.

This patch fixes the cgroup output issues by forcing an empty field
for non-cgroup events as soon as one event has cgroup.

Before:

   @ @cycles @foo@ 0@100.00@@
  2531614   @ @cycles @6420922@100.00@@

foo cgroup lines up with time_running!

After:

   @ @cycles @foo @0   @100.00@@
  2594834   @ @cycles @@5287372 @100.00@@

Fields line up.

Signed-off-by: Stephane Eranian 
Acked-by: Jiri Olsa 
Cc: Peter Zijlstra 
Link: 
http://lkml.kernel.org/r/1541587845-9150-1-git-send-email-eran...@google.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/stat-display.c | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index e7b4c44ebb62..665ee374fc01 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -59,6 +59,15 @@ static void print_noise(struct perf_stat_config *config,
print_noise_pct(config, stddev_stats(>res_stats[0]), avg);
 }
 
+static void print_cgroup(struct perf_stat_config *config, struct perf_evsel 
*evsel)
+{
+   if (nr_cgroups) {
+   const char *cgrp_name = evsel->cgrp ? evsel->cgrp->name  : "";
+   fprintf(config->output, "%s%s", config->csv_sep, cgrp_name);
+   }
+}
+
+
 static void aggr_printout(struct perf_stat_config *config,
  struct perf_evsel *evsel, int id, int nr)
 {
@@ -336,8 +345,7 @@ static void abs_printout(struct perf_stat_config *config,
 
fprintf(output, "%-*s", config->csv_output ? 0 : 25, 
perf_evsel__name(evsel));
 
-   if (evsel->cgrp)
-   fprintf(output, "%s%s", config->csv_sep, evsel->cgrp->name);
+   print_cgroup(config, evsel);
 }
 
 static bool is_mixed_hw_group(struct perf_evsel *counter)
@@ -431,9 +439,7 @@ static void printout(struct perf_stat_config *config, int 
id, int nr,
config->csv_output ? 0 : -25,
perf_evsel__name(counter));
 
-   if (counter->cgrp)
-   fprintf(config->output, "%s%s",
-   config->csv_sep, counter->cgrp->name);
+   print_cgroup(config, counter);
 
if (!config->csv_output)
pm(config, , NULL, NULL, "", 0);
-- 
2.19.1



[PATCH 05/22] perf env: Also consider env->arch == NULL as local operation

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Arnaldo Carvalho de Melo 

We'll set a new machine field based on env->arch, which for live mode,
like with 'perf top' means we need to use uname() to figure the name of
the arch, fix perf_env__arch() to consider both (env == NULL) and
(env->arch == NULL) as local operation.

Cc: Adrian Hunter 
Cc: Andi Kleen 
Cc: David Ahern 
Cc: David S. Miller 
Cc: Jiri Olsa 
Cc: Leo Yan 
Cc: Mathieu Poirier 
Cc: Namhyung Kim 
Cc: Wang Nan 
Cc: sta...@vger.kernel.org
Link: https://lkml.kernel.org/n/tip-vcz4ufzdon7cwy8dm2ua5...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/env.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 59f38c7693f8..4c23779e271a 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -166,7 +166,7 @@ const char *perf_env__arch(struct perf_env *env)
struct utsname uts;
char *arch_name;
 
-   if (!env) { /* Assume local operation */
+   if (!env || !env->arch) { /* Assume local operation */
if (uname() < 0)
return NULL;
arch_name = uts.machine;
-- 
2.19.1



[PATCH 02/22] perf stat: Fix shadow stats for clock events

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Ravi Bangoria 

Commit 0aa802a79469 ("perf stat: Get rid of extra clock display
function") introduced scale and unit for clock events. Thus,
perf_stat__update_shadow_stats() now saves scaled values of clock events
in msecs, instead of original nsecs. But while calculating values of
shadow stats we still consider clock event values in nsecs. This results
in a wrong shadow stat values. Ex,

  # ./perf stat -e task-clock,cycles ls

  2.60 msec task-clock:u#0.877 CPUs utilized
 2,430,564  cycles:u# 1215282.000 GHz

Fix this by saving original nsec values for clock events in
perf_stat__update_shadow_stats(). After patch:

  # ./perf stat -e task-clock,cycles ls

  3.14 msec task-clock:u#0.839 CPUs utilized
 3,094,528  cycles:u#0.985 GHz

Suggested-by: Jiri Olsa 
Reported-by: Anton Blanchard 
Signed-off-by: Ravi Bangoria 
Reviewed-by: Jiri Olsa 
Cc: Alexander Shishkin 
Cc: Jin Yao 
Cc: Namhyung Kim 
Cc: Thomas Richter 
Cc: yuzhouj...@didichuxing.com
Fixes: 0aa802a79469 ("perf stat: Get rid of extra clock display function")
Link: 
http://lkml.kernel.org/r/20181116042843.24067-1-ravi.bango...@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/stat-shadow.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index f0a8cec55c47..3c22c58b3e90 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -209,11 +209,12 @@ void perf_stat__update_shadow_stats(struct perf_evsel 
*counter, u64 count,
int cpu, struct runtime_stat *st)
 {
int ctx = evsel_context(counter);
+   u64 count_ns = count;
 
count *= counter->scale;
 
if (perf_evsel__is_clock(counter))
-   update_runtime_stat(st, STAT_NSECS, 0, cpu, count);
+   update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns);
else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count);
else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
-- 
2.19.1



[PATCH 03/22] perf stat: Fix CSV mode column output for non-cgroup events

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Stephane Eranian 

When using the -x option, perf stat prints CSV-style output with one
event per line.  For each event, it prints the count, the unit, the
event name, the cgroup, and a bunch of other event specific fields (such
as insn per cycles).

When you use CSV-style mode, you expect a normalized output where each
event is printed with the same number of fields regardless of what it is
so it can easily be imported into a spreadsheet or parsed.

For instance, if an event does not have a unit, then print an empty
field for it.

Although this approach was implemented for the unit, it was not for the
cgroup.

When mixing cgroup and non-cgroup events, then non-cgroup events would
not show an empty field, instead the next field was printed, make
columns not line up correctly.

This patch fixes the cgroup output issues by forcing an empty field
for non-cgroup events as soon as one event has cgroup.

Before:

   @ @cycles @foo@ 0@100.00@@
  2531614   @ @cycles @6420922@100.00@@

foo cgroup lines up with time_running!

After:

   @ @cycles @foo @0   @100.00@@
  2594834   @ @cycles @@5287372 @100.00@@

Fields line up.

Signed-off-by: Stephane Eranian 
Acked-by: Jiri Olsa 
Cc: Peter Zijlstra 
Link: 
http://lkml.kernel.org/r/1541587845-9150-1-git-send-email-eran...@google.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/stat-display.c | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index e7b4c44ebb62..665ee374fc01 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -59,6 +59,15 @@ static void print_noise(struct perf_stat_config *config,
print_noise_pct(config, stddev_stats(>res_stats[0]), avg);
 }
 
+static void print_cgroup(struct perf_stat_config *config, struct perf_evsel 
*evsel)
+{
+   if (nr_cgroups) {
+   const char *cgrp_name = evsel->cgrp ? evsel->cgrp->name  : "";
+   fprintf(config->output, "%s%s", config->csv_sep, cgrp_name);
+   }
+}
+
+
 static void aggr_printout(struct perf_stat_config *config,
  struct perf_evsel *evsel, int id, int nr)
 {
@@ -336,8 +345,7 @@ static void abs_printout(struct perf_stat_config *config,
 
fprintf(output, "%-*s", config->csv_output ? 0 : 25, 
perf_evsel__name(evsel));
 
-   if (evsel->cgrp)
-   fprintf(output, "%s%s", config->csv_sep, evsel->cgrp->name);
+   print_cgroup(config, evsel);
 }
 
 static bool is_mixed_hw_group(struct perf_evsel *counter)
@@ -431,9 +439,7 @@ static void printout(struct perf_stat_config *config, int 
id, int nr,
config->csv_output ? 0 : -25,
perf_evsel__name(counter));
 
-   if (counter->cgrp)
-   fprintf(config->output, "%s%s",
-   config->csv_sep, counter->cgrp->name);
+   print_cgroup(config, counter);
 
if (!config->csv_output)
pm(config, , NULL, NULL, "", 0);
-- 
2.19.1



[PATCH 05/22] perf env: Also consider env->arch == NULL as local operation

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Arnaldo Carvalho de Melo 

We'll set a new machine field based on env->arch, which for live mode,
like with 'perf top' means we need to use uname() to figure the name of
the arch, fix perf_env__arch() to consider both (env == NULL) and
(env->arch == NULL) as local operation.

Cc: Adrian Hunter 
Cc: Andi Kleen 
Cc: David Ahern 
Cc: David S. Miller 
Cc: Jiri Olsa 
Cc: Leo Yan 
Cc: Mathieu Poirier 
Cc: Namhyung Kim 
Cc: Wang Nan 
Cc: sta...@vger.kernel.org
Link: https://lkml.kernel.org/n/tip-vcz4ufzdon7cwy8dm2ua5...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/env.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 59f38c7693f8..4c23779e271a 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -166,7 +166,7 @@ const char *perf_env__arch(struct perf_env *env)
struct utsname uts;
char *arch_name;
 
-   if (!env) { /* Assume local operation */
+   if (!env || !env->arch) { /* Assume local operation */
if (uname() < 0)
return NULL;
arch_name = uts.machine;
-- 
2.19.1



[PATCH 02/22] perf stat: Fix shadow stats for clock events

2018-11-30 Thread Arnaldo Carvalho de Melo
From: Ravi Bangoria 

Commit 0aa802a79469 ("perf stat: Get rid of extra clock display
function") introduced scale and unit for clock events. Thus,
perf_stat__update_shadow_stats() now saves scaled values of clock events
in msecs, instead of original nsecs. But while calculating values of
shadow stats we still consider clock event values in nsecs. This results
in a wrong shadow stat values. Ex,

  # ./perf stat -e task-clock,cycles ls

  2.60 msec task-clock:u#0.877 CPUs utilized
 2,430,564  cycles:u# 1215282.000 GHz

Fix this by saving original nsec values for clock events in
perf_stat__update_shadow_stats(). After patch:

  # ./perf stat -e task-clock,cycles ls

  3.14 msec task-clock:u#0.839 CPUs utilized
 3,094,528  cycles:u#0.985 GHz

Suggested-by: Jiri Olsa 
Reported-by: Anton Blanchard 
Signed-off-by: Ravi Bangoria 
Reviewed-by: Jiri Olsa 
Cc: Alexander Shishkin 
Cc: Jin Yao 
Cc: Namhyung Kim 
Cc: Thomas Richter 
Cc: yuzhouj...@didichuxing.com
Fixes: 0aa802a79469 ("perf stat: Get rid of extra clock display function")
Link: 
http://lkml.kernel.org/r/20181116042843.24067-1-ravi.bango...@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/stat-shadow.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index f0a8cec55c47..3c22c58b3e90 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -209,11 +209,12 @@ void perf_stat__update_shadow_stats(struct perf_evsel 
*counter, u64 count,
int cpu, struct runtime_stat *st)
 {
int ctx = evsel_context(counter);
+   u64 count_ns = count;
 
count *= counter->scale;
 
if (perf_evsel__is_clock(counter))
-   update_runtime_stat(st, STAT_NSECS, 0, cpu, count);
+   update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns);
else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count);
else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
-- 
2.19.1



[PATCH v11 1/3] dt-bindings: clock: Update GCC bindings for protected-clocks

2018-11-30 Thread Taniya Das
Add protected-clocks list which could used to specify the clocks to be
bypassed on certain devices.

Reviewed-by: Rob Herring 
Signed-off-by: Taniya Das 
---
 Documentation/devicetree/bindings/clock/qcom,gcc.txt | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc.txt 
b/Documentation/devicetree/bindings/clock/qcom,gcc.txt
index 52d9345..5e37de9 100644
--- a/Documentation/devicetree/bindings/clock/qcom,gcc.txt
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc.txt
@@ -35,6 +35,8 @@ be part of GCC and hence the TSENS properties can also be
 part of the GCC/clock-controller node.
 For more details on the TSENS properties please refer
 Documentation/devicetree/bindings/thermal/qcom-tsens.txt
+- protected-clocks : Protected clock specifier list as per common clock
+ binding.

 Example:
clock-controller@90 {
@@ -55,3 +57,15 @@ Example of GCC with TSENS properties:
#reset-cells = <1>;
#thermal-sensor-cells = <1>;
};
+
+Example of GCC with protected-clocks properties:
+   clock-controller@10 {
+   compatible = "qcom,gcc-sdm845";
+   reg = <0x10 0x1f>;
+   #clock-cells = <1>;
+   #reset-cells = <1>;
+   #power-domain-cells = <1>;
+   protected-clocks = ,
+  ,
+  ;
+   };
--
Qualcomm INDIA, on behalf of Qualcomm Innovation Center, Inc.is a member
of the Code Aurora Forum, hosted by the  Linux Foundation.



[PATCH v11 1/3] dt-bindings: clock: Update GCC bindings for protected-clocks

2018-11-30 Thread Taniya Das
Add protected-clocks list which could used to specify the clocks to be
bypassed on certain devices.

Reviewed-by: Rob Herring 
Signed-off-by: Taniya Das 
---
 Documentation/devicetree/bindings/clock/qcom,gcc.txt | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc.txt 
b/Documentation/devicetree/bindings/clock/qcom,gcc.txt
index 52d9345..5e37de9 100644
--- a/Documentation/devicetree/bindings/clock/qcom,gcc.txt
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc.txt
@@ -35,6 +35,8 @@ be part of GCC and hence the TSENS properties can also be
 part of the GCC/clock-controller node.
 For more details on the TSENS properties please refer
 Documentation/devicetree/bindings/thermal/qcom-tsens.txt
+- protected-clocks : Protected clock specifier list as per common clock
+ binding.

 Example:
clock-controller@90 {
@@ -55,3 +57,15 @@ Example of GCC with TSENS properties:
#reset-cells = <1>;
#thermal-sensor-cells = <1>;
};
+
+Example of GCC with protected-clocks properties:
+   clock-controller@10 {
+   compatible = "qcom,gcc-sdm845";
+   reg = <0x10 0x1f>;
+   #clock-cells = <1>;
+   #reset-cells = <1>;
+   #power-domain-cells = <1>;
+   protected-clocks = ,
+  ,
+  ;
+   };
--
Qualcomm INDIA, on behalf of Qualcomm Innovation Center, Inc.is a member
of the Code Aurora Forum, hosted by the  Linux Foundation.



[PATCH v11 2/3] dt-bindings: clock: Introduce QCOM LPASS clock bindings

2018-11-30 Thread Taniya Das
Add device tree bindings for Low Power Audio subsystem clock controller for
Qualcomm Technology Inc's SDM845 SoCs.

Reviewed-by: Rob Herring 
Signed-off-by: Taniya Das 
---
 .../devicetree/bindings/clock/qcom,gcc.txt |  4 +++-
 .../devicetree/bindings/clock/qcom,lpasscc.txt | 26 ++
 include/dt-bindings/clock/qcom,gcc-sdm845.h|  2 ++
 include/dt-bindings/clock/qcom,lpass-sdm845.h  | 15 +
 4 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/devicetree/bindings/clock/qcom,lpasscc.txt
 create mode 100644 include/dt-bindings/clock/qcom,lpass-sdm845.h

diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc.txt 
b/Documentation/devicetree/bindings/clock/qcom,gcc.txt
index 5e37de9..8661c3c 100644
--- a/Documentation/devicetree/bindings/clock/qcom,gcc.txt
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc.txt
@@ -67,5 +67,7 @@ Example of GCC with protected-clocks properties:
#power-domain-cells = <1>;
protected-clocks = ,
   ,
-  ;
+  ,
+  ,
+  ;
};
diff --git a/Documentation/devicetree/bindings/clock/qcom,lpasscc.txt 
b/Documentation/devicetree/bindings/clock/qcom,lpasscc.txt
new file mode 100644
index 000..b9e9787
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,lpasscc.txt
@@ -0,0 +1,26 @@
+Qualcomm LPASS Clock Controller Binding
+---
+
+Required properties :
+- compatible   : shall contain "qcom,sdm845-lpasscc"
+- #clock-cells : from common clock binding, shall contain 1.
+- reg  : shall contain base register address and size,
+ in the order
+   Index-0 maps to LPASS_CC register region
+   Index-1 maps to LPASS_QDSP6SS register region
+
+Optional properties :
+- reg-names: register names of LPASS domain
+"cc", "qdsp6ss".
+
+Example:
+
+The below node has to be defined in the cases where the LPASS peripheral loader
+would bring the subsystem out of reset.
+
+   lpasscc: clock-controller@17014000 {
+   compatible = "qcom,sdm845-lpasscc";
+   reg = <0x17014000 0x1f004>, <0x1730 0x200>;
+   reg-names = "cc", "qdsp6ss";
+   #clock-cells = <1>;
+   };
diff --git a/include/dt-bindings/clock/qcom,gcc-sdm845.h 
b/include/dt-bindings/clock/qcom,gcc-sdm845.h
index b8eae5a..968fa65 100644
--- a/include/dt-bindings/clock/qcom,gcc-sdm845.h
+++ b/include/dt-bindings/clock/qcom,gcc-sdm845.h
@@ -197,6 +197,8 @@
 #define GCC_QSPI_CORE_CLK_SRC  187
 #define GCC_QSPI_CORE_CLK  188
 #define GCC_QSPI_CNOC_PERIPH_AHB_CLK   189
+#define GCC_LPASS_Q6_AXI_CLK   190
+#define GCC_LPASS_SWAY_CLK 191

 /* GCC Resets */
 #define GCC_MMSS_BCR   0
diff --git a/include/dt-bindings/clock/qcom,lpass-sdm845.h 
b/include/dt-bindings/clock/qcom,lpass-sdm845.h
new file mode 100644
index 000..6590508
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,lpass-sdm845.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_SDM_LPASS_SDM845_H
+#define _DT_BINDINGS_CLK_SDM_LPASS_SDM845_H
+
+#define LPASS_Q6SS_AHBM_AON_CLK0
+#define LPASS_Q6SS_AHBS_AON_CLK1
+#define LPASS_QDSP6SS_XO_CLK   2
+#define LPASS_QDSP6SS_SLEEP_CLK3
+#define LPASS_QDSP6SS_CORE_CLK 4
+
+#endif
--
Qualcomm INDIA, on behalf of Qualcomm Innovation Center, Inc.is a member
of the Code Aurora Forum, hosted by the  Linux Foundation.



[PATCH v11 3/3] clk: qcom: Add lpass clock controller driver for SDM845

2018-11-30 Thread Taniya Das
Add support for the lpass clock controller found on SDM845 based devices.
This would allow lpass peripheral loader drivers to control the clocks to
bring the subsystem out of reset.
LPASS clocks present on the global clock controller would be registered
with the clock framework based on the protected-clock flag. Also do not
gate these clocks if they are left unused, as the lpass clocks require
the global clock controller lpass clocks to be enabled before they are
accessed. Mark the GCC lpass clocks as CRITICAL, for the LPASS clock
access.

Signed-off-by: Taniya Das 
---
 drivers/clk/qcom/Kconfig  |   9 ++
 drivers/clk/qcom/Makefile |   1 +
 drivers/clk/qcom/gcc-sdm845.c |  32 +++
 drivers/clk/qcom/lpasscc-sdm845.c | 179 ++
 4 files changed, 221 insertions(+)
 create mode 100644 drivers/clk/qcom/lpasscc-sdm845.c

diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig
index 6f3e466..d87a22e 100644
--- a/drivers/clk/qcom/Kconfig
+++ b/drivers/clk/qcom/Kconfig
@@ -302,6 +302,15 @@ config SDM_DISPCC_845
  Say Y if you want to support display devices and functionality such as
  splash screen.

+config SDM_LPASSCC_845
+   tristate "SDM845 Low Power Audio Subsystem (LPAAS) Clock Controller"
+   depends on COMMON_CLK_QCOM
+   select SDM_GCC_845
+   help
+ Support for the LPASS clock controller on SDM845 devices.
+ Say Y if you want to use the LPASS branch clocks of the LPASS clock
+ controller to reset the LPASS subsystem.
+
 config SPMI_PMIC_CLKDIV
tristate "SPMI PMIC clkdiv Support"
depends on (COMMON_CLK_QCOM && SPMI) || COMPILE_TEST
diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile
index 6ed2827..ee8d069 100644
--- a/drivers/clk/qcom/Makefile
+++ b/drivers/clk/qcom/Makefile
@@ -47,6 +47,7 @@ obj-$(CONFIG_SDM_DISPCC_845) += dispcc-sdm845.o
 obj-$(CONFIG_SDM_GCC_660) += gcc-sdm660.o
 obj-$(CONFIG_SDM_GCC_845) += gcc-sdm845.o
 obj-$(CONFIG_SDM_GPUCC_845) += gpucc-sdm845.o
+obj-$(CONFIG_SDM_LPASSCC_845) += lpasscc-sdm845.o
 obj-$(CONFIG_SDM_VIDEOCC_845) += videocc-sdm845.o
 obj-$(CONFIG_SPMI_PMIC_CLKDIV) += clk-spmi-pmic-div.o
 obj-$(CONFIG_KPSS_XCC) += kpss-xcc.o
diff --git a/drivers/clk/qcom/gcc-sdm845.c b/drivers/clk/qcom/gcc-sdm845.c
index f133b7f..db90f9b 100644
--- a/drivers/clk/qcom/gcc-sdm845.c
+++ b/drivers/clk/qcom/gcc-sdm845.c
@@ -3153,6 +3153,34 @@ enum {
},
 };

+static struct clk_branch gcc_lpass_q6_axi_clk = {
+   .halt_reg = 0x47000,
+   .halt_check = BRANCH_HALT,
+   .clkr = {
+   .enable_reg = 0x47000,
+   .enable_mask = BIT(0),
+   .hw.init = &(struct clk_init_data){
+   .name = "gcc_lpass_q6_axi_clk",
+   .flags = CLK_IS_CRITICAL,
+   .ops = _branch2_ops,
+   },
+   },
+};
+
+static struct clk_branch gcc_lpass_sway_clk = {
+   .halt_reg = 0x47008,
+   .halt_check = BRANCH_HALT,
+   .clkr = {
+   .enable_reg = 0x47008,
+   .enable_mask = BIT(0),
+   .hw.init = &(struct clk_init_data){
+   .name = "gcc_lpass_sway_clk",
+   .flags = CLK_IS_CRITICAL,
+   .ops = _branch2_ops,
+   },
+   },
+};
+
 static struct gdsc pcie_0_gdsc = {
.gdscr = 0x6b004,
.pd = {
@@ -3453,6 +3481,10 @@ enum {
[GCC_QSPI_CORE_CLK_SRC] = _qspi_core_clk_src.clkr,
[GCC_QSPI_CORE_CLK] = _qspi_core_clk.clkr,
[GCC_QSPI_CNOC_PERIPH_AHB_CLK] = _qspi_cnoc_periph_ahb_clk.clkr,
+#ifdef CONFIG_SDM_LPASSCC_845
+   [GCC_LPASS_Q6_AXI_CLK] = _lpass_q6_axi_clk.clkr,
+   [GCC_LPASS_SWAY_CLK] = _lpass_sway_clk.clkr,
+#endif
 };

 static const struct qcom_reset_map gcc_sdm845_resets[] = {
diff --git a/drivers/clk/qcom/lpasscc-sdm845.c 
b/drivers/clk/qcom/lpasscc-sdm845.c
new file mode 100644
index 000..e246b99
--- /dev/null
+++ b/drivers/clk/qcom/lpasscc-sdm845.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "clk-regmap.h"
+#include "clk-branch.h"
+#include "common.h"
+
+static struct clk_branch lpass_q6ss_ahbm_aon_clk = {
+   .halt_reg = 0x12000,
+   .halt_check = BRANCH_VOTED,
+   .clkr = {
+   .enable_reg = 0x12000,
+   .enable_mask = BIT(0),
+   .hw.init = &(struct clk_init_data){
+   .name = "lpass_q6ss_ahbm_aon_clk",
+   .ops = _branch2_ops,
+   },
+   },
+};
+
+static struct clk_branch lpass_q6ss_ahbs_aon_clk = {
+   .halt_reg = 0x1f000,
+   .halt_check = BRANCH_VOTED,
+   .clkr = {
+   .enable_reg = 0x1f000,
+   .enable_mask = BIT(0),
+   .hw.init = &(struct clk_init_data){
+  

Re: [RFC v3 06/19] arch: um: enable running kunit from User Mode Linux

2018-11-30 Thread Luis Chamberlain
On Fri, Nov 30, 2018 at 08:05:34AM -0600, Rob Herring wrote:
> On Thu, Nov 29, 2018 at 9:37 PM Luis Chamberlain  wrote:
> >
> > On Wed, Nov 28, 2018 at 03:26:03PM -0600, Rob Herring wrote:
> > > On Wed, Nov 28, 2018 at 1:37 PM Brendan Higgins
> > >  wrote:
> > > >
> > > > Make minimum number of changes outside of the KUnit directories for
> > > > KUnit to build and run using UML.
> > >
> > > There's nothing in this patch limiting this to UML.
> >
> > Not that one, but the abort thing segv thing is, eventually.
> > To support other architectures we'd need to make a wrapper to that
> > hack which Brendan added, and then allow each os to implement
> > its own call, and add an asm-generic helper.
> 
> I've not looked into why this is needed, but can't you make the abort
> support optional and arches can select it when they support it.

Its why I have asked for it to be properly documented. The patches in no
way illustrate *why* such thing is done. And if we are going to
potentially have other archs do something similar best to make it
explicit.

> At
> least before, the DT unittests didn't need this to run and shouldn't
> depend on it after converting to kunit.

  Luis


[PATCH v11 2/3] dt-bindings: clock: Introduce QCOM LPASS clock bindings

2018-11-30 Thread Taniya Das
Add device tree bindings for Low Power Audio subsystem clock controller for
Qualcomm Technology Inc's SDM845 SoCs.

Reviewed-by: Rob Herring 
Signed-off-by: Taniya Das 
---
 .../devicetree/bindings/clock/qcom,gcc.txt |  4 +++-
 .../devicetree/bindings/clock/qcom,lpasscc.txt | 26 ++
 include/dt-bindings/clock/qcom,gcc-sdm845.h|  2 ++
 include/dt-bindings/clock/qcom,lpass-sdm845.h  | 15 +
 4 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/devicetree/bindings/clock/qcom,lpasscc.txt
 create mode 100644 include/dt-bindings/clock/qcom,lpass-sdm845.h

diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc.txt 
b/Documentation/devicetree/bindings/clock/qcom,gcc.txt
index 5e37de9..8661c3c 100644
--- a/Documentation/devicetree/bindings/clock/qcom,gcc.txt
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc.txt
@@ -67,5 +67,7 @@ Example of GCC with protected-clocks properties:
#power-domain-cells = <1>;
protected-clocks = ,
   ,
-  ;
+  ,
+  ,
+  ;
};
diff --git a/Documentation/devicetree/bindings/clock/qcom,lpasscc.txt 
b/Documentation/devicetree/bindings/clock/qcom,lpasscc.txt
new file mode 100644
index 000..b9e9787
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,lpasscc.txt
@@ -0,0 +1,26 @@
+Qualcomm LPASS Clock Controller Binding
+---
+
+Required properties :
+- compatible   : shall contain "qcom,sdm845-lpasscc"
+- #clock-cells : from common clock binding, shall contain 1.
+- reg  : shall contain base register address and size,
+ in the order
+   Index-0 maps to LPASS_CC register region
+   Index-1 maps to LPASS_QDSP6SS register region
+
+Optional properties :
+- reg-names: register names of LPASS domain
+"cc", "qdsp6ss".
+
+Example:
+
+The below node has to be defined in the cases where the LPASS peripheral loader
+would bring the subsystem out of reset.
+
+   lpasscc: clock-controller@17014000 {
+   compatible = "qcom,sdm845-lpasscc";
+   reg = <0x17014000 0x1f004>, <0x1730 0x200>;
+   reg-names = "cc", "qdsp6ss";
+   #clock-cells = <1>;
+   };
diff --git a/include/dt-bindings/clock/qcom,gcc-sdm845.h 
b/include/dt-bindings/clock/qcom,gcc-sdm845.h
index b8eae5a..968fa65 100644
--- a/include/dt-bindings/clock/qcom,gcc-sdm845.h
+++ b/include/dt-bindings/clock/qcom,gcc-sdm845.h
@@ -197,6 +197,8 @@
 #define GCC_QSPI_CORE_CLK_SRC  187
 #define GCC_QSPI_CORE_CLK  188
 #define GCC_QSPI_CNOC_PERIPH_AHB_CLK   189
+#define GCC_LPASS_Q6_AXI_CLK   190
+#define GCC_LPASS_SWAY_CLK 191

 /* GCC Resets */
 #define GCC_MMSS_BCR   0
diff --git a/include/dt-bindings/clock/qcom,lpass-sdm845.h 
b/include/dt-bindings/clock/qcom,lpass-sdm845.h
new file mode 100644
index 000..6590508
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,lpass-sdm845.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_SDM_LPASS_SDM845_H
+#define _DT_BINDINGS_CLK_SDM_LPASS_SDM845_H
+
+#define LPASS_Q6SS_AHBM_AON_CLK0
+#define LPASS_Q6SS_AHBS_AON_CLK1
+#define LPASS_QDSP6SS_XO_CLK   2
+#define LPASS_QDSP6SS_SLEEP_CLK3
+#define LPASS_QDSP6SS_CORE_CLK 4
+
+#endif
--
Qualcomm INDIA, on behalf of Qualcomm Innovation Center, Inc.is a member
of the Code Aurora Forum, hosted by the  Linux Foundation.



[PATCH v11 3/3] clk: qcom: Add lpass clock controller driver for SDM845

2018-11-30 Thread Taniya Das
Add support for the lpass clock controller found on SDM845 based devices.
This would allow lpass peripheral loader drivers to control the clocks to
bring the subsystem out of reset.
LPASS clocks present on the global clock controller would be registered
with the clock framework based on the protected-clock flag. Also do not
gate these clocks if they are left unused, as the lpass clocks require
the global clock controller lpass clocks to be enabled before they are
accessed. Mark the GCC lpass clocks as CRITICAL, for the LPASS clock
access.

Signed-off-by: Taniya Das 
---
 drivers/clk/qcom/Kconfig  |   9 ++
 drivers/clk/qcom/Makefile |   1 +
 drivers/clk/qcom/gcc-sdm845.c |  32 +++
 drivers/clk/qcom/lpasscc-sdm845.c | 179 ++
 4 files changed, 221 insertions(+)
 create mode 100644 drivers/clk/qcom/lpasscc-sdm845.c

diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig
index 6f3e466..d87a22e 100644
--- a/drivers/clk/qcom/Kconfig
+++ b/drivers/clk/qcom/Kconfig
@@ -302,6 +302,15 @@ config SDM_DISPCC_845
  Say Y if you want to support display devices and functionality such as
  splash screen.

+config SDM_LPASSCC_845
+   tristate "SDM845 Low Power Audio Subsystem (LPAAS) Clock Controller"
+   depends on COMMON_CLK_QCOM
+   select SDM_GCC_845
+   help
+ Support for the LPASS clock controller on SDM845 devices.
+ Say Y if you want to use the LPASS branch clocks of the LPASS clock
+ controller to reset the LPASS subsystem.
+
 config SPMI_PMIC_CLKDIV
tristate "SPMI PMIC clkdiv Support"
depends on (COMMON_CLK_QCOM && SPMI) || COMPILE_TEST
diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile
index 6ed2827..ee8d069 100644
--- a/drivers/clk/qcom/Makefile
+++ b/drivers/clk/qcom/Makefile
@@ -47,6 +47,7 @@ obj-$(CONFIG_SDM_DISPCC_845) += dispcc-sdm845.o
 obj-$(CONFIG_SDM_GCC_660) += gcc-sdm660.o
 obj-$(CONFIG_SDM_GCC_845) += gcc-sdm845.o
 obj-$(CONFIG_SDM_GPUCC_845) += gpucc-sdm845.o
+obj-$(CONFIG_SDM_LPASSCC_845) += lpasscc-sdm845.o
 obj-$(CONFIG_SDM_VIDEOCC_845) += videocc-sdm845.o
 obj-$(CONFIG_SPMI_PMIC_CLKDIV) += clk-spmi-pmic-div.o
 obj-$(CONFIG_KPSS_XCC) += kpss-xcc.o
diff --git a/drivers/clk/qcom/gcc-sdm845.c b/drivers/clk/qcom/gcc-sdm845.c
index f133b7f..db90f9b 100644
--- a/drivers/clk/qcom/gcc-sdm845.c
+++ b/drivers/clk/qcom/gcc-sdm845.c
@@ -3153,6 +3153,34 @@ enum {
},
 };

+static struct clk_branch gcc_lpass_q6_axi_clk = {
+   .halt_reg = 0x47000,
+   .halt_check = BRANCH_HALT,
+   .clkr = {
+   .enable_reg = 0x47000,
+   .enable_mask = BIT(0),
+   .hw.init = &(struct clk_init_data){
+   .name = "gcc_lpass_q6_axi_clk",
+   .flags = CLK_IS_CRITICAL,
+   .ops = _branch2_ops,
+   },
+   },
+};
+
+static struct clk_branch gcc_lpass_sway_clk = {
+   .halt_reg = 0x47008,
+   .halt_check = BRANCH_HALT,
+   .clkr = {
+   .enable_reg = 0x47008,
+   .enable_mask = BIT(0),
+   .hw.init = &(struct clk_init_data){
+   .name = "gcc_lpass_sway_clk",
+   .flags = CLK_IS_CRITICAL,
+   .ops = _branch2_ops,
+   },
+   },
+};
+
 static struct gdsc pcie_0_gdsc = {
.gdscr = 0x6b004,
.pd = {
@@ -3453,6 +3481,10 @@ enum {
[GCC_QSPI_CORE_CLK_SRC] = _qspi_core_clk_src.clkr,
[GCC_QSPI_CORE_CLK] = _qspi_core_clk.clkr,
[GCC_QSPI_CNOC_PERIPH_AHB_CLK] = _qspi_cnoc_periph_ahb_clk.clkr,
+#ifdef CONFIG_SDM_LPASSCC_845
+   [GCC_LPASS_Q6_AXI_CLK] = _lpass_q6_axi_clk.clkr,
+   [GCC_LPASS_SWAY_CLK] = _lpass_sway_clk.clkr,
+#endif
 };

 static const struct qcom_reset_map gcc_sdm845_resets[] = {
diff --git a/drivers/clk/qcom/lpasscc-sdm845.c 
b/drivers/clk/qcom/lpasscc-sdm845.c
new file mode 100644
index 000..e246b99
--- /dev/null
+++ b/drivers/clk/qcom/lpasscc-sdm845.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "clk-regmap.h"
+#include "clk-branch.h"
+#include "common.h"
+
+static struct clk_branch lpass_q6ss_ahbm_aon_clk = {
+   .halt_reg = 0x12000,
+   .halt_check = BRANCH_VOTED,
+   .clkr = {
+   .enable_reg = 0x12000,
+   .enable_mask = BIT(0),
+   .hw.init = &(struct clk_init_data){
+   .name = "lpass_q6ss_ahbm_aon_clk",
+   .ops = _branch2_ops,
+   },
+   },
+};
+
+static struct clk_branch lpass_q6ss_ahbs_aon_clk = {
+   .halt_reg = 0x1f000,
+   .halt_check = BRANCH_VOTED,
+   .clkr = {
+   .enable_reg = 0x1f000,
+   .enable_mask = BIT(0),
+   .hw.init = &(struct clk_init_data){
+  

Re: [RFC v3 06/19] arch: um: enable running kunit from User Mode Linux

2018-11-30 Thread Luis Chamberlain
On Fri, Nov 30, 2018 at 08:05:34AM -0600, Rob Herring wrote:
> On Thu, Nov 29, 2018 at 9:37 PM Luis Chamberlain  wrote:
> >
> > On Wed, Nov 28, 2018 at 03:26:03PM -0600, Rob Herring wrote:
> > > On Wed, Nov 28, 2018 at 1:37 PM Brendan Higgins
> > >  wrote:
> > > >
> > > > Make minimum number of changes outside of the KUnit directories for
> > > > KUnit to build and run using UML.
> > >
> > > There's nothing in this patch limiting this to UML.
> >
> > Not that one, but the abort thing segv thing is, eventually.
> > To support other architectures we'd need to make a wrapper to that
> > hack which Brendan added, and then allow each os to implement
> > its own call, and add an asm-generic helper.
> 
> I've not looked into why this is needed, but can't you make the abort
> support optional and arches can select it when they support it.

Its why I have asked for it to be properly documented. The patches in no
way illustrate *why* such thing is done. And if we are going to
potentially have other archs do something similar best to make it
explicit.

> At
> least before, the DT unittests didn't need this to run and shouldn't
> depend on it after converting to kunit.

  Luis


[PATCH v11 0/3] Add support for LPASS clock controller for SDM845

2018-11-30 Thread Taniya Das
 [v11]
  * Add the GCC LPASS clocks only if LPASSCC config is present.
  * Update the comment in lpasscc driver.

 [v10]
  * Separate change to add  protected-clocks list in GCC binding.
  * Remove the clock support 'LPASS_AUDIO_WRAPPER_AON_CLK' as it is always ON
  clock.
  * Add few comments for module description and match table.

 [v9]
  * Update GCC documentation binding with the protected-clocks list.
  * Update the GCC code to add the GCC lpass clocks.
  * This depends on the acceptance of
  https://lore.kernel.org/lkml/20181105194011.43770-1-swb...@chromium.org/

 [v8]
  * Add CLK_IS_CRITICAL for GCC lpass clocks for lpass clocks access to go
  through always.

 [v7]
  * Cleanup header file inclusions.
  * Move the comments along with the flags.
  * Update the commit with details for CLK_IGNORE_UNUSED.

 [v6]
  * Update the logic to register the lpass clocks when the device tree property
   is not present.
  * Add the CLK_IGNORE_UNUSED flag for the lpass clocks to not gate the clocks
   at late_init.

 [v5]
  * Address the comments in device tree binding to update the reg-names,
update the unit address in lpass clock node example and also
add reg property for the gcc clock node.
  * Update the lpass driver to take care of the reg-names.

 [v4]
  * Update the description in GCC Documentation binding for
  'qcom,lpass-protected'.
  * Remove 'qcom,lpass-protected' from LPASS Documentation binding.
  * Update KConfig to use Low Power Audio Subsystem.
  * Add module_exit() and also update return value for
devm_ioremap_resource failure.

 [v3]
  * Add a device tree property to identify lpass protected GCC clocks.
  * Update the GCC driver code to register the lpass clocks when the flag is
   defined.
  * Add comment for clocks using the BRANCH_HALT_SKIP flag.
  * Use platform APIs instead of of_address_to_resource.
  * Replace devm_ioremap with devm_ioremap_resource.
  * Use fixed index for 'lpass_cc' & 'lpass_qdsp6ss' in probe.

 [v2]
  * Make gcc_lpass_sway_clk static.
  * Remove using child nodes and use reg-names to differentiate various
domains of LPASS CC.

Add support for the lpass clock controller found on SDM845 based devices.
This would allow lpass peripheral loader drivers to control the clocks to
bring the subsystem out of reset.

Taniya Das (3):
  dt-bindings: clock: Update GCC bindings for protected-clocks
  dt-bindings: clock: Introduce QCOM LPASS clock bindings
  clk: qcom: Add lpass clock controller driver for SDM845

 .../devicetree/bindings/clock/qcom,gcc.txt |  16 ++
 .../devicetree/bindings/clock/qcom,lpasscc.txt |  26 +++
 drivers/clk/qcom/Kconfig   |   9 ++
 drivers/clk/qcom/Makefile  |   1 +
 drivers/clk/qcom/gcc-sdm845.c  |  32 
 drivers/clk/qcom/lpasscc-sdm845.c  | 179 +
 include/dt-bindings/clock/qcom,gcc-sdm845.h|   2 +
 include/dt-bindings/clock/qcom,lpass-sdm845.h  |  15 ++
 8 files changed, 280 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/qcom,lpasscc.txt
 create mode 100644 drivers/clk/qcom/lpasscc-sdm845.c
 create mode 100644 include/dt-bindings/clock/qcom,lpass-sdm845.h

--
Qualcomm INDIA, on behalf of Qualcomm Innovation Center, Inc.is a member
of the Code Aurora Forum, hosted by the  Linux Foundation.



[PATCH v11 0/3] Add support for LPASS clock controller for SDM845

2018-11-30 Thread Taniya Das
 [v11]
  * Add the GCC LPASS clocks only if LPASSCC config is present.
  * Update the comment in lpasscc driver.

 [v10]
  * Separate change to add  protected-clocks list in GCC binding.
  * Remove the clock support 'LPASS_AUDIO_WRAPPER_AON_CLK' as it is always ON
  clock.
  * Add few comments for module description and match table.

 [v9]
  * Update GCC documentation binding with the protected-clocks list.
  * Update the GCC code to add the GCC lpass clocks.
  * This depends on the acceptance of
  https://lore.kernel.org/lkml/20181105194011.43770-1-swb...@chromium.org/

 [v8]
  * Add CLK_IS_CRITICAL for GCC lpass clocks for lpass clocks access to go
  through always.

 [v7]
  * Cleanup header file inclusions.
  * Move the comments along with the flags.
  * Update the commit with details for CLK_IGNORE_UNUSED.

 [v6]
  * Update the logic to register the lpass clocks when the device tree property
   is not present.
  * Add the CLK_IGNORE_UNUSED flag for the lpass clocks to not gate the clocks
   at late_init.

 [v5]
  * Address the comments in device tree binding to update the reg-names,
update the unit address in lpass clock node example and also
add reg property for the gcc clock node.
  * Update the lpass driver to take care of the reg-names.

 [v4]
  * Update the description in GCC Documentation binding for
  'qcom,lpass-protected'.
  * Remove 'qcom,lpass-protected' from LPASS Documentation binding.
  * Update KConfig to use Low Power Audio Subsystem.
  * Add module_exit() and also update return value for
devm_ioremap_resource failure.

 [v3]
  * Add a device tree property to identify lpass protected GCC clocks.
  * Update the GCC driver code to register the lpass clocks when the flag is
   defined.
  * Add comment for clocks using the BRANCH_HALT_SKIP flag.
  * Use platform APIs instead of of_address_to_resource.
  * Replace devm_ioremap with devm_ioremap_resource.
  * Use fixed index for 'lpass_cc' & 'lpass_qdsp6ss' in probe.

 [v2]
  * Make gcc_lpass_sway_clk static.
  * Remove using child nodes and use reg-names to differentiate various
domains of LPASS CC.

Add support for the lpass clock controller found on SDM845 based devices.
This would allow lpass peripheral loader drivers to control the clocks to
bring the subsystem out of reset.

Taniya Das (3):
  dt-bindings: clock: Update GCC bindings for protected-clocks
  dt-bindings: clock: Introduce QCOM LPASS clock bindings
  clk: qcom: Add lpass clock controller driver for SDM845

 .../devicetree/bindings/clock/qcom,gcc.txt |  16 ++
 .../devicetree/bindings/clock/qcom,lpasscc.txt |  26 +++
 drivers/clk/qcom/Kconfig   |   9 ++
 drivers/clk/qcom/Makefile  |   1 +
 drivers/clk/qcom/gcc-sdm845.c  |  32 
 drivers/clk/qcom/lpasscc-sdm845.c  | 179 +
 include/dt-bindings/clock/qcom,gcc-sdm845.h|   2 +
 include/dt-bindings/clock/qcom,lpass-sdm845.h  |  15 ++
 8 files changed, 280 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/qcom,lpasscc.txt
 create mode 100644 drivers/clk/qcom/lpasscc-sdm845.c
 create mode 100644 include/dt-bindings/clock/qcom,lpass-sdm845.h

--
Qualcomm INDIA, on behalf of Qualcomm Innovation Center, Inc.is a member
of the Code Aurora Forum, hosted by the  Linux Foundation.



Re: [PATCH v2] kmemleak: Turn kmemleak_lock to raw spinlock on RT

2018-11-30 Thread Sebastian Andrzej Siewior
On 2018-11-24 22:26:46 [+0800], He Zhe wrote:
> On latest v4.19.1-rt3, both of the call traces can be reproduced with kmemleak
> enabied. And none can be reproduced with kmemleak disabled.
okay. So it needs attention.

> On latest mainline tree, none can be reproduced no matter kmemleak is enabled
> or disabled.
> 
> I don't get why kfree from a preempt-disabled section should cause a warning
> without kmemleak, since kfree can't sleep.

it might. It will acquire a sleeping lock if it has go down to the
memory allocator to actually give memory back.

> If I understand correctly, the call trace above is caused by trying to 
> schedule
> after preemption is disabled, which cannot be reached in mainline kernel. So
> we might need to turn to use raw lock to keep preemption disabled.

The buddy-allocator runs with spin locks so it is okay on !RT. So you
can use kfree() with disabled preemption or disabled interrupts.
I don't think that we want to use raw-locks in the buddy-allocator.

> >From what I reached above, this is RT-only and happens on v4.18 and v4.19.
> 
> The call trace above is caused by grabbing kmemleak_lock and then getting
> scheduled and then re-grabbing kmemleak_lock. Using raw lock can also solve
> this problem.

But this is a reader / writer lock. And if I understand the other part
of the thread then it needs multiple readers.
Couldn't we just get rid of that kfree() or move it somewhere else?
I mean if the free() memory on CPU-down and allocate it again CPU-up
then we could skip that, rigth? Just allocate it and don't free it
because the CPU will likely get up again.

> Thanks,
> Zhe

Sebastian


Re: [PATCH v2] kmemleak: Turn kmemleak_lock to raw spinlock on RT

2018-11-30 Thread Sebastian Andrzej Siewior
On 2018-11-24 22:26:46 [+0800], He Zhe wrote:
> On latest v4.19.1-rt3, both of the call traces can be reproduced with kmemleak
> enabied. And none can be reproduced with kmemleak disabled.
okay. So it needs attention.

> On latest mainline tree, none can be reproduced no matter kmemleak is enabled
> or disabled.
> 
> I don't get why kfree from a preempt-disabled section should cause a warning
> without kmemleak, since kfree can't sleep.

it might. It will acquire a sleeping lock if it has go down to the
memory allocator to actually give memory back.

> If I understand correctly, the call trace above is caused by trying to 
> schedule
> after preemption is disabled, which cannot be reached in mainline kernel. So
> we might need to turn to use raw lock to keep preemption disabled.

The buddy-allocator runs with spin locks so it is okay on !RT. So you
can use kfree() with disabled preemption or disabled interrupts.
I don't think that we want to use raw-locks in the buddy-allocator.

> >From what I reached above, this is RT-only and happens on v4.18 and v4.19.
> 
> The call trace above is caused by grabbing kmemleak_lock and then getting
> scheduled and then re-grabbing kmemleak_lock. Using raw lock can also solve
> this problem.

But this is a reader / writer lock. And if I understand the other part
of the thread then it needs multiple readers.
Couldn't we just get rid of that kfree() or move it somewhere else?
I mean if the free() memory on CPU-down and allocate it again CPU-up
then we could skip that, rigth? Just allocate it and don't free it
because the CPU will likely get up again.

> Thanks,
> Zhe

Sebastian


Re: [PATCH] fs: Make /proc/sys inodes be owned by global root.

2018-11-30 Thread Luis Chamberlain
On Fri, Nov 30, 2018 at 08:48:11AM -0600, Eric W. Biederman wrote:
> Luis Chamberlain  writes:
> 
> > The logic seems sensible then, but are we implicating what a container
> > does with its sysctl values onto the entire system? If so, sure, it
> > seems you want this for networking purposes as there are a series of
> > sysctl values a container may want to muck with, but are we sure we
> > want the same for *all* sysctl entries?
> 
> No.  Please look at the patch again.  It sets the default uid and gid
> for sysctl entries to 0.  AKA GLOBAL_ROOT_UID and GLOBAL_ROOT_GID
> because there is a bug and they were not set to that value.
> 
> Those are the uids and gids that are tested agasint.  It just happens
> you have to be in a weird configuration for this bug to become a problem.

Thanks, then provided the commit lot is modified:

Acked-by: Luis Chamberlain 

  Luis


Re: [PATCH] fs: Make /proc/sys inodes be owned by global root.

2018-11-30 Thread Luis Chamberlain
On Fri, Nov 30, 2018 at 08:48:11AM -0600, Eric W. Biederman wrote:
> Luis Chamberlain  writes:
> 
> > The logic seems sensible then, but are we implicating what a container
> > does with its sysctl values onto the entire system? If so, sure, it
> > seems you want this for networking purposes as there are a series of
> > sysctl values a container may want to muck with, but are we sure we
> > want the same for *all* sysctl entries?
> 
> No.  Please look at the patch again.  It sets the default uid and gid
> for sysctl entries to 0.  AKA GLOBAL_ROOT_UID and GLOBAL_ROOT_GID
> because there is a bug and they were not set to that value.
> 
> Those are the uids and gids that are tested agasint.  It just happens
> you have to be in a weird configuration for this bug to become a problem.

Thanks, then provided the commit lot is modified:

Acked-by: Luis Chamberlain 

  Luis


Re: [PATCH v10 3/3] clk: qcom: Add lpass clock controller driver for SDM845

2018-11-30 Thread Taniya Das

Hello Stephen,

On 11/29/2018 2:40 AM, Stephen Boyd wrote:

Quoting Taniya Das (2018-11-21 23:53:41)

diff --git a/drivers/clk/qcom/gcc-sdm845.c b/drivers/clk/qcom/gcc-sdm845.c
index f133b7f..ba8ff99 100644
--- a/drivers/clk/qcom/gcc-sdm845.c
+++ b/drivers/clk/qcom/gcc-sdm845.c
@@ -3153,6 +3153,34 @@ enum {
 },
  };

+static struct clk_branch gcc_lpass_q6_axi_clk = {
+   .halt_reg = 0x47000,
+   .halt_check = BRANCH_HALT,
+   .clkr = {
+   .enable_reg = 0x47000,
+   .enable_mask = BIT(0),
+   .hw.init = &(struct clk_init_data){
+   .name = "gcc_lpass_q6_axi_clk",
+   .flags = CLK_IS_CRITICAL,
+   .ops = _branch2_ops,
+   },
+   },
+};
+
+static struct clk_branch gcc_lpass_sway_clk = {
+   .halt_reg = 0x47008,
+   .halt_check = BRANCH_HALT,
+   .clkr = {
+   .enable_reg = 0x47008,
+   .enable_mask = BIT(0),
+   .hw.init = &(struct clk_init_data){
+   .name = "gcc_lpass_sway_clk",
+   .flags = CLK_IS_CRITICAL,
+   .ops = _branch2_ops,
+   },
+   },
+};
+
  static struct gdsc pcie_0_gdsc = {
 .gdscr = 0x6b004,
 .pd = {
@@ -3453,6 +3481,8 @@ enum {
 [GCC_QSPI_CORE_CLK_SRC] = _qspi_core_clk_src.clkr,
 [GCC_QSPI_CORE_CLK] = _qspi_core_clk.clkr,
 [GCC_QSPI_CNOC_PERIPH_AHB_CLK] = _qspi_cnoc_periph_ahb_clk.clkr,
+   [GCC_LPASS_Q6_AXI_CLK] = _lpass_q6_axi_clk.clkr,
+   [GCC_LPASS_SWAY_CLK] = _lpass_sway_clk.clkr,


I have one single idea to avoid the integration nightmare with dts
needing another update for this on platforms where these can't be
touched. It's not perfect, but we can throw these clks and usage of the
clks behind an #ifdef CONFIG_SDM_LPASSCC_845 and then let the dts parts
match up with the clk driver parts in linux-next. After everything is
merged together, someone can turn on the knobs for LPASS clk controller
and make sure they have the right dts bits to mark them as protected.



Sure, would keep it under the ifdefer and would clean it up later.

--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation.

--


Re: [PATCH v10 3/3] clk: qcom: Add lpass clock controller driver for SDM845

2018-11-30 Thread Taniya Das

Hello Stephen,

On 11/29/2018 2:40 AM, Stephen Boyd wrote:

Quoting Taniya Das (2018-11-21 23:53:41)

diff --git a/drivers/clk/qcom/gcc-sdm845.c b/drivers/clk/qcom/gcc-sdm845.c
index f133b7f..ba8ff99 100644
--- a/drivers/clk/qcom/gcc-sdm845.c
+++ b/drivers/clk/qcom/gcc-sdm845.c
@@ -3153,6 +3153,34 @@ enum {
 },
  };

+static struct clk_branch gcc_lpass_q6_axi_clk = {
+   .halt_reg = 0x47000,
+   .halt_check = BRANCH_HALT,
+   .clkr = {
+   .enable_reg = 0x47000,
+   .enable_mask = BIT(0),
+   .hw.init = &(struct clk_init_data){
+   .name = "gcc_lpass_q6_axi_clk",
+   .flags = CLK_IS_CRITICAL,
+   .ops = _branch2_ops,
+   },
+   },
+};
+
+static struct clk_branch gcc_lpass_sway_clk = {
+   .halt_reg = 0x47008,
+   .halt_check = BRANCH_HALT,
+   .clkr = {
+   .enable_reg = 0x47008,
+   .enable_mask = BIT(0),
+   .hw.init = &(struct clk_init_data){
+   .name = "gcc_lpass_sway_clk",
+   .flags = CLK_IS_CRITICAL,
+   .ops = _branch2_ops,
+   },
+   },
+};
+
  static struct gdsc pcie_0_gdsc = {
 .gdscr = 0x6b004,
 .pd = {
@@ -3453,6 +3481,8 @@ enum {
 [GCC_QSPI_CORE_CLK_SRC] = _qspi_core_clk_src.clkr,
 [GCC_QSPI_CORE_CLK] = _qspi_core_clk.clkr,
 [GCC_QSPI_CNOC_PERIPH_AHB_CLK] = _qspi_cnoc_periph_ahb_clk.clkr,
+   [GCC_LPASS_Q6_AXI_CLK] = _lpass_q6_axi_clk.clkr,
+   [GCC_LPASS_SWAY_CLK] = _lpass_sway_clk.clkr,


I have one single idea to avoid the integration nightmare with dts
needing another update for this on platforms where these can't be
touched. It's not perfect, but we can throw these clks and usage of the
clks behind an #ifdef CONFIG_SDM_LPASSCC_845 and then let the dts parts
match up with the clk driver parts in linux-next. After everything is
merged together, someone can turn on the knobs for LPASS clk controller
and make sure they have the right dts bits to mark them as protected.



Sure, would keep it under the ifdefer and would clean it up later.

--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation.

--


Re: [PATCH v10 3/3] clk: qcom: Add lpass clock controller driver for SDM845

2018-11-30 Thread Taniya Das

Hello Stephen,

On 11/27/2018 2:44 PM, Stephen Boyd wrote:

Quoting Taniya Das (2018-11-21 23:53:41)

+
+static struct clk_branch lpass_qdsp6ss_core_clk = {
+   .halt_reg = 0x20,
+   /* CLK_OFF would not toggle until LPASS is not out of reset */


Is this really "CLK_OFF won't toggle until LPASS it out of reset"?



Would take care of it, in the next series.

--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation.

--


Re: [PATCH v10 3/3] clk: qcom: Add lpass clock controller driver for SDM845

2018-11-30 Thread Taniya Das

Hello Stephen,

On 11/27/2018 2:44 PM, Stephen Boyd wrote:

Quoting Taniya Das (2018-11-21 23:53:41)

+
+static struct clk_branch lpass_qdsp6ss_core_clk = {
+   .halt_reg = 0x20,
+   /* CLK_OFF would not toggle until LPASS is not out of reset */


Is this really "CLK_OFF won't toggle until LPASS it out of reset"?



Would take care of it, in the next series.

--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation.

--


Re: [PATCH 0/2] [GIT PULL] tracing: More fixes for 4.20

2018-11-30 Thread Steven Rostedt
On Fri, 30 Nov 2018 09:41:00 -0800
Linus Torvalds  wrote:

> On Thu, Nov 29, 2018 at 7:19 PM Steven Rostedt  wrote:
> >
> > Note, this is on top of a previous git pull that I have submitted:
> >
> >   http://lkml.kernel.org/r/20181127224031.76681...@vmware.local.home  
> 
> Hmm.
> 
> I had dismissed that, because the patch descriptors for that series
> had had "for-next" in them.
> 
> https://lore.kernel.org/lkml/20181122002801.501220...@goodmis.org/
> 
> so I dismissed that pull request entirely as being not for this
> release entirely.
> 
> I went back and merged things, but in general, please try to avoid
> confusing me. I'm easily confused when I get mixed messages about the
> patches and the pull requests, and will then generally default to
> "ignore, this is informational".
>

My apologies. I used my scripts to push them into my linux-next repo,
and it added the [for-next] when doing so in the series. I wanted it to
sit in next for a week (because I modified a bunch of architecture code
that I could only compile test, but not run).

I'll be more careful next time.

Thanks!

-- Steve


Re: [PATCH 0/2] [GIT PULL] tracing: More fixes for 4.20

2018-11-30 Thread Steven Rostedt
On Fri, 30 Nov 2018 09:41:00 -0800
Linus Torvalds  wrote:

> On Thu, Nov 29, 2018 at 7:19 PM Steven Rostedt  wrote:
> >
> > Note, this is on top of a previous git pull that I have submitted:
> >
> >   http://lkml.kernel.org/r/20181127224031.76681...@vmware.local.home  
> 
> Hmm.
> 
> I had dismissed that, because the patch descriptors for that series
> had had "for-next" in them.
> 
> https://lore.kernel.org/lkml/20181122002801.501220...@goodmis.org/
> 
> so I dismissed that pull request entirely as being not for this
> release entirely.
> 
> I went back and merged things, but in general, please try to avoid
> confusing me. I'm easily confused when I get mixed messages about the
> patches and the pull requests, and will then generally default to
> "ignore, this is informational".
>

My apologies. I used my scripts to push them into my linux-next repo,
and it added the [for-next] when doing so in the series. I wanted it to
sit in next for a week (because I modified a bunch of architecture code
that I could only compile test, but not run).

I'll be more careful next time.

Thanks!

-- Steve


RE: [PATCH] [repost] Drivers: hv: vmbus: Offload the handling of channels to two workqueues

2018-11-30 Thread Dexuan Cui
> From: KY Srinivasan 
> Sent: Friday, November 30, 2018 9:31 AM
> > From: Dexuan Cui 
> > Sent: Thursday, November 29, 2018 12:17 AM
> > To: gre...@linuxfoundation.org
> > Cc: KY Srinivasan ; Haiyang Zhang
> > ; Stephen Hemminger
> > ; linux-kernel@vger.kernel.org;
> > de...@linuxdriverproject.org; a...@canonical.com; vkuznets
> > ; o...@aepfle.de; jasow...@redhat.com; Michael
> > Kelley 
> > Subject: RE: [PATCH] [repost] Drivers: hv: vmbus: Offload the handling of
> > channels to two workqueues
> >
> > > From: gre...@linuxfoundation.org 
> > > Sent: Wednesday, November 28, 2018 11:45 PM
> > > >
> > > > There is no change in this repost. I just rebased this patch to today's
> > > > char-misc's char-misc-next branch. Previously KY posted the patch with
> > his
> > > > Signed-off-by (which is kept in this repost), but there was a conflict 
> > > > issue.
> > > >
> > > > Note: the patch can't be cleanly applied to char-misc's char-misc-linus
> > branch
> > > --
> > > > to do that, we need to cherry-pick the supporting patch first:
> > > > 4d3c5c69191f ("Drivers: hv: vmbus: Remove the useless API
> > > vmbus_get_outgoing_channel()")
> > >
> > > That is not going to work for the obvious reason that this dependant
> > > patch is not going to be merged into 4.20-final.
> >
> > It looks the dependent patch (4d3c5c69191f) is going to miss the v4.20
> > release.
> > This is not a big issue, as the dependent patch isn't really important.
> >
> > > So, what do you expect us to do here?  The only way this can be accepted
> > > is to have it go into my -next branch, which means it will show up in
> > > 4.21-rc1, is that ok?
> >
> > Is there any chance for this patch ("Drivers: hv: vmbus: Offload the 
> > handling
> > ...") to
> > go into v4.20?
> >
> > If yes, I can quickly do a rebase to char-misc's char-misc-linus branch,
> > because actually the conflict can be very easily fixed. And I can help to 
> > fix any
> > conflict when the dependent patch is backported to v4.20.1.
> 
> This patch fixes an important bug while the patch this depends on is not
> critical.
> I suggest we revert the patch that this patch depends on
> and we can submit a new version of this patch that can go in now - into 4.20
> release.
> 
> K. Y

I agree.

Hi Greg,
Please let us know what we can do to try to push this important fix into v4.20.

Actually it's straightforward, though it looks big. And, we ave done a full 
testing
with the patch.

Thanks,
--Dexuan


RE: [PATCH] [repost] Drivers: hv: vmbus: Offload the handling of channels to two workqueues

2018-11-30 Thread Dexuan Cui
> From: KY Srinivasan 
> Sent: Friday, November 30, 2018 9:31 AM
> > From: Dexuan Cui 
> > Sent: Thursday, November 29, 2018 12:17 AM
> > To: gre...@linuxfoundation.org
> > Cc: KY Srinivasan ; Haiyang Zhang
> > ; Stephen Hemminger
> > ; linux-kernel@vger.kernel.org;
> > de...@linuxdriverproject.org; a...@canonical.com; vkuznets
> > ; o...@aepfle.de; jasow...@redhat.com; Michael
> > Kelley 
> > Subject: RE: [PATCH] [repost] Drivers: hv: vmbus: Offload the handling of
> > channels to two workqueues
> >
> > > From: gre...@linuxfoundation.org 
> > > Sent: Wednesday, November 28, 2018 11:45 PM
> > > >
> > > > There is no change in this repost. I just rebased this patch to today's
> > > > char-misc's char-misc-next branch. Previously KY posted the patch with
> > his
> > > > Signed-off-by (which is kept in this repost), but there was a conflict 
> > > > issue.
> > > >
> > > > Note: the patch can't be cleanly applied to char-misc's char-misc-linus
> > branch
> > > --
> > > > to do that, we need to cherry-pick the supporting patch first:
> > > > 4d3c5c69191f ("Drivers: hv: vmbus: Remove the useless API
> > > vmbus_get_outgoing_channel()")
> > >
> > > That is not going to work for the obvious reason that this dependant
> > > patch is not going to be merged into 4.20-final.
> >
> > It looks the dependent patch (4d3c5c69191f) is going to miss the v4.20
> > release.
> > This is not a big issue, as the dependent patch isn't really important.
> >
> > > So, what do you expect us to do here?  The only way this can be accepted
> > > is to have it go into my -next branch, which means it will show up in
> > > 4.21-rc1, is that ok?
> >
> > Is there any chance for this patch ("Drivers: hv: vmbus: Offload the 
> > handling
> > ...") to
> > go into v4.20?
> >
> > If yes, I can quickly do a rebase to char-misc's char-misc-linus branch,
> > because actually the conflict can be very easily fixed. And I can help to 
> > fix any
> > conflict when the dependent patch is backported to v4.20.1.
> 
> This patch fixes an important bug while the patch this depends on is not
> critical.
> I suggest we revert the patch that this patch depends on
> and we can submit a new version of this patch that can go in now - into 4.20
> release.
> 
> K. Y

I agree.

Hi Greg,
Please let us know what we can do to try to push this important fix into v4.20.

Actually it's straightforward, though it looks big. And, we ave done a full 
testing
with the patch.

Thanks,
--Dexuan


Re: [PATCH v6 04/24] arm/arm64: gic-v3: Add PMR and RPR accessors

2018-11-30 Thread Catalin Marinas
On Mon, Nov 12, 2018 at 11:56:55AM +, Julien Thierry wrote:
> Add helper functions to access system registers related to interrupt
> priorities: PMR and RPR.
>
> Signed-off-by: Julien Thierry 
> Cc: Russell King 
> Cc: Catalin Marinas 
> Cc: Will Deacon 
> Cc: Marc Zyngier 

Acked-by: Catalin Marinas 
IMPORTANT NOTICE: The contents of this email and any attachments are 
confidential and may also be privileged. If you are not the intended recipient, 
please notify the sender immediately and do not disclose the contents to any 
other person, use it for any purpose, or store or copy the information in any 
medium. Thank you.


Re: [PATCH v6 04/24] arm/arm64: gic-v3: Add PMR and RPR accessors

2018-11-30 Thread Catalin Marinas
On Mon, Nov 12, 2018 at 11:56:55AM +, Julien Thierry wrote:
> Add helper functions to access system registers related to interrupt
> priorities: PMR and RPR.
>
> Signed-off-by: Julien Thierry 
> Cc: Russell King 
> Cc: Catalin Marinas 
> Cc: Will Deacon 
> Cc: Marc Zyngier 

Acked-by: Catalin Marinas 
IMPORTANT NOTICE: The contents of this email and any attachments are 
confidential and may also be privileged. If you are not the intended recipient, 
please notify the sender immediately and do not disclose the contents to any 
other person, use it for any purpose, or store or copy the information in any 
medium. Thank you.


Re: BUG: corrupted list in freeary

2018-11-30 Thread Dmitry Vyukov
On Fri, Nov 30, 2018 at 5:58 PM, Dmitry Vyukov  wrote:
> On Thu, Nov 29, 2018 at 9:13 AM, Manfred Spraul
>  wrote:
>> Hello together,
>>
>> On 11/27/18 4:52 PM, syzbot wrote:
>>
>> Hello,
>>
>> syzbot found the following crash on:
>>
>> HEAD commit:e195ca6cb6f2 Merge branch 'for-linus' of git://git.kernel...
>> git tree:   upstream
>> console output: https://syzkaller.appspot.com/x/log.txt?x=10d3e6a340
>>
>> From the console output:
>>
>> 20:36:14 executing program 4:
>> semget$private(0x1200, 0x39d0, 0x0)
>>
>>
>> I don't understand the 0x1200.
>>
>> What does that mean? What is the actual syscall?
>
> Hi Manfred,
>
> The syscall is semget with the first argument 0x1200.
>
>
>>
>> Is 0x39d0 the number of semaphores in the array, i.e. create ~13.000
>> semaphores?
>
> If the second argument of 0x39d0 relates to creation of 0x39d0
> semaphores, then yes.
>
>
>
>> kernel config:  https://syzkaller.appspot.com/x/.config?x=73e2bc0cb6463446
>> dashboard link: https://syzkaller.appspot.com/bug?extid=c92d3646e35bc5d1a909
>> compiler:   gcc (GCC) 8.0.1 20180413 (experimental)
>>
>> Unfortunately, I don't have any reproducer for this crash yet.
>>
>> IMPORTANT: if you fix the bug, please add the following tag to the commit:
>> Reported-by: syzbot+c92d3646e35bc5d1a...@syzkaller.appspotmail.com
>>
>> input: syz1 as /devices/virtual/input/input670
>> input: syz1 as /devices/virtual/input/input671
>> list_del corruption. prev->next should be 8881dae2cdb8, but was
>> 0010
>> [ cut here ]
>> kernel BUG at lib/list_debug.c:53!
>> invalid opcode:  [#1] PREEMPT SMP KASAN
>> CPU: 0 PID: 6194 Comm: syz-executor5 Not tainted 4.20.0-rc3+ #348
>> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
>> Google 01/01/2011
>> RIP: 0010:__list_del_entry_valid.cold.1+0x48/0x4a lib/list_debug.c:51
>> Code: d0 60 88 e8 b2 31 d2 fd 0f 0b 48 89 de 48 c7 c7 00 d2 60 88 e8 a1 31
>> d2 fd 0f 0b 48 89 de 48 c7 c7 a0 d1 60 88 e8 90 31 d2 fd <0f> 0b 48 89 d9 48
>> c7 c7 60 d2 60 88 e8 7f 31 d2 fd 0f 0b 48 89 f1
>> RSP: 0018:8881848fee80 EFLAGS: 00010286
>> RAX: 0054 RBX: 8881dae2cdb8 RCX: 
>> RDX:  RSI: 8165eaf5 RDI: 0005
>> RBP: 8881848fee98 R08: 8881848f26c0 R09: 0006
>> R10:  R11: 8881848f26c0 R12: 8881c3173a00
>> R13: 8881be118118 R14: 8881848ff280 R15: dc00
>> FS:  020b2940() GS:8881dae0() knlGS:
>> CS:  0010 DS:  ES:  CR0: 80050033
>> CR2: 00625208 CR3: 0001c10d3000 CR4: 001406f0
>> DR0:  DR1:  DR2: 
>> DR3:  DR6: fffe0ff0 DR7: 0400
>> Call Trace:
>>  __list_del_entry include/linux/list.h:117 [inline]
>>  list_del include/linux/list.h:125 [inline]
>>  unlink_queue ipc/sem.c:786 [inline]
>>
>> Unlink_queue means transfer all waiting threads to the wake-q.
>>
>> There are 2*(1+) linked lists in an array.
>>
>> And this fails, because one linked list contains 0x10 instead of a real
>> pointer.
>>
>> I could not find any semop() in the log --> all lists must be empty.
>>
>> Actually, the lists were initialized in newary(), and then never touched.
>>
>>  freeary+0xbd1/0x1a40 ipc/sem.c:1160
>>
>> Free a semaphore array
>>
>>  free_ipcs+0x9f/0x1c0 ipc/namespace.c:112
>>  sem_exit_ns+0x20/0x40 ipc/sem.c:237
>>  free_ipc_ns ipc/namespace.c:120 [inline]
>>
>> Free all ipc ids in the name space
>>
>>  put_ipc_ns+0x66/0x180 ipc/namespace.c:152
>>  free_nsproxy+0xcf/0x220 kernel/nsproxy.c:180
>>
>> Free the name space
>>
>>  switch_task_namespaces+0xb3/0xd0 kernel/nsproxy.c:229
>>  exit_task_namespaces+0x17/0x20 kernel/nsproxy.c:234
>>  do_exit+0x1ad1/0x26d0 kernel/exit.c:866
>>  do_group_exit+0x177/0x440 kernel/exit.c:970
>>  get_signal+0x8b0/0x1980 kernel/signal.c:2517
>>  do_signal+0x9c/0x21c0 arch/x86/kernel/signal.c:816
>>  exit_to_usermode_loop+0x2e5/0x380 arch/x86/entry/common.c:162
>>  prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline]
>>  syscall_return_slowpath arch/x86/entry/common.c:268 [inline]
>>  do_syscall_64+0x6be/0x820 arch/x86/entry/common.c:293
>>  entry_SYSCALL_64_after_hwframe+0x49/0xbe
>> RIP: 0033:0x410fa0
>>
>> This is time code 604.599748 in the console output:
>>
>> [  604.599748] RIP: 0033:0x410fa0
>>
>>
>> Questions:
>>
>> 1) What is this?
>>
>> [  600.924691]  entry_SYSCALL_64_after_hwframe+0x49/0xbe^M
>> [  600.929872] RIP: 0033:0x7f3e597d0120^M
>> [  600.933576] Code: Bad RIP value.^M
>> [  600.936920] RSP: 002b:7ffc2d83e008 EFLAGS: 0246 ORIG_RAX:
>> 0002^M
>> [  600.944608] RAX: ffda RBX: 55ca2995b436 RCX:
>> 7f3e597d0120^M
>> [  600.951856] RDX: 7ffc2d83e244 RSI: 0008 RDI:
>> 7ffc2d83e220^M
>> [  600.959107] RBP: 55ca2995b1e0 R08:  R09:
>> 

Re: BUG: corrupted list in freeary

2018-11-30 Thread Dmitry Vyukov
On Fri, Nov 30, 2018 at 5:58 PM, Dmitry Vyukov  wrote:
> On Thu, Nov 29, 2018 at 9:13 AM, Manfred Spraul
>  wrote:
>> Hello together,
>>
>> On 11/27/18 4:52 PM, syzbot wrote:
>>
>> Hello,
>>
>> syzbot found the following crash on:
>>
>> HEAD commit:e195ca6cb6f2 Merge branch 'for-linus' of git://git.kernel...
>> git tree:   upstream
>> console output: https://syzkaller.appspot.com/x/log.txt?x=10d3e6a340
>>
>> From the console output:
>>
>> 20:36:14 executing program 4:
>> semget$private(0x1200, 0x39d0, 0x0)
>>
>>
>> I don't understand the 0x1200.
>>
>> What does that mean? What is the actual syscall?
>
> Hi Manfred,
>
> The syscall is semget with the first argument 0x1200.
>
>
>>
>> Is 0x39d0 the number of semaphores in the array, i.e. create ~13.000
>> semaphores?
>
> If the second argument of 0x39d0 relates to creation of 0x39d0
> semaphores, then yes.
>
>
>
>> kernel config:  https://syzkaller.appspot.com/x/.config?x=73e2bc0cb6463446
>> dashboard link: https://syzkaller.appspot.com/bug?extid=c92d3646e35bc5d1a909
>> compiler:   gcc (GCC) 8.0.1 20180413 (experimental)
>>
>> Unfortunately, I don't have any reproducer for this crash yet.
>>
>> IMPORTANT: if you fix the bug, please add the following tag to the commit:
>> Reported-by: syzbot+c92d3646e35bc5d1a...@syzkaller.appspotmail.com
>>
>> input: syz1 as /devices/virtual/input/input670
>> input: syz1 as /devices/virtual/input/input671
>> list_del corruption. prev->next should be 8881dae2cdb8, but was
>> 0010
>> [ cut here ]
>> kernel BUG at lib/list_debug.c:53!
>> invalid opcode:  [#1] PREEMPT SMP KASAN
>> CPU: 0 PID: 6194 Comm: syz-executor5 Not tainted 4.20.0-rc3+ #348
>> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
>> Google 01/01/2011
>> RIP: 0010:__list_del_entry_valid.cold.1+0x48/0x4a lib/list_debug.c:51
>> Code: d0 60 88 e8 b2 31 d2 fd 0f 0b 48 89 de 48 c7 c7 00 d2 60 88 e8 a1 31
>> d2 fd 0f 0b 48 89 de 48 c7 c7 a0 d1 60 88 e8 90 31 d2 fd <0f> 0b 48 89 d9 48
>> c7 c7 60 d2 60 88 e8 7f 31 d2 fd 0f 0b 48 89 f1
>> RSP: 0018:8881848fee80 EFLAGS: 00010286
>> RAX: 0054 RBX: 8881dae2cdb8 RCX: 
>> RDX:  RSI: 8165eaf5 RDI: 0005
>> RBP: 8881848fee98 R08: 8881848f26c0 R09: 0006
>> R10:  R11: 8881848f26c0 R12: 8881c3173a00
>> R13: 8881be118118 R14: 8881848ff280 R15: dc00
>> FS:  020b2940() GS:8881dae0() knlGS:
>> CS:  0010 DS:  ES:  CR0: 80050033
>> CR2: 00625208 CR3: 0001c10d3000 CR4: 001406f0
>> DR0:  DR1:  DR2: 
>> DR3:  DR6: fffe0ff0 DR7: 0400
>> Call Trace:
>>  __list_del_entry include/linux/list.h:117 [inline]
>>  list_del include/linux/list.h:125 [inline]
>>  unlink_queue ipc/sem.c:786 [inline]
>>
>> Unlink_queue means transfer all waiting threads to the wake-q.
>>
>> There are 2*(1+) linked lists in an array.
>>
>> And this fails, because one linked list contains 0x10 instead of a real
>> pointer.
>>
>> I could not find any semop() in the log --> all lists must be empty.
>>
>> Actually, the lists were initialized in newary(), and then never touched.
>>
>>  freeary+0xbd1/0x1a40 ipc/sem.c:1160
>>
>> Free a semaphore array
>>
>>  free_ipcs+0x9f/0x1c0 ipc/namespace.c:112
>>  sem_exit_ns+0x20/0x40 ipc/sem.c:237
>>  free_ipc_ns ipc/namespace.c:120 [inline]
>>
>> Free all ipc ids in the name space
>>
>>  put_ipc_ns+0x66/0x180 ipc/namespace.c:152
>>  free_nsproxy+0xcf/0x220 kernel/nsproxy.c:180
>>
>> Free the name space
>>
>>  switch_task_namespaces+0xb3/0xd0 kernel/nsproxy.c:229
>>  exit_task_namespaces+0x17/0x20 kernel/nsproxy.c:234
>>  do_exit+0x1ad1/0x26d0 kernel/exit.c:866
>>  do_group_exit+0x177/0x440 kernel/exit.c:970
>>  get_signal+0x8b0/0x1980 kernel/signal.c:2517
>>  do_signal+0x9c/0x21c0 arch/x86/kernel/signal.c:816
>>  exit_to_usermode_loop+0x2e5/0x380 arch/x86/entry/common.c:162
>>  prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline]
>>  syscall_return_slowpath arch/x86/entry/common.c:268 [inline]
>>  do_syscall_64+0x6be/0x820 arch/x86/entry/common.c:293
>>  entry_SYSCALL_64_after_hwframe+0x49/0xbe
>> RIP: 0033:0x410fa0
>>
>> This is time code 604.599748 in the console output:
>>
>> [  604.599748] RIP: 0033:0x410fa0
>>
>>
>> Questions:
>>
>> 1) What is this?
>>
>> [  600.924691]  entry_SYSCALL_64_after_hwframe+0x49/0xbe^M
>> [  600.929872] RIP: 0033:0x7f3e597d0120^M
>> [  600.933576] Code: Bad RIP value.^M
>> [  600.936920] RSP: 002b:7ffc2d83e008 EFLAGS: 0246 ORIG_RAX:
>> 0002^M
>> [  600.944608] RAX: ffda RBX: 55ca2995b436 RCX:
>> 7f3e597d0120^M
>> [  600.951856] RDX: 7ffc2d83e244 RSI: 0008 RDI:
>> 7ffc2d83e220^M
>> [  600.959107] RBP: 55ca2995b1e0 R08:  R09:
>> 

Re: [PATCH v6 03/24] arm64: cpufeature: Add cpufeature for IRQ priority masking

2018-11-30 Thread Catalin Marinas
On Mon, Nov 12, 2018 at 11:56:54AM +, Julien Thierry wrote:
> Add a cpufeature indicating whether a cpu supports masking interrupts
> by priority.
>
> The feature will be properly enabled in a later patch.
>
> Signed-off-by: Julien Thierry 
> Cc: Catalin Marinas 
> Cc: Will Deacon 
> Cc: Marc Zyngier 
> Cc: Suzuki K Poulose 

Acked-by: Catalin Marinas 
IMPORTANT NOTICE: The contents of this email and any attachments are 
confidential and may also be privileged. If you are not the intended recipient, 
please notify the sender immediately and do not disclose the contents to any 
other person, use it for any purpose, or store or copy the information in any 
medium. Thank you.


Re: [PATCH v6 03/24] arm64: cpufeature: Add cpufeature for IRQ priority masking

2018-11-30 Thread Catalin Marinas
On Mon, Nov 12, 2018 at 11:56:54AM +, Julien Thierry wrote:
> Add a cpufeature indicating whether a cpu supports masking interrupts
> by priority.
>
> The feature will be properly enabled in a later patch.
>
> Signed-off-by: Julien Thierry 
> Cc: Catalin Marinas 
> Cc: Will Deacon 
> Cc: Marc Zyngier 
> Cc: Suzuki K Poulose 

Acked-by: Catalin Marinas 
IMPORTANT NOTICE: The contents of this email and any attachments are 
confidential and may also be privileged. If you are not the intended recipient, 
please notify the sender immediately and do not disclose the contents to any 
other person, use it for any purpose, or store or copy the information in any 
medium. Thank you.


Re: [PATCH v6 02/24] arm64: cpufeature: Set SYSREG_GIC_CPUIF as a boot system feature

2018-11-30 Thread Catalin Marinas
On Mon, Nov 12, 2018 at 11:56:53AM +, Julien Thierry wrote:
> It is not supported to have some CPUs using GICv3 sysreg CPU interface
> while some others do not.
>
> Once ICC_SRE_EL1.SRE is set on a CPU, the bit cannot be cleared. Since
> matching this feature require setting ICC_SRE_EL1.SRE, it cannot be
> turned off if found on a CPU.
>
> Set the feature as STRICT_BOOT, if boot CPU has it, all other CPUs are
> required to have it.
>
> Signed-off-by: Julien Thierry 
> Suggested-by: Daniel Thompson 
> Cc: Catalin Marinas 
> Cc: Will Deacon 
> Cc: Suzuki K Poulose 
> Cc: Marc Zyngier 

Acked-by: Catalin Marinas 
IMPORTANT NOTICE: The contents of this email and any attachments are 
confidential and may also be privileged. If you are not the intended recipient, 
please notify the sender immediately and do not disclose the contents to any 
other person, use it for any purpose, or store or copy the information in any 
medium. Thank you.


Re: [PATCH v6 02/24] arm64: cpufeature: Set SYSREG_GIC_CPUIF as a boot system feature

2018-11-30 Thread Catalin Marinas
On Mon, Nov 12, 2018 at 11:56:53AM +, Julien Thierry wrote:
> It is not supported to have some CPUs using GICv3 sysreg CPU interface
> while some others do not.
>
> Once ICC_SRE_EL1.SRE is set on a CPU, the bit cannot be cleared. Since
> matching this feature require setting ICC_SRE_EL1.SRE, it cannot be
> turned off if found on a CPU.
>
> Set the feature as STRICT_BOOT, if boot CPU has it, all other CPUs are
> required to have it.
>
> Signed-off-by: Julien Thierry 
> Suggested-by: Daniel Thompson 
> Cc: Catalin Marinas 
> Cc: Will Deacon 
> Cc: Suzuki K Poulose 
> Cc: Marc Zyngier 

Acked-by: Catalin Marinas 
IMPORTANT NOTICE: The contents of this email and any attachments are 
confidential and may also be privileged. If you are not the intended recipient, 
please notify the sender immediately and do not disclose the contents to any 
other person, use it for any purpose, or store or copy the information in any 
medium. Thank you.


Re: [PATCH 0/4] x86/mm/cpa: Fix cpa-array TLB invalidation

2018-11-30 Thread Peter Zijlstra
On Fri, Nov 30, 2018 at 05:49:34PM +, StDenis, Tom wrote:
> On 2018-11-30 12:48 p.m., Peter Zijlstra wrote:
> > On Fri, Nov 30, 2018 at 04:19:46PM +, StDenis, Tom wrote:
> >> On 2018-11-30 10:31 a.m., Peter Zijlstra wrote:
> > 
> >>> I pushed them out to:
> >>>
> >>> git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git x86/mm
> >>>
> >>> I hope that works; I'm out for a few hours, but should check on email
> >>> again tonight.
> >>>
> >>
> >> NAK I get a failure in TTM on init with your x86/mm branch (see attached
> >> dmesg).
> > 
> > *sigh*, it's been one of those days. Ok, I'll go write some cpa
> > selftests or something so that I have code that uses this stuff.
> > 
> 
> Well the ttm crash could be completely unrelated the problem is your 
> x86/mm branch is not up to date with master and doesn't include drm fixes.

Well, it crashes right in the middle of the CPA code, and I'm having a
horrible day, so I'm thinking I screwed up rather than anything else.

Also, some level of selftests would be good to have in any case I
figure.


Re: [PATCH 0/4] x86/mm/cpa: Fix cpa-array TLB invalidation

2018-11-30 Thread Peter Zijlstra
On Fri, Nov 30, 2018 at 05:49:34PM +, StDenis, Tom wrote:
> On 2018-11-30 12:48 p.m., Peter Zijlstra wrote:
> > On Fri, Nov 30, 2018 at 04:19:46PM +, StDenis, Tom wrote:
> >> On 2018-11-30 10:31 a.m., Peter Zijlstra wrote:
> > 
> >>> I pushed them out to:
> >>>
> >>> git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git x86/mm
> >>>
> >>> I hope that works; I'm out for a few hours, but should check on email
> >>> again tonight.
> >>>
> >>
> >> NAK I get a failure in TTM on init with your x86/mm branch (see attached
> >> dmesg).
> > 
> > *sigh*, it's been one of those days. Ok, I'll go write some cpa
> > selftests or something so that I have code that uses this stuff.
> > 
> 
> Well the ttm crash could be completely unrelated the problem is your 
> x86/mm branch is not up to date with master and doesn't include drm fixes.

Well, it crashes right in the middle of the CPA code, and I'm having a
horrible day, so I'm thinking I screwed up rather than anything else.

Also, some level of selftests would be good to have in any case I
figure.


Re: [PATCH v4 4/6] coresight: Use PMU driver configuration for sink selection

2018-11-30 Thread Mathieu Poirier
On Fri, 30 Nov 2018 at 00:42, Greg KH  wrote:
>
> On Thu, Nov 29, 2018 at 04:09:15PM -0700, Mathieu Poirier wrote:
> > Hi Greg,
> >
> > On Thu, Nov 29, 2018 at 08:49:36AM +0100, Greg KH wrote:
> > > On Wed, Nov 28, 2018 at 03:01:16PM -0700, Mathieu Poirier wrote:
> > > > This patch uses the PMU driver configuration held in 
> > > > event::hw::drv_config
> > > > to select a sink for each event that is created (the old sysFS way of
> > > > working is kept around for backward compatibility).
> > >
> > > It is "sysfs", no InterCaps please, I've never called it that in the
> > > past.
> > >
> > > And just use sysfs, if that does not work properly, then fix that, don't
> > > create yet-another-way-to-configure-this-thing to just confuse people.
> >
> > Thanks for the review, you've provided usefull comments.
> >
> > Regarding the "char *" argument for the ioctl, I followed an example that
> > currently exist but I can proceed differently.
>
> What driver currently uses a char * on an ioctl to parse arbritrary
> userspace information to set its configuration?  That should be fixed...
>

Perf filters [1] are communicated to the kernel as a char *.  Given
the dynamic nature of event creation I really don't know how else it
could have been done.

[1]. 
https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/perf_event.h#L459

> > My goal with this patchset was specifically to fix what is wrong with sysfs 
> > and
> > completely take it out of the equation.  The only reason to keep the kernel
> > interface alive was to prevent braking older user space perf tools currently
> > using it.
>
> That's fine, just don't create a new syscall that takes arbritrary data
> and parses it in the kernel, that's not ok.

You got it.

>
> > I chose to use an ioctl() because it is flexible and well suited for the 
> > dynamic
> > nature of perf events.  It is also currently used to set various event 
> > specific
> > configuration so doing the same adds to the established pattern and avoids
> > creating a new way of doing things, something the perf crew would have been
> > quick to point out.
> >
> > Was my approach wrong?
>
> I don't know how the perf interface works, so perhaps work with those
> developers to sync up and match what they use today?

I have already done a fair amount of work with them.

>
> But step back, what exactly are you trying to do here?  You have an
> implementation of a solution but I don't see the problem stated anywhere
> here.

In the coresight world there can be more than one sink to aggregate
trace data generated by CPUs, hence the need for users to select which
one to use from the perf command line.

Up until now sysfs was used to communicate sink information to the
kernel but that was never the right way to proceed because it breaks
when more than one perf session are created at the same time.  The
situation was manageable when working with per-thread scenarios where
a single HW trace event is created but in CPU-wide mode a HW trace
event is created for each CPU that is specified on the perf command
line, taking us back to the concurrency problem we have when dealing
with multiple per-thread session.

Since my goal is to add coresight support for CPU-wide trace
scenarios, the issue with sysfs concurrency needs to be addressed
first, which this set is aiming at.  Sysfs is a problem so I'm
removing it in favour of an ioctl() where a specific sink can be
assigned to each event.

The above should probably go in the cover letter.  Let me know if you
want more information.

Thanks,
Mathieu


Re: [PATCH v4 4/6] coresight: Use PMU driver configuration for sink selection

2018-11-30 Thread Mathieu Poirier
On Fri, 30 Nov 2018 at 00:42, Greg KH  wrote:
>
> On Thu, Nov 29, 2018 at 04:09:15PM -0700, Mathieu Poirier wrote:
> > Hi Greg,
> >
> > On Thu, Nov 29, 2018 at 08:49:36AM +0100, Greg KH wrote:
> > > On Wed, Nov 28, 2018 at 03:01:16PM -0700, Mathieu Poirier wrote:
> > > > This patch uses the PMU driver configuration held in 
> > > > event::hw::drv_config
> > > > to select a sink for each event that is created (the old sysFS way of
> > > > working is kept around for backward compatibility).
> > >
> > > It is "sysfs", no InterCaps please, I've never called it that in the
> > > past.
> > >
> > > And just use sysfs, if that does not work properly, then fix that, don't
> > > create yet-another-way-to-configure-this-thing to just confuse people.
> >
> > Thanks for the review, you've provided usefull comments.
> >
> > Regarding the "char *" argument for the ioctl, I followed an example that
> > currently exist but I can proceed differently.
>
> What driver currently uses a char * on an ioctl to parse arbritrary
> userspace information to set its configuration?  That should be fixed...
>

Perf filters [1] are communicated to the kernel as a char *.  Given
the dynamic nature of event creation I really don't know how else it
could have been done.

[1]. 
https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/perf_event.h#L459

> > My goal with this patchset was specifically to fix what is wrong with sysfs 
> > and
> > completely take it out of the equation.  The only reason to keep the kernel
> > interface alive was to prevent braking older user space perf tools currently
> > using it.
>
> That's fine, just don't create a new syscall that takes arbritrary data
> and parses it in the kernel, that's not ok.

You got it.

>
> > I chose to use an ioctl() because it is flexible and well suited for the 
> > dynamic
> > nature of perf events.  It is also currently used to set various event 
> > specific
> > configuration so doing the same adds to the established pattern and avoids
> > creating a new way of doing things, something the perf crew would have been
> > quick to point out.
> >
> > Was my approach wrong?
>
> I don't know how the perf interface works, so perhaps work with those
> developers to sync up and match what they use today?

I have already done a fair amount of work with them.

>
> But step back, what exactly are you trying to do here?  You have an
> implementation of a solution but I don't see the problem stated anywhere
> here.

In the coresight world there can be more than one sink to aggregate
trace data generated by CPUs, hence the need for users to select which
one to use from the perf command line.

Up until now sysfs was used to communicate sink information to the
kernel but that was never the right way to proceed because it breaks
when more than one perf session are created at the same time.  The
situation was manageable when working with per-thread scenarios where
a single HW trace event is created but in CPU-wide mode a HW trace
event is created for each CPU that is specified on the perf command
line, taking us back to the concurrency problem we have when dealing
with multiple per-thread session.

Since my goal is to add coresight support for CPU-wide trace
scenarios, the issue with sysfs concurrency needs to be addressed
first, which this set is aiming at.  Sysfs is a problem so I'm
removing it in favour of an ioctl() where a specific sink can be
assigned to each event.

The above should probably go in the cover letter.  Let me know if you
want more information.

Thanks,
Mathieu


[PATCH] ARM: dts: s5pv210: Add s5p-jpeg codec node.

2018-11-30 Thread Paweł Chmiel
This commit adds node for s5p-jpeg codec,
which is present in S5PV210 SOC.

Signed-off-by: Paweł Chmiel 
---
 arch/arm/boot/dts/s5pv210.dtsi | 9 +
 1 file changed, 9 insertions(+)

diff --git a/arch/arm/boot/dts/s5pv210.dtsi b/arch/arm/boot/dts/s5pv210.dtsi
index 75f454a210d6..12eac8930eac 100644
--- a/arch/arm/boot/dts/s5pv210.dtsi
+++ b/arch/arm/boot/dts/s5pv210.dtsi
@@ -627,6 +627,15 @@
samsung,lcd-wb;
};
};
+
+   jpeg_codec: jpeg-codec@fb60 {
+   compatible = "samsung,s5pv210-jpeg";
+   reg = <0xfb60 0x1000>;
+   interrupt-parent = <>;
+   interrupts = <8>;
+   clocks = < CLK_JPEG>;
+   clock-names = "jpeg";
+   };
};
 };
 
-- 
2.17.1



[PATCH] ARM: dts: s5pv210: Add s5p-jpeg codec node.

2018-11-30 Thread Paweł Chmiel
This commit adds node for s5p-jpeg codec,
which is present in S5PV210 SOC.

Signed-off-by: Paweł Chmiel 
---
 arch/arm/boot/dts/s5pv210.dtsi | 9 +
 1 file changed, 9 insertions(+)

diff --git a/arch/arm/boot/dts/s5pv210.dtsi b/arch/arm/boot/dts/s5pv210.dtsi
index 75f454a210d6..12eac8930eac 100644
--- a/arch/arm/boot/dts/s5pv210.dtsi
+++ b/arch/arm/boot/dts/s5pv210.dtsi
@@ -627,6 +627,15 @@
samsung,lcd-wb;
};
};
+
+   jpeg_codec: jpeg-codec@fb60 {
+   compatible = "samsung,s5pv210-jpeg";
+   reg = <0xfb60 0x1000>;
+   interrupt-parent = <>;
+   interrupts = <8>;
+   clocks = < CLK_JPEG>;
+   clock-names = "jpeg";
+   };
};
 };
 
-- 
2.17.1



Re: [PATCH v6 01/24] arm64: Remove unused daif related functions/macros

2018-11-30 Thread Catalin Marinas
On Mon, Nov 12, 2018 at 11:56:52AM +, Julien Thierry wrote:
> There are some helpers to modify PSR.[DAIF] bits that are not referenced
> anywhere. The less these bits are available outside of local_irq_*
> functions the better.
>
> Get rid of those unused helpers.
>
> Signed-off-by: Julien Thierry 
> Cc: Catalin Marinas 
> Cc: Will Deacon 
> Cc: James Morse 

Acked-by: Catalin Marinas 
IMPORTANT NOTICE: The contents of this email and any attachments are 
confidential and may also be privileged. If you are not the intended recipient, 
please notify the sender immediately and do not disclose the contents to any 
other person, use it for any purpose, or store or copy the information in any 
medium. Thank you.


Re: [PATCH v6 01/24] arm64: Remove unused daif related functions/macros

2018-11-30 Thread Catalin Marinas
On Mon, Nov 12, 2018 at 11:56:52AM +, Julien Thierry wrote:
> There are some helpers to modify PSR.[DAIF] bits that are not referenced
> anywhere. The less these bits are available outside of local_irq_*
> functions the better.
>
> Get rid of those unused helpers.
>
> Signed-off-by: Julien Thierry 
> Cc: Catalin Marinas 
> Cc: Will Deacon 
> Cc: James Morse 

Acked-by: Catalin Marinas 
IMPORTANT NOTICE: The contents of this email and any attachments are 
confidential and may also be privileged. If you are not the intended recipient, 
please notify the sender immediately and do not disclose the contents to any 
other person, use it for any purpose, or store or copy the information in any 
medium. Thank you.


Re: BUG: corrupted list in freeary

2018-11-30 Thread Dmitry Vyukov
On Thu, Nov 29, 2018 at 9:13 AM, Manfred Spraul
 wrote:
> Hello together,
>
> On 11/27/18 4:52 PM, syzbot wrote:
>
> Hello,
>
> syzbot found the following crash on:
>
> HEAD commit:e195ca6cb6f2 Merge branch 'for-linus' of git://git.kernel...
> git tree:   upstream
> console output: https://syzkaller.appspot.com/x/log.txt?x=10d3e6a340
>
> From the console output:
>
> 20:36:14 executing program 4:
> semget$private(0x1200, 0x39d0, 0x0)
>
>
> I don't understand the 0x1200.
>
> What does that mean? What is the actual syscall?

Hi Manfred,

The syscall is semget with the first argument 0x1200.


>
> Is 0x39d0 the number of semaphores in the array, i.e. create ~13.000
> semaphores?

If the second argument of 0x39d0 relates to creation of 0x39d0
semaphores, then yes.



> kernel config:  https://syzkaller.appspot.com/x/.config?x=73e2bc0cb6463446
> dashboard link: https://syzkaller.appspot.com/bug?extid=c92d3646e35bc5d1a909
> compiler:   gcc (GCC) 8.0.1 20180413 (experimental)
>
> Unfortunately, I don't have any reproducer for this crash yet.
>
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+c92d3646e35bc5d1a...@syzkaller.appspotmail.com
>
> input: syz1 as /devices/virtual/input/input670
> input: syz1 as /devices/virtual/input/input671
> list_del corruption. prev->next should be 8881dae2cdb8, but was
> 0010
> [ cut here ]
> kernel BUG at lib/list_debug.c:53!
> invalid opcode:  [#1] PREEMPT SMP KASAN
> CPU: 0 PID: 6194 Comm: syz-executor5 Not tainted 4.20.0-rc3+ #348
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> RIP: 0010:__list_del_entry_valid.cold.1+0x48/0x4a lib/list_debug.c:51
> Code: d0 60 88 e8 b2 31 d2 fd 0f 0b 48 89 de 48 c7 c7 00 d2 60 88 e8 a1 31
> d2 fd 0f 0b 48 89 de 48 c7 c7 a0 d1 60 88 e8 90 31 d2 fd <0f> 0b 48 89 d9 48
> c7 c7 60 d2 60 88 e8 7f 31 d2 fd 0f 0b 48 89 f1
> RSP: 0018:8881848fee80 EFLAGS: 00010286
> RAX: 0054 RBX: 8881dae2cdb8 RCX: 
> RDX:  RSI: 8165eaf5 RDI: 0005
> RBP: 8881848fee98 R08: 8881848f26c0 R09: 0006
> R10:  R11: 8881848f26c0 R12: 8881c3173a00
> R13: 8881be118118 R14: 8881848ff280 R15: dc00
> FS:  020b2940() GS:8881dae0() knlGS:
> CS:  0010 DS:  ES:  CR0: 80050033
> CR2: 00625208 CR3: 0001c10d3000 CR4: 001406f0
> DR0:  DR1:  DR2: 
> DR3:  DR6: fffe0ff0 DR7: 0400
> Call Trace:
>  __list_del_entry include/linux/list.h:117 [inline]
>  list_del include/linux/list.h:125 [inline]
>  unlink_queue ipc/sem.c:786 [inline]
>
> Unlink_queue means transfer all waiting threads to the wake-q.
>
> There are 2*(1+) linked lists in an array.
>
> And this fails, because one linked list contains 0x10 instead of a real
> pointer.
>
> I could not find any semop() in the log --> all lists must be empty.
>
> Actually, the lists were initialized in newary(), and then never touched.
>
>  freeary+0xbd1/0x1a40 ipc/sem.c:1160
>
> Free a semaphore array
>
>  free_ipcs+0x9f/0x1c0 ipc/namespace.c:112
>  sem_exit_ns+0x20/0x40 ipc/sem.c:237
>  free_ipc_ns ipc/namespace.c:120 [inline]
>
> Free all ipc ids in the name space
>
>  put_ipc_ns+0x66/0x180 ipc/namespace.c:152
>  free_nsproxy+0xcf/0x220 kernel/nsproxy.c:180
>
> Free the name space
>
>  switch_task_namespaces+0xb3/0xd0 kernel/nsproxy.c:229
>  exit_task_namespaces+0x17/0x20 kernel/nsproxy.c:234
>  do_exit+0x1ad1/0x26d0 kernel/exit.c:866
>  do_group_exit+0x177/0x440 kernel/exit.c:970
>  get_signal+0x8b0/0x1980 kernel/signal.c:2517
>  do_signal+0x9c/0x21c0 arch/x86/kernel/signal.c:816
>  exit_to_usermode_loop+0x2e5/0x380 arch/x86/entry/common.c:162
>  prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline]
>  syscall_return_slowpath arch/x86/entry/common.c:268 [inline]
>  do_syscall_64+0x6be/0x820 arch/x86/entry/common.c:293
>  entry_SYSCALL_64_after_hwframe+0x49/0xbe
> RIP: 0033:0x410fa0
>
> This is time code 604.599748 in the console output:
>
> [  604.599748] RIP: 0033:0x410fa0
>
>
> Questions:
>
> 1) What is this?
>
> [  600.924691]  entry_SYSCALL_64_after_hwframe+0x49/0xbe^M
> [  600.929872] RIP: 0033:0x7f3e597d0120^M
> [  600.933576] Code: Bad RIP value.^M
> [  600.936920] RSP: 002b:7ffc2d83e008 EFLAGS: 0246 ORIG_RAX:
> 0002^M
> [  600.944608] RAX: ffda RBX: 55ca2995b436 RCX:
> 7f3e597d0120^M
> [  600.951856] RDX: 7ffc2d83e244 RSI: 0008 RDI:
> 7ffc2d83e220^M
> [  600.959107] RBP: 55ca2995b1e0 R08:  R09:
> 55ca2995b099^M
> [  600.966355] R10:  R11: 0246 R12:
> 0001^M
> [  600.973628] R13: 55ca2995b090 R14: 55ca2995b190 R15:
> 7ffc2d83e220^M
>
> 

Re: BUG: corrupted list in freeary

2018-11-30 Thread Dmitry Vyukov
On Thu, Nov 29, 2018 at 9:13 AM, Manfred Spraul
 wrote:
> Hello together,
>
> On 11/27/18 4:52 PM, syzbot wrote:
>
> Hello,
>
> syzbot found the following crash on:
>
> HEAD commit:e195ca6cb6f2 Merge branch 'for-linus' of git://git.kernel...
> git tree:   upstream
> console output: https://syzkaller.appspot.com/x/log.txt?x=10d3e6a340
>
> From the console output:
>
> 20:36:14 executing program 4:
> semget$private(0x1200, 0x39d0, 0x0)
>
>
> I don't understand the 0x1200.
>
> What does that mean? What is the actual syscall?

Hi Manfred,

The syscall is semget with the first argument 0x1200.


>
> Is 0x39d0 the number of semaphores in the array, i.e. create ~13.000
> semaphores?

If the second argument of 0x39d0 relates to creation of 0x39d0
semaphores, then yes.



> kernel config:  https://syzkaller.appspot.com/x/.config?x=73e2bc0cb6463446
> dashboard link: https://syzkaller.appspot.com/bug?extid=c92d3646e35bc5d1a909
> compiler:   gcc (GCC) 8.0.1 20180413 (experimental)
>
> Unfortunately, I don't have any reproducer for this crash yet.
>
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+c92d3646e35bc5d1a...@syzkaller.appspotmail.com
>
> input: syz1 as /devices/virtual/input/input670
> input: syz1 as /devices/virtual/input/input671
> list_del corruption. prev->next should be 8881dae2cdb8, but was
> 0010
> [ cut here ]
> kernel BUG at lib/list_debug.c:53!
> invalid opcode:  [#1] PREEMPT SMP KASAN
> CPU: 0 PID: 6194 Comm: syz-executor5 Not tainted 4.20.0-rc3+ #348
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> RIP: 0010:__list_del_entry_valid.cold.1+0x48/0x4a lib/list_debug.c:51
> Code: d0 60 88 e8 b2 31 d2 fd 0f 0b 48 89 de 48 c7 c7 00 d2 60 88 e8 a1 31
> d2 fd 0f 0b 48 89 de 48 c7 c7 a0 d1 60 88 e8 90 31 d2 fd <0f> 0b 48 89 d9 48
> c7 c7 60 d2 60 88 e8 7f 31 d2 fd 0f 0b 48 89 f1
> RSP: 0018:8881848fee80 EFLAGS: 00010286
> RAX: 0054 RBX: 8881dae2cdb8 RCX: 
> RDX:  RSI: 8165eaf5 RDI: 0005
> RBP: 8881848fee98 R08: 8881848f26c0 R09: 0006
> R10:  R11: 8881848f26c0 R12: 8881c3173a00
> R13: 8881be118118 R14: 8881848ff280 R15: dc00
> FS:  020b2940() GS:8881dae0() knlGS:
> CS:  0010 DS:  ES:  CR0: 80050033
> CR2: 00625208 CR3: 0001c10d3000 CR4: 001406f0
> DR0:  DR1:  DR2: 
> DR3:  DR6: fffe0ff0 DR7: 0400
> Call Trace:
>  __list_del_entry include/linux/list.h:117 [inline]
>  list_del include/linux/list.h:125 [inline]
>  unlink_queue ipc/sem.c:786 [inline]
>
> Unlink_queue means transfer all waiting threads to the wake-q.
>
> There are 2*(1+) linked lists in an array.
>
> And this fails, because one linked list contains 0x10 instead of a real
> pointer.
>
> I could not find any semop() in the log --> all lists must be empty.
>
> Actually, the lists were initialized in newary(), and then never touched.
>
>  freeary+0xbd1/0x1a40 ipc/sem.c:1160
>
> Free a semaphore array
>
>  free_ipcs+0x9f/0x1c0 ipc/namespace.c:112
>  sem_exit_ns+0x20/0x40 ipc/sem.c:237
>  free_ipc_ns ipc/namespace.c:120 [inline]
>
> Free all ipc ids in the name space
>
>  put_ipc_ns+0x66/0x180 ipc/namespace.c:152
>  free_nsproxy+0xcf/0x220 kernel/nsproxy.c:180
>
> Free the name space
>
>  switch_task_namespaces+0xb3/0xd0 kernel/nsproxy.c:229
>  exit_task_namespaces+0x17/0x20 kernel/nsproxy.c:234
>  do_exit+0x1ad1/0x26d0 kernel/exit.c:866
>  do_group_exit+0x177/0x440 kernel/exit.c:970
>  get_signal+0x8b0/0x1980 kernel/signal.c:2517
>  do_signal+0x9c/0x21c0 arch/x86/kernel/signal.c:816
>  exit_to_usermode_loop+0x2e5/0x380 arch/x86/entry/common.c:162
>  prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline]
>  syscall_return_slowpath arch/x86/entry/common.c:268 [inline]
>  do_syscall_64+0x6be/0x820 arch/x86/entry/common.c:293
>  entry_SYSCALL_64_after_hwframe+0x49/0xbe
> RIP: 0033:0x410fa0
>
> This is time code 604.599748 in the console output:
>
> [  604.599748] RIP: 0033:0x410fa0
>
>
> Questions:
>
> 1) What is this?
>
> [  600.924691]  entry_SYSCALL_64_after_hwframe+0x49/0xbe^M
> [  600.929872] RIP: 0033:0x7f3e597d0120^M
> [  600.933576] Code: Bad RIP value.^M
> [  600.936920] RSP: 002b:7ffc2d83e008 EFLAGS: 0246 ORIG_RAX:
> 0002^M
> [  600.944608] RAX: ffda RBX: 55ca2995b436 RCX:
> 7f3e597d0120^M
> [  600.951856] RDX: 7ffc2d83e244 RSI: 0008 RDI:
> 7ffc2d83e220^M
> [  600.959107] RBP: 55ca2995b1e0 R08:  R09:
> 55ca2995b099^M
> [  600.966355] R10:  R11: 0246 R12:
> 0001^M
> [  600.973628] R13: 55ca2995b090 R14: 55ca2995b190 R15:
> 7ffc2d83e220^M
>
> 

Re: [PATCH 2/2] arm64: dts: marvell: armada-37xx: Enable emmc on espressobin

2018-11-30 Thread Gregory CLEMENT
Hi Ding,
 
 On ven., oct. 26 2018, Ding Tao  wrote:

> The ESPRESSObin board has a emmc interface available on U11, let's
> enable it.


Applied on mvebu/dt64 for now, but what happen if U11 is not populated?

Thanks,

Gregory

>
> Signed-off-by: Ding Tao 
> ---
>  .../dts/marvell/armada-3720-espressobin.dts| 18 ++
>  1 file changed, 18 insertions(+)
>
> diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts 
> b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts
> index 3ab25ad402b9..ee05aabbba88 100644
> --- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts
> +++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts
> @@ -60,6 +60,24 @@
>   cd-gpios = < 3 GPIO_ACTIVE_LOW>;
>   marvell,pad-type = "sd";
>   vqmmc-supply = <_sd_reg1>;
> +
> + pinctrl-names = "default";
> + pinctrl-0 = <_pins>;
> + status = "okay";
> +};
> +
> +/* U11 */
> + {
> + non-removable;
> + bus-width = <8>;
> + mmc-ddr-1_8v;
> + mmc-hs400-1_8v;
> + marvell,xenon-emmc;
> + marvell,xenon-tun-count = <9>;
> + marvell,pad-type = "fixed-1-8v";
> +
> + pinctrl-names = "default";
> + pinctrl-0 = <_pins>;
>   status = "okay";
>  };
>  
> -- 
> 2.17.1
>
>
>

-- 
Gregory Clement, Bootlin
Embedded Linux and Kernel engineering
http://bootlin.com


Re: [PATCH 2/2] arm64: dts: marvell: armada-37xx: Enable emmc on espressobin

2018-11-30 Thread Gregory CLEMENT
Hi Ding,
 
 On ven., oct. 26 2018, Ding Tao  wrote:

> The ESPRESSObin board has a emmc interface available on U11, let's
> enable it.


Applied on mvebu/dt64 for now, but what happen if U11 is not populated?

Thanks,

Gregory

>
> Signed-off-by: Ding Tao 
> ---
>  .../dts/marvell/armada-3720-espressobin.dts| 18 ++
>  1 file changed, 18 insertions(+)
>
> diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts 
> b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts
> index 3ab25ad402b9..ee05aabbba88 100644
> --- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts
> +++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts
> @@ -60,6 +60,24 @@
>   cd-gpios = < 3 GPIO_ACTIVE_LOW>;
>   marvell,pad-type = "sd";
>   vqmmc-supply = <_sd_reg1>;
> +
> + pinctrl-names = "default";
> + pinctrl-0 = <_pins>;
> + status = "okay";
> +};
> +
> +/* U11 */
> + {
> + non-removable;
> + bus-width = <8>;
> + mmc-ddr-1_8v;
> + mmc-hs400-1_8v;
> + marvell,xenon-emmc;
> + marvell,xenon-tun-count = <9>;
> + marvell,pad-type = "fixed-1-8v";
> +
> + pinctrl-names = "default";
> + pinctrl-0 = <_pins>;
>   status = "okay";
>  };
>  
> -- 
> 2.17.1
>
>
>

-- 
Gregory Clement, Bootlin
Embedded Linux and Kernel engineering
http://bootlin.com


Re: [PATCH 0/4] x86/mm/cpa: Fix cpa-array TLB invalidation

2018-11-30 Thread Peter Zijlstra
On Fri, Nov 30, 2018 at 03:27:02PM +, StDenis, Tom wrote:
> I can apply the patch you attached but the inline patches just don't 
> apply.  Could be my imap client (thunderbird) mangled them but I've 
> applied patches this way before.  could you attach them instead please?

That's arguably a bug in Thunderbird; but there's already upstream quilt
changes (that I used to have before Debian helpfully updated my quilt
package) that should remedy this as well.

It seems some MUA's get horribly confused about the
"Content-Disposition: inline; filename=$patch" header quilt-mail adds.

I've once again removed that from my local copy; hopefully the next time
Debian updates that package it will actually be with a new enough
version to also include those changes :/


Re: [PATCH 0/4] x86/mm/cpa: Fix cpa-array TLB invalidation

2018-11-30 Thread Peter Zijlstra
On Fri, Nov 30, 2018 at 03:27:02PM +, StDenis, Tom wrote:
> I can apply the patch you attached but the inline patches just don't 
> apply.  Could be my imap client (thunderbird) mangled them but I've 
> applied patches this way before.  could you attach them instead please?

That's arguably a bug in Thunderbird; but there's already upstream quilt
changes (that I used to have before Debian helpfully updated my quilt
package) that should remedy this as well.

It seems some MUA's get horribly confused about the
"Content-Disposition: inline; filename=$patch" header quilt-mail adds.

I've once again removed that from my local copy; hopefully the next time
Debian updates that package it will actually be with a new enough
version to also include those changes :/


Re: [PATCH 0/4] x86/mm/cpa: Fix cpa-array TLB invalidation

2018-11-30 Thread StDenis, Tom
On 2018-11-30 12:48 p.m., Peter Zijlstra wrote:
> On Fri, Nov 30, 2018 at 04:19:46PM +, StDenis, Tom wrote:
>> On 2018-11-30 10:31 a.m., Peter Zijlstra wrote:
> 
>>> I pushed them out to:
>>>
>>> git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git x86/mm
>>>
>>> I hope that works; I'm out for a few hours, but should check on email
>>> again tonight.
>>>
>>
>> NAK I get a failure in TTM on init with your x86/mm branch (see attached
>> dmesg).
> 
> *sigh*, it's been one of those days. Ok, I'll go write some cpa
> selftests or something so that I have code that uses this stuff.
> 

Well the ttm crash could be completely unrelated the problem is your 
x86/mm branch is not up to date with master and doesn't include drm fixes.

Tom


Re: [PATCH 0/4] x86/mm/cpa: Fix cpa-array TLB invalidation

2018-11-30 Thread StDenis, Tom
On 2018-11-30 12:48 p.m., Peter Zijlstra wrote:
> On Fri, Nov 30, 2018 at 04:19:46PM +, StDenis, Tom wrote:
>> On 2018-11-30 10:31 a.m., Peter Zijlstra wrote:
> 
>>> I pushed them out to:
>>>
>>> git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git x86/mm
>>>
>>> I hope that works; I'm out for a few hours, but should check on email
>>> again tonight.
>>>
>>
>> NAK I get a failure in TTM on init with your x86/mm branch (see attached
>> dmesg).
> 
> *sigh*, it's been one of those days. Ok, I'll go write some cpa
> selftests or something so that I have code that uses this stuff.
> 

Well the ttm crash could be completely unrelated the problem is your 
x86/mm branch is not up to date with master and doesn't include drm fixes.

Tom


Re: [PATCH 0/4] x86/mm/cpa: Fix cpa-array TLB invalidation

2018-11-30 Thread Peter Zijlstra
On Fri, Nov 30, 2018 at 04:19:46PM +, StDenis, Tom wrote:
> On 2018-11-30 10:31 a.m., Peter Zijlstra wrote:

> > I pushed them out to:
> > 
> >git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git x86/mm
> > 
> > I hope that works; I'm out for a few hours, but should check on email
> > again tonight.
> > 
> 
> NAK I get a failure in TTM on init with your x86/mm branch (see attached 
> dmesg).

*sigh*, it's been one of those days. Ok, I'll go write some cpa
selftests or something so that I have code that uses this stuff.


Re: [PATCH 0/4] x86/mm/cpa: Fix cpa-array TLB invalidation

2018-11-30 Thread Peter Zijlstra
On Fri, Nov 30, 2018 at 04:19:46PM +, StDenis, Tom wrote:
> On 2018-11-30 10:31 a.m., Peter Zijlstra wrote:

> > I pushed them out to:
> > 
> >git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git x86/mm
> > 
> > I hope that works; I'm out for a few hours, but should check on email
> > again tonight.
> > 
> 
> NAK I get a failure in TTM on init with your x86/mm branch (see attached 
> dmesg).

*sigh*, it's been one of those days. Ok, I'll go write some cpa
selftests or something so that I have code that uses this stuff.


Re: [PATCH 2/4] x86/mm/cpa: Fix cpa_flush_array()

2018-11-30 Thread Dave Hansen
> +void __cpa_flush_array(void *data)
>  {
> - unsigned int i, level;
> + struct cpa_data *cpa = data;
> + unsigned int i;
>  
> - if (__cpa_flush_range(baddr, numpages, cache))
> + for (i = 0; i < cpa->numpages; i++)
> + __flush_tlb_one_kernel(__cpa_addr(cpa, i));
> +}

While I guess it won't _hurt_ anything, we do have cases where
__cpa_addr() can return 0.  So, won't this be flushing virtual address
0x0 unnecessarily for those?


Re: [PATCH 2/4] x86/mm/cpa: Fix cpa_flush_array()

2018-11-30 Thread Dave Hansen
> +void __cpa_flush_array(void *data)
>  {
> - unsigned int i, level;
> + struct cpa_data *cpa = data;
> + unsigned int i;
>  
> - if (__cpa_flush_range(baddr, numpages, cache))
> + for (i = 0; i < cpa->numpages; i++)
> + __flush_tlb_one_kernel(__cpa_addr(cpa, i));
> +}

While I guess it won't _hurt_ anything, we do have cases where
__cpa_addr() can return 0.  So, won't this be flushing virtual address
0x0 unnecessarily for those?


Re: [PATCH 0/2] [GIT PULL] tracing: More fixes for 4.20

2018-11-30 Thread Linus Torvalds
On Thu, Nov 29, 2018 at 7:19 PM Steven Rostedt  wrote:
>
> Note, this is on top of a previous git pull that I have submitted:
>
>   http://lkml.kernel.org/r/20181127224031.76681...@vmware.local.home

Hmm.

I had dismissed that, because the patch descriptors for that series
had had "for-next" in them.

https://lore.kernel.org/lkml/20181122002801.501220...@goodmis.org/

so I dismissed that pull request entirely as being not for this
release entirely.

I went back and merged things, but in general, please try to avoid
confusing me. I'm easily confused when I get mixed messages about the
patches and the pull requests, and will then generally default to
"ignore, this is informational".

  Linus


Re: [PATCH 0/2] [GIT PULL] tracing: More fixes for 4.20

2018-11-30 Thread Linus Torvalds
On Thu, Nov 29, 2018 at 7:19 PM Steven Rostedt  wrote:
>
> Note, this is on top of a previous git pull that I have submitted:
>
>   http://lkml.kernel.org/r/20181127224031.76681...@vmware.local.home

Hmm.

I had dismissed that, because the patch descriptors for that series
had had "for-next" in them.

https://lore.kernel.org/lkml/20181122002801.501220...@goodmis.org/

so I dismissed that pull request entirely as being not for this
release entirely.

I went back and merged things, but in general, please try to avoid
confusing me. I'm easily confused when I get mixed messages about the
patches and the pull requests, and will then generally default to
"ignore, this is informational".

  Linus


[PATCH 1/1] selftests: watchdog: Add gettimeleft command line arg

2018-11-30 Thread Jerry Hoemann
Add command line argument to call and display the results
of ioctl WDIOC_GETTIMELEFT.

Signed-off-by: Jerry Hoemann 
---
 tools/testing/selftests/watchdog/watchdog-test.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/watchdog/watchdog-test.c 
b/tools/testing/selftests/watchdog/watchdog-test.c
index c6bd9a6..dac907a 100644
--- a/tools/testing/selftests/watchdog/watchdog-test.c
+++ b/tools/testing/selftests/watchdog/watchdog-test.c
@@ -19,7 +19,7 @@
 
 int fd;
 const char v = 'V';
-static const char sopts[] = "bdehp:t:Tn:N";
+static const char sopts[] = "bdehp:t:Tn:NL";
 static const struct option lopts[] = {
{"bootstatus",  no_argument, NULL, 'b'},
{"disable", no_argument, NULL, 'd'},
@@ -30,6 +30,7 @@
{"gettimeout",  no_argument, NULL, 'T'},
{"pretimeout",required_argument, NULL, 'n'},
{"getpretimeout",   no_argument, NULL, 'N'},
+   {"gettimeleft", no_argument, NULL, 'L'},
{NULL,  no_argument, NULL, 0x0}
 };
 
@@ -77,6 +78,7 @@ static void usage(char *progname)
printf(" -T, --gettimeoutGet the timeout\n");
printf(" -n, --pretimeout=T  Set the pretimeout to T seconds\n");
printf(" -N, --getpretimeout Get the pretimeout\n");
+   printf(" -L, --gettimeleft   Get the time left until timer experies\n");
printf("\n");
printf("Parameters are parsed left-to-right in real-time.\n");
printf("Example: %s -d -t 10 -p 5 -e\n", progname);
@@ -180,6 +182,15 @@ int main(int argc, char *argv[])
else
printf("WDIOC_GETPRETIMEOUT error '%s'\n", 
strerror(errno));
break;
+   case 'L':
+   oneshot = 1;
+   ret = ioctl(fd, WDIOC_GETTIMELEFT, );
+   if (!ret)
+   printf("WDIOC_GETTIMELEFT returns %u 
seconds.\n", flags);
+   else
+   printf("WDIOC_GETTIMELEFT error '%s'\n", 
strerror(errno));
+   break;
+
default:
usage(argv[0]);
goto end;
-- 
1.8.3.1



[PATCH 1/1] selftests: watchdog: Add gettimeleft command line arg

2018-11-30 Thread Jerry Hoemann
Add command line argument to call and display the results
of ioctl WDIOC_GETTIMELEFT.

Signed-off-by: Jerry Hoemann 
---
 tools/testing/selftests/watchdog/watchdog-test.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/watchdog/watchdog-test.c 
b/tools/testing/selftests/watchdog/watchdog-test.c
index c6bd9a6..dac907a 100644
--- a/tools/testing/selftests/watchdog/watchdog-test.c
+++ b/tools/testing/selftests/watchdog/watchdog-test.c
@@ -19,7 +19,7 @@
 
 int fd;
 const char v = 'V';
-static const char sopts[] = "bdehp:t:Tn:N";
+static const char sopts[] = "bdehp:t:Tn:NL";
 static const struct option lopts[] = {
{"bootstatus",  no_argument, NULL, 'b'},
{"disable", no_argument, NULL, 'd'},
@@ -30,6 +30,7 @@
{"gettimeout",  no_argument, NULL, 'T'},
{"pretimeout",required_argument, NULL, 'n'},
{"getpretimeout",   no_argument, NULL, 'N'},
+   {"gettimeleft", no_argument, NULL, 'L'},
{NULL,  no_argument, NULL, 0x0}
 };
 
@@ -77,6 +78,7 @@ static void usage(char *progname)
printf(" -T, --gettimeoutGet the timeout\n");
printf(" -n, --pretimeout=T  Set the pretimeout to T seconds\n");
printf(" -N, --getpretimeout Get the pretimeout\n");
+   printf(" -L, --gettimeleft   Get the time left until timer experies\n");
printf("\n");
printf("Parameters are parsed left-to-right in real-time.\n");
printf("Example: %s -d -t 10 -p 5 -e\n", progname);
@@ -180,6 +182,15 @@ int main(int argc, char *argv[])
else
printf("WDIOC_GETPRETIMEOUT error '%s'\n", 
strerror(errno));
break;
+   case 'L':
+   oneshot = 1;
+   ret = ioctl(fd, WDIOC_GETTIMELEFT, );
+   if (!ret)
+   printf("WDIOC_GETTIMELEFT returns %u 
seconds.\n", flags);
+   else
+   printf("WDIOC_GETTIMELEFT error '%s'\n", 
strerror(errno));
+   break;
+
default:
usage(argv[0]);
goto end;
-- 
1.8.3.1



Re: [PATCH v12 1/5] x86/boot: Add get_acpi_rsdp() to parse RSDP in cmdline from KEXEC

2018-11-30 Thread Masayoshi Mizuma
On Fri, Nov 30, 2018 at 10:43:47AM +0800, Chao Fan wrote:
...
> >]$ make arch/x86/boot/compressed/misc.o
> >  CALLscripts/checksyscalls.sh
> >  DESCEND  objtool
> >  CC  arch/x86/boot/compressed/misc.o
> >ld: -r and -pie may not be used together
> >make[1]: *** [scripts/Makefile.build:294: arch/x86/boot/compressed/misc.o] 
> >Error 1
> >make: *** [Makefile:1715: arch/x86/boot/compressed/misc.o] Error 2
> >]$
> 
> Hi Masa,
> 
> So many thanks for your test.
> 
> Could you give me more details about this error? More error message.
> Just on the first commit or the whole PATCHSET?
> Cause I didn't get error both on this commit and on the whole PATCHSET.

I built your whole patchset and got the error.
The error depends on CONFIG_MODVERSIONS.
If CONFIG_MODVERSIONS=y, you will get the build error.

Thanks,
Masa


Re: [PATCH v12 1/5] x86/boot: Add get_acpi_rsdp() to parse RSDP in cmdline from KEXEC

2018-11-30 Thread Masayoshi Mizuma
On Fri, Nov 30, 2018 at 10:43:47AM +0800, Chao Fan wrote:
...
> >]$ make arch/x86/boot/compressed/misc.o
> >  CALLscripts/checksyscalls.sh
> >  DESCEND  objtool
> >  CC  arch/x86/boot/compressed/misc.o
> >ld: -r and -pie may not be used together
> >make[1]: *** [scripts/Makefile.build:294: arch/x86/boot/compressed/misc.o] 
> >Error 1
> >make: *** [Makefile:1715: arch/x86/boot/compressed/misc.o] Error 2
> >]$
> 
> Hi Masa,
> 
> So many thanks for your test.
> 
> Could you give me more details about this error? More error message.
> Just on the first commit or the whole PATCHSET?
> Cause I didn't get error both on this commit and on the whole PATCHSET.

I built your whole patchset and got the error.
The error depends on CONFIG_MODVERSIONS.
If CONFIG_MODVERSIONS=y, you will get the build error.

Thanks,
Masa


[PATCH v2 0/2] arm64: Only call into preempt_schedule() if need_resched()

2018-11-30 Thread Will Deacon
Hi all,

This is version two of the patches I originally posted here:

  http://lkml.kernel.org/r/1543347902-21170-1-git-send-email-will.dea...@arm.com

The only change since v1 is that  __preempt_count_dec_and_test() now
reloads the need_resched flag if it initially saw that it was set. This
resolves the issue spotted by Peter, where an IRQ coming in during the
decrement can cause a reschedule to be missed.

Feedback welcome.

Will

--->8

Will Deacon (2):
  preempt: Move PREEMPT_NEED_RESCHED definition into arch code
  arm64: preempt: Provide our own implementation of asm/preempt.h

 arch/arm64/include/asm/Kbuild|  1 -
 arch/arm64/include/asm/preempt.h | 88 
 arch/arm64/include/asm/thread_info.h | 13 +-
 arch/s390/include/asm/preempt.h  |  2 +
 arch/x86/include/asm/preempt.h   |  3 ++
 include/linux/preempt.h  |  3 --
 6 files changed, 105 insertions(+), 5 deletions(-)
 create mode 100644 arch/arm64/include/asm/preempt.h

-- 
2.1.4



[PATCH v2 1/2] preempt: Move PREEMPT_NEED_RESCHED definition into arch code

2018-11-30 Thread Will Deacon
PREEMPT_NEED_RESCHED is never used directly, so move it into the arch
code where it can potentially be implemented using either a different
bit in the preempt count or as an entirely separate entity.

Cc: Robert Love 
Cc: Thomas Gleixner 
Cc: Peter Zijlstra 
Cc: Martin Schwidefsky 
Signed-off-by: Will Deacon 
---
 arch/s390/include/asm/preempt.h | 2 ++
 arch/x86/include/asm/preempt.h  | 3 +++
 include/linux/preempt.h | 3 ---
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
index 23a14d187fb1..b5ea9e14c017 100644
--- a/arch/s390/include/asm/preempt.h
+++ b/arch/s390/include/asm/preempt.h
@@ -8,6 +8,8 @@
 
 #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
 
+/* We use the MSB mostly because its available */
+#define PREEMPT_NEED_RESCHED   0x8000
 #define PREEMPT_ENABLED(0 + PREEMPT_NEED_RESCHED)
 
 static inline int preempt_count(void)
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 90cb2f36c042..99a7fa9ab0a3 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -8,6 +8,9 @@
 
 DECLARE_PER_CPU(int, __preempt_count);
 
+/* We use the MSB mostly because its available */
+#define PREEMPT_NEED_RESCHED   0x8000
+
 /*
  * We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such
  * that a decrement hitting 0 means we can and should reschedule.
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index c01813c3fbe9..dd92b1a93919 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -53,9 +53,6 @@
 
 #define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
 
-/* We use the MSB mostly because its available */
-#define PREEMPT_NEED_RESCHED   0x8000
-
 #define PREEMPT_DISABLED   (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
 
 /*
-- 
2.1.4



[PATCH v2 2/2] arm64: preempt: Provide our own implementation of asm/preempt.h

2018-11-30 Thread Will Deacon
The asm-generic/preempt.h implementation doesn't make use of the
PREEMPT_NEED_RESCHED flag, since this can interact badly with load/store
architectures which rely on the preempt_count word being unchanged across
an interrupt.

However, since we're a 64-bit architecture and the preempt count is
only 32 bits wide, we can simply pack it next to the resched flag and
load the whole thing in one go, so that a dec-and-test operation doesn't
need to load twice.

Signed-off-by: Will Deacon 
---
 arch/arm64/include/asm/Kbuild|  1 -
 arch/arm64/include/asm/preempt.h | 88 
 arch/arm64/include/asm/thread_info.h | 13 +-
 3 files changed, 100 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm64/include/asm/preempt.h

diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 6cd5d77b6b44..33498f900390 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -14,7 +14,6 @@ generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += msi.h
-generic-y += preempt.h
 generic-y += qrwlock.h
 generic-y += qspinlock.h
 generic-y += rwsem.h
diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h
new file mode 100644
index ..f1c1398cf065
--- /dev/null
+++ b/arch/arm64/include/asm/preempt.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_PREEMPT_H
+#define __ASM_PREEMPT_H
+
+#include 
+
+#define PREEMPT_NEED_RESCHED   BIT(32)
+#define PREEMPT_ENABLED(PREEMPT_NEED_RESCHED)
+
+static inline int preempt_count(void)
+{
+   return READ_ONCE(current_thread_info()->preempt.count);
+}
+
+static inline void preempt_count_set(u64 pc)
+{
+   /* Preserve existing value of PREEMPT_NEED_RESCHED */
+   WRITE_ONCE(current_thread_info()->preempt.count, pc);
+}
+
+#define init_task_preempt_count(p) do { \
+   task_thread_info(p)->preempt_count = FORK_PREEMPT_COUNT; \
+} while (0)
+
+#define init_idle_preempt_count(p, cpu) do { \
+   task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \
+} while (0)
+
+static inline void set_preempt_need_resched(void)
+{
+   current_thread_info()->preempt.need_resched = 0;
+}
+
+static inline void clear_preempt_need_resched(void)
+{
+   current_thread_info()->preempt.need_resched = 1;
+}
+
+static inline bool test_preempt_need_resched(void)
+{
+   return !current_thread_info()->preempt.need_resched;
+}
+
+static inline void __preempt_count_add(int val)
+{
+   u32 pc = READ_ONCE(current_thread_info()->preempt.count);
+   pc += val;
+   WRITE_ONCE(current_thread_info()->preempt.count, pc);
+}
+
+static inline void __preempt_count_sub(int val)
+{
+   u32 pc = READ_ONCE(current_thread_info()->preempt.count);
+   pc -= val;
+   WRITE_ONCE(current_thread_info()->preempt.count, pc);
+}
+
+static inline bool __preempt_count_dec_and_test(void)
+{
+   struct thread_info *ti = current_thread_info();
+   u64 pc = READ_ONCE(ti->preempt_count);
+
+   WRITE_ONCE(ti->preempt.count, --pc);
+
+   /*
+* If we wrote back all zeroes, then we're preemptible and in
+* need of a reschedule. Otherwise, we need to reload the
+* preempt_count in case the need_resched flag was cleared by an
+* interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE
+* pair.
+*/
+   return !pc || !READ_ONCE(ti->preempt_count);
+}
+
+static inline bool should_resched(int preempt_offset)
+{
+   u64 pc = READ_ONCE(current_thread_info()->preempt_count);
+   return pc == preempt_offset;
+}
+
+#ifdef CONFIG_PREEMPT
+void preempt_schedule(void);
+#define __preempt_schedule() preempt_schedule()
+void preempt_schedule_notrace(void);
+#define __preempt_schedule_notrace() preempt_schedule_notrace()
+#endif /* CONFIG_PREEMPT */
+
+#endif /* __ASM_PREEMPT_H */
diff --git a/arch/arm64/include/asm/thread_info.h 
b/arch/arm64/include/asm/thread_info.h
index cb2c10a8f0a8..bbca68b54732 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -42,7 +42,18 @@ struct thread_info {
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
u64 ttbr0;  /* saved TTBR0_EL1 */
 #endif
-   int preempt_count;  /* 0 => preemptable, <0 => bug 
*/
+   union {
+   u64 preempt_count;  /* 0 => preemptible, <0 => bug 
*/
+   struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+   u32 need_resched;
+   u32 count;
+#else
+   u32 count;
+   u32 need_resched;
+#endif
+   } preempt;
+   };
 };
 
 #define thread_saved_pc(tsk)   \
-- 
2.1.4



[PATCH v2 0/2] arm64: Only call into preempt_schedule() if need_resched()

2018-11-30 Thread Will Deacon
Hi all,

This is version two of the patches I originally posted here:

  http://lkml.kernel.org/r/1543347902-21170-1-git-send-email-will.dea...@arm.com

The only change since v1 is that  __preempt_count_dec_and_test() now
reloads the need_resched flag if it initially saw that it was set. This
resolves the issue spotted by Peter, where an IRQ coming in during the
decrement can cause a reschedule to be missed.

Feedback welcome.

Will

--->8

Will Deacon (2):
  preempt: Move PREEMPT_NEED_RESCHED definition into arch code
  arm64: preempt: Provide our own implementation of asm/preempt.h

 arch/arm64/include/asm/Kbuild|  1 -
 arch/arm64/include/asm/preempt.h | 88 
 arch/arm64/include/asm/thread_info.h | 13 +-
 arch/s390/include/asm/preempt.h  |  2 +
 arch/x86/include/asm/preempt.h   |  3 ++
 include/linux/preempt.h  |  3 --
 6 files changed, 105 insertions(+), 5 deletions(-)
 create mode 100644 arch/arm64/include/asm/preempt.h

-- 
2.1.4



[PATCH v2 1/2] preempt: Move PREEMPT_NEED_RESCHED definition into arch code

2018-11-30 Thread Will Deacon
PREEMPT_NEED_RESCHED is never used directly, so move it into the arch
code where it can potentially be implemented using either a different
bit in the preempt count or as an entirely separate entity.

Cc: Robert Love 
Cc: Thomas Gleixner 
Cc: Peter Zijlstra 
Cc: Martin Schwidefsky 
Signed-off-by: Will Deacon 
---
 arch/s390/include/asm/preempt.h | 2 ++
 arch/x86/include/asm/preempt.h  | 3 +++
 include/linux/preempt.h | 3 ---
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
index 23a14d187fb1..b5ea9e14c017 100644
--- a/arch/s390/include/asm/preempt.h
+++ b/arch/s390/include/asm/preempt.h
@@ -8,6 +8,8 @@
 
 #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
 
+/* We use the MSB mostly because its available */
+#define PREEMPT_NEED_RESCHED   0x8000
 #define PREEMPT_ENABLED(0 + PREEMPT_NEED_RESCHED)
 
 static inline int preempt_count(void)
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 90cb2f36c042..99a7fa9ab0a3 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -8,6 +8,9 @@
 
 DECLARE_PER_CPU(int, __preempt_count);
 
+/* We use the MSB mostly because its available */
+#define PREEMPT_NEED_RESCHED   0x8000
+
 /*
  * We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such
  * that a decrement hitting 0 means we can and should reschedule.
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index c01813c3fbe9..dd92b1a93919 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -53,9 +53,6 @@
 
 #define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
 
-/* We use the MSB mostly because its available */
-#define PREEMPT_NEED_RESCHED   0x8000
-
 #define PREEMPT_DISABLED   (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
 
 /*
-- 
2.1.4



[PATCH v2 2/2] arm64: preempt: Provide our own implementation of asm/preempt.h

2018-11-30 Thread Will Deacon
The asm-generic/preempt.h implementation doesn't make use of the
PREEMPT_NEED_RESCHED flag, since this can interact badly with load/store
architectures which rely on the preempt_count word being unchanged across
an interrupt.

However, since we're a 64-bit architecture and the preempt count is
only 32 bits wide, we can simply pack it next to the resched flag and
load the whole thing in one go, so that a dec-and-test operation doesn't
need to load twice.

Signed-off-by: Will Deacon 
---
 arch/arm64/include/asm/Kbuild|  1 -
 arch/arm64/include/asm/preempt.h | 88 
 arch/arm64/include/asm/thread_info.h | 13 +-
 3 files changed, 100 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm64/include/asm/preempt.h

diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 6cd5d77b6b44..33498f900390 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -14,7 +14,6 @@ generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += msi.h
-generic-y += preempt.h
 generic-y += qrwlock.h
 generic-y += qspinlock.h
 generic-y += rwsem.h
diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h
new file mode 100644
index ..f1c1398cf065
--- /dev/null
+++ b/arch/arm64/include/asm/preempt.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_PREEMPT_H
+#define __ASM_PREEMPT_H
+
+#include 
+
+#define PREEMPT_NEED_RESCHED   BIT(32)
+#define PREEMPT_ENABLED(PREEMPT_NEED_RESCHED)
+
+static inline int preempt_count(void)
+{
+   return READ_ONCE(current_thread_info()->preempt.count);
+}
+
+static inline void preempt_count_set(u64 pc)
+{
+   /* Preserve existing value of PREEMPT_NEED_RESCHED */
+   WRITE_ONCE(current_thread_info()->preempt.count, pc);
+}
+
+#define init_task_preempt_count(p) do { \
+   task_thread_info(p)->preempt_count = FORK_PREEMPT_COUNT; \
+} while (0)
+
+#define init_idle_preempt_count(p, cpu) do { \
+   task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \
+} while (0)
+
+static inline void set_preempt_need_resched(void)
+{
+   current_thread_info()->preempt.need_resched = 0;
+}
+
+static inline void clear_preempt_need_resched(void)
+{
+   current_thread_info()->preempt.need_resched = 1;
+}
+
+static inline bool test_preempt_need_resched(void)
+{
+   return !current_thread_info()->preempt.need_resched;
+}
+
+static inline void __preempt_count_add(int val)
+{
+   u32 pc = READ_ONCE(current_thread_info()->preempt.count);
+   pc += val;
+   WRITE_ONCE(current_thread_info()->preempt.count, pc);
+}
+
+static inline void __preempt_count_sub(int val)
+{
+   u32 pc = READ_ONCE(current_thread_info()->preempt.count);
+   pc -= val;
+   WRITE_ONCE(current_thread_info()->preempt.count, pc);
+}
+
+static inline bool __preempt_count_dec_and_test(void)
+{
+   struct thread_info *ti = current_thread_info();
+   u64 pc = READ_ONCE(ti->preempt_count);
+
+   WRITE_ONCE(ti->preempt.count, --pc);
+
+   /*
+* If we wrote back all zeroes, then we're preemptible and in
+* need of a reschedule. Otherwise, we need to reload the
+* preempt_count in case the need_resched flag was cleared by an
+* interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE
+* pair.
+*/
+   return !pc || !READ_ONCE(ti->preempt_count);
+}
+
+static inline bool should_resched(int preempt_offset)
+{
+   u64 pc = READ_ONCE(current_thread_info()->preempt_count);
+   return pc == preempt_offset;
+}
+
+#ifdef CONFIG_PREEMPT
+void preempt_schedule(void);
+#define __preempt_schedule() preempt_schedule()
+void preempt_schedule_notrace(void);
+#define __preempt_schedule_notrace() preempt_schedule_notrace()
+#endif /* CONFIG_PREEMPT */
+
+#endif /* __ASM_PREEMPT_H */
diff --git a/arch/arm64/include/asm/thread_info.h 
b/arch/arm64/include/asm/thread_info.h
index cb2c10a8f0a8..bbca68b54732 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -42,7 +42,18 @@ struct thread_info {
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
u64 ttbr0;  /* saved TTBR0_EL1 */
 #endif
-   int preempt_count;  /* 0 => preemptable, <0 => bug 
*/
+   union {
+   u64 preempt_count;  /* 0 => preemptible, <0 => bug 
*/
+   struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+   u32 need_resched;
+   u32 count;
+#else
+   u32 count;
+   u32 need_resched;
+#endif
+   } preempt;
+   };
 };
 
 #define thread_saved_pc(tsk)   \
-- 
2.1.4



<    1   2   3   4   5   6   7   8   9   10   >