[PATCH 14/22] perf bpf: Use ERR_CAST instead of ERR_PTR(PTR_ERR())
From: Wen Yang Use ERR_CAST inlined function instead of ERR_PTR(PTR_ERR(...)). This makes it more readable and also fix this warning detected by err_cast.cocci: tools/perf/util/bpf-loader.c:1606:11-18: WARNING: ERR_CAST can be used with op Signed-off-by: Wen Yang Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Julia Lawall Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wen Yang Cc: zhong.weid...@zte.com.cn Link: http://lkml.kernel.org/r/20181127090610.28488-1-wen.yan...@zte.com.cn Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-loader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index f9ae1a993806..9a280647d829 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -1603,7 +1603,7 @@ struct perf_evsel *bpf__setup_output_event(struct perf_evlist *evlist, const cha op = bpf_map__add_newop(map, NULL); if (IS_ERR(op)) - return ERR_PTR(PTR_ERR(op)); + return ERR_CAST(op); op->op_type = BPF_MAP_OP_SET_EVSEL; op->v.evsel = evsel; } -- 2.19.1
[PATCH 21/22] perf beauty mmap_flags: Check if the arch has a mmap.h file
From: Arnaldo Carvalho de Melo If not, then just use what is in asm-generic. This fixes the build for my sh4, m68k and riscv64 perf test build containers that were failing due to 80ee5668b8a7 ("perf beauty: Add a generator for MAP_ mmap's flag constants"), that were not covered in the cset introducing those tools/arch/*/include/uapi/asm/mman.h files. f3539c12d819 ("tools include: Add uapi mman.h for each architecture") Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: 80ee5668b8a7 ("perf beauty: Add a generator for MAP_ mmap's flag constants") Link: https://lkml.kernel.org/n/tip-rpy9t2e0wxpnum1yvxhre...@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 +- tools/perf/trace/beauty/mmap_flags.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 67e9adbe6ee8..bfdaefd500ab 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -474,7 +474,7 @@ $(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_t mmap_flags_array := $(beauty_outdir)/mmap_flags_array.c mmap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mmap_flags.sh -$(mmap_flags_array): $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(arch_asm_uapi_dir)/mman.h $(mmap_flags_tbl) +$(mmap_flags_array): $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(mmap_flags_tbl) $(Q)$(SHELL) '$(mmap_flags_tbl)' $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@ mount_flags_array := $(beauty_outdir)/mount_flags_array.c diff --git a/tools/perf/trace/beauty/mmap_flags.sh b/tools/perf/trace/beauty/mmap_flags.sh index 22c3fdca8975..cd41023107d7 100755 --- a/tools/perf/trace/beauty/mmap_flags.sh +++ b/tools/perf/trace/beauty/mmap_flags.sh @@ -20,12 +20,12 @@ egrep -q $regex ${arch_mman} && \ (egrep $regex ${arch_mman} | \ sed -r "s/$regex/\2 \1/g" | \ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n") -egrep -q '#[[:space:]]*include[[:space:]]+.*' ${arch_mman} && +[ ! -f ${arch_mman} || egrep -q '#[[:space:]]*include[[:space:]]+.*' ${arch_mman} ] && (egrep $regex ${header_dir}/mman.h | \ sed -r "s/$regex/\2 \1/g" | \ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n") -- 2.19.1
[PATCH 09/22] perf script: Use fallbacks for branch stacks
From: Adrian Hunter Branch stacks do not necessarily have the same cpumode as the 'ip'. Use the fallback functions in those cases. This patch depends on patch "perf tools: Add fallback functions for cases where cpumode is insufficient". Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: David S. Miller Cc: Jiri Olsa Cc: Leo Yan Cc: Mathieu Poirier Cc: sta...@vger.kernel.org Link: http://lkml.kernel.org/r/20181106210712.12098-4-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 12 ++-- .../util/scripting-engines/trace-event-python.c | 16 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 04913136bac9..3ea98fe72f7f 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -724,8 +724,8 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample, if (PRINT_FIELD(DSO)) { memset(, 0, sizeof(alf)); memset(, 0, sizeof(alt)); - thread__find_map(thread, sample->cpumode, from, ); - thread__find_map(thread, sample->cpumode, to, ); + thread__find_map_fb(thread, sample->cpumode, from, ); + thread__find_map_fb(thread, sample->cpumode, to, ); } printed += fprintf(fp, " 0x%"PRIx64, from); @@ -771,8 +771,8 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample, from = br->entries[i].from; to = br->entries[i].to; - thread__find_symbol(thread, sample->cpumode, from, ); - thread__find_symbol(thread, sample->cpumode, to, ); + thread__find_symbol_fb(thread, sample->cpumode, from, ); + thread__find_symbol_fb(thread, sample->cpumode, to, ); printed += symbol__fprintf_symname_offs(alf.sym, , fp); if (PRINT_FIELD(DSO)) { @@ -816,11 +816,11 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample, from = br->entries[i].from; to = br->entries[i].to; - if (thread__find_map(thread, sample->cpumode, from, ) && + if (thread__find_map_fb(thread, sample->cpumode, from, ) && !alf.map->dso->adjust_symbols) from = map__map_ip(alf.map, from); - if (thread__find_map(thread, sample->cpumode, to, ) && + if (thread__find_map_fb(thread, sample->cpumode, to, ) && !alt.map->dso->adjust_symbols) to = map__map_ip(alt.map, to); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 69aa93d4ee99..0c4b050f6fc2 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -494,14 +494,14 @@ static PyObject *python_process_brstack(struct perf_sample *sample, pydict_set_item_string_decref(pyelem, "cycles", PyLong_FromUnsignedLongLong(br->entries[i].flags.cycles)); - thread__find_map(thread, sample->cpumode, -br->entries[i].from, ); + thread__find_map_fb(thread, sample->cpumode, + br->entries[i].from, ); dsoname = get_dsoname(al.map); pydict_set_item_string_decref(pyelem, "from_dsoname", _PyUnicode_FromString(dsoname)); - thread__find_map(thread, sample->cpumode, -br->entries[i].to, ); + thread__find_map_fb(thread, sample->cpumode, + br->entries[i].to, ); dsoname = get_dsoname(al.map); pydict_set_item_string_decref(pyelem, "to_dsoname", _PyUnicode_FromString(dsoname)); @@ -576,14 +576,14 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample, if (!pyelem) Py_FatalError("couldn't create Python dictionary"); - thread__find_symbol(thread, sample->cpumode, - br->entries[i].from, ); + thread__find_symbol_fb(thread, sample->cpumode, + br->entries[i].from, ); get_symoff(al.sym, , true, bf, sizeof(bf)); pydict_set_item_string_decref(pyelem, "from", _PyUnicode_FromString(bf)); - thread__find_symbol(thread, sample->cpumode, - br->entries[i].to, ); + thread__find_symbol_fb(thread, sample->cpumode, +
[PATCH 13/22] tools include: Adopt ERR_CAST() from the kernel err.h header
From: Arnaldo Carvalho de Melo Add ERR_CAST(), so that tools can use it, just like the kernel. This addresses coccinelle checks that are being performed to tools/ in addition to kernel sources, so lets add this to cover that and to get tools code closer to kernel coding standards. This originally was introduced in the kernel headers in this cset: d1bc8e954452 ("Add an ERR_CAST() function to complement ERR_PTR and co.") Cc: Adrian Hunter Cc: Alexander Shishkin Cc: David Ahern Cc: David Howells Cc: Jiri Olsa Cc: Julia Lawall Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Cc: Wen Yang Cc: zhong.weid...@zte.com.cn Link: https://lkml.kernel.org/n/tip-tlt97p066zyhzqhl5jt86...@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/err.h | 13 + 1 file changed, 13 insertions(+) diff --git a/tools/include/linux/err.h b/tools/include/linux/err.h index 094649667bae..2f5a12b88a86 100644 --- a/tools/include/linux/err.h +++ b/tools/include/linux/err.h @@ -59,4 +59,17 @@ static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr) else return 0; } + +/** + * ERR_CAST - Explicitly cast an error-valued pointer to another pointer type + * @ptr: The pointer to cast. + * + * Explicitly cast an error-valued pointer to another pointer type in such a + * way as to make it clear that's what's going on. + */ +static inline void * __must_check ERR_CAST(__force const void *ptr) +{ + /* cast away the const */ + return (void *) ptr; +} #endif /* _LINUX_ERR_H */ -- 2.19.1
[PATCH 17/22] tools build feature: Check if libaio is available
From: Alexey Budankov This will be used by 'perf record' to speed up reading the perf ring buffer. Committer testing: $ make -C tools/perf O=/tmp/build/perf make: Entering directory '/home/acme/git/perf/tools/perf' BUILD: Doing 'make -j8' parallel build Auto-detecting system features: ... dwarf: [ on ] ...dwarf_getlocations: [ on ] ... glibc: [ on ] ... gtk2: [ OFF ] ... libaudit: [ OFF ] ...libbfd: [ OFF ] ...libelf: [ on ] ... libnuma: [ OFF ] ...numa_num_possible_cpus: [ OFF ] ... libperl: [ OFF ] ... libpython: [ OFF ] ... libslang: [ on ] ... libcrypto: [ on ] ... libunwind: [ on ] ...libdw-dwarf-unwind: [ on ] ... zlib: [ on ] ... lzma: [ on ] ... get_cpuid: [ on ] ... bpf: [ on ] ...libaio: [ on ] $ ls -la /tmp/build/perf/feature/test-libaio.* -rwxrwxr-x. 1 acme acme 18296 Nov 26 08:49 /tmp/build/perf/feature/test-libaio.bin -rw-rw-r--. 1 acme acme 1165 Nov 26 08:49 /tmp/build/perf/feature/test-libaio.d -rw-rw-r--. 1 acme acme 0 Nov 26 08:49 /tmp/build/perf/feature/test-libaio.make.output $ $ grep -i aio /tmp/build/perf/FEATURE-DUMP feature-libaio=1 $ Signed-off-by: Alexey Budankov Tested-by: Arnaldo Carvalho de Melo Reviewed-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andi Kleen Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5fcda10c-6c63-68df-383a-c6d9e5d1f...@linux.intel.com [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 6 -- tools/build/feature/Makefile | 6 +- tools/build/feature/test-all.c| 5 + tools/build/feature/test-libaio.c | 16 tools/perf/Makefile.config| 6 ++ tools/perf/Makefile.perf | 7 ++- 6 files changed, 42 insertions(+), 4 deletions(-) create mode 100644 tools/build/feature/test-libaio.c diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 8a123834a2a3..d47b8f73e2e7 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -70,7 +70,8 @@ FEATURE_TESTS_BASIC := \ sched_getcpu \ sdt\ setns \ -libopencsd +libopencsd \ +libaio # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list # of all feature tests @@ -116,7 +117,8 @@ FEATURE_DISPLAY ?= \ zlib \ lzma \ get_cpuid \ - bpf + bpf \ + libaio # Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features. # If in the future we need per-feature checks/flags for features not diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 38c22e122cb0..2dbcc0d00f52 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -61,7 +61,8 @@ FILES= \ test-libopencsd.bin \ test-clang.bin\ test-llvm.bin \ - test-llvm-version.bin + test-llvm-version.bin \ + test-libaio.bin FILES := $(addprefix $(OUTPUT),$(FILES)) @@ -297,6 +298,9 @@ $(OUTPUT)test-clang.bin: -include $(OUTPUT)*.d +$(OUTPUT)test-libaio.bin: + $(BUILD) -lrt + ### clean: diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 58f01b950195..20cdaa4fc112 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -174,6 +174,10 @@ # include "test-libopencsd.c" #undef main +#define main main_test_libaio +# include "test-libaio.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -214,6 +218,7 @@ int main(int argc, char *argv[]) main_test_sdt(); main_test_setns(); main_test_libopencsd(); + main_test_libaio(); return 0; } diff --git a/tools/build/feature/test-libaio.c b/tools/build/feature/test-libaio.c new file mode 100644 index ..932133c9a265 --- /dev/null +++ b/tools/build/feature/test-libaio.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +int main(void) +{ + struct aiocb aiocb; + + aiocb.aio_fildes = 0; + aiocb.aio_offset = 0; + aiocb.aio_buf = 0; + aiocb.aio_nbytes = 0; +
[PATCH 19/22] perf record: Enable asynchronous trace writing
From: Alexey Budankov The trace file offset is read once before mmaps iterating loop and written back after all performance data is enqueued for aio writing. The trace file offset is incremented linearly after every successful aio write operation. record__aio_sync() blocks till completion of the started AIO operation and then proceeds. record__aio_mmap_read_sync() implements a barrier for all incomplete aio write requests. Signed-off-by: Alexey Budankov Reviewed-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andi Kleen Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/ce2d45e9-d236-871c-7c8f-1bed2d37e...@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 5 + tools/perf/builtin-record.c | 218 ++- tools/perf/perf.h| 1 + tools/perf/util/evlist.c | 6 +- tools/perf/util/evlist.h | 2 +- tools/perf/util/mmap.c | 77 +++- tools/perf/util/mmap.h | 14 ++ 7 files changed, 314 insertions(+), 9 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 246dee081efd..7efb4af88a68 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -435,6 +435,11 @@ Specify vmlinux path which has debuginfo. --buildid-all:: Record build-id of all DSOs regardless whether it's actually hit or not. +--aio:: +Enable asynchronous (Posix AIO) trace writing mode. +Asynchronous mode is supported only when linking Perf tool with libc library +providing implementation for Posix AIO API. + --all-kernel:: Configure all used events to run in kernel space. diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 488779bc4c8d..408d6477c960 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -124,6 +124,183 @@ static int record__write(struct record *rec, struct perf_mmap *map __maybe_unuse return 0; } +#ifdef HAVE_AIO_SUPPORT +static int record__aio_write(struct aiocb *cblock, int trace_fd, + void *buf, size_t size, off_t off) +{ + int rc; + + cblock->aio_fildes = trace_fd; + cblock->aio_buf= buf; + cblock->aio_nbytes = size; + cblock->aio_offset = off; + cblock->aio_sigevent.sigev_notify = SIGEV_NONE; + + do { + rc = aio_write(cblock); + if (rc == 0) { + break; + } else if (errno != EAGAIN) { + cblock->aio_fildes = -1; + pr_err("failed to queue perf data, error: %m\n"); + break; + } + } while (1); + + return rc; +} + +static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock) +{ + void *rem_buf; + off_t rem_off; + size_t rem_size; + int rc, aio_errno; + ssize_t aio_ret, written; + + aio_errno = aio_error(cblock); + if (aio_errno == EINPROGRESS) + return 0; + + written = aio_ret = aio_return(cblock); + if (aio_ret < 0) { + if (aio_errno != EINTR) + pr_err("failed to write perf data, error: %m\n"); + written = 0; + } + + rem_size = cblock->aio_nbytes - written; + + if (rem_size == 0) { + cblock->aio_fildes = -1; + /* +* md->refcount is incremented in perf_mmap__push() for +* every enqueued aio write request so decrement it because +* the request is now complete. +*/ + perf_mmap__put(md); + rc = 1; + } else { + /* +* aio write request may require restart with the +* reminder if the kernel didn't write whole +* chunk at once. +*/ + rem_off = cblock->aio_offset + written; + rem_buf = (void *)(cblock->aio_buf + written); + record__aio_write(cblock, cblock->aio_fildes, + rem_buf, rem_size, rem_off); + rc = 0; + } + + return rc; +} + +static void record__aio_sync(struct perf_mmap *md) +{ + struct aiocb *cblock = >aio.cblock; + struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ + + do { + if (cblock->aio_fildes == -1 || record__aio_complete(md, cblock)) + return; + + while (aio_suspend((const struct aiocb**), 1, )) { + if (!(errno == EAGAIN || errno == EINTR)) + pr_err("failed to sync perf data, error: %m\n"); + } + } while (1); +} + +static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off) +{ + struct record
[PATCH 17/22] tools build feature: Check if libaio is available
From: Alexey Budankov This will be used by 'perf record' to speed up reading the perf ring buffer. Committer testing: $ make -C tools/perf O=/tmp/build/perf make: Entering directory '/home/acme/git/perf/tools/perf' BUILD: Doing 'make -j8' parallel build Auto-detecting system features: ... dwarf: [ on ] ...dwarf_getlocations: [ on ] ... glibc: [ on ] ... gtk2: [ OFF ] ... libaudit: [ OFF ] ...libbfd: [ OFF ] ...libelf: [ on ] ... libnuma: [ OFF ] ...numa_num_possible_cpus: [ OFF ] ... libperl: [ OFF ] ... libpython: [ OFF ] ... libslang: [ on ] ... libcrypto: [ on ] ... libunwind: [ on ] ...libdw-dwarf-unwind: [ on ] ... zlib: [ on ] ... lzma: [ on ] ... get_cpuid: [ on ] ... bpf: [ on ] ...libaio: [ on ] $ ls -la /tmp/build/perf/feature/test-libaio.* -rwxrwxr-x. 1 acme acme 18296 Nov 26 08:49 /tmp/build/perf/feature/test-libaio.bin -rw-rw-r--. 1 acme acme 1165 Nov 26 08:49 /tmp/build/perf/feature/test-libaio.d -rw-rw-r--. 1 acme acme 0 Nov 26 08:49 /tmp/build/perf/feature/test-libaio.make.output $ $ grep -i aio /tmp/build/perf/FEATURE-DUMP feature-libaio=1 $ Signed-off-by: Alexey Budankov Tested-by: Arnaldo Carvalho de Melo Reviewed-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andi Kleen Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5fcda10c-6c63-68df-383a-c6d9e5d1f...@linux.intel.com [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 6 -- tools/build/feature/Makefile | 6 +- tools/build/feature/test-all.c| 5 + tools/build/feature/test-libaio.c | 16 tools/perf/Makefile.config| 6 ++ tools/perf/Makefile.perf | 7 ++- 6 files changed, 42 insertions(+), 4 deletions(-) create mode 100644 tools/build/feature/test-libaio.c diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 8a123834a2a3..d47b8f73e2e7 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -70,7 +70,8 @@ FEATURE_TESTS_BASIC := \ sched_getcpu \ sdt\ setns \ -libopencsd +libopencsd \ +libaio # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list # of all feature tests @@ -116,7 +117,8 @@ FEATURE_DISPLAY ?= \ zlib \ lzma \ get_cpuid \ - bpf + bpf \ + libaio # Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features. # If in the future we need per-feature checks/flags for features not diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 38c22e122cb0..2dbcc0d00f52 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -61,7 +61,8 @@ FILES= \ test-libopencsd.bin \ test-clang.bin\ test-llvm.bin \ - test-llvm-version.bin + test-llvm-version.bin \ + test-libaio.bin FILES := $(addprefix $(OUTPUT),$(FILES)) @@ -297,6 +298,9 @@ $(OUTPUT)test-clang.bin: -include $(OUTPUT)*.d +$(OUTPUT)test-libaio.bin: + $(BUILD) -lrt + ### clean: diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 58f01b950195..20cdaa4fc112 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -174,6 +174,10 @@ # include "test-libopencsd.c" #undef main +#define main main_test_libaio +# include "test-libaio.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -214,6 +218,7 @@ int main(int argc, char *argv[]) main_test_sdt(); main_test_setns(); main_test_libopencsd(); + main_test_libaio(); return 0; } diff --git a/tools/build/feature/test-libaio.c b/tools/build/feature/test-libaio.c new file mode 100644 index ..932133c9a265 --- /dev/null +++ b/tools/build/feature/test-libaio.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +int main(void) +{ + struct aiocb aiocb; + + aiocb.aio_fildes = 0; + aiocb.aio_offset = 0; + aiocb.aio_buf = 0; + aiocb.aio_nbytes = 0; +
[PATCH 19/22] perf record: Enable asynchronous trace writing
From: Alexey Budankov The trace file offset is read once before mmaps iterating loop and written back after all performance data is enqueued for aio writing. The trace file offset is incremented linearly after every successful aio write operation. record__aio_sync() blocks till completion of the started AIO operation and then proceeds. record__aio_mmap_read_sync() implements a barrier for all incomplete aio write requests. Signed-off-by: Alexey Budankov Reviewed-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andi Kleen Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/ce2d45e9-d236-871c-7c8f-1bed2d37e...@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 5 + tools/perf/builtin-record.c | 218 ++- tools/perf/perf.h| 1 + tools/perf/util/evlist.c | 6 +- tools/perf/util/evlist.h | 2 +- tools/perf/util/mmap.c | 77 +++- tools/perf/util/mmap.h | 14 ++ 7 files changed, 314 insertions(+), 9 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 246dee081efd..7efb4af88a68 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -435,6 +435,11 @@ Specify vmlinux path which has debuginfo. --buildid-all:: Record build-id of all DSOs regardless whether it's actually hit or not. +--aio:: +Enable asynchronous (Posix AIO) trace writing mode. +Asynchronous mode is supported only when linking Perf tool with libc library +providing implementation for Posix AIO API. + --all-kernel:: Configure all used events to run in kernel space. diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 488779bc4c8d..408d6477c960 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -124,6 +124,183 @@ static int record__write(struct record *rec, struct perf_mmap *map __maybe_unuse return 0; } +#ifdef HAVE_AIO_SUPPORT +static int record__aio_write(struct aiocb *cblock, int trace_fd, + void *buf, size_t size, off_t off) +{ + int rc; + + cblock->aio_fildes = trace_fd; + cblock->aio_buf= buf; + cblock->aio_nbytes = size; + cblock->aio_offset = off; + cblock->aio_sigevent.sigev_notify = SIGEV_NONE; + + do { + rc = aio_write(cblock); + if (rc == 0) { + break; + } else if (errno != EAGAIN) { + cblock->aio_fildes = -1; + pr_err("failed to queue perf data, error: %m\n"); + break; + } + } while (1); + + return rc; +} + +static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock) +{ + void *rem_buf; + off_t rem_off; + size_t rem_size; + int rc, aio_errno; + ssize_t aio_ret, written; + + aio_errno = aio_error(cblock); + if (aio_errno == EINPROGRESS) + return 0; + + written = aio_ret = aio_return(cblock); + if (aio_ret < 0) { + if (aio_errno != EINTR) + pr_err("failed to write perf data, error: %m\n"); + written = 0; + } + + rem_size = cblock->aio_nbytes - written; + + if (rem_size == 0) { + cblock->aio_fildes = -1; + /* +* md->refcount is incremented in perf_mmap__push() for +* every enqueued aio write request so decrement it because +* the request is now complete. +*/ + perf_mmap__put(md); + rc = 1; + } else { + /* +* aio write request may require restart with the +* reminder if the kernel didn't write whole +* chunk at once. +*/ + rem_off = cblock->aio_offset + written; + rem_buf = (void *)(cblock->aio_buf + written); + record__aio_write(cblock, cblock->aio_fildes, + rem_buf, rem_size, rem_off); + rc = 0; + } + + return rc; +} + +static void record__aio_sync(struct perf_mmap *md) +{ + struct aiocb *cblock = >aio.cblock; + struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ + + do { + if (cblock->aio_fildes == -1 || record__aio_complete(md, cblock)) + return; + + while (aio_suspend((const struct aiocb**), 1, )) { + if (!(errno == EAGAIN || errno == EINTR)) + pr_err("failed to sync perf data, error: %m\n"); + } + } while (1); +} + +static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off) +{ + struct record
[PATCH 18/22] perf mmap: Map data buffer for preserving collected data
From: Alexey Budankov The map->data buffer is used to preserve map->base profiling data for writing to disk. AIO map->cblock is used to queue corresponding map->data buffer for asynchronous writing. Signed-off-by: Alexey Budankov Reviewed-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andi Kleen Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5fcda10c-6c63-68df-383a-c6d9e5d1f...@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 2 +- tools/perf/util/mmap.c | 49 +++- tools/perf/util/mmap.h | 11 - 3 files changed, 59 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 36526d229315..6f010b9f0a81 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1028,7 +1028,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, * Its value is decided by evsel's write_backward. * So should not be passed through const pointer. */ - struct mmap_params mp; + struct mmap_params mp = { .nr_cblocks = 0 }; if (!evlist->mmap) evlist->mmap = perf_evlist__alloc_mmap(evlist, false); diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index cdb95b3a1213..47cdc3ad6546 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -153,8 +153,55 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb { } +#ifdef HAVE_AIO_SUPPORT +static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp) +{ + int delta_max; + + if (mp->nr_cblocks) { + map->aio.data = malloc(perf_mmap__mmap_len(map)); + if (!map->aio.data) { + pr_debug2("failed to allocate data buffer, error %m\n"); + return -1; + } + /* +* Use cblock.aio_fildes value different from -1 +* to denote started aio write operation on the +* cblock so it requires explicit record__aio_sync() +* call prior the cblock may be reused again. +*/ + map->aio.cblock.aio_fildes = -1; + /* +* Allocate cblock with max priority delta to +* have faster aio write system calls. +*/ + delta_max = sysconf(_SC_AIO_PRIO_DELTA_MAX); + map->aio.cblock.aio_reqprio = delta_max; + } + + return 0; +} + +static void perf_mmap__aio_munmap(struct perf_mmap *map) +{ + if (map->aio.data) + zfree(>aio.data); +} +#else +static int perf_mmap__aio_mmap(struct perf_mmap *map __maybe_unused, + struct mmap_params *mp __maybe_unused) +{ + return 0; +} + +static void perf_mmap__aio_munmap(struct perf_mmap *map __maybe_unused) +{ +} +#endif + void perf_mmap__munmap(struct perf_mmap *map) { + perf_mmap__aio_munmap(map); if (map->base != NULL) { munmap(map->base, perf_mmap__mmap_len(map)); map->base = NULL; @@ -197,7 +244,7 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c >auxtrace_mp, map->base, fd)) return -1; - return 0; + return perf_mmap__aio_mmap(map, mp); } static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end) diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index cc5e2d6d17a9..3f10ad030c5e 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -6,6 +6,9 @@ #include #include #include +#ifdef HAVE_AIO_SUPPORT +#include +#endif #include "auxtrace.h" #include "event.h" @@ -26,6 +29,12 @@ struct perf_mmap { bool overwrite; struct auxtrace_mmap auxtrace_mmap; char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); +#ifdef HAVE_AIO_SUPPORT + struct { + void *data; + struct aiocb cblock; + } aio; +#endif }; /* @@ -57,7 +66,7 @@ enum bkw_mmap_state { }; struct mmap_params { - int prot, mask; + int prot, mask, nr_cblocks; struct auxtrace_mmap_params auxtrace_mp; }; -- 2.19.1
[PATCH 20/22] perf record: Extend trace writing to multi AIO
From: Alexey Budankov Multi AIO trace writing allows caching more kernel data into userspace memory postponing trace writing for the sake of overall profiling data thruput increase. It could be seen as kernel data buffer extension into userspace memory. With an --aio option value different from 0 (default value is 1) the tool has capability to cache more and more data into user space along with delegating spill to AIO. That allows avoiding to suspend at record__aio_sync() between calls of record__mmap_read_evlist() and increases profiling data thruput at the cost of userspace memory. Signed-off-by: Alexey Budankov Reviewed-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andi Kleen Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/050bb053-e7f3-aa83-fde7-f27ff90be...@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 4 +- tools/perf/builtin-record.c | 67 ++-- tools/perf/util/mmap.c | 64 ++ tools/perf/util/mmap.h | 9 ++-- 4 files changed, 102 insertions(+), 42 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 7efb4af88a68..d232b13ea713 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -435,8 +435,8 @@ Specify vmlinux path which has debuginfo. --buildid-all:: Record build-id of all DSOs regardless whether it's actually hit or not. ---aio:: -Enable asynchronous (Posix AIO) trace writing mode. +--aio[=n]:: +Use control blocks in asynchronous (Posix AIO) trace writing mode (default: 1, max: 4). Asynchronous mode is supported only when linking Perf tool with libc library providing implementation for Posix AIO API. diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 408d6477c960..4736dc96c4ca 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -196,16 +196,35 @@ static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock) return rc; } -static void record__aio_sync(struct perf_mmap *md) +static int record__aio_sync(struct perf_mmap *md, bool sync_all) { - struct aiocb *cblock = >aio.cblock; + struct aiocb **aiocb = md->aio.aiocb; + struct aiocb *cblocks = md->aio.cblocks; struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ + int i, do_suspend; do { - if (cblock->aio_fildes == -1 || record__aio_complete(md, cblock)) - return; + do_suspend = 0; + for (i = 0; i < md->aio.nr_cblocks; ++i) { + if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, [i])) { + if (sync_all) + aiocb[i] = NULL; + else + return i; + } else { + /* +* Started aio write is not complete yet +* so it has to be waited before the +* next allocation. +*/ + aiocb[i] = [i]; + do_suspend = 1; + } + } + if (!do_suspend) + return -1; - while (aio_suspend((const struct aiocb**), 1, )) { + while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, )) { if (!(errno == EAGAIN || errno == EINTR)) pr_err("failed to sync perf data, error: %m\n"); } @@ -252,28 +271,36 @@ static void record__aio_mmap_read_sync(struct record *rec) struct perf_mmap *map = [i]; if (map->base) - record__aio_sync(map); + record__aio_sync(map, true); } } static int nr_cblocks_default = 1; +static int nr_cblocks_max = 4; static int record__aio_parse(const struct option *opt, -const char *str __maybe_unused, +const char *str, int unset) { struct record_opts *opts = (struct record_opts *)opt->value; - if (unset) + if (unset) { opts->nr_cblocks = 0; - else - opts->nr_cblocks = nr_cblocks_default; + } else { + if (str) + opts->nr_cblocks = strtol(str, NULL, 0); + if (!opts->nr_cblocks) + opts->nr_cblocks = nr_cblocks_default; + } return 0; } #else /* HAVE_AIO_SUPPORT */ -static void record__aio_sync(struct perf_mmap *md __maybe_unused) +static int nr_cblocks_max = 0; + +static int
[PATCH 18/22] perf mmap: Map data buffer for preserving collected data
From: Alexey Budankov The map->data buffer is used to preserve map->base profiling data for writing to disk. AIO map->cblock is used to queue corresponding map->data buffer for asynchronous writing. Signed-off-by: Alexey Budankov Reviewed-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andi Kleen Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5fcda10c-6c63-68df-383a-c6d9e5d1f...@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 2 +- tools/perf/util/mmap.c | 49 +++- tools/perf/util/mmap.h | 11 - 3 files changed, 59 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 36526d229315..6f010b9f0a81 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1028,7 +1028,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, * Its value is decided by evsel's write_backward. * So should not be passed through const pointer. */ - struct mmap_params mp; + struct mmap_params mp = { .nr_cblocks = 0 }; if (!evlist->mmap) evlist->mmap = perf_evlist__alloc_mmap(evlist, false); diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index cdb95b3a1213..47cdc3ad6546 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -153,8 +153,55 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb { } +#ifdef HAVE_AIO_SUPPORT +static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp) +{ + int delta_max; + + if (mp->nr_cblocks) { + map->aio.data = malloc(perf_mmap__mmap_len(map)); + if (!map->aio.data) { + pr_debug2("failed to allocate data buffer, error %m\n"); + return -1; + } + /* +* Use cblock.aio_fildes value different from -1 +* to denote started aio write operation on the +* cblock so it requires explicit record__aio_sync() +* call prior the cblock may be reused again. +*/ + map->aio.cblock.aio_fildes = -1; + /* +* Allocate cblock with max priority delta to +* have faster aio write system calls. +*/ + delta_max = sysconf(_SC_AIO_PRIO_DELTA_MAX); + map->aio.cblock.aio_reqprio = delta_max; + } + + return 0; +} + +static void perf_mmap__aio_munmap(struct perf_mmap *map) +{ + if (map->aio.data) + zfree(>aio.data); +} +#else +static int perf_mmap__aio_mmap(struct perf_mmap *map __maybe_unused, + struct mmap_params *mp __maybe_unused) +{ + return 0; +} + +static void perf_mmap__aio_munmap(struct perf_mmap *map __maybe_unused) +{ +} +#endif + void perf_mmap__munmap(struct perf_mmap *map) { + perf_mmap__aio_munmap(map); if (map->base != NULL) { munmap(map->base, perf_mmap__mmap_len(map)); map->base = NULL; @@ -197,7 +244,7 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c >auxtrace_mp, map->base, fd)) return -1; - return 0; + return perf_mmap__aio_mmap(map, mp); } static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end) diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index cc5e2d6d17a9..3f10ad030c5e 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -6,6 +6,9 @@ #include #include #include +#ifdef HAVE_AIO_SUPPORT +#include +#endif #include "auxtrace.h" #include "event.h" @@ -26,6 +29,12 @@ struct perf_mmap { bool overwrite; struct auxtrace_mmap auxtrace_mmap; char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); +#ifdef HAVE_AIO_SUPPORT + struct { + void *data; + struct aiocb cblock; + } aio; +#endif }; /* @@ -57,7 +66,7 @@ enum bkw_mmap_state { }; struct mmap_params { - int prot, mask; + int prot, mask, nr_cblocks; struct auxtrace_mmap_params auxtrace_mp; }; -- 2.19.1
[PATCH 20/22] perf record: Extend trace writing to multi AIO
From: Alexey Budankov Multi AIO trace writing allows caching more kernel data into userspace memory postponing trace writing for the sake of overall profiling data thruput increase. It could be seen as kernel data buffer extension into userspace memory. With an --aio option value different from 0 (default value is 1) the tool has capability to cache more and more data into user space along with delegating spill to AIO. That allows avoiding to suspend at record__aio_sync() between calls of record__mmap_read_evlist() and increases profiling data thruput at the cost of userspace memory. Signed-off-by: Alexey Budankov Reviewed-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andi Kleen Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/050bb053-e7f3-aa83-fde7-f27ff90be...@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 4 +- tools/perf/builtin-record.c | 67 ++-- tools/perf/util/mmap.c | 64 ++ tools/perf/util/mmap.h | 9 ++-- 4 files changed, 102 insertions(+), 42 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 7efb4af88a68..d232b13ea713 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -435,8 +435,8 @@ Specify vmlinux path which has debuginfo. --buildid-all:: Record build-id of all DSOs regardless whether it's actually hit or not. ---aio:: -Enable asynchronous (Posix AIO) trace writing mode. +--aio[=n]:: +Use control blocks in asynchronous (Posix AIO) trace writing mode (default: 1, max: 4). Asynchronous mode is supported only when linking Perf tool with libc library providing implementation for Posix AIO API. diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 408d6477c960..4736dc96c4ca 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -196,16 +196,35 @@ static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock) return rc; } -static void record__aio_sync(struct perf_mmap *md) +static int record__aio_sync(struct perf_mmap *md, bool sync_all) { - struct aiocb *cblock = >aio.cblock; + struct aiocb **aiocb = md->aio.aiocb; + struct aiocb *cblocks = md->aio.cblocks; struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ + int i, do_suspend; do { - if (cblock->aio_fildes == -1 || record__aio_complete(md, cblock)) - return; + do_suspend = 0; + for (i = 0; i < md->aio.nr_cblocks; ++i) { + if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, [i])) { + if (sync_all) + aiocb[i] = NULL; + else + return i; + } else { + /* +* Started aio write is not complete yet +* so it has to be waited before the +* next allocation. +*/ + aiocb[i] = [i]; + do_suspend = 1; + } + } + if (!do_suspend) + return -1; - while (aio_suspend((const struct aiocb**), 1, )) { + while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, )) { if (!(errno == EAGAIN || errno == EINTR)) pr_err("failed to sync perf data, error: %m\n"); } @@ -252,28 +271,36 @@ static void record__aio_mmap_read_sync(struct record *rec) struct perf_mmap *map = [i]; if (map->base) - record__aio_sync(map); + record__aio_sync(map, true); } } static int nr_cblocks_default = 1; +static int nr_cblocks_max = 4; static int record__aio_parse(const struct option *opt, -const char *str __maybe_unused, +const char *str, int unset) { struct record_opts *opts = (struct record_opts *)opt->value; - if (unset) + if (unset) { opts->nr_cblocks = 0; - else - opts->nr_cblocks = nr_cblocks_default; + } else { + if (str) + opts->nr_cblocks = strtol(str, NULL, 0); + if (!opts->nr_cblocks) + opts->nr_cblocks = nr_cblocks_default; + } return 0; } #else /* HAVE_AIO_SUPPORT */ -static void record__aio_sync(struct perf_mmap *md __maybe_unused) +static int nr_cblocks_max = 0; + +static int
[PATCH 16/22] perf intel-pt: Fix error with config term "pt=0"
From: Adrian Hunter Users should never use 'pt=0', but if they do it may give a meaningless error: $ perf record -e intel_pt/pt=0/u uname Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (intel_pt/pt=0/u). Fix that by forcing 'pt=1'. Committer testing: # perf record -e intel_pt/pt=0/u uname Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (intel_pt/pt=0/u). /bin/dmesg | grep -i perf may provide additional information. # perf record -e intel_pt/pt=0/u uname pt=0 doesn't make sense, forcing pt=1 Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.020 MB perf.data ] # Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/b7c5b4e5-9497-10e5-fd43-5f3e4a0fe...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/intel-pt.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index db0ba8caf5a2..ba8ecaf52200 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -524,10 +524,21 @@ static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, struct perf_evsel *evsel) { int err; + char c; if (!evsel) return 0; + /* +* If supported, force pass-through config term (pt=1) even if user +* sets pt=0, which avoids senseless kernel errors. +*/ + if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", ) == 1 && + !(evsel->attr.config & 1)) { + pr_warning("pt=0 doesn't make sense, forcing pt=1\n"); + evsel->attr.config |= 1; + } + err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds", "cyc_thresh", "caps/psb_cyc", evsel->attr.config); -- 2.19.1
[PATCH 06/22] perf machine: Record if a arch has a single user/kernel address space
From: Adrian Hunter Some architectures have a single address space for kernel and user addresses, which makes it possible to determine if an address is in kernel space or user space. Some don't, e.g.: sparc. Cache that info in perf_env so that, for instance, code needing to fallback failed symbol lookups at the kernel space in single address space arches can lookup at userspace. Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: David S. Miller Cc: Jiri Olsa Cc: Leo Yan Cc: Mathieu Poirier Cc: sta...@vger.kernel.org Link: http://lkml.kernel.org/r/20181106210712.12098-2-adrian.hun...@intel.com [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/common.c | 10 ++ tools/perf/arch/common.h | 1 + tools/perf/util/machine.h | 1 + tools/perf/util/session.c | 4 4 files changed, 16 insertions(+) diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c index 82657c01a3b8..5f69fd0b745a 100644 --- a/tools/perf/arch/common.c +++ b/tools/perf/arch/common.c @@ -200,3 +200,13 @@ int perf_env__lookup_objdump(struct perf_env *env, const char **path) return perf_env__lookup_binutils_path(env, "objdump", path); } + +/* + * Some architectures have a single address space for kernel and user addresses, + * which makes it possible to determine if an address is in kernel space or user + * space. + */ +bool perf_env__single_address_space(struct perf_env *env) +{ + return strcmp(perf_env__arch(env), "sparc"); +} diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h index 2167001b18c5..c298a446d1f6 100644 --- a/tools/perf/arch/common.h +++ b/tools/perf/arch/common.h @@ -5,5 +5,6 @@ #include "../util/env.h" int perf_env__lookup_objdump(struct perf_env *env, const char **path); +bool perf_env__single_address_space(struct perf_env *env); #endif /* ARCH_PERF_COMMON_H */ diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index d856b85862e2..ca897a73014c 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -42,6 +42,7 @@ struct machine { u16 id_hdr_size; bool comm_exec; bool kptr_restrict_warned; + bool single_address_space; char *root_dir; char *mmap_name; struct threadsthreads[THREADS__TABLE_SIZE]; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 7d2c8ce6cfad..f8eab197f35c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -24,6 +24,7 @@ #include "thread.h" #include "thread-stack.h" #include "stat.h" +#include "arch/common.h" static int perf_session__deliver_event(struct perf_session *session, union perf_event *event, @@ -150,6 +151,9 @@ struct perf_session *perf_session__new(struct perf_data *data, session->machines.host.env = _env; } + session->machines.host.single_address_space = + perf_env__single_address_space(session->machines.host.env); + if (!data || perf_data__is_write(data)) { /* * In O_RDONLY mode this will be performed when reading the -- 2.19.1
[PATCH 12/22] perf test: Fix perf_event_attr test failure
From: Adrian Hunter Fix inconsistent use of tabs and spaces error: # perf test 16 -v 16: Setup struct perf_event_attr : --- start --- test child forked, pid 20224 File "/usr/libexec/perf-core/tests/attr.py", line 119 log.warning("expected %s=%s, got %s" % (t, self[t], other[t])) ^ TabError: inconsistent use of tabs and spaces in indentation test child finished with -1 end Setup struct perf_event_attr: FAILED! Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181122140456.16817-1-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py index ff9b60b99f52..44090a9a19f3 100644 --- a/tools/perf/tests/attr.py +++ b/tools/perf/tests/attr.py @@ -116,7 +116,7 @@ class Event(dict): if not self.has_key(t) or not other.has_key(t): continue if not data_equal(self[t], other[t]): - log.warning("expected %s=%s, got %s" % (t, self[t], other[t])) +log.warning("expected %s=%s, got %s" % (t, self[t], other[t])) # Test file description needs to have following sections: # [config] -- 2.19.1
[GIT PULL 00/22] perf/core improvements and fixes
Hi Ingo, Please consider pulling, more to come, Regards, - Arnaldo Test results at the end of this message, as usual. The following changes since commit b1a9d7b0190119dad5b9b7841751b5a7586bbc8b: Merge tag 'perf-urgent-for-mingo-4.20-20181121' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent (2018-11-21 15:57:21 +0100) are available in the Git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git tags/perf-core-for-mingo-4.21-20181130 for you to fetch changes up to 09d3f015d1e1b4fee7e9bbdcf54201d239393391: uprobes: Fix handle_swbp() vs. unregister() + register() race once more (2018-11-23 08:31:19 +0100) perf/core improvements and fixes: - Introduce 'perf record --aio' to use asynchronous IO trace writing in 'perf record' disabled by default, i.e. one needs to explicitly use 'perf record --aio' to use it, in which case the number of AIO aiocb structs will be one, specify 'perf record --aio=N' to ask for more, according to your needs, related to the number of processors in your machine. Reports about the effectiveness of this option are welcome so that we can decide on making it the default mode of operation. Read the respective patches commit logs for further information (Alexey Budankov) - Add fallback routines to be used in places where we don't have the cpu mode (kernel/user space/hypervisor) and thus must first fallback lookups looking at all map trees when trying to resolve symbols (Adrian Hunter) - Introduce 'perf top --kallsyms file' to match 'perf report --kallsyms', useful when dealing with BPF, where symbol resolution happens via kallsyms, not via the default vmlinux ELF symtabs (Arnaldo Carvalho de Melo) - Fix CSV mode column output for non-cgroup events in 'perf stat' (Stephane Eranian) - Fix 'perf stat' shadow stats for clock events. (Ravi Bangoria) - Fix error with config term "pt=0", where we should just force "pt=1" and warn the user about the former being non-sensical (Adrian Hunter) - Fix 'perf test' entry where we expect 'sleep' to come in a PERF_RECORD_COMM but instead we get 'coreutils' when sleep is provided by some versions of the 'coreutils' package (Adrian Hunter) - Remove needless rb_tree extra indirection from map__find() (Eric Saint-Etienne) - Add sanity check to libtraceevent's is_timestamp_in_us() (Tzvetomir Stoyanov) - Use ERR_CAST instead of ERR_PTR(PTR_ERR()) (Wen Yang) Signed-off-by: Arnaldo Carvalho de Melo Andrea Parri (1): uprobes: Fix handle_swbp() vs. unregister() + register() race once more Jiri Olsa (3): perf/x86/intel: Move branch tracing setup to the Intel-specific source file perf/x86/intel: Add generic branch tracing check to intel_pmu_has_bts() perf/x86/intel: Disallow precise_ip on BTS events arch/x86/events/core.c | 20 arch/x86/events/intel/core.c | 56 ++-- arch/x86/events/perf_event.h | 13 ++ kernel/events/uprobes.c | 12 -- 4 files changed, 63 insertions(+), 38 deletions(-) Test results: XXX: Investigation on the watchpoint and breakpoint 'perf test' failures is underway, doesn't look like related to patches in this batch. The first ones are container (docker) based builds of tools/perf with and without libelf support. Where clang is available, it is also used to build perf with/without libelf, and building with LIBCLANGLLVM=1 (built-in clang) with gcc and clang when clang and its devel libraries are installed. The objtool and samples/bpf/ builds are disabled now that I'm switching from using the sources in a local volume to fetching them from a http server to build it inside the container, to make it easier to build in a container cluster. Those will come back later. Several are cross builds, the ones with -x-ARCH and the android one, and those may not have all the features built, due to lack of multi-arch devel packages, available and being used so far on just a few, like debian:experimental-x-{arm64,mipsel}. The 'perf test' one will perform a variety of tests exercising tools/perf/util/, tools/lib/{bpf,traceevent,etc}, as well as run perf commands with a variety of command line event specifications to then intercept the sys_perf_event syscall to check that the perf_event_attr fields are set up as expected, among a variety of other unit tests. Then there is the 'make -C tools/perf build-test' ones, that build tools/perf/ with a variety of feature sets, exercising the build with an incomplete set of features as well as with a complete one. It is planned to have it run on each of the containers mentioned above, using some container orchestration infrastructure. Get in contact if interested in helping having this in place. # dm 1 alpine:3.4
[GIT PULL 00/22] perf/core improvements and fixes
Hi Ingo, Please consider pulling, more to come, Regards, - Arnaldo Test results at the end of this message, as usual. The following changes since commit b1a9d7b0190119dad5b9b7841751b5a7586bbc8b: Merge tag 'perf-urgent-for-mingo-4.20-20181121' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent (2018-11-21 15:57:21 +0100) are available in the Git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git tags/perf-core-for-mingo-4.21-20181130 for you to fetch changes up to 09d3f015d1e1b4fee7e9bbdcf54201d239393391: uprobes: Fix handle_swbp() vs. unregister() + register() race once more (2018-11-23 08:31:19 +0100) perf/core improvements and fixes: - Introduce 'perf record --aio' to use asynchronous IO trace writing in 'perf record' disabled by default, i.e. one needs to explicitly use 'perf record --aio' to use it, in which case the number of AIO aiocb structs will be one, specify 'perf record --aio=N' to ask for more, according to your needs, related to the number of processors in your machine. Reports about the effectiveness of this option are welcome so that we can decide on making it the default mode of operation. Read the respective patches commit logs for further information (Alexey Budankov) - Add fallback routines to be used in places where we don't have the cpu mode (kernel/user space/hypervisor) and thus must first fallback lookups looking at all map trees when trying to resolve symbols (Adrian Hunter) - Introduce 'perf top --kallsyms file' to match 'perf report --kallsyms', useful when dealing with BPF, where symbol resolution happens via kallsyms, not via the default vmlinux ELF symtabs (Arnaldo Carvalho de Melo) - Fix CSV mode column output for non-cgroup events in 'perf stat' (Stephane Eranian) - Fix 'perf stat' shadow stats for clock events. (Ravi Bangoria) - Fix error with config term "pt=0", where we should just force "pt=1" and warn the user about the former being non-sensical (Adrian Hunter) - Fix 'perf test' entry where we expect 'sleep' to come in a PERF_RECORD_COMM but instead we get 'coreutils' when sleep is provided by some versions of the 'coreutils' package (Adrian Hunter) - Remove needless rb_tree extra indirection from map__find() (Eric Saint-Etienne) - Add sanity check to libtraceevent's is_timestamp_in_us() (Tzvetomir Stoyanov) - Use ERR_CAST instead of ERR_PTR(PTR_ERR()) (Wen Yang) Signed-off-by: Arnaldo Carvalho de Melo Andrea Parri (1): uprobes: Fix handle_swbp() vs. unregister() + register() race once more Jiri Olsa (3): perf/x86/intel: Move branch tracing setup to the Intel-specific source file perf/x86/intel: Add generic branch tracing check to intel_pmu_has_bts() perf/x86/intel: Disallow precise_ip on BTS events arch/x86/events/core.c | 20 arch/x86/events/intel/core.c | 56 ++-- arch/x86/events/perf_event.h | 13 ++ kernel/events/uprobes.c | 12 -- 4 files changed, 63 insertions(+), 38 deletions(-) Test results: XXX: Investigation on the watchpoint and breakpoint 'perf test' failures is underway, doesn't look like related to patches in this batch. The first ones are container (docker) based builds of tools/perf with and without libelf support. Where clang is available, it is also used to build perf with/without libelf, and building with LIBCLANGLLVM=1 (built-in clang) with gcc and clang when clang and its devel libraries are installed. The objtool and samples/bpf/ builds are disabled now that I'm switching from using the sources in a local volume to fetching them from a http server to build it inside the container, to make it easier to build in a container cluster. Those will come back later. Several are cross builds, the ones with -x-ARCH and the android one, and those may not have all the features built, due to lack of multi-arch devel packages, available and being used so far on just a few, like debian:experimental-x-{arm64,mipsel}. The 'perf test' one will perform a variety of tests exercising tools/perf/util/, tools/lib/{bpf,traceevent,etc}, as well as run perf commands with a variety of command line event specifications to then intercept the sys_perf_event syscall to check that the perf_event_attr fields are set up as expected, among a variety of other unit tests. Then there is the 'make -C tools/perf build-test' ones, that build tools/perf/ with a variety of feature sets, exercising the build with an incomplete set of features as well as with a complete one. It is planned to have it run on each of the containers mentioned above, using some container orchestration infrastructure. Get in contact if interested in helping having this in place. # dm 1 alpine:3.4
[PATCH 16/22] perf intel-pt: Fix error with config term "pt=0"
From: Adrian Hunter Users should never use 'pt=0', but if they do it may give a meaningless error: $ perf record -e intel_pt/pt=0/u uname Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (intel_pt/pt=0/u). Fix that by forcing 'pt=1'. Committer testing: # perf record -e intel_pt/pt=0/u uname Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (intel_pt/pt=0/u). /bin/dmesg | grep -i perf may provide additional information. # perf record -e intel_pt/pt=0/u uname pt=0 doesn't make sense, forcing pt=1 Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.020 MB perf.data ] # Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/b7c5b4e5-9497-10e5-fd43-5f3e4a0fe...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/intel-pt.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index db0ba8caf5a2..ba8ecaf52200 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -524,10 +524,21 @@ static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, struct perf_evsel *evsel) { int err; + char c; if (!evsel) return 0; + /* +* If supported, force pass-through config term (pt=1) even if user +* sets pt=0, which avoids senseless kernel errors. +*/ + if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", ) == 1 && + !(evsel->attr.config & 1)) { + pr_warning("pt=0 doesn't make sense, forcing pt=1\n"); + evsel->attr.config |= 1; + } + err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds", "cyc_thresh", "caps/psb_cyc", evsel->attr.config); -- 2.19.1
[PATCH 06/22] perf machine: Record if a arch has a single user/kernel address space
From: Adrian Hunter Some architectures have a single address space for kernel and user addresses, which makes it possible to determine if an address is in kernel space or user space. Some don't, e.g.: sparc. Cache that info in perf_env so that, for instance, code needing to fallback failed symbol lookups at the kernel space in single address space arches can lookup at userspace. Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: David S. Miller Cc: Jiri Olsa Cc: Leo Yan Cc: Mathieu Poirier Cc: sta...@vger.kernel.org Link: http://lkml.kernel.org/r/20181106210712.12098-2-adrian.hun...@intel.com [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/common.c | 10 ++ tools/perf/arch/common.h | 1 + tools/perf/util/machine.h | 1 + tools/perf/util/session.c | 4 4 files changed, 16 insertions(+) diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c index 82657c01a3b8..5f69fd0b745a 100644 --- a/tools/perf/arch/common.c +++ b/tools/perf/arch/common.c @@ -200,3 +200,13 @@ int perf_env__lookup_objdump(struct perf_env *env, const char **path) return perf_env__lookup_binutils_path(env, "objdump", path); } + +/* + * Some architectures have a single address space for kernel and user addresses, + * which makes it possible to determine if an address is in kernel space or user + * space. + */ +bool perf_env__single_address_space(struct perf_env *env) +{ + return strcmp(perf_env__arch(env), "sparc"); +} diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h index 2167001b18c5..c298a446d1f6 100644 --- a/tools/perf/arch/common.h +++ b/tools/perf/arch/common.h @@ -5,5 +5,6 @@ #include "../util/env.h" int perf_env__lookup_objdump(struct perf_env *env, const char **path); +bool perf_env__single_address_space(struct perf_env *env); #endif /* ARCH_PERF_COMMON_H */ diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index d856b85862e2..ca897a73014c 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -42,6 +42,7 @@ struct machine { u16 id_hdr_size; bool comm_exec; bool kptr_restrict_warned; + bool single_address_space; char *root_dir; char *mmap_name; struct threadsthreads[THREADS__TABLE_SIZE]; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 7d2c8ce6cfad..f8eab197f35c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -24,6 +24,7 @@ #include "thread.h" #include "thread-stack.h" #include "stat.h" +#include "arch/common.h" static int perf_session__deliver_event(struct perf_session *session, union perf_event *event, @@ -150,6 +151,9 @@ struct perf_session *perf_session__new(struct perf_data *data, session->machines.host.env = _env; } + session->machines.host.single_address_space = + perf_env__single_address_space(session->machines.host.env); + if (!data || perf_data__is_write(data)) { /* * In O_RDONLY mode this will be performed when reading the -- 2.19.1
[PATCH 12/22] perf test: Fix perf_event_attr test failure
From: Adrian Hunter Fix inconsistent use of tabs and spaces error: # perf test 16 -v 16: Setup struct perf_event_attr : --- start --- test child forked, pid 20224 File "/usr/libexec/perf-core/tests/attr.py", line 119 log.warning("expected %s=%s, got %s" % (t, self[t], other[t])) ^ TabError: inconsistent use of tabs and spaces in indentation test child finished with -1 end Setup struct perf_event_attr: FAILED! Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181122140456.16817-1-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py index ff9b60b99f52..44090a9a19f3 100644 --- a/tools/perf/tests/attr.py +++ b/tools/perf/tests/attr.py @@ -116,7 +116,7 @@ class Event(dict): if not self.has_key(t) or not other.has_key(t): continue if not data_equal(self[t], other[t]): - log.warning("expected %s=%s, got %s" % (t, self[t], other[t])) +log.warning("expected %s=%s, got %s" % (t, self[t], other[t])) # Test file description needs to have following sections: # [config] -- 2.19.1
[PATCH 01/22] perf build: Give better hint about devel package for libssl
From: Arnaldo Carvalho de Melo In debian/ubuntu its libssl-dev, but for fedora/RHEL/Centos/etc its openssl-devel, fix it. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Cc: Wang Nan Fixes: 8ee4646038e4 ("perf build: Add libcrypto feature detection") Link: https://lkml.kernel.org/n/tip-lnxqszts6aq2c9jy4b7ml...@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index e110010e7faa..c643d5e0c26b 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -588,7 +588,7 @@ endif ifndef NO_LIBCRYPTO ifneq ($(feature-libcrypto), 1) -msg := $(warning No libcrypto.h found, disables jitted code injection, please install libssl-devel or libssl-dev); +msg := $(warning No libcrypto.h found, disables jitted code injection, please install openssl-devel or libssl-dev); NO_LIBCRYPTO := 1 else CFLAGS += -DHAVE_LIBCRYPTO_SUPPORT -- 2.19.1
[PATCH 07/22] perf thread: Add fallback functions for cases where cpumode is insufficient
From: Adrian Hunter For branch stacks or branch samples, the sample cpumode might not be correct because it applies only to the sample 'ip' and not necessary to 'addr' or branch stack addresses. Add fallback functions that can be used to deal with those cases Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: David S. Miller Cc: Jiri Olsa Cc: Leo Yan Cc: Mathieu Poirier Cc: sta...@vger.kernel.org Link: http://lkml.kernel.org/r/20181106210712.12098-2-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 27 +++ tools/perf/util/machine.c | 27 +++ tools/perf/util/machine.h | 2 ++ tools/perf/util/thread.h | 4 4 files changed, 60 insertions(+) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index e9c108a6b1c3..9431b20c1337 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1577,6 +1577,24 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, return al->map; } +/* + * For branch stacks or branch samples, the sample cpumode might not be correct + * because it applies only to the sample 'ip' and not necessary to 'addr' or + * branch stack addresses. If possible, use a fallback to deal with those cases. + */ +struct map *thread__find_map_fb(struct thread *thread, u8 cpumode, u64 addr, + struct addr_location *al) +{ + struct map *map = thread__find_map(thread, cpumode, addr, al); + struct machine *machine = thread->mg->machine; + u8 addr_cpumode = machine__addr_cpumode(machine, cpumode, addr); + + if (map || addr_cpumode == cpumode) + return map; + + return thread__find_map(thread, addr_cpumode, addr, al); +} + struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al) { @@ -1586,6 +1604,15 @@ struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode, return al->sym; } +struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode, + u64 addr, struct addr_location *al) +{ + al->sym = NULL; + if (thread__find_map_fb(thread, cpumode, addr, al)) + al->sym = map__find_symbol(al->map, al->addr); + return al->sym; +} + /* * Callers need to drop the reference to al->thread, obtained in * machine__findnew_thread() diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 8f36ce813bc5..9397e3f2444d 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2592,6 +2592,33 @@ int machine__get_kernel_start(struct machine *machine) return err; } +u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr) +{ + u8 addr_cpumode = cpumode; + bool kernel_ip; + + if (!machine->single_address_space) + goto out; + + kernel_ip = machine__kernel_ip(machine, addr); + switch (cpumode) { + case PERF_RECORD_MISC_KERNEL: + case PERF_RECORD_MISC_USER: + addr_cpumode = kernel_ip ? PERF_RECORD_MISC_KERNEL : + PERF_RECORD_MISC_USER; + break; + case PERF_RECORD_MISC_GUEST_KERNEL: + case PERF_RECORD_MISC_GUEST_USER: + addr_cpumode = kernel_ip ? PERF_RECORD_MISC_GUEST_KERNEL : + PERF_RECORD_MISC_GUEST_USER; + break; + default: + break; + } +out: + return addr_cpumode; +} + struct dso *machine__findnew_dso(struct machine *machine, const char *filename) { return dsos__findnew(>dsos, filename); diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index ca897a73014c..ebde3ea70225 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -100,6 +100,8 @@ static inline bool machine__kernel_ip(struct machine *machine, u64 ip) return ip >= kernel_start; } +u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr); + struct thread *machine__find_thread(struct machine *machine, pid_t pid, pid_t tid); struct comm *machine__thread_exec_comm(struct machine *machine, diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 30e2b4c165fe..5920c3bb8ffe 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -96,9 +96,13 @@ struct thread *thread__main_thread(struct machine *machine, struct thread *threa struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al); +struct map *thread__find_map_fb(struct thread *thread, u8 cpumode, u64 addr, + struct addr_location *al); struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al);
[PATCH 15/22] perf top: Allow passing a kallsyms file
From: Arnaldo Carvalho de Melo This basically replicates what was done for 'perf report' in: b226a5a72901 ("perf report: Allow user to specify path to kallsyms file") This should help with resolving eBPF symbols, that are in kallsyms but, of course, not in vmlinux. Reported-by: Ivan Babrou Tested-by: Ivan Babrou Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: David Ahern Cc: David S. Miller Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-x52mx1ybq8128rtg9hjrj...@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-top.txt | 3 +++ tools/perf/builtin-top.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 808b664343c9..44d89fb9c788 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -70,6 +70,9 @@ Default is to monitor all CPUS. --ignore-vmlinux:: Ignore vmlinux files. +--kallsyms=:: + kallsyms pathname + -m :: --mmap-pages=:: Number of mmap data pages (must be a power of two) or size diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index aa0c73e57924..1252d1759064 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1289,6 +1289,8 @@ int cmd_top(int argc, const char **argv) "file", "vmlinux pathname"), OPT_BOOLEAN(0, "ignore-vmlinux", _conf.ignore_vmlinux, "don't load vmlinux even if found"), + OPT_STRING(0, "kallsyms", _conf.kallsyms_name, + "file", "kallsyms pathname"), OPT_BOOLEAN('K', "hide_kernel_symbols", _kernel_symbols, "hide kernel symbols"), OPT_CALLBACK('m', "mmap-pages", >mmap_pages, "pages", -- 2.19.1
[PATCH 11/22] perf tests record: Allow for 'sleep' being 'coreutils'
From: Adrian Hunter If the 'sleep' command is provided by coreutils, then the "PERF_RECORD_* events & perf_sample fields" test will fail because the MMAP name is 'coreutils' not 'sleep', and there is an extra COMM event. Fix the test to detect that case. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181122135545.16295-1-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/perf-record.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 34394cc05077..07f6bd8ed719 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -58,6 +58,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus char *bname, *mmap_filename; u64 prev_time = 0; bool found_cmd_mmap = false, +found_coreutils_mmap = false, found_libc_mmap = false, found_vdso_mmap = false, found_ld_mmap = false; @@ -254,6 +255,8 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus if (bname != NULL) { if (!found_cmd_mmap) found_cmd_mmap = !strcmp(bname + 1, cmd); + if (!found_coreutils_mmap) + found_coreutils_mmap = !strcmp(bname + 1, "coreutils"); if (!found_libc_mmap) found_libc_mmap = !strncmp(bname + 1, "libc", 4); if (!found_ld_mmap) @@ -292,7 +295,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus } found_exit: - if (nr_events[PERF_RECORD_COMM] > 1) { + if (nr_events[PERF_RECORD_COMM] > 1 + !!found_coreutils_mmap) { pr_debug("Excessive number of PERF_RECORD_COMM events!\n"); ++errs; } @@ -302,7 +305,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus ++errs; } - if (!found_cmd_mmap) { + if (!found_cmd_mmap && !found_coreutils_mmap) { pr_debug("PERF_RECORD_MMAP for %s missing!\n", cmd); ++errs; } -- 2.19.1
[PATCH 08/22] perf tools: Use fallback for sample_addr_correlates_sym() cases
From: Adrian Hunter thread__resolve() is used in the sample_addr_correlates_sym() cases where 'addr' is a destination of a branch which does not necessarily have the same cpumode as the 'ip'. Use the fallback function in that case. This patch depends on patch "perf tools: Add fallback functions for cases where cpumode is insufficient". Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: David S. Miller Cc: Jiri Olsa Cc: Leo Yan Cc: Mathieu Poirier Cc: sta...@vger.kernel.org Link: http://lkml.kernel.org/r/20181106210712.12098-3-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 9431b20c1337..24493200cf80 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1706,7 +1706,7 @@ bool sample_addr_correlates_sym(struct perf_event_attr *attr) void thread__resolve(struct thread *thread, struct addr_location *al, struct perf_sample *sample) { - thread__find_map(thread, sample->cpumode, sample->addr, al); + thread__find_map_fb(thread, sample->cpumode, sample->addr, al); al->cpu = sample->cpu; al->sym = NULL; -- 2.19.1
[PATCH 11/22] perf tests record: Allow for 'sleep' being 'coreutils'
From: Adrian Hunter If the 'sleep' command is provided by coreutils, then the "PERF_RECORD_* events & perf_sample fields" test will fail because the MMAP name is 'coreutils' not 'sleep', and there is an extra COMM event. Fix the test to detect that case. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20181122135545.16295-1-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/perf-record.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 34394cc05077..07f6bd8ed719 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -58,6 +58,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus char *bname, *mmap_filename; u64 prev_time = 0; bool found_cmd_mmap = false, +found_coreutils_mmap = false, found_libc_mmap = false, found_vdso_mmap = false, found_ld_mmap = false; @@ -254,6 +255,8 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus if (bname != NULL) { if (!found_cmd_mmap) found_cmd_mmap = !strcmp(bname + 1, cmd); + if (!found_coreutils_mmap) + found_coreutils_mmap = !strcmp(bname + 1, "coreutils"); if (!found_libc_mmap) found_libc_mmap = !strncmp(bname + 1, "libc", 4); if (!found_ld_mmap) @@ -292,7 +295,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus } found_exit: - if (nr_events[PERF_RECORD_COMM] > 1) { + if (nr_events[PERF_RECORD_COMM] > 1 + !!found_coreutils_mmap) { pr_debug("Excessive number of PERF_RECORD_COMM events!\n"); ++errs; } @@ -302,7 +305,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus ++errs; } - if (!found_cmd_mmap) { + if (!found_cmd_mmap && !found_coreutils_mmap) { pr_debug("PERF_RECORD_MMAP for %s missing!\n", cmd); ++errs; } -- 2.19.1
[PATCH 08/22] perf tools: Use fallback for sample_addr_correlates_sym() cases
From: Adrian Hunter thread__resolve() is used in the sample_addr_correlates_sym() cases where 'addr' is a destination of a branch which does not necessarily have the same cpumode as the 'ip'. Use the fallback function in that case. This patch depends on patch "perf tools: Add fallback functions for cases where cpumode is insufficient". Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: David S. Miller Cc: Jiri Olsa Cc: Leo Yan Cc: Mathieu Poirier Cc: sta...@vger.kernel.org Link: http://lkml.kernel.org/r/20181106210712.12098-3-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 9431b20c1337..24493200cf80 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1706,7 +1706,7 @@ bool sample_addr_correlates_sym(struct perf_event_attr *attr) void thread__resolve(struct thread *thread, struct addr_location *al, struct perf_sample *sample) { - thread__find_map(thread, sample->cpumode, sample->addr, al); + thread__find_map_fb(thread, sample->cpumode, sample->addr, al); al->cpu = sample->cpu; al->sym = NULL; -- 2.19.1
[PATCH 01/22] perf build: Give better hint about devel package for libssl
From: Arnaldo Carvalho de Melo In debian/ubuntu its libssl-dev, but for fedora/RHEL/Centos/etc its openssl-devel, fix it. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Cc: Wang Nan Fixes: 8ee4646038e4 ("perf build: Add libcrypto feature detection") Link: https://lkml.kernel.org/n/tip-lnxqszts6aq2c9jy4b7ml...@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index e110010e7faa..c643d5e0c26b 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -588,7 +588,7 @@ endif ifndef NO_LIBCRYPTO ifneq ($(feature-libcrypto), 1) -msg := $(warning No libcrypto.h found, disables jitted code injection, please install libssl-devel or libssl-dev); +msg := $(warning No libcrypto.h found, disables jitted code injection, please install openssl-devel or libssl-dev); NO_LIBCRYPTO := 1 else CFLAGS += -DHAVE_LIBCRYPTO_SUPPORT -- 2.19.1
[PATCH 07/22] perf thread: Add fallback functions for cases where cpumode is insufficient
From: Adrian Hunter For branch stacks or branch samples, the sample cpumode might not be correct because it applies only to the sample 'ip' and not necessary to 'addr' or branch stack addresses. Add fallback functions that can be used to deal with those cases Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: David S. Miller Cc: Jiri Olsa Cc: Leo Yan Cc: Mathieu Poirier Cc: sta...@vger.kernel.org Link: http://lkml.kernel.org/r/20181106210712.12098-2-adrian.hun...@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 27 +++ tools/perf/util/machine.c | 27 +++ tools/perf/util/machine.h | 2 ++ tools/perf/util/thread.h | 4 4 files changed, 60 insertions(+) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index e9c108a6b1c3..9431b20c1337 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1577,6 +1577,24 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, return al->map; } +/* + * For branch stacks or branch samples, the sample cpumode might not be correct + * because it applies only to the sample 'ip' and not necessary to 'addr' or + * branch stack addresses. If possible, use a fallback to deal with those cases. + */ +struct map *thread__find_map_fb(struct thread *thread, u8 cpumode, u64 addr, + struct addr_location *al) +{ + struct map *map = thread__find_map(thread, cpumode, addr, al); + struct machine *machine = thread->mg->machine; + u8 addr_cpumode = machine__addr_cpumode(machine, cpumode, addr); + + if (map || addr_cpumode == cpumode) + return map; + + return thread__find_map(thread, addr_cpumode, addr, al); +} + struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al) { @@ -1586,6 +1604,15 @@ struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode, return al->sym; } +struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode, + u64 addr, struct addr_location *al) +{ + al->sym = NULL; + if (thread__find_map_fb(thread, cpumode, addr, al)) + al->sym = map__find_symbol(al->map, al->addr); + return al->sym; +} + /* * Callers need to drop the reference to al->thread, obtained in * machine__findnew_thread() diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 8f36ce813bc5..9397e3f2444d 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2592,6 +2592,33 @@ int machine__get_kernel_start(struct machine *machine) return err; } +u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr) +{ + u8 addr_cpumode = cpumode; + bool kernel_ip; + + if (!machine->single_address_space) + goto out; + + kernel_ip = machine__kernel_ip(machine, addr); + switch (cpumode) { + case PERF_RECORD_MISC_KERNEL: + case PERF_RECORD_MISC_USER: + addr_cpumode = kernel_ip ? PERF_RECORD_MISC_KERNEL : + PERF_RECORD_MISC_USER; + break; + case PERF_RECORD_MISC_GUEST_KERNEL: + case PERF_RECORD_MISC_GUEST_USER: + addr_cpumode = kernel_ip ? PERF_RECORD_MISC_GUEST_KERNEL : + PERF_RECORD_MISC_GUEST_USER; + break; + default: + break; + } +out: + return addr_cpumode; +} + struct dso *machine__findnew_dso(struct machine *machine, const char *filename) { return dsos__findnew(>dsos, filename); diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index ca897a73014c..ebde3ea70225 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -100,6 +100,8 @@ static inline bool machine__kernel_ip(struct machine *machine, u64 ip) return ip >= kernel_start; } +u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr); + struct thread *machine__find_thread(struct machine *machine, pid_t pid, pid_t tid); struct comm *machine__thread_exec_comm(struct machine *machine, diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 30e2b4c165fe..5920c3bb8ffe 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -96,9 +96,13 @@ struct thread *thread__main_thread(struct machine *machine, struct thread *threa struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al); +struct map *thread__find_map_fb(struct thread *thread, u8 cpumode, u64 addr, + struct addr_location *al); struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al);
[PATCH 15/22] perf top: Allow passing a kallsyms file
From: Arnaldo Carvalho de Melo This basically replicates what was done for 'perf report' in: b226a5a72901 ("perf report: Allow user to specify path to kallsyms file") This should help with resolving eBPF symbols, that are in kallsyms but, of course, not in vmlinux. Reported-by: Ivan Babrou Tested-by: Ivan Babrou Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: David Ahern Cc: David S. Miller Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-x52mx1ybq8128rtg9hjrj...@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-top.txt | 3 +++ tools/perf/builtin-top.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 808b664343c9..44d89fb9c788 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -70,6 +70,9 @@ Default is to monitor all CPUS. --ignore-vmlinux:: Ignore vmlinux files. +--kallsyms=:: + kallsyms pathname + -m :: --mmap-pages=:: Number of mmap data pages (must be a power of two) or size diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index aa0c73e57924..1252d1759064 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1289,6 +1289,8 @@ int cmd_top(int argc, const char **argv) "file", "vmlinux pathname"), OPT_BOOLEAN(0, "ignore-vmlinux", _conf.ignore_vmlinux, "don't load vmlinux even if found"), + OPT_STRING(0, "kallsyms", _conf.kallsyms_name, + "file", "kallsyms pathname"), OPT_BOOLEAN('K', "hide_kernel_symbols", _kernel_symbols, "hide kernel symbols"), OPT_CALLBACK('m', "mmap-pages", >mmap_pages, "pages", -- 2.19.1
[PATCH 04/22] perf map: Remove extra indirection from map__find()
From: Eric Saint-Etienne A double pointer is used in map__find() where a single pointer is enough because the function doesn't affect the rbtree and the rbtree is locked. Signed-off-by: Eric Saint-Etienne Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Eric Saint-Etienne Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1542969759-24346-1-git-send-email-eric.saint.etie...@oracle.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 781eed8e3265..a0d58b4d9c32 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -873,19 +873,18 @@ void maps__remove(struct maps *maps, struct map *map) struct map *maps__find(struct maps *maps, u64 ip) { - struct rb_node **p, *parent = NULL; + struct rb_node *p; struct map *m; down_read(>lock); - p = >entries.rb_node; - while (*p != NULL) { - parent = *p; - m = rb_entry(parent, struct map, rb_node); + p = maps->entries.rb_node; + while (p != NULL) { + m = rb_entry(p, struct map, rb_node); if (ip < m->start) - p = &(*p)->rb_left; + p = p->rb_left; else if (ip >= m->end) - p = &(*p)->rb_right; + p = p->rb_right; else goto out; } -- 2.19.1
[PATCH 04/22] perf map: Remove extra indirection from map__find()
From: Eric Saint-Etienne A double pointer is used in map__find() where a single pointer is enough because the function doesn't affect the rbtree and the rbtree is locked. Signed-off-by: Eric Saint-Etienne Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Eric Saint-Etienne Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1542969759-24346-1-git-send-email-eric.saint.etie...@oracle.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 781eed8e3265..a0d58b4d9c32 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -873,19 +873,18 @@ void maps__remove(struct maps *maps, struct map *map) struct map *maps__find(struct maps *maps, u64 ip) { - struct rb_node **p, *parent = NULL; + struct rb_node *p; struct map *m; down_read(>lock); - p = >entries.rb_node; - while (*p != NULL) { - parent = *p; - m = rb_entry(parent, struct map, rb_node); + p = maps->entries.rb_node; + while (p != NULL) { + m = rb_entry(p, struct map, rb_node); if (ip < m->start) - p = &(*p)->rb_left; + p = p->rb_left; else if (ip >= m->end) - p = &(*p)->rb_right; + p = p->rb_right; else goto out; } -- 2.19.1
[PATCH 03/22] perf stat: Fix CSV mode column output for non-cgroup events
From: Stephane Eranian When using the -x option, perf stat prints CSV-style output with one event per line. For each event, it prints the count, the unit, the event name, the cgroup, and a bunch of other event specific fields (such as insn per cycles). When you use CSV-style mode, you expect a normalized output where each event is printed with the same number of fields regardless of what it is so it can easily be imported into a spreadsheet or parsed. For instance, if an event does not have a unit, then print an empty field for it. Although this approach was implemented for the unit, it was not for the cgroup. When mixing cgroup and non-cgroup events, then non-cgroup events would not show an empty field, instead the next field was printed, make columns not line up correctly. This patch fixes the cgroup output issues by forcing an empty field for non-cgroup events as soon as one event has cgroup. Before: @ @cycles @foo@ 0@100.00@@ 2531614 @ @cycles @6420922@100.00@@ foo cgroup lines up with time_running! After: @ @cycles @foo @0 @100.00@@ 2594834 @ @cycles @@5287372 @100.00@@ Fields line up. Signed-off-by: Stephane Eranian Acked-by: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1541587845-9150-1-git-send-email-eran...@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 16 +++- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index e7b4c44ebb62..665ee374fc01 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -59,6 +59,15 @@ static void print_noise(struct perf_stat_config *config, print_noise_pct(config, stddev_stats(>res_stats[0]), avg); } +static void print_cgroup(struct perf_stat_config *config, struct perf_evsel *evsel) +{ + if (nr_cgroups) { + const char *cgrp_name = evsel->cgrp ? evsel->cgrp->name : ""; + fprintf(config->output, "%s%s", config->csv_sep, cgrp_name); + } +} + + static void aggr_printout(struct perf_stat_config *config, struct perf_evsel *evsel, int id, int nr) { @@ -336,8 +345,7 @@ static void abs_printout(struct perf_stat_config *config, fprintf(output, "%-*s", config->csv_output ? 0 : 25, perf_evsel__name(evsel)); - if (evsel->cgrp) - fprintf(output, "%s%s", config->csv_sep, evsel->cgrp->name); + print_cgroup(config, evsel); } static bool is_mixed_hw_group(struct perf_evsel *counter) @@ -431,9 +439,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, config->csv_output ? 0 : -25, perf_evsel__name(counter)); - if (counter->cgrp) - fprintf(config->output, "%s%s", - config->csv_sep, counter->cgrp->name); + print_cgroup(config, counter); if (!config->csv_output) pm(config, , NULL, NULL, "", 0); -- 2.19.1
[PATCH 05/22] perf env: Also consider env->arch == NULL as local operation
From: Arnaldo Carvalho de Melo We'll set a new machine field based on env->arch, which for live mode, like with 'perf top' means we need to use uname() to figure the name of the arch, fix perf_env__arch() to consider both (env == NULL) and (env->arch == NULL) as local operation. Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: David S. Miller Cc: Jiri Olsa Cc: Leo Yan Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Wang Nan Cc: sta...@vger.kernel.org Link: https://lkml.kernel.org/n/tip-vcz4ufzdon7cwy8dm2ua5...@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/env.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 59f38c7693f8..4c23779e271a 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -166,7 +166,7 @@ const char *perf_env__arch(struct perf_env *env) struct utsname uts; char *arch_name; - if (!env) { /* Assume local operation */ + if (!env || !env->arch) { /* Assume local operation */ if (uname() < 0) return NULL; arch_name = uts.machine; -- 2.19.1
[PATCH 02/22] perf stat: Fix shadow stats for clock events
From: Ravi Bangoria Commit 0aa802a79469 ("perf stat: Get rid of extra clock display function") introduced scale and unit for clock events. Thus, perf_stat__update_shadow_stats() now saves scaled values of clock events in msecs, instead of original nsecs. But while calculating values of shadow stats we still consider clock event values in nsecs. This results in a wrong shadow stat values. Ex, # ./perf stat -e task-clock,cycles ls 2.60 msec task-clock:u#0.877 CPUs utilized 2,430,564 cycles:u# 1215282.000 GHz Fix this by saving original nsec values for clock events in perf_stat__update_shadow_stats(). After patch: # ./perf stat -e task-clock,cycles ls 3.14 msec task-clock:u#0.839 CPUs utilized 3,094,528 cycles:u#0.985 GHz Suggested-by: Jiri Olsa Reported-by: Anton Blanchard Signed-off-by: Ravi Bangoria Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Jin Yao Cc: Namhyung Kim Cc: Thomas Richter Cc: yuzhouj...@didichuxing.com Fixes: 0aa802a79469 ("perf stat: Get rid of extra clock display function") Link: http://lkml.kernel.org/r/20181116042843.24067-1-ravi.bango...@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index f0a8cec55c47..3c22c58b3e90 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -209,11 +209,12 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, int cpu, struct runtime_stat *st) { int ctx = evsel_context(counter); + u64 count_ns = count; count *= counter->scale; if (perf_evsel__is_clock(counter)) - update_runtime_stat(st, STAT_NSECS, 0, cpu, count); + update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns); else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) -- 2.19.1
[PATCH 03/22] perf stat: Fix CSV mode column output for non-cgroup events
From: Stephane Eranian When using the -x option, perf stat prints CSV-style output with one event per line. For each event, it prints the count, the unit, the event name, the cgroup, and a bunch of other event specific fields (such as insn per cycles). When you use CSV-style mode, you expect a normalized output where each event is printed with the same number of fields regardless of what it is so it can easily be imported into a spreadsheet or parsed. For instance, if an event does not have a unit, then print an empty field for it. Although this approach was implemented for the unit, it was not for the cgroup. When mixing cgroup and non-cgroup events, then non-cgroup events would not show an empty field, instead the next field was printed, make columns not line up correctly. This patch fixes the cgroup output issues by forcing an empty field for non-cgroup events as soon as one event has cgroup. Before: @ @cycles @foo@ 0@100.00@@ 2531614 @ @cycles @6420922@100.00@@ foo cgroup lines up with time_running! After: @ @cycles @foo @0 @100.00@@ 2594834 @ @cycles @@5287372 @100.00@@ Fields line up. Signed-off-by: Stephane Eranian Acked-by: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1541587845-9150-1-git-send-email-eran...@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 16 +++- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index e7b4c44ebb62..665ee374fc01 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -59,6 +59,15 @@ static void print_noise(struct perf_stat_config *config, print_noise_pct(config, stddev_stats(>res_stats[0]), avg); } +static void print_cgroup(struct perf_stat_config *config, struct perf_evsel *evsel) +{ + if (nr_cgroups) { + const char *cgrp_name = evsel->cgrp ? evsel->cgrp->name : ""; + fprintf(config->output, "%s%s", config->csv_sep, cgrp_name); + } +} + + static void aggr_printout(struct perf_stat_config *config, struct perf_evsel *evsel, int id, int nr) { @@ -336,8 +345,7 @@ static void abs_printout(struct perf_stat_config *config, fprintf(output, "%-*s", config->csv_output ? 0 : 25, perf_evsel__name(evsel)); - if (evsel->cgrp) - fprintf(output, "%s%s", config->csv_sep, evsel->cgrp->name); + print_cgroup(config, evsel); } static bool is_mixed_hw_group(struct perf_evsel *counter) @@ -431,9 +439,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, config->csv_output ? 0 : -25, perf_evsel__name(counter)); - if (counter->cgrp) - fprintf(config->output, "%s%s", - config->csv_sep, counter->cgrp->name); + print_cgroup(config, counter); if (!config->csv_output) pm(config, , NULL, NULL, "", 0); -- 2.19.1
[PATCH 05/22] perf env: Also consider env->arch == NULL as local operation
From: Arnaldo Carvalho de Melo We'll set a new machine field based on env->arch, which for live mode, like with 'perf top' means we need to use uname() to figure the name of the arch, fix perf_env__arch() to consider both (env == NULL) and (env->arch == NULL) as local operation. Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: David S. Miller Cc: Jiri Olsa Cc: Leo Yan Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Wang Nan Cc: sta...@vger.kernel.org Link: https://lkml.kernel.org/n/tip-vcz4ufzdon7cwy8dm2ua5...@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/env.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 59f38c7693f8..4c23779e271a 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -166,7 +166,7 @@ const char *perf_env__arch(struct perf_env *env) struct utsname uts; char *arch_name; - if (!env) { /* Assume local operation */ + if (!env || !env->arch) { /* Assume local operation */ if (uname() < 0) return NULL; arch_name = uts.machine; -- 2.19.1
[PATCH 02/22] perf stat: Fix shadow stats for clock events
From: Ravi Bangoria Commit 0aa802a79469 ("perf stat: Get rid of extra clock display function") introduced scale and unit for clock events. Thus, perf_stat__update_shadow_stats() now saves scaled values of clock events in msecs, instead of original nsecs. But while calculating values of shadow stats we still consider clock event values in nsecs. This results in a wrong shadow stat values. Ex, # ./perf stat -e task-clock,cycles ls 2.60 msec task-clock:u#0.877 CPUs utilized 2,430,564 cycles:u# 1215282.000 GHz Fix this by saving original nsec values for clock events in perf_stat__update_shadow_stats(). After patch: # ./perf stat -e task-clock,cycles ls 3.14 msec task-clock:u#0.839 CPUs utilized 3,094,528 cycles:u#0.985 GHz Suggested-by: Jiri Olsa Reported-by: Anton Blanchard Signed-off-by: Ravi Bangoria Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Jin Yao Cc: Namhyung Kim Cc: Thomas Richter Cc: yuzhouj...@didichuxing.com Fixes: 0aa802a79469 ("perf stat: Get rid of extra clock display function") Link: http://lkml.kernel.org/r/20181116042843.24067-1-ravi.bango...@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index f0a8cec55c47..3c22c58b3e90 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -209,11 +209,12 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, int cpu, struct runtime_stat *st) { int ctx = evsel_context(counter); + u64 count_ns = count; count *= counter->scale; if (perf_evsel__is_clock(counter)) - update_runtime_stat(st, STAT_NSECS, 0, cpu, count); + update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns); else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) -- 2.19.1
[PATCH v11 1/3] dt-bindings: clock: Update GCC bindings for protected-clocks
Add protected-clocks list which could used to specify the clocks to be bypassed on certain devices. Reviewed-by: Rob Herring Signed-off-by: Taniya Das --- Documentation/devicetree/bindings/clock/qcom,gcc.txt | 14 ++ 1 file changed, 14 insertions(+) diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc.txt b/Documentation/devicetree/bindings/clock/qcom,gcc.txt index 52d9345..5e37de9 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc.txt +++ b/Documentation/devicetree/bindings/clock/qcom,gcc.txt @@ -35,6 +35,8 @@ be part of GCC and hence the TSENS properties can also be part of the GCC/clock-controller node. For more details on the TSENS properties please refer Documentation/devicetree/bindings/thermal/qcom-tsens.txt +- protected-clocks : Protected clock specifier list as per common clock + binding. Example: clock-controller@90 { @@ -55,3 +57,15 @@ Example of GCC with TSENS properties: #reset-cells = <1>; #thermal-sensor-cells = <1>; }; + +Example of GCC with protected-clocks properties: + clock-controller@10 { + compatible = "qcom,gcc-sdm845"; + reg = <0x10 0x1f>; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + protected-clocks = , + , + ; + }; -- Qualcomm INDIA, on behalf of Qualcomm Innovation Center, Inc.is a member of the Code Aurora Forum, hosted by the Linux Foundation.
[PATCH v11 1/3] dt-bindings: clock: Update GCC bindings for protected-clocks
Add protected-clocks list which could used to specify the clocks to be bypassed on certain devices. Reviewed-by: Rob Herring Signed-off-by: Taniya Das --- Documentation/devicetree/bindings/clock/qcom,gcc.txt | 14 ++ 1 file changed, 14 insertions(+) diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc.txt b/Documentation/devicetree/bindings/clock/qcom,gcc.txt index 52d9345..5e37de9 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc.txt +++ b/Documentation/devicetree/bindings/clock/qcom,gcc.txt @@ -35,6 +35,8 @@ be part of GCC and hence the TSENS properties can also be part of the GCC/clock-controller node. For more details on the TSENS properties please refer Documentation/devicetree/bindings/thermal/qcom-tsens.txt +- protected-clocks : Protected clock specifier list as per common clock + binding. Example: clock-controller@90 { @@ -55,3 +57,15 @@ Example of GCC with TSENS properties: #reset-cells = <1>; #thermal-sensor-cells = <1>; }; + +Example of GCC with protected-clocks properties: + clock-controller@10 { + compatible = "qcom,gcc-sdm845"; + reg = <0x10 0x1f>; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + protected-clocks = , + , + ; + }; -- Qualcomm INDIA, on behalf of Qualcomm Innovation Center, Inc.is a member of the Code Aurora Forum, hosted by the Linux Foundation.
[PATCH v11 2/3] dt-bindings: clock: Introduce QCOM LPASS clock bindings
Add device tree bindings for Low Power Audio subsystem clock controller for Qualcomm Technology Inc's SDM845 SoCs. Reviewed-by: Rob Herring Signed-off-by: Taniya Das --- .../devicetree/bindings/clock/qcom,gcc.txt | 4 +++- .../devicetree/bindings/clock/qcom,lpasscc.txt | 26 ++ include/dt-bindings/clock/qcom,gcc-sdm845.h| 2 ++ include/dt-bindings/clock/qcom,lpass-sdm845.h | 15 + 4 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 Documentation/devicetree/bindings/clock/qcom,lpasscc.txt create mode 100644 include/dt-bindings/clock/qcom,lpass-sdm845.h diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc.txt b/Documentation/devicetree/bindings/clock/qcom,gcc.txt index 5e37de9..8661c3c 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc.txt +++ b/Documentation/devicetree/bindings/clock/qcom,gcc.txt @@ -67,5 +67,7 @@ Example of GCC with protected-clocks properties: #power-domain-cells = <1>; protected-clocks = , , - ; + , + , + ; }; diff --git a/Documentation/devicetree/bindings/clock/qcom,lpasscc.txt b/Documentation/devicetree/bindings/clock/qcom,lpasscc.txt new file mode 100644 index 000..b9e9787 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,lpasscc.txt @@ -0,0 +1,26 @@ +Qualcomm LPASS Clock Controller Binding +--- + +Required properties : +- compatible : shall contain "qcom,sdm845-lpasscc" +- #clock-cells : from common clock binding, shall contain 1. +- reg : shall contain base register address and size, + in the order + Index-0 maps to LPASS_CC register region + Index-1 maps to LPASS_QDSP6SS register region + +Optional properties : +- reg-names: register names of LPASS domain +"cc", "qdsp6ss". + +Example: + +The below node has to be defined in the cases where the LPASS peripheral loader +would bring the subsystem out of reset. + + lpasscc: clock-controller@17014000 { + compatible = "qcom,sdm845-lpasscc"; + reg = <0x17014000 0x1f004>, <0x1730 0x200>; + reg-names = "cc", "qdsp6ss"; + #clock-cells = <1>; + }; diff --git a/include/dt-bindings/clock/qcom,gcc-sdm845.h b/include/dt-bindings/clock/qcom,gcc-sdm845.h index b8eae5a..968fa65 100644 --- a/include/dt-bindings/clock/qcom,gcc-sdm845.h +++ b/include/dt-bindings/clock/qcom,gcc-sdm845.h @@ -197,6 +197,8 @@ #define GCC_QSPI_CORE_CLK_SRC 187 #define GCC_QSPI_CORE_CLK 188 #define GCC_QSPI_CNOC_PERIPH_AHB_CLK 189 +#define GCC_LPASS_Q6_AXI_CLK 190 +#define GCC_LPASS_SWAY_CLK 191 /* GCC Resets */ #define GCC_MMSS_BCR 0 diff --git a/include/dt-bindings/clock/qcom,lpass-sdm845.h b/include/dt-bindings/clock/qcom,lpass-sdm845.h new file mode 100644 index 000..6590508 --- /dev/null +++ b/include/dt-bindings/clock/qcom,lpass-sdm845.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2018, The Linux Foundation. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_SDM_LPASS_SDM845_H +#define _DT_BINDINGS_CLK_SDM_LPASS_SDM845_H + +#define LPASS_Q6SS_AHBM_AON_CLK0 +#define LPASS_Q6SS_AHBS_AON_CLK1 +#define LPASS_QDSP6SS_XO_CLK 2 +#define LPASS_QDSP6SS_SLEEP_CLK3 +#define LPASS_QDSP6SS_CORE_CLK 4 + +#endif -- Qualcomm INDIA, on behalf of Qualcomm Innovation Center, Inc.is a member of the Code Aurora Forum, hosted by the Linux Foundation.
[PATCH v11 3/3] clk: qcom: Add lpass clock controller driver for SDM845
Add support for the lpass clock controller found on SDM845 based devices. This would allow lpass peripheral loader drivers to control the clocks to bring the subsystem out of reset. LPASS clocks present on the global clock controller would be registered with the clock framework based on the protected-clock flag. Also do not gate these clocks if they are left unused, as the lpass clocks require the global clock controller lpass clocks to be enabled before they are accessed. Mark the GCC lpass clocks as CRITICAL, for the LPASS clock access. Signed-off-by: Taniya Das --- drivers/clk/qcom/Kconfig | 9 ++ drivers/clk/qcom/Makefile | 1 + drivers/clk/qcom/gcc-sdm845.c | 32 +++ drivers/clk/qcom/lpasscc-sdm845.c | 179 ++ 4 files changed, 221 insertions(+) create mode 100644 drivers/clk/qcom/lpasscc-sdm845.c diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig index 6f3e466..d87a22e 100644 --- a/drivers/clk/qcom/Kconfig +++ b/drivers/clk/qcom/Kconfig @@ -302,6 +302,15 @@ config SDM_DISPCC_845 Say Y if you want to support display devices and functionality such as splash screen. +config SDM_LPASSCC_845 + tristate "SDM845 Low Power Audio Subsystem (LPAAS) Clock Controller" + depends on COMMON_CLK_QCOM + select SDM_GCC_845 + help + Support for the LPASS clock controller on SDM845 devices. + Say Y if you want to use the LPASS branch clocks of the LPASS clock + controller to reset the LPASS subsystem. + config SPMI_PMIC_CLKDIV tristate "SPMI PMIC clkdiv Support" depends on (COMMON_CLK_QCOM && SPMI) || COMPILE_TEST diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile index 6ed2827..ee8d069 100644 --- a/drivers/clk/qcom/Makefile +++ b/drivers/clk/qcom/Makefile @@ -47,6 +47,7 @@ obj-$(CONFIG_SDM_DISPCC_845) += dispcc-sdm845.o obj-$(CONFIG_SDM_GCC_660) += gcc-sdm660.o obj-$(CONFIG_SDM_GCC_845) += gcc-sdm845.o obj-$(CONFIG_SDM_GPUCC_845) += gpucc-sdm845.o +obj-$(CONFIG_SDM_LPASSCC_845) += lpasscc-sdm845.o obj-$(CONFIG_SDM_VIDEOCC_845) += videocc-sdm845.o obj-$(CONFIG_SPMI_PMIC_CLKDIV) += clk-spmi-pmic-div.o obj-$(CONFIG_KPSS_XCC) += kpss-xcc.o diff --git a/drivers/clk/qcom/gcc-sdm845.c b/drivers/clk/qcom/gcc-sdm845.c index f133b7f..db90f9b 100644 --- a/drivers/clk/qcom/gcc-sdm845.c +++ b/drivers/clk/qcom/gcc-sdm845.c @@ -3153,6 +3153,34 @@ enum { }, }; +static struct clk_branch gcc_lpass_q6_axi_clk = { + .halt_reg = 0x47000, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x47000, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_lpass_q6_axi_clk", + .flags = CLK_IS_CRITICAL, + .ops = _branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_lpass_sway_clk = { + .halt_reg = 0x47008, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x47008, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_lpass_sway_clk", + .flags = CLK_IS_CRITICAL, + .ops = _branch2_ops, + }, + }, +}; + static struct gdsc pcie_0_gdsc = { .gdscr = 0x6b004, .pd = { @@ -3453,6 +3481,10 @@ enum { [GCC_QSPI_CORE_CLK_SRC] = _qspi_core_clk_src.clkr, [GCC_QSPI_CORE_CLK] = _qspi_core_clk.clkr, [GCC_QSPI_CNOC_PERIPH_AHB_CLK] = _qspi_cnoc_periph_ahb_clk.clkr, +#ifdef CONFIG_SDM_LPASSCC_845 + [GCC_LPASS_Q6_AXI_CLK] = _lpass_q6_axi_clk.clkr, + [GCC_LPASS_SWAY_CLK] = _lpass_sway_clk.clkr, +#endif }; static const struct qcom_reset_map gcc_sdm845_resets[] = { diff --git a/drivers/clk/qcom/lpasscc-sdm845.c b/drivers/clk/qcom/lpasscc-sdm845.c new file mode 100644 index 000..e246b99 --- /dev/null +++ b/drivers/clk/qcom/lpasscc-sdm845.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include + +#include + +#include "clk-regmap.h" +#include "clk-branch.h" +#include "common.h" + +static struct clk_branch lpass_q6ss_ahbm_aon_clk = { + .halt_reg = 0x12000, + .halt_check = BRANCH_VOTED, + .clkr = { + .enable_reg = 0x12000, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "lpass_q6ss_ahbm_aon_clk", + .ops = _branch2_ops, + }, + }, +}; + +static struct clk_branch lpass_q6ss_ahbs_aon_clk = { + .halt_reg = 0x1f000, + .halt_check = BRANCH_VOTED, + .clkr = { + .enable_reg = 0x1f000, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ +
Re: [RFC v3 06/19] arch: um: enable running kunit from User Mode Linux
On Fri, Nov 30, 2018 at 08:05:34AM -0600, Rob Herring wrote: > On Thu, Nov 29, 2018 at 9:37 PM Luis Chamberlain wrote: > > > > On Wed, Nov 28, 2018 at 03:26:03PM -0600, Rob Herring wrote: > > > On Wed, Nov 28, 2018 at 1:37 PM Brendan Higgins > > > wrote: > > > > > > > > Make minimum number of changes outside of the KUnit directories for > > > > KUnit to build and run using UML. > > > > > > There's nothing in this patch limiting this to UML. > > > > Not that one, but the abort thing segv thing is, eventually. > > To support other architectures we'd need to make a wrapper to that > > hack which Brendan added, and then allow each os to implement > > its own call, and add an asm-generic helper. > > I've not looked into why this is needed, but can't you make the abort > support optional and arches can select it when they support it. Its why I have asked for it to be properly documented. The patches in no way illustrate *why* such thing is done. And if we are going to potentially have other archs do something similar best to make it explicit. > At > least before, the DT unittests didn't need this to run and shouldn't > depend on it after converting to kunit. Luis
[PATCH v11 2/3] dt-bindings: clock: Introduce QCOM LPASS clock bindings
Add device tree bindings for Low Power Audio subsystem clock controller for Qualcomm Technology Inc's SDM845 SoCs. Reviewed-by: Rob Herring Signed-off-by: Taniya Das --- .../devicetree/bindings/clock/qcom,gcc.txt | 4 +++- .../devicetree/bindings/clock/qcom,lpasscc.txt | 26 ++ include/dt-bindings/clock/qcom,gcc-sdm845.h| 2 ++ include/dt-bindings/clock/qcom,lpass-sdm845.h | 15 + 4 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 Documentation/devicetree/bindings/clock/qcom,lpasscc.txt create mode 100644 include/dt-bindings/clock/qcom,lpass-sdm845.h diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc.txt b/Documentation/devicetree/bindings/clock/qcom,gcc.txt index 5e37de9..8661c3c 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc.txt +++ b/Documentation/devicetree/bindings/clock/qcom,gcc.txt @@ -67,5 +67,7 @@ Example of GCC with protected-clocks properties: #power-domain-cells = <1>; protected-clocks = , , - ; + , + , + ; }; diff --git a/Documentation/devicetree/bindings/clock/qcom,lpasscc.txt b/Documentation/devicetree/bindings/clock/qcom,lpasscc.txt new file mode 100644 index 000..b9e9787 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,lpasscc.txt @@ -0,0 +1,26 @@ +Qualcomm LPASS Clock Controller Binding +--- + +Required properties : +- compatible : shall contain "qcom,sdm845-lpasscc" +- #clock-cells : from common clock binding, shall contain 1. +- reg : shall contain base register address and size, + in the order + Index-0 maps to LPASS_CC register region + Index-1 maps to LPASS_QDSP6SS register region + +Optional properties : +- reg-names: register names of LPASS domain +"cc", "qdsp6ss". + +Example: + +The below node has to be defined in the cases where the LPASS peripheral loader +would bring the subsystem out of reset. + + lpasscc: clock-controller@17014000 { + compatible = "qcom,sdm845-lpasscc"; + reg = <0x17014000 0x1f004>, <0x1730 0x200>; + reg-names = "cc", "qdsp6ss"; + #clock-cells = <1>; + }; diff --git a/include/dt-bindings/clock/qcom,gcc-sdm845.h b/include/dt-bindings/clock/qcom,gcc-sdm845.h index b8eae5a..968fa65 100644 --- a/include/dt-bindings/clock/qcom,gcc-sdm845.h +++ b/include/dt-bindings/clock/qcom,gcc-sdm845.h @@ -197,6 +197,8 @@ #define GCC_QSPI_CORE_CLK_SRC 187 #define GCC_QSPI_CORE_CLK 188 #define GCC_QSPI_CNOC_PERIPH_AHB_CLK 189 +#define GCC_LPASS_Q6_AXI_CLK 190 +#define GCC_LPASS_SWAY_CLK 191 /* GCC Resets */ #define GCC_MMSS_BCR 0 diff --git a/include/dt-bindings/clock/qcom,lpass-sdm845.h b/include/dt-bindings/clock/qcom,lpass-sdm845.h new file mode 100644 index 000..6590508 --- /dev/null +++ b/include/dt-bindings/clock/qcom,lpass-sdm845.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2018, The Linux Foundation. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_SDM_LPASS_SDM845_H +#define _DT_BINDINGS_CLK_SDM_LPASS_SDM845_H + +#define LPASS_Q6SS_AHBM_AON_CLK0 +#define LPASS_Q6SS_AHBS_AON_CLK1 +#define LPASS_QDSP6SS_XO_CLK 2 +#define LPASS_QDSP6SS_SLEEP_CLK3 +#define LPASS_QDSP6SS_CORE_CLK 4 + +#endif -- Qualcomm INDIA, on behalf of Qualcomm Innovation Center, Inc.is a member of the Code Aurora Forum, hosted by the Linux Foundation.
[PATCH v11 3/3] clk: qcom: Add lpass clock controller driver for SDM845
Add support for the lpass clock controller found on SDM845 based devices. This would allow lpass peripheral loader drivers to control the clocks to bring the subsystem out of reset. LPASS clocks present on the global clock controller would be registered with the clock framework based on the protected-clock flag. Also do not gate these clocks if they are left unused, as the lpass clocks require the global clock controller lpass clocks to be enabled before they are accessed. Mark the GCC lpass clocks as CRITICAL, for the LPASS clock access. Signed-off-by: Taniya Das --- drivers/clk/qcom/Kconfig | 9 ++ drivers/clk/qcom/Makefile | 1 + drivers/clk/qcom/gcc-sdm845.c | 32 +++ drivers/clk/qcom/lpasscc-sdm845.c | 179 ++ 4 files changed, 221 insertions(+) create mode 100644 drivers/clk/qcom/lpasscc-sdm845.c diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig index 6f3e466..d87a22e 100644 --- a/drivers/clk/qcom/Kconfig +++ b/drivers/clk/qcom/Kconfig @@ -302,6 +302,15 @@ config SDM_DISPCC_845 Say Y if you want to support display devices and functionality such as splash screen. +config SDM_LPASSCC_845 + tristate "SDM845 Low Power Audio Subsystem (LPAAS) Clock Controller" + depends on COMMON_CLK_QCOM + select SDM_GCC_845 + help + Support for the LPASS clock controller on SDM845 devices. + Say Y if you want to use the LPASS branch clocks of the LPASS clock + controller to reset the LPASS subsystem. + config SPMI_PMIC_CLKDIV tristate "SPMI PMIC clkdiv Support" depends on (COMMON_CLK_QCOM && SPMI) || COMPILE_TEST diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile index 6ed2827..ee8d069 100644 --- a/drivers/clk/qcom/Makefile +++ b/drivers/clk/qcom/Makefile @@ -47,6 +47,7 @@ obj-$(CONFIG_SDM_DISPCC_845) += dispcc-sdm845.o obj-$(CONFIG_SDM_GCC_660) += gcc-sdm660.o obj-$(CONFIG_SDM_GCC_845) += gcc-sdm845.o obj-$(CONFIG_SDM_GPUCC_845) += gpucc-sdm845.o +obj-$(CONFIG_SDM_LPASSCC_845) += lpasscc-sdm845.o obj-$(CONFIG_SDM_VIDEOCC_845) += videocc-sdm845.o obj-$(CONFIG_SPMI_PMIC_CLKDIV) += clk-spmi-pmic-div.o obj-$(CONFIG_KPSS_XCC) += kpss-xcc.o diff --git a/drivers/clk/qcom/gcc-sdm845.c b/drivers/clk/qcom/gcc-sdm845.c index f133b7f..db90f9b 100644 --- a/drivers/clk/qcom/gcc-sdm845.c +++ b/drivers/clk/qcom/gcc-sdm845.c @@ -3153,6 +3153,34 @@ enum { }, }; +static struct clk_branch gcc_lpass_q6_axi_clk = { + .halt_reg = 0x47000, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x47000, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_lpass_q6_axi_clk", + .flags = CLK_IS_CRITICAL, + .ops = _branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_lpass_sway_clk = { + .halt_reg = 0x47008, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x47008, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_lpass_sway_clk", + .flags = CLK_IS_CRITICAL, + .ops = _branch2_ops, + }, + }, +}; + static struct gdsc pcie_0_gdsc = { .gdscr = 0x6b004, .pd = { @@ -3453,6 +3481,10 @@ enum { [GCC_QSPI_CORE_CLK_SRC] = _qspi_core_clk_src.clkr, [GCC_QSPI_CORE_CLK] = _qspi_core_clk.clkr, [GCC_QSPI_CNOC_PERIPH_AHB_CLK] = _qspi_cnoc_periph_ahb_clk.clkr, +#ifdef CONFIG_SDM_LPASSCC_845 + [GCC_LPASS_Q6_AXI_CLK] = _lpass_q6_axi_clk.clkr, + [GCC_LPASS_SWAY_CLK] = _lpass_sway_clk.clkr, +#endif }; static const struct qcom_reset_map gcc_sdm845_resets[] = { diff --git a/drivers/clk/qcom/lpasscc-sdm845.c b/drivers/clk/qcom/lpasscc-sdm845.c new file mode 100644 index 000..e246b99 --- /dev/null +++ b/drivers/clk/qcom/lpasscc-sdm845.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018, The Linux Foundation. All rights reserved. + */ + +#include +#include +#include +#include + +#include + +#include "clk-regmap.h" +#include "clk-branch.h" +#include "common.h" + +static struct clk_branch lpass_q6ss_ahbm_aon_clk = { + .halt_reg = 0x12000, + .halt_check = BRANCH_VOTED, + .clkr = { + .enable_reg = 0x12000, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "lpass_q6ss_ahbm_aon_clk", + .ops = _branch2_ops, + }, + }, +}; + +static struct clk_branch lpass_q6ss_ahbs_aon_clk = { + .halt_reg = 0x1f000, + .halt_check = BRANCH_VOTED, + .clkr = { + .enable_reg = 0x1f000, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ +
Re: [RFC v3 06/19] arch: um: enable running kunit from User Mode Linux
On Fri, Nov 30, 2018 at 08:05:34AM -0600, Rob Herring wrote: > On Thu, Nov 29, 2018 at 9:37 PM Luis Chamberlain wrote: > > > > On Wed, Nov 28, 2018 at 03:26:03PM -0600, Rob Herring wrote: > > > On Wed, Nov 28, 2018 at 1:37 PM Brendan Higgins > > > wrote: > > > > > > > > Make minimum number of changes outside of the KUnit directories for > > > > KUnit to build and run using UML. > > > > > > There's nothing in this patch limiting this to UML. > > > > Not that one, but the abort thing segv thing is, eventually. > > To support other architectures we'd need to make a wrapper to that > > hack which Brendan added, and then allow each os to implement > > its own call, and add an asm-generic helper. > > I've not looked into why this is needed, but can't you make the abort > support optional and arches can select it when they support it. Its why I have asked for it to be properly documented. The patches in no way illustrate *why* such thing is done. And if we are going to potentially have other archs do something similar best to make it explicit. > At > least before, the DT unittests didn't need this to run and shouldn't > depend on it after converting to kunit. Luis
[PATCH v11 0/3] Add support for LPASS clock controller for SDM845
[v11] * Add the GCC LPASS clocks only if LPASSCC config is present. * Update the comment in lpasscc driver. [v10] * Separate change to add protected-clocks list in GCC binding. * Remove the clock support 'LPASS_AUDIO_WRAPPER_AON_CLK' as it is always ON clock. * Add few comments for module description and match table. [v9] * Update GCC documentation binding with the protected-clocks list. * Update the GCC code to add the GCC lpass clocks. * This depends on the acceptance of https://lore.kernel.org/lkml/20181105194011.43770-1-swb...@chromium.org/ [v8] * Add CLK_IS_CRITICAL for GCC lpass clocks for lpass clocks access to go through always. [v7] * Cleanup header file inclusions. * Move the comments along with the flags. * Update the commit with details for CLK_IGNORE_UNUSED. [v6] * Update the logic to register the lpass clocks when the device tree property is not present. * Add the CLK_IGNORE_UNUSED flag for the lpass clocks to not gate the clocks at late_init. [v5] * Address the comments in device tree binding to update the reg-names, update the unit address in lpass clock node example and also add reg property for the gcc clock node. * Update the lpass driver to take care of the reg-names. [v4] * Update the description in GCC Documentation binding for 'qcom,lpass-protected'. * Remove 'qcom,lpass-protected' from LPASS Documentation binding. * Update KConfig to use Low Power Audio Subsystem. * Add module_exit() and also update return value for devm_ioremap_resource failure. [v3] * Add a device tree property to identify lpass protected GCC clocks. * Update the GCC driver code to register the lpass clocks when the flag is defined. * Add comment for clocks using the BRANCH_HALT_SKIP flag. * Use platform APIs instead of of_address_to_resource. * Replace devm_ioremap with devm_ioremap_resource. * Use fixed index for 'lpass_cc' & 'lpass_qdsp6ss' in probe. [v2] * Make gcc_lpass_sway_clk static. * Remove using child nodes and use reg-names to differentiate various domains of LPASS CC. Add support for the lpass clock controller found on SDM845 based devices. This would allow lpass peripheral loader drivers to control the clocks to bring the subsystem out of reset. Taniya Das (3): dt-bindings: clock: Update GCC bindings for protected-clocks dt-bindings: clock: Introduce QCOM LPASS clock bindings clk: qcom: Add lpass clock controller driver for SDM845 .../devicetree/bindings/clock/qcom,gcc.txt | 16 ++ .../devicetree/bindings/clock/qcom,lpasscc.txt | 26 +++ drivers/clk/qcom/Kconfig | 9 ++ drivers/clk/qcom/Makefile | 1 + drivers/clk/qcom/gcc-sdm845.c | 32 drivers/clk/qcom/lpasscc-sdm845.c | 179 + include/dt-bindings/clock/qcom,gcc-sdm845.h| 2 + include/dt-bindings/clock/qcom,lpass-sdm845.h | 15 ++ 8 files changed, 280 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/qcom,lpasscc.txt create mode 100644 drivers/clk/qcom/lpasscc-sdm845.c create mode 100644 include/dt-bindings/clock/qcom,lpass-sdm845.h -- Qualcomm INDIA, on behalf of Qualcomm Innovation Center, Inc.is a member of the Code Aurora Forum, hosted by the Linux Foundation.
[PATCH v11 0/3] Add support for LPASS clock controller for SDM845
[v11] * Add the GCC LPASS clocks only if LPASSCC config is present. * Update the comment in lpasscc driver. [v10] * Separate change to add protected-clocks list in GCC binding. * Remove the clock support 'LPASS_AUDIO_WRAPPER_AON_CLK' as it is always ON clock. * Add few comments for module description and match table. [v9] * Update GCC documentation binding with the protected-clocks list. * Update the GCC code to add the GCC lpass clocks. * This depends on the acceptance of https://lore.kernel.org/lkml/20181105194011.43770-1-swb...@chromium.org/ [v8] * Add CLK_IS_CRITICAL for GCC lpass clocks for lpass clocks access to go through always. [v7] * Cleanup header file inclusions. * Move the comments along with the flags. * Update the commit with details for CLK_IGNORE_UNUSED. [v6] * Update the logic to register the lpass clocks when the device tree property is not present. * Add the CLK_IGNORE_UNUSED flag for the lpass clocks to not gate the clocks at late_init. [v5] * Address the comments in device tree binding to update the reg-names, update the unit address in lpass clock node example and also add reg property for the gcc clock node. * Update the lpass driver to take care of the reg-names. [v4] * Update the description in GCC Documentation binding for 'qcom,lpass-protected'. * Remove 'qcom,lpass-protected' from LPASS Documentation binding. * Update KConfig to use Low Power Audio Subsystem. * Add module_exit() and also update return value for devm_ioremap_resource failure. [v3] * Add a device tree property to identify lpass protected GCC clocks. * Update the GCC driver code to register the lpass clocks when the flag is defined. * Add comment for clocks using the BRANCH_HALT_SKIP flag. * Use platform APIs instead of of_address_to_resource. * Replace devm_ioremap with devm_ioremap_resource. * Use fixed index for 'lpass_cc' & 'lpass_qdsp6ss' in probe. [v2] * Make gcc_lpass_sway_clk static. * Remove using child nodes and use reg-names to differentiate various domains of LPASS CC. Add support for the lpass clock controller found on SDM845 based devices. This would allow lpass peripheral loader drivers to control the clocks to bring the subsystem out of reset. Taniya Das (3): dt-bindings: clock: Update GCC bindings for protected-clocks dt-bindings: clock: Introduce QCOM LPASS clock bindings clk: qcom: Add lpass clock controller driver for SDM845 .../devicetree/bindings/clock/qcom,gcc.txt | 16 ++ .../devicetree/bindings/clock/qcom,lpasscc.txt | 26 +++ drivers/clk/qcom/Kconfig | 9 ++ drivers/clk/qcom/Makefile | 1 + drivers/clk/qcom/gcc-sdm845.c | 32 drivers/clk/qcom/lpasscc-sdm845.c | 179 + include/dt-bindings/clock/qcom,gcc-sdm845.h| 2 + include/dt-bindings/clock/qcom,lpass-sdm845.h | 15 ++ 8 files changed, 280 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/qcom,lpasscc.txt create mode 100644 drivers/clk/qcom/lpasscc-sdm845.c create mode 100644 include/dt-bindings/clock/qcom,lpass-sdm845.h -- Qualcomm INDIA, on behalf of Qualcomm Innovation Center, Inc.is a member of the Code Aurora Forum, hosted by the Linux Foundation.
Re: [PATCH v2] kmemleak: Turn kmemleak_lock to raw spinlock on RT
On 2018-11-24 22:26:46 [+0800], He Zhe wrote: > On latest v4.19.1-rt3, both of the call traces can be reproduced with kmemleak > enabied. And none can be reproduced with kmemleak disabled. okay. So it needs attention. > On latest mainline tree, none can be reproduced no matter kmemleak is enabled > or disabled. > > I don't get why kfree from a preempt-disabled section should cause a warning > without kmemleak, since kfree can't sleep. it might. It will acquire a sleeping lock if it has go down to the memory allocator to actually give memory back. > If I understand correctly, the call trace above is caused by trying to > schedule > after preemption is disabled, which cannot be reached in mainline kernel. So > we might need to turn to use raw lock to keep preemption disabled. The buddy-allocator runs with spin locks so it is okay on !RT. So you can use kfree() with disabled preemption or disabled interrupts. I don't think that we want to use raw-locks in the buddy-allocator. > >From what I reached above, this is RT-only and happens on v4.18 and v4.19. > > The call trace above is caused by grabbing kmemleak_lock and then getting > scheduled and then re-grabbing kmemleak_lock. Using raw lock can also solve > this problem. But this is a reader / writer lock. And if I understand the other part of the thread then it needs multiple readers. Couldn't we just get rid of that kfree() or move it somewhere else? I mean if the free() memory on CPU-down and allocate it again CPU-up then we could skip that, rigth? Just allocate it and don't free it because the CPU will likely get up again. > Thanks, > Zhe Sebastian
Re: [PATCH v2] kmemleak: Turn kmemleak_lock to raw spinlock on RT
On 2018-11-24 22:26:46 [+0800], He Zhe wrote: > On latest v4.19.1-rt3, both of the call traces can be reproduced with kmemleak > enabied. And none can be reproduced with kmemleak disabled. okay. So it needs attention. > On latest mainline tree, none can be reproduced no matter kmemleak is enabled > or disabled. > > I don't get why kfree from a preempt-disabled section should cause a warning > without kmemleak, since kfree can't sleep. it might. It will acquire a sleeping lock if it has go down to the memory allocator to actually give memory back. > If I understand correctly, the call trace above is caused by trying to > schedule > after preemption is disabled, which cannot be reached in mainline kernel. So > we might need to turn to use raw lock to keep preemption disabled. The buddy-allocator runs with spin locks so it is okay on !RT. So you can use kfree() with disabled preemption or disabled interrupts. I don't think that we want to use raw-locks in the buddy-allocator. > >From what I reached above, this is RT-only and happens on v4.18 and v4.19. > > The call trace above is caused by grabbing kmemleak_lock and then getting > scheduled and then re-grabbing kmemleak_lock. Using raw lock can also solve > this problem. But this is a reader / writer lock. And if I understand the other part of the thread then it needs multiple readers. Couldn't we just get rid of that kfree() or move it somewhere else? I mean if the free() memory on CPU-down and allocate it again CPU-up then we could skip that, rigth? Just allocate it and don't free it because the CPU will likely get up again. > Thanks, > Zhe Sebastian
Re: [PATCH] fs: Make /proc/sys inodes be owned by global root.
On Fri, Nov 30, 2018 at 08:48:11AM -0600, Eric W. Biederman wrote: > Luis Chamberlain writes: > > > The logic seems sensible then, but are we implicating what a container > > does with its sysctl values onto the entire system? If so, sure, it > > seems you want this for networking purposes as there are a series of > > sysctl values a container may want to muck with, but are we sure we > > want the same for *all* sysctl entries? > > No. Please look at the patch again. It sets the default uid and gid > for sysctl entries to 0. AKA GLOBAL_ROOT_UID and GLOBAL_ROOT_GID > because there is a bug and they were not set to that value. > > Those are the uids and gids that are tested agasint. It just happens > you have to be in a weird configuration for this bug to become a problem. Thanks, then provided the commit lot is modified: Acked-by: Luis Chamberlain Luis
Re: [PATCH] fs: Make /proc/sys inodes be owned by global root.
On Fri, Nov 30, 2018 at 08:48:11AM -0600, Eric W. Biederman wrote: > Luis Chamberlain writes: > > > The logic seems sensible then, but are we implicating what a container > > does with its sysctl values onto the entire system? If so, sure, it > > seems you want this for networking purposes as there are a series of > > sysctl values a container may want to muck with, but are we sure we > > want the same for *all* sysctl entries? > > No. Please look at the patch again. It sets the default uid and gid > for sysctl entries to 0. AKA GLOBAL_ROOT_UID and GLOBAL_ROOT_GID > because there is a bug and they were not set to that value. > > Those are the uids and gids that are tested agasint. It just happens > you have to be in a weird configuration for this bug to become a problem. Thanks, then provided the commit lot is modified: Acked-by: Luis Chamberlain Luis
Re: [PATCH v10 3/3] clk: qcom: Add lpass clock controller driver for SDM845
Hello Stephen, On 11/29/2018 2:40 AM, Stephen Boyd wrote: Quoting Taniya Das (2018-11-21 23:53:41) diff --git a/drivers/clk/qcom/gcc-sdm845.c b/drivers/clk/qcom/gcc-sdm845.c index f133b7f..ba8ff99 100644 --- a/drivers/clk/qcom/gcc-sdm845.c +++ b/drivers/clk/qcom/gcc-sdm845.c @@ -3153,6 +3153,34 @@ enum { }, }; +static struct clk_branch gcc_lpass_q6_axi_clk = { + .halt_reg = 0x47000, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x47000, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_lpass_q6_axi_clk", + .flags = CLK_IS_CRITICAL, + .ops = _branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_lpass_sway_clk = { + .halt_reg = 0x47008, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x47008, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_lpass_sway_clk", + .flags = CLK_IS_CRITICAL, + .ops = _branch2_ops, + }, + }, +}; + static struct gdsc pcie_0_gdsc = { .gdscr = 0x6b004, .pd = { @@ -3453,6 +3481,8 @@ enum { [GCC_QSPI_CORE_CLK_SRC] = _qspi_core_clk_src.clkr, [GCC_QSPI_CORE_CLK] = _qspi_core_clk.clkr, [GCC_QSPI_CNOC_PERIPH_AHB_CLK] = _qspi_cnoc_periph_ahb_clk.clkr, + [GCC_LPASS_Q6_AXI_CLK] = _lpass_q6_axi_clk.clkr, + [GCC_LPASS_SWAY_CLK] = _lpass_sway_clk.clkr, I have one single idea to avoid the integration nightmare with dts needing another update for this on platforms where these can't be touched. It's not perfect, but we can throw these clks and usage of the clks behind an #ifdef CONFIG_SDM_LPASSCC_845 and then let the dts parts match up with the clk driver parts in linux-next. After everything is merged together, someone can turn on the knobs for LPASS clk controller and make sure they have the right dts bits to mark them as protected. Sure, would keep it under the ifdefer and would clean it up later. -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation. --
Re: [PATCH v10 3/3] clk: qcom: Add lpass clock controller driver for SDM845
Hello Stephen, On 11/29/2018 2:40 AM, Stephen Boyd wrote: Quoting Taniya Das (2018-11-21 23:53:41) diff --git a/drivers/clk/qcom/gcc-sdm845.c b/drivers/clk/qcom/gcc-sdm845.c index f133b7f..ba8ff99 100644 --- a/drivers/clk/qcom/gcc-sdm845.c +++ b/drivers/clk/qcom/gcc-sdm845.c @@ -3153,6 +3153,34 @@ enum { }, }; +static struct clk_branch gcc_lpass_q6_axi_clk = { + .halt_reg = 0x47000, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x47000, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_lpass_q6_axi_clk", + .flags = CLK_IS_CRITICAL, + .ops = _branch2_ops, + }, + }, +}; + +static struct clk_branch gcc_lpass_sway_clk = { + .halt_reg = 0x47008, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x47008, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_lpass_sway_clk", + .flags = CLK_IS_CRITICAL, + .ops = _branch2_ops, + }, + }, +}; + static struct gdsc pcie_0_gdsc = { .gdscr = 0x6b004, .pd = { @@ -3453,6 +3481,8 @@ enum { [GCC_QSPI_CORE_CLK_SRC] = _qspi_core_clk_src.clkr, [GCC_QSPI_CORE_CLK] = _qspi_core_clk.clkr, [GCC_QSPI_CNOC_PERIPH_AHB_CLK] = _qspi_cnoc_periph_ahb_clk.clkr, + [GCC_LPASS_Q6_AXI_CLK] = _lpass_q6_axi_clk.clkr, + [GCC_LPASS_SWAY_CLK] = _lpass_sway_clk.clkr, I have one single idea to avoid the integration nightmare with dts needing another update for this on platforms where these can't be touched. It's not perfect, but we can throw these clks and usage of the clks behind an #ifdef CONFIG_SDM_LPASSCC_845 and then let the dts parts match up with the clk driver parts in linux-next. After everything is merged together, someone can turn on the knobs for LPASS clk controller and make sure they have the right dts bits to mark them as protected. Sure, would keep it under the ifdefer and would clean it up later. -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation. --
Re: [PATCH v10 3/3] clk: qcom: Add lpass clock controller driver for SDM845
Hello Stephen, On 11/27/2018 2:44 PM, Stephen Boyd wrote: Quoting Taniya Das (2018-11-21 23:53:41) + +static struct clk_branch lpass_qdsp6ss_core_clk = { + .halt_reg = 0x20, + /* CLK_OFF would not toggle until LPASS is not out of reset */ Is this really "CLK_OFF won't toggle until LPASS it out of reset"? Would take care of it, in the next series. -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation. --
Re: [PATCH v10 3/3] clk: qcom: Add lpass clock controller driver for SDM845
Hello Stephen, On 11/27/2018 2:44 PM, Stephen Boyd wrote: Quoting Taniya Das (2018-11-21 23:53:41) + +static struct clk_branch lpass_qdsp6ss_core_clk = { + .halt_reg = 0x20, + /* CLK_OFF would not toggle until LPASS is not out of reset */ Is this really "CLK_OFF won't toggle until LPASS it out of reset"? Would take care of it, in the next series. -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation. --
Re: [PATCH 0/2] [GIT PULL] tracing: More fixes for 4.20
On Fri, 30 Nov 2018 09:41:00 -0800 Linus Torvalds wrote: > On Thu, Nov 29, 2018 at 7:19 PM Steven Rostedt wrote: > > > > Note, this is on top of a previous git pull that I have submitted: > > > > http://lkml.kernel.org/r/20181127224031.76681...@vmware.local.home > > Hmm. > > I had dismissed that, because the patch descriptors for that series > had had "for-next" in them. > > https://lore.kernel.org/lkml/20181122002801.501220...@goodmis.org/ > > so I dismissed that pull request entirely as being not for this > release entirely. > > I went back and merged things, but in general, please try to avoid > confusing me. I'm easily confused when I get mixed messages about the > patches and the pull requests, and will then generally default to > "ignore, this is informational". > My apologies. I used my scripts to push them into my linux-next repo, and it added the [for-next] when doing so in the series. I wanted it to sit in next for a week (because I modified a bunch of architecture code that I could only compile test, but not run). I'll be more careful next time. Thanks! -- Steve
Re: [PATCH 0/2] [GIT PULL] tracing: More fixes for 4.20
On Fri, 30 Nov 2018 09:41:00 -0800 Linus Torvalds wrote: > On Thu, Nov 29, 2018 at 7:19 PM Steven Rostedt wrote: > > > > Note, this is on top of a previous git pull that I have submitted: > > > > http://lkml.kernel.org/r/20181127224031.76681...@vmware.local.home > > Hmm. > > I had dismissed that, because the patch descriptors for that series > had had "for-next" in them. > > https://lore.kernel.org/lkml/20181122002801.501220...@goodmis.org/ > > so I dismissed that pull request entirely as being not for this > release entirely. > > I went back and merged things, but in general, please try to avoid > confusing me. I'm easily confused when I get mixed messages about the > patches and the pull requests, and will then generally default to > "ignore, this is informational". > My apologies. I used my scripts to push them into my linux-next repo, and it added the [for-next] when doing so in the series. I wanted it to sit in next for a week (because I modified a bunch of architecture code that I could only compile test, but not run). I'll be more careful next time. Thanks! -- Steve
RE: [PATCH] [repost] Drivers: hv: vmbus: Offload the handling of channels to two workqueues
> From: KY Srinivasan > Sent: Friday, November 30, 2018 9:31 AM > > From: Dexuan Cui > > Sent: Thursday, November 29, 2018 12:17 AM > > To: gre...@linuxfoundation.org > > Cc: KY Srinivasan ; Haiyang Zhang > > ; Stephen Hemminger > > ; linux-kernel@vger.kernel.org; > > de...@linuxdriverproject.org; a...@canonical.com; vkuznets > > ; o...@aepfle.de; jasow...@redhat.com; Michael > > Kelley > > Subject: RE: [PATCH] [repost] Drivers: hv: vmbus: Offload the handling of > > channels to two workqueues > > > > > From: gre...@linuxfoundation.org > > > Sent: Wednesday, November 28, 2018 11:45 PM > > > > > > > > There is no change in this repost. I just rebased this patch to today's > > > > char-misc's char-misc-next branch. Previously KY posted the patch with > > his > > > > Signed-off-by (which is kept in this repost), but there was a conflict > > > > issue. > > > > > > > > Note: the patch can't be cleanly applied to char-misc's char-misc-linus > > branch > > > -- > > > > to do that, we need to cherry-pick the supporting patch first: > > > > 4d3c5c69191f ("Drivers: hv: vmbus: Remove the useless API > > > vmbus_get_outgoing_channel()") > > > > > > That is not going to work for the obvious reason that this dependant > > > patch is not going to be merged into 4.20-final. > > > > It looks the dependent patch (4d3c5c69191f) is going to miss the v4.20 > > release. > > This is not a big issue, as the dependent patch isn't really important. > > > > > So, what do you expect us to do here? The only way this can be accepted > > > is to have it go into my -next branch, which means it will show up in > > > 4.21-rc1, is that ok? > > > > Is there any chance for this patch ("Drivers: hv: vmbus: Offload the > > handling > > ...") to > > go into v4.20? > > > > If yes, I can quickly do a rebase to char-misc's char-misc-linus branch, > > because actually the conflict can be very easily fixed. And I can help to > > fix any > > conflict when the dependent patch is backported to v4.20.1. > > This patch fixes an important bug while the patch this depends on is not > critical. > I suggest we revert the patch that this patch depends on > and we can submit a new version of this patch that can go in now - into 4.20 > release. > > K. Y I agree. Hi Greg, Please let us know what we can do to try to push this important fix into v4.20. Actually it's straightforward, though it looks big. And, we ave done a full testing with the patch. Thanks, --Dexuan
RE: [PATCH] [repost] Drivers: hv: vmbus: Offload the handling of channels to two workqueues
> From: KY Srinivasan > Sent: Friday, November 30, 2018 9:31 AM > > From: Dexuan Cui > > Sent: Thursday, November 29, 2018 12:17 AM > > To: gre...@linuxfoundation.org > > Cc: KY Srinivasan ; Haiyang Zhang > > ; Stephen Hemminger > > ; linux-kernel@vger.kernel.org; > > de...@linuxdriverproject.org; a...@canonical.com; vkuznets > > ; o...@aepfle.de; jasow...@redhat.com; Michael > > Kelley > > Subject: RE: [PATCH] [repost] Drivers: hv: vmbus: Offload the handling of > > channels to two workqueues > > > > > From: gre...@linuxfoundation.org > > > Sent: Wednesday, November 28, 2018 11:45 PM > > > > > > > > There is no change in this repost. I just rebased this patch to today's > > > > char-misc's char-misc-next branch. Previously KY posted the patch with > > his > > > > Signed-off-by (which is kept in this repost), but there was a conflict > > > > issue. > > > > > > > > Note: the patch can't be cleanly applied to char-misc's char-misc-linus > > branch > > > -- > > > > to do that, we need to cherry-pick the supporting patch first: > > > > 4d3c5c69191f ("Drivers: hv: vmbus: Remove the useless API > > > vmbus_get_outgoing_channel()") > > > > > > That is not going to work for the obvious reason that this dependant > > > patch is not going to be merged into 4.20-final. > > > > It looks the dependent patch (4d3c5c69191f) is going to miss the v4.20 > > release. > > This is not a big issue, as the dependent patch isn't really important. > > > > > So, what do you expect us to do here? The only way this can be accepted > > > is to have it go into my -next branch, which means it will show up in > > > 4.21-rc1, is that ok? > > > > Is there any chance for this patch ("Drivers: hv: vmbus: Offload the > > handling > > ...") to > > go into v4.20? > > > > If yes, I can quickly do a rebase to char-misc's char-misc-linus branch, > > because actually the conflict can be very easily fixed. And I can help to > > fix any > > conflict when the dependent patch is backported to v4.20.1. > > This patch fixes an important bug while the patch this depends on is not > critical. > I suggest we revert the patch that this patch depends on > and we can submit a new version of this patch that can go in now - into 4.20 > release. > > K. Y I agree. Hi Greg, Please let us know what we can do to try to push this important fix into v4.20. Actually it's straightforward, though it looks big. And, we ave done a full testing with the patch. Thanks, --Dexuan
Re: [PATCH v6 04/24] arm/arm64: gic-v3: Add PMR and RPR accessors
On Mon, Nov 12, 2018 at 11:56:55AM +, Julien Thierry wrote: > Add helper functions to access system registers related to interrupt > priorities: PMR and RPR. > > Signed-off-by: Julien Thierry > Cc: Russell King > Cc: Catalin Marinas > Cc: Will Deacon > Cc: Marc Zyngier Acked-by: Catalin Marinas IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
Re: [PATCH v6 04/24] arm/arm64: gic-v3: Add PMR and RPR accessors
On Mon, Nov 12, 2018 at 11:56:55AM +, Julien Thierry wrote: > Add helper functions to access system registers related to interrupt > priorities: PMR and RPR. > > Signed-off-by: Julien Thierry > Cc: Russell King > Cc: Catalin Marinas > Cc: Will Deacon > Cc: Marc Zyngier Acked-by: Catalin Marinas IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
Re: BUG: corrupted list in freeary
On Fri, Nov 30, 2018 at 5:58 PM, Dmitry Vyukov wrote: > On Thu, Nov 29, 2018 at 9:13 AM, Manfred Spraul > wrote: >> Hello together, >> >> On 11/27/18 4:52 PM, syzbot wrote: >> >> Hello, >> >> syzbot found the following crash on: >> >> HEAD commit:e195ca6cb6f2 Merge branch 'for-linus' of git://git.kernel... >> git tree: upstream >> console output: https://syzkaller.appspot.com/x/log.txt?x=10d3e6a340 >> >> From the console output: >> >> 20:36:14 executing program 4: >> semget$private(0x1200, 0x39d0, 0x0) >> >> >> I don't understand the 0x1200. >> >> What does that mean? What is the actual syscall? > > Hi Manfred, > > The syscall is semget with the first argument 0x1200. > > >> >> Is 0x39d0 the number of semaphores in the array, i.e. create ~13.000 >> semaphores? > > If the second argument of 0x39d0 relates to creation of 0x39d0 > semaphores, then yes. > > > >> kernel config: https://syzkaller.appspot.com/x/.config?x=73e2bc0cb6463446 >> dashboard link: https://syzkaller.appspot.com/bug?extid=c92d3646e35bc5d1a909 >> compiler: gcc (GCC) 8.0.1 20180413 (experimental) >> >> Unfortunately, I don't have any reproducer for this crash yet. >> >> IMPORTANT: if you fix the bug, please add the following tag to the commit: >> Reported-by: syzbot+c92d3646e35bc5d1a...@syzkaller.appspotmail.com >> >> input: syz1 as /devices/virtual/input/input670 >> input: syz1 as /devices/virtual/input/input671 >> list_del corruption. prev->next should be 8881dae2cdb8, but was >> 0010 >> [ cut here ] >> kernel BUG at lib/list_debug.c:53! >> invalid opcode: [#1] PREEMPT SMP KASAN >> CPU: 0 PID: 6194 Comm: syz-executor5 Not tainted 4.20.0-rc3+ #348 >> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS >> Google 01/01/2011 >> RIP: 0010:__list_del_entry_valid.cold.1+0x48/0x4a lib/list_debug.c:51 >> Code: d0 60 88 e8 b2 31 d2 fd 0f 0b 48 89 de 48 c7 c7 00 d2 60 88 e8 a1 31 >> d2 fd 0f 0b 48 89 de 48 c7 c7 a0 d1 60 88 e8 90 31 d2 fd <0f> 0b 48 89 d9 48 >> c7 c7 60 d2 60 88 e8 7f 31 d2 fd 0f 0b 48 89 f1 >> RSP: 0018:8881848fee80 EFLAGS: 00010286 >> RAX: 0054 RBX: 8881dae2cdb8 RCX: >> RDX: RSI: 8165eaf5 RDI: 0005 >> RBP: 8881848fee98 R08: 8881848f26c0 R09: 0006 >> R10: R11: 8881848f26c0 R12: 8881c3173a00 >> R13: 8881be118118 R14: 8881848ff280 R15: dc00 >> FS: 020b2940() GS:8881dae0() knlGS: >> CS: 0010 DS: ES: CR0: 80050033 >> CR2: 00625208 CR3: 0001c10d3000 CR4: 001406f0 >> DR0: DR1: DR2: >> DR3: DR6: fffe0ff0 DR7: 0400 >> Call Trace: >> __list_del_entry include/linux/list.h:117 [inline] >> list_del include/linux/list.h:125 [inline] >> unlink_queue ipc/sem.c:786 [inline] >> >> Unlink_queue means transfer all waiting threads to the wake-q. >> >> There are 2*(1+) linked lists in an array. >> >> And this fails, because one linked list contains 0x10 instead of a real >> pointer. >> >> I could not find any semop() in the log --> all lists must be empty. >> >> Actually, the lists were initialized in newary(), and then never touched. >> >> freeary+0xbd1/0x1a40 ipc/sem.c:1160 >> >> Free a semaphore array >> >> free_ipcs+0x9f/0x1c0 ipc/namespace.c:112 >> sem_exit_ns+0x20/0x40 ipc/sem.c:237 >> free_ipc_ns ipc/namespace.c:120 [inline] >> >> Free all ipc ids in the name space >> >> put_ipc_ns+0x66/0x180 ipc/namespace.c:152 >> free_nsproxy+0xcf/0x220 kernel/nsproxy.c:180 >> >> Free the name space >> >> switch_task_namespaces+0xb3/0xd0 kernel/nsproxy.c:229 >> exit_task_namespaces+0x17/0x20 kernel/nsproxy.c:234 >> do_exit+0x1ad1/0x26d0 kernel/exit.c:866 >> do_group_exit+0x177/0x440 kernel/exit.c:970 >> get_signal+0x8b0/0x1980 kernel/signal.c:2517 >> do_signal+0x9c/0x21c0 arch/x86/kernel/signal.c:816 >> exit_to_usermode_loop+0x2e5/0x380 arch/x86/entry/common.c:162 >> prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline] >> syscall_return_slowpath arch/x86/entry/common.c:268 [inline] >> do_syscall_64+0x6be/0x820 arch/x86/entry/common.c:293 >> entry_SYSCALL_64_after_hwframe+0x49/0xbe >> RIP: 0033:0x410fa0 >> >> This is time code 604.599748 in the console output: >> >> [ 604.599748] RIP: 0033:0x410fa0 >> >> >> Questions: >> >> 1) What is this? >> >> [ 600.924691] entry_SYSCALL_64_after_hwframe+0x49/0xbe^M >> [ 600.929872] RIP: 0033:0x7f3e597d0120^M >> [ 600.933576] Code: Bad RIP value.^M >> [ 600.936920] RSP: 002b:7ffc2d83e008 EFLAGS: 0246 ORIG_RAX: >> 0002^M >> [ 600.944608] RAX: ffda RBX: 55ca2995b436 RCX: >> 7f3e597d0120^M >> [ 600.951856] RDX: 7ffc2d83e244 RSI: 0008 RDI: >> 7ffc2d83e220^M >> [ 600.959107] RBP: 55ca2995b1e0 R08: R09: >>
Re: BUG: corrupted list in freeary
On Fri, Nov 30, 2018 at 5:58 PM, Dmitry Vyukov wrote: > On Thu, Nov 29, 2018 at 9:13 AM, Manfred Spraul > wrote: >> Hello together, >> >> On 11/27/18 4:52 PM, syzbot wrote: >> >> Hello, >> >> syzbot found the following crash on: >> >> HEAD commit:e195ca6cb6f2 Merge branch 'for-linus' of git://git.kernel... >> git tree: upstream >> console output: https://syzkaller.appspot.com/x/log.txt?x=10d3e6a340 >> >> From the console output: >> >> 20:36:14 executing program 4: >> semget$private(0x1200, 0x39d0, 0x0) >> >> >> I don't understand the 0x1200. >> >> What does that mean? What is the actual syscall? > > Hi Manfred, > > The syscall is semget with the first argument 0x1200. > > >> >> Is 0x39d0 the number of semaphores in the array, i.e. create ~13.000 >> semaphores? > > If the second argument of 0x39d0 relates to creation of 0x39d0 > semaphores, then yes. > > > >> kernel config: https://syzkaller.appspot.com/x/.config?x=73e2bc0cb6463446 >> dashboard link: https://syzkaller.appspot.com/bug?extid=c92d3646e35bc5d1a909 >> compiler: gcc (GCC) 8.0.1 20180413 (experimental) >> >> Unfortunately, I don't have any reproducer for this crash yet. >> >> IMPORTANT: if you fix the bug, please add the following tag to the commit: >> Reported-by: syzbot+c92d3646e35bc5d1a...@syzkaller.appspotmail.com >> >> input: syz1 as /devices/virtual/input/input670 >> input: syz1 as /devices/virtual/input/input671 >> list_del corruption. prev->next should be 8881dae2cdb8, but was >> 0010 >> [ cut here ] >> kernel BUG at lib/list_debug.c:53! >> invalid opcode: [#1] PREEMPT SMP KASAN >> CPU: 0 PID: 6194 Comm: syz-executor5 Not tainted 4.20.0-rc3+ #348 >> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS >> Google 01/01/2011 >> RIP: 0010:__list_del_entry_valid.cold.1+0x48/0x4a lib/list_debug.c:51 >> Code: d0 60 88 e8 b2 31 d2 fd 0f 0b 48 89 de 48 c7 c7 00 d2 60 88 e8 a1 31 >> d2 fd 0f 0b 48 89 de 48 c7 c7 a0 d1 60 88 e8 90 31 d2 fd <0f> 0b 48 89 d9 48 >> c7 c7 60 d2 60 88 e8 7f 31 d2 fd 0f 0b 48 89 f1 >> RSP: 0018:8881848fee80 EFLAGS: 00010286 >> RAX: 0054 RBX: 8881dae2cdb8 RCX: >> RDX: RSI: 8165eaf5 RDI: 0005 >> RBP: 8881848fee98 R08: 8881848f26c0 R09: 0006 >> R10: R11: 8881848f26c0 R12: 8881c3173a00 >> R13: 8881be118118 R14: 8881848ff280 R15: dc00 >> FS: 020b2940() GS:8881dae0() knlGS: >> CS: 0010 DS: ES: CR0: 80050033 >> CR2: 00625208 CR3: 0001c10d3000 CR4: 001406f0 >> DR0: DR1: DR2: >> DR3: DR6: fffe0ff0 DR7: 0400 >> Call Trace: >> __list_del_entry include/linux/list.h:117 [inline] >> list_del include/linux/list.h:125 [inline] >> unlink_queue ipc/sem.c:786 [inline] >> >> Unlink_queue means transfer all waiting threads to the wake-q. >> >> There are 2*(1+) linked lists in an array. >> >> And this fails, because one linked list contains 0x10 instead of a real >> pointer. >> >> I could not find any semop() in the log --> all lists must be empty. >> >> Actually, the lists were initialized in newary(), and then never touched. >> >> freeary+0xbd1/0x1a40 ipc/sem.c:1160 >> >> Free a semaphore array >> >> free_ipcs+0x9f/0x1c0 ipc/namespace.c:112 >> sem_exit_ns+0x20/0x40 ipc/sem.c:237 >> free_ipc_ns ipc/namespace.c:120 [inline] >> >> Free all ipc ids in the name space >> >> put_ipc_ns+0x66/0x180 ipc/namespace.c:152 >> free_nsproxy+0xcf/0x220 kernel/nsproxy.c:180 >> >> Free the name space >> >> switch_task_namespaces+0xb3/0xd0 kernel/nsproxy.c:229 >> exit_task_namespaces+0x17/0x20 kernel/nsproxy.c:234 >> do_exit+0x1ad1/0x26d0 kernel/exit.c:866 >> do_group_exit+0x177/0x440 kernel/exit.c:970 >> get_signal+0x8b0/0x1980 kernel/signal.c:2517 >> do_signal+0x9c/0x21c0 arch/x86/kernel/signal.c:816 >> exit_to_usermode_loop+0x2e5/0x380 arch/x86/entry/common.c:162 >> prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline] >> syscall_return_slowpath arch/x86/entry/common.c:268 [inline] >> do_syscall_64+0x6be/0x820 arch/x86/entry/common.c:293 >> entry_SYSCALL_64_after_hwframe+0x49/0xbe >> RIP: 0033:0x410fa0 >> >> This is time code 604.599748 in the console output: >> >> [ 604.599748] RIP: 0033:0x410fa0 >> >> >> Questions: >> >> 1) What is this? >> >> [ 600.924691] entry_SYSCALL_64_after_hwframe+0x49/0xbe^M >> [ 600.929872] RIP: 0033:0x7f3e597d0120^M >> [ 600.933576] Code: Bad RIP value.^M >> [ 600.936920] RSP: 002b:7ffc2d83e008 EFLAGS: 0246 ORIG_RAX: >> 0002^M >> [ 600.944608] RAX: ffda RBX: 55ca2995b436 RCX: >> 7f3e597d0120^M >> [ 600.951856] RDX: 7ffc2d83e244 RSI: 0008 RDI: >> 7ffc2d83e220^M >> [ 600.959107] RBP: 55ca2995b1e0 R08: R09: >>
Re: [PATCH v6 03/24] arm64: cpufeature: Add cpufeature for IRQ priority masking
On Mon, Nov 12, 2018 at 11:56:54AM +, Julien Thierry wrote: > Add a cpufeature indicating whether a cpu supports masking interrupts > by priority. > > The feature will be properly enabled in a later patch. > > Signed-off-by: Julien Thierry > Cc: Catalin Marinas > Cc: Will Deacon > Cc: Marc Zyngier > Cc: Suzuki K Poulose Acked-by: Catalin Marinas IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
Re: [PATCH v6 03/24] arm64: cpufeature: Add cpufeature for IRQ priority masking
On Mon, Nov 12, 2018 at 11:56:54AM +, Julien Thierry wrote: > Add a cpufeature indicating whether a cpu supports masking interrupts > by priority. > > The feature will be properly enabled in a later patch. > > Signed-off-by: Julien Thierry > Cc: Catalin Marinas > Cc: Will Deacon > Cc: Marc Zyngier > Cc: Suzuki K Poulose Acked-by: Catalin Marinas IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
Re: [PATCH v6 02/24] arm64: cpufeature: Set SYSREG_GIC_CPUIF as a boot system feature
On Mon, Nov 12, 2018 at 11:56:53AM +, Julien Thierry wrote: > It is not supported to have some CPUs using GICv3 sysreg CPU interface > while some others do not. > > Once ICC_SRE_EL1.SRE is set on a CPU, the bit cannot be cleared. Since > matching this feature require setting ICC_SRE_EL1.SRE, it cannot be > turned off if found on a CPU. > > Set the feature as STRICT_BOOT, if boot CPU has it, all other CPUs are > required to have it. > > Signed-off-by: Julien Thierry > Suggested-by: Daniel Thompson > Cc: Catalin Marinas > Cc: Will Deacon > Cc: Suzuki K Poulose > Cc: Marc Zyngier Acked-by: Catalin Marinas IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
Re: [PATCH v6 02/24] arm64: cpufeature: Set SYSREG_GIC_CPUIF as a boot system feature
On Mon, Nov 12, 2018 at 11:56:53AM +, Julien Thierry wrote: > It is not supported to have some CPUs using GICv3 sysreg CPU interface > while some others do not. > > Once ICC_SRE_EL1.SRE is set on a CPU, the bit cannot be cleared. Since > matching this feature require setting ICC_SRE_EL1.SRE, it cannot be > turned off if found on a CPU. > > Set the feature as STRICT_BOOT, if boot CPU has it, all other CPUs are > required to have it. > > Signed-off-by: Julien Thierry > Suggested-by: Daniel Thompson > Cc: Catalin Marinas > Cc: Will Deacon > Cc: Suzuki K Poulose > Cc: Marc Zyngier Acked-by: Catalin Marinas IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
Re: [PATCH 0/4] x86/mm/cpa: Fix cpa-array TLB invalidation
On Fri, Nov 30, 2018 at 05:49:34PM +, StDenis, Tom wrote: > On 2018-11-30 12:48 p.m., Peter Zijlstra wrote: > > On Fri, Nov 30, 2018 at 04:19:46PM +, StDenis, Tom wrote: > >> On 2018-11-30 10:31 a.m., Peter Zijlstra wrote: > > > >>> I pushed them out to: > >>> > >>> git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git x86/mm > >>> > >>> I hope that works; I'm out for a few hours, but should check on email > >>> again tonight. > >>> > >> > >> NAK I get a failure in TTM on init with your x86/mm branch (see attached > >> dmesg). > > > > *sigh*, it's been one of those days. Ok, I'll go write some cpa > > selftests or something so that I have code that uses this stuff. > > > > Well the ttm crash could be completely unrelated the problem is your > x86/mm branch is not up to date with master and doesn't include drm fixes. Well, it crashes right in the middle of the CPA code, and I'm having a horrible day, so I'm thinking I screwed up rather than anything else. Also, some level of selftests would be good to have in any case I figure.
Re: [PATCH 0/4] x86/mm/cpa: Fix cpa-array TLB invalidation
On Fri, Nov 30, 2018 at 05:49:34PM +, StDenis, Tom wrote: > On 2018-11-30 12:48 p.m., Peter Zijlstra wrote: > > On Fri, Nov 30, 2018 at 04:19:46PM +, StDenis, Tom wrote: > >> On 2018-11-30 10:31 a.m., Peter Zijlstra wrote: > > > >>> I pushed them out to: > >>> > >>> git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git x86/mm > >>> > >>> I hope that works; I'm out for a few hours, but should check on email > >>> again tonight. > >>> > >> > >> NAK I get a failure in TTM on init with your x86/mm branch (see attached > >> dmesg). > > > > *sigh*, it's been one of those days. Ok, I'll go write some cpa > > selftests or something so that I have code that uses this stuff. > > > > Well the ttm crash could be completely unrelated the problem is your > x86/mm branch is not up to date with master and doesn't include drm fixes. Well, it crashes right in the middle of the CPA code, and I'm having a horrible day, so I'm thinking I screwed up rather than anything else. Also, some level of selftests would be good to have in any case I figure.
Re: [PATCH v4 4/6] coresight: Use PMU driver configuration for sink selection
On Fri, 30 Nov 2018 at 00:42, Greg KH wrote: > > On Thu, Nov 29, 2018 at 04:09:15PM -0700, Mathieu Poirier wrote: > > Hi Greg, > > > > On Thu, Nov 29, 2018 at 08:49:36AM +0100, Greg KH wrote: > > > On Wed, Nov 28, 2018 at 03:01:16PM -0700, Mathieu Poirier wrote: > > > > This patch uses the PMU driver configuration held in > > > > event::hw::drv_config > > > > to select a sink for each event that is created (the old sysFS way of > > > > working is kept around for backward compatibility). > > > > > > It is "sysfs", no InterCaps please, I've never called it that in the > > > past. > > > > > > And just use sysfs, if that does not work properly, then fix that, don't > > > create yet-another-way-to-configure-this-thing to just confuse people. > > > > Thanks for the review, you've provided usefull comments. > > > > Regarding the "char *" argument for the ioctl, I followed an example that > > currently exist but I can proceed differently. > > What driver currently uses a char * on an ioctl to parse arbritrary > userspace information to set its configuration? That should be fixed... > Perf filters [1] are communicated to the kernel as a char *. Given the dynamic nature of event creation I really don't know how else it could have been done. [1]. https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/perf_event.h#L459 > > My goal with this patchset was specifically to fix what is wrong with sysfs > > and > > completely take it out of the equation. The only reason to keep the kernel > > interface alive was to prevent braking older user space perf tools currently > > using it. > > That's fine, just don't create a new syscall that takes arbritrary data > and parses it in the kernel, that's not ok. You got it. > > > I chose to use an ioctl() because it is flexible and well suited for the > > dynamic > > nature of perf events. It is also currently used to set various event > > specific > > configuration so doing the same adds to the established pattern and avoids > > creating a new way of doing things, something the perf crew would have been > > quick to point out. > > > > Was my approach wrong? > > I don't know how the perf interface works, so perhaps work with those > developers to sync up and match what they use today? I have already done a fair amount of work with them. > > But step back, what exactly are you trying to do here? You have an > implementation of a solution but I don't see the problem stated anywhere > here. In the coresight world there can be more than one sink to aggregate trace data generated by CPUs, hence the need for users to select which one to use from the perf command line. Up until now sysfs was used to communicate sink information to the kernel but that was never the right way to proceed because it breaks when more than one perf session are created at the same time. The situation was manageable when working with per-thread scenarios where a single HW trace event is created but in CPU-wide mode a HW trace event is created for each CPU that is specified on the perf command line, taking us back to the concurrency problem we have when dealing with multiple per-thread session. Since my goal is to add coresight support for CPU-wide trace scenarios, the issue with sysfs concurrency needs to be addressed first, which this set is aiming at. Sysfs is a problem so I'm removing it in favour of an ioctl() where a specific sink can be assigned to each event. The above should probably go in the cover letter. Let me know if you want more information. Thanks, Mathieu
Re: [PATCH v4 4/6] coresight: Use PMU driver configuration for sink selection
On Fri, 30 Nov 2018 at 00:42, Greg KH wrote: > > On Thu, Nov 29, 2018 at 04:09:15PM -0700, Mathieu Poirier wrote: > > Hi Greg, > > > > On Thu, Nov 29, 2018 at 08:49:36AM +0100, Greg KH wrote: > > > On Wed, Nov 28, 2018 at 03:01:16PM -0700, Mathieu Poirier wrote: > > > > This patch uses the PMU driver configuration held in > > > > event::hw::drv_config > > > > to select a sink for each event that is created (the old sysFS way of > > > > working is kept around for backward compatibility). > > > > > > It is "sysfs", no InterCaps please, I've never called it that in the > > > past. > > > > > > And just use sysfs, if that does not work properly, then fix that, don't > > > create yet-another-way-to-configure-this-thing to just confuse people. > > > > Thanks for the review, you've provided usefull comments. > > > > Regarding the "char *" argument for the ioctl, I followed an example that > > currently exist but I can proceed differently. > > What driver currently uses a char * on an ioctl to parse arbritrary > userspace information to set its configuration? That should be fixed... > Perf filters [1] are communicated to the kernel as a char *. Given the dynamic nature of event creation I really don't know how else it could have been done. [1]. https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/perf_event.h#L459 > > My goal with this patchset was specifically to fix what is wrong with sysfs > > and > > completely take it out of the equation. The only reason to keep the kernel > > interface alive was to prevent braking older user space perf tools currently > > using it. > > That's fine, just don't create a new syscall that takes arbritrary data > and parses it in the kernel, that's not ok. You got it. > > > I chose to use an ioctl() because it is flexible and well suited for the > > dynamic > > nature of perf events. It is also currently used to set various event > > specific > > configuration so doing the same adds to the established pattern and avoids > > creating a new way of doing things, something the perf crew would have been > > quick to point out. > > > > Was my approach wrong? > > I don't know how the perf interface works, so perhaps work with those > developers to sync up and match what they use today? I have already done a fair amount of work with them. > > But step back, what exactly are you trying to do here? You have an > implementation of a solution but I don't see the problem stated anywhere > here. In the coresight world there can be more than one sink to aggregate trace data generated by CPUs, hence the need for users to select which one to use from the perf command line. Up until now sysfs was used to communicate sink information to the kernel but that was never the right way to proceed because it breaks when more than one perf session are created at the same time. The situation was manageable when working with per-thread scenarios where a single HW trace event is created but in CPU-wide mode a HW trace event is created for each CPU that is specified on the perf command line, taking us back to the concurrency problem we have when dealing with multiple per-thread session. Since my goal is to add coresight support for CPU-wide trace scenarios, the issue with sysfs concurrency needs to be addressed first, which this set is aiming at. Sysfs is a problem so I'm removing it in favour of an ioctl() where a specific sink can be assigned to each event. The above should probably go in the cover letter. Let me know if you want more information. Thanks, Mathieu
[PATCH] ARM: dts: s5pv210: Add s5p-jpeg codec node.
This commit adds node for s5p-jpeg codec, which is present in S5PV210 SOC. Signed-off-by: Paweł Chmiel --- arch/arm/boot/dts/s5pv210.dtsi | 9 + 1 file changed, 9 insertions(+) diff --git a/arch/arm/boot/dts/s5pv210.dtsi b/arch/arm/boot/dts/s5pv210.dtsi index 75f454a210d6..12eac8930eac 100644 --- a/arch/arm/boot/dts/s5pv210.dtsi +++ b/arch/arm/boot/dts/s5pv210.dtsi @@ -627,6 +627,15 @@ samsung,lcd-wb; }; }; + + jpeg_codec: jpeg-codec@fb60 { + compatible = "samsung,s5pv210-jpeg"; + reg = <0xfb60 0x1000>; + interrupt-parent = <>; + interrupts = <8>; + clocks = < CLK_JPEG>; + clock-names = "jpeg"; + }; }; }; -- 2.17.1
[PATCH] ARM: dts: s5pv210: Add s5p-jpeg codec node.
This commit adds node for s5p-jpeg codec, which is present in S5PV210 SOC. Signed-off-by: Paweł Chmiel --- arch/arm/boot/dts/s5pv210.dtsi | 9 + 1 file changed, 9 insertions(+) diff --git a/arch/arm/boot/dts/s5pv210.dtsi b/arch/arm/boot/dts/s5pv210.dtsi index 75f454a210d6..12eac8930eac 100644 --- a/arch/arm/boot/dts/s5pv210.dtsi +++ b/arch/arm/boot/dts/s5pv210.dtsi @@ -627,6 +627,15 @@ samsung,lcd-wb; }; }; + + jpeg_codec: jpeg-codec@fb60 { + compatible = "samsung,s5pv210-jpeg"; + reg = <0xfb60 0x1000>; + interrupt-parent = <>; + interrupts = <8>; + clocks = < CLK_JPEG>; + clock-names = "jpeg"; + }; }; }; -- 2.17.1
Re: [PATCH v6 01/24] arm64: Remove unused daif related functions/macros
On Mon, Nov 12, 2018 at 11:56:52AM +, Julien Thierry wrote: > There are some helpers to modify PSR.[DAIF] bits that are not referenced > anywhere. The less these bits are available outside of local_irq_* > functions the better. > > Get rid of those unused helpers. > > Signed-off-by: Julien Thierry > Cc: Catalin Marinas > Cc: Will Deacon > Cc: James Morse Acked-by: Catalin Marinas IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
Re: [PATCH v6 01/24] arm64: Remove unused daif related functions/macros
On Mon, Nov 12, 2018 at 11:56:52AM +, Julien Thierry wrote: > There are some helpers to modify PSR.[DAIF] bits that are not referenced > anywhere. The less these bits are available outside of local_irq_* > functions the better. > > Get rid of those unused helpers. > > Signed-off-by: Julien Thierry > Cc: Catalin Marinas > Cc: Will Deacon > Cc: James Morse Acked-by: Catalin Marinas IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
Re: BUG: corrupted list in freeary
On Thu, Nov 29, 2018 at 9:13 AM, Manfred Spraul wrote: > Hello together, > > On 11/27/18 4:52 PM, syzbot wrote: > > Hello, > > syzbot found the following crash on: > > HEAD commit:e195ca6cb6f2 Merge branch 'for-linus' of git://git.kernel... > git tree: upstream > console output: https://syzkaller.appspot.com/x/log.txt?x=10d3e6a340 > > From the console output: > > 20:36:14 executing program 4: > semget$private(0x1200, 0x39d0, 0x0) > > > I don't understand the 0x1200. > > What does that mean? What is the actual syscall? Hi Manfred, The syscall is semget with the first argument 0x1200. > > Is 0x39d0 the number of semaphores in the array, i.e. create ~13.000 > semaphores? If the second argument of 0x39d0 relates to creation of 0x39d0 semaphores, then yes. > kernel config: https://syzkaller.appspot.com/x/.config?x=73e2bc0cb6463446 > dashboard link: https://syzkaller.appspot.com/bug?extid=c92d3646e35bc5d1a909 > compiler: gcc (GCC) 8.0.1 20180413 (experimental) > > Unfortunately, I don't have any reproducer for this crash yet. > > IMPORTANT: if you fix the bug, please add the following tag to the commit: > Reported-by: syzbot+c92d3646e35bc5d1a...@syzkaller.appspotmail.com > > input: syz1 as /devices/virtual/input/input670 > input: syz1 as /devices/virtual/input/input671 > list_del corruption. prev->next should be 8881dae2cdb8, but was > 0010 > [ cut here ] > kernel BUG at lib/list_debug.c:53! > invalid opcode: [#1] PREEMPT SMP KASAN > CPU: 0 PID: 6194 Comm: syz-executor5 Not tainted 4.20.0-rc3+ #348 > Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS > Google 01/01/2011 > RIP: 0010:__list_del_entry_valid.cold.1+0x48/0x4a lib/list_debug.c:51 > Code: d0 60 88 e8 b2 31 d2 fd 0f 0b 48 89 de 48 c7 c7 00 d2 60 88 e8 a1 31 > d2 fd 0f 0b 48 89 de 48 c7 c7 a0 d1 60 88 e8 90 31 d2 fd <0f> 0b 48 89 d9 48 > c7 c7 60 d2 60 88 e8 7f 31 d2 fd 0f 0b 48 89 f1 > RSP: 0018:8881848fee80 EFLAGS: 00010286 > RAX: 0054 RBX: 8881dae2cdb8 RCX: > RDX: RSI: 8165eaf5 RDI: 0005 > RBP: 8881848fee98 R08: 8881848f26c0 R09: 0006 > R10: R11: 8881848f26c0 R12: 8881c3173a00 > R13: 8881be118118 R14: 8881848ff280 R15: dc00 > FS: 020b2940() GS:8881dae0() knlGS: > CS: 0010 DS: ES: CR0: 80050033 > CR2: 00625208 CR3: 0001c10d3000 CR4: 001406f0 > DR0: DR1: DR2: > DR3: DR6: fffe0ff0 DR7: 0400 > Call Trace: > __list_del_entry include/linux/list.h:117 [inline] > list_del include/linux/list.h:125 [inline] > unlink_queue ipc/sem.c:786 [inline] > > Unlink_queue means transfer all waiting threads to the wake-q. > > There are 2*(1+) linked lists in an array. > > And this fails, because one linked list contains 0x10 instead of a real > pointer. > > I could not find any semop() in the log --> all lists must be empty. > > Actually, the lists were initialized in newary(), and then never touched. > > freeary+0xbd1/0x1a40 ipc/sem.c:1160 > > Free a semaphore array > > free_ipcs+0x9f/0x1c0 ipc/namespace.c:112 > sem_exit_ns+0x20/0x40 ipc/sem.c:237 > free_ipc_ns ipc/namespace.c:120 [inline] > > Free all ipc ids in the name space > > put_ipc_ns+0x66/0x180 ipc/namespace.c:152 > free_nsproxy+0xcf/0x220 kernel/nsproxy.c:180 > > Free the name space > > switch_task_namespaces+0xb3/0xd0 kernel/nsproxy.c:229 > exit_task_namespaces+0x17/0x20 kernel/nsproxy.c:234 > do_exit+0x1ad1/0x26d0 kernel/exit.c:866 > do_group_exit+0x177/0x440 kernel/exit.c:970 > get_signal+0x8b0/0x1980 kernel/signal.c:2517 > do_signal+0x9c/0x21c0 arch/x86/kernel/signal.c:816 > exit_to_usermode_loop+0x2e5/0x380 arch/x86/entry/common.c:162 > prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline] > syscall_return_slowpath arch/x86/entry/common.c:268 [inline] > do_syscall_64+0x6be/0x820 arch/x86/entry/common.c:293 > entry_SYSCALL_64_after_hwframe+0x49/0xbe > RIP: 0033:0x410fa0 > > This is time code 604.599748 in the console output: > > [ 604.599748] RIP: 0033:0x410fa0 > > > Questions: > > 1) What is this? > > [ 600.924691] entry_SYSCALL_64_after_hwframe+0x49/0xbe^M > [ 600.929872] RIP: 0033:0x7f3e597d0120^M > [ 600.933576] Code: Bad RIP value.^M > [ 600.936920] RSP: 002b:7ffc2d83e008 EFLAGS: 0246 ORIG_RAX: > 0002^M > [ 600.944608] RAX: ffda RBX: 55ca2995b436 RCX: > 7f3e597d0120^M > [ 600.951856] RDX: 7ffc2d83e244 RSI: 0008 RDI: > 7ffc2d83e220^M > [ 600.959107] RBP: 55ca2995b1e0 R08: R09: > 55ca2995b099^M > [ 600.966355] R10: R11: 0246 R12: > 0001^M > [ 600.973628] R13: 55ca2995b090 R14: 55ca2995b190 R15: > 7ffc2d83e220^M > >
Re: BUG: corrupted list in freeary
On Thu, Nov 29, 2018 at 9:13 AM, Manfred Spraul wrote: > Hello together, > > On 11/27/18 4:52 PM, syzbot wrote: > > Hello, > > syzbot found the following crash on: > > HEAD commit:e195ca6cb6f2 Merge branch 'for-linus' of git://git.kernel... > git tree: upstream > console output: https://syzkaller.appspot.com/x/log.txt?x=10d3e6a340 > > From the console output: > > 20:36:14 executing program 4: > semget$private(0x1200, 0x39d0, 0x0) > > > I don't understand the 0x1200. > > What does that mean? What is the actual syscall? Hi Manfred, The syscall is semget with the first argument 0x1200. > > Is 0x39d0 the number of semaphores in the array, i.e. create ~13.000 > semaphores? If the second argument of 0x39d0 relates to creation of 0x39d0 semaphores, then yes. > kernel config: https://syzkaller.appspot.com/x/.config?x=73e2bc0cb6463446 > dashboard link: https://syzkaller.appspot.com/bug?extid=c92d3646e35bc5d1a909 > compiler: gcc (GCC) 8.0.1 20180413 (experimental) > > Unfortunately, I don't have any reproducer for this crash yet. > > IMPORTANT: if you fix the bug, please add the following tag to the commit: > Reported-by: syzbot+c92d3646e35bc5d1a...@syzkaller.appspotmail.com > > input: syz1 as /devices/virtual/input/input670 > input: syz1 as /devices/virtual/input/input671 > list_del corruption. prev->next should be 8881dae2cdb8, but was > 0010 > [ cut here ] > kernel BUG at lib/list_debug.c:53! > invalid opcode: [#1] PREEMPT SMP KASAN > CPU: 0 PID: 6194 Comm: syz-executor5 Not tainted 4.20.0-rc3+ #348 > Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS > Google 01/01/2011 > RIP: 0010:__list_del_entry_valid.cold.1+0x48/0x4a lib/list_debug.c:51 > Code: d0 60 88 e8 b2 31 d2 fd 0f 0b 48 89 de 48 c7 c7 00 d2 60 88 e8 a1 31 > d2 fd 0f 0b 48 89 de 48 c7 c7 a0 d1 60 88 e8 90 31 d2 fd <0f> 0b 48 89 d9 48 > c7 c7 60 d2 60 88 e8 7f 31 d2 fd 0f 0b 48 89 f1 > RSP: 0018:8881848fee80 EFLAGS: 00010286 > RAX: 0054 RBX: 8881dae2cdb8 RCX: > RDX: RSI: 8165eaf5 RDI: 0005 > RBP: 8881848fee98 R08: 8881848f26c0 R09: 0006 > R10: R11: 8881848f26c0 R12: 8881c3173a00 > R13: 8881be118118 R14: 8881848ff280 R15: dc00 > FS: 020b2940() GS:8881dae0() knlGS: > CS: 0010 DS: ES: CR0: 80050033 > CR2: 00625208 CR3: 0001c10d3000 CR4: 001406f0 > DR0: DR1: DR2: > DR3: DR6: fffe0ff0 DR7: 0400 > Call Trace: > __list_del_entry include/linux/list.h:117 [inline] > list_del include/linux/list.h:125 [inline] > unlink_queue ipc/sem.c:786 [inline] > > Unlink_queue means transfer all waiting threads to the wake-q. > > There are 2*(1+) linked lists in an array. > > And this fails, because one linked list contains 0x10 instead of a real > pointer. > > I could not find any semop() in the log --> all lists must be empty. > > Actually, the lists were initialized in newary(), and then never touched. > > freeary+0xbd1/0x1a40 ipc/sem.c:1160 > > Free a semaphore array > > free_ipcs+0x9f/0x1c0 ipc/namespace.c:112 > sem_exit_ns+0x20/0x40 ipc/sem.c:237 > free_ipc_ns ipc/namespace.c:120 [inline] > > Free all ipc ids in the name space > > put_ipc_ns+0x66/0x180 ipc/namespace.c:152 > free_nsproxy+0xcf/0x220 kernel/nsproxy.c:180 > > Free the name space > > switch_task_namespaces+0xb3/0xd0 kernel/nsproxy.c:229 > exit_task_namespaces+0x17/0x20 kernel/nsproxy.c:234 > do_exit+0x1ad1/0x26d0 kernel/exit.c:866 > do_group_exit+0x177/0x440 kernel/exit.c:970 > get_signal+0x8b0/0x1980 kernel/signal.c:2517 > do_signal+0x9c/0x21c0 arch/x86/kernel/signal.c:816 > exit_to_usermode_loop+0x2e5/0x380 arch/x86/entry/common.c:162 > prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline] > syscall_return_slowpath arch/x86/entry/common.c:268 [inline] > do_syscall_64+0x6be/0x820 arch/x86/entry/common.c:293 > entry_SYSCALL_64_after_hwframe+0x49/0xbe > RIP: 0033:0x410fa0 > > This is time code 604.599748 in the console output: > > [ 604.599748] RIP: 0033:0x410fa0 > > > Questions: > > 1) What is this? > > [ 600.924691] entry_SYSCALL_64_after_hwframe+0x49/0xbe^M > [ 600.929872] RIP: 0033:0x7f3e597d0120^M > [ 600.933576] Code: Bad RIP value.^M > [ 600.936920] RSP: 002b:7ffc2d83e008 EFLAGS: 0246 ORIG_RAX: > 0002^M > [ 600.944608] RAX: ffda RBX: 55ca2995b436 RCX: > 7f3e597d0120^M > [ 600.951856] RDX: 7ffc2d83e244 RSI: 0008 RDI: > 7ffc2d83e220^M > [ 600.959107] RBP: 55ca2995b1e0 R08: R09: > 55ca2995b099^M > [ 600.966355] R10: R11: 0246 R12: > 0001^M > [ 600.973628] R13: 55ca2995b090 R14: 55ca2995b190 R15: > 7ffc2d83e220^M > >
Re: [PATCH 2/2] arm64: dts: marvell: armada-37xx: Enable emmc on espressobin
Hi Ding, On ven., oct. 26 2018, Ding Tao wrote: > The ESPRESSObin board has a emmc interface available on U11, let's > enable it. Applied on mvebu/dt64 for now, but what happen if U11 is not populated? Thanks, Gregory > > Signed-off-by: Ding Tao > --- > .../dts/marvell/armada-3720-espressobin.dts| 18 ++ > 1 file changed, 18 insertions(+) > > diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts > b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts > index 3ab25ad402b9..ee05aabbba88 100644 > --- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts > +++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts > @@ -60,6 +60,24 @@ > cd-gpios = < 3 GPIO_ACTIVE_LOW>; > marvell,pad-type = "sd"; > vqmmc-supply = <_sd_reg1>; > + > + pinctrl-names = "default"; > + pinctrl-0 = <_pins>; > + status = "okay"; > +}; > + > +/* U11 */ > + { > + non-removable; > + bus-width = <8>; > + mmc-ddr-1_8v; > + mmc-hs400-1_8v; > + marvell,xenon-emmc; > + marvell,xenon-tun-count = <9>; > + marvell,pad-type = "fixed-1-8v"; > + > + pinctrl-names = "default"; > + pinctrl-0 = <_pins>; > status = "okay"; > }; > > -- > 2.17.1 > > > -- Gregory Clement, Bootlin Embedded Linux and Kernel engineering http://bootlin.com
Re: [PATCH 2/2] arm64: dts: marvell: armada-37xx: Enable emmc on espressobin
Hi Ding, On ven., oct. 26 2018, Ding Tao wrote: > The ESPRESSObin board has a emmc interface available on U11, let's > enable it. Applied on mvebu/dt64 for now, but what happen if U11 is not populated? Thanks, Gregory > > Signed-off-by: Ding Tao > --- > .../dts/marvell/armada-3720-espressobin.dts| 18 ++ > 1 file changed, 18 insertions(+) > > diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts > b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts > index 3ab25ad402b9..ee05aabbba88 100644 > --- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts > +++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts > @@ -60,6 +60,24 @@ > cd-gpios = < 3 GPIO_ACTIVE_LOW>; > marvell,pad-type = "sd"; > vqmmc-supply = <_sd_reg1>; > + > + pinctrl-names = "default"; > + pinctrl-0 = <_pins>; > + status = "okay"; > +}; > + > +/* U11 */ > + { > + non-removable; > + bus-width = <8>; > + mmc-ddr-1_8v; > + mmc-hs400-1_8v; > + marvell,xenon-emmc; > + marvell,xenon-tun-count = <9>; > + marvell,pad-type = "fixed-1-8v"; > + > + pinctrl-names = "default"; > + pinctrl-0 = <_pins>; > status = "okay"; > }; > > -- > 2.17.1 > > > -- Gregory Clement, Bootlin Embedded Linux and Kernel engineering http://bootlin.com
Re: [PATCH 0/4] x86/mm/cpa: Fix cpa-array TLB invalidation
On Fri, Nov 30, 2018 at 03:27:02PM +, StDenis, Tom wrote: > I can apply the patch you attached but the inline patches just don't > apply. Could be my imap client (thunderbird) mangled them but I've > applied patches this way before. could you attach them instead please? That's arguably a bug in Thunderbird; but there's already upstream quilt changes (that I used to have before Debian helpfully updated my quilt package) that should remedy this as well. It seems some MUA's get horribly confused about the "Content-Disposition: inline; filename=$patch" header quilt-mail adds. I've once again removed that from my local copy; hopefully the next time Debian updates that package it will actually be with a new enough version to also include those changes :/
Re: [PATCH 0/4] x86/mm/cpa: Fix cpa-array TLB invalidation
On Fri, Nov 30, 2018 at 03:27:02PM +, StDenis, Tom wrote: > I can apply the patch you attached but the inline patches just don't > apply. Could be my imap client (thunderbird) mangled them but I've > applied patches this way before. could you attach them instead please? That's arguably a bug in Thunderbird; but there's already upstream quilt changes (that I used to have before Debian helpfully updated my quilt package) that should remedy this as well. It seems some MUA's get horribly confused about the "Content-Disposition: inline; filename=$patch" header quilt-mail adds. I've once again removed that from my local copy; hopefully the next time Debian updates that package it will actually be with a new enough version to also include those changes :/
Re: [PATCH 0/4] x86/mm/cpa: Fix cpa-array TLB invalidation
On 2018-11-30 12:48 p.m., Peter Zijlstra wrote: > On Fri, Nov 30, 2018 at 04:19:46PM +, StDenis, Tom wrote: >> On 2018-11-30 10:31 a.m., Peter Zijlstra wrote: > >>> I pushed them out to: >>> >>> git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git x86/mm >>> >>> I hope that works; I'm out for a few hours, but should check on email >>> again tonight. >>> >> >> NAK I get a failure in TTM on init with your x86/mm branch (see attached >> dmesg). > > *sigh*, it's been one of those days. Ok, I'll go write some cpa > selftests or something so that I have code that uses this stuff. > Well the ttm crash could be completely unrelated the problem is your x86/mm branch is not up to date with master and doesn't include drm fixes. Tom
Re: [PATCH 0/4] x86/mm/cpa: Fix cpa-array TLB invalidation
On 2018-11-30 12:48 p.m., Peter Zijlstra wrote: > On Fri, Nov 30, 2018 at 04:19:46PM +, StDenis, Tom wrote: >> On 2018-11-30 10:31 a.m., Peter Zijlstra wrote: > >>> I pushed them out to: >>> >>> git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git x86/mm >>> >>> I hope that works; I'm out for a few hours, but should check on email >>> again tonight. >>> >> >> NAK I get a failure in TTM on init with your x86/mm branch (see attached >> dmesg). > > *sigh*, it's been one of those days. Ok, I'll go write some cpa > selftests or something so that I have code that uses this stuff. > Well the ttm crash could be completely unrelated the problem is your x86/mm branch is not up to date with master and doesn't include drm fixes. Tom
Re: [PATCH 0/4] x86/mm/cpa: Fix cpa-array TLB invalidation
On Fri, Nov 30, 2018 at 04:19:46PM +, StDenis, Tom wrote: > On 2018-11-30 10:31 a.m., Peter Zijlstra wrote: > > I pushed them out to: > > > >git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git x86/mm > > > > I hope that works; I'm out for a few hours, but should check on email > > again tonight. > > > > NAK I get a failure in TTM on init with your x86/mm branch (see attached > dmesg). *sigh*, it's been one of those days. Ok, I'll go write some cpa selftests or something so that I have code that uses this stuff.
Re: [PATCH 0/4] x86/mm/cpa: Fix cpa-array TLB invalidation
On Fri, Nov 30, 2018 at 04:19:46PM +, StDenis, Tom wrote: > On 2018-11-30 10:31 a.m., Peter Zijlstra wrote: > > I pushed them out to: > > > >git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git x86/mm > > > > I hope that works; I'm out for a few hours, but should check on email > > again tonight. > > > > NAK I get a failure in TTM on init with your x86/mm branch (see attached > dmesg). *sigh*, it's been one of those days. Ok, I'll go write some cpa selftests or something so that I have code that uses this stuff.
Re: [PATCH 2/4] x86/mm/cpa: Fix cpa_flush_array()
> +void __cpa_flush_array(void *data) > { > - unsigned int i, level; > + struct cpa_data *cpa = data; > + unsigned int i; > > - if (__cpa_flush_range(baddr, numpages, cache)) > + for (i = 0; i < cpa->numpages; i++) > + __flush_tlb_one_kernel(__cpa_addr(cpa, i)); > +} While I guess it won't _hurt_ anything, we do have cases where __cpa_addr() can return 0. So, won't this be flushing virtual address 0x0 unnecessarily for those?
Re: [PATCH 2/4] x86/mm/cpa: Fix cpa_flush_array()
> +void __cpa_flush_array(void *data) > { > - unsigned int i, level; > + struct cpa_data *cpa = data; > + unsigned int i; > > - if (__cpa_flush_range(baddr, numpages, cache)) > + for (i = 0; i < cpa->numpages; i++) > + __flush_tlb_one_kernel(__cpa_addr(cpa, i)); > +} While I guess it won't _hurt_ anything, we do have cases where __cpa_addr() can return 0. So, won't this be flushing virtual address 0x0 unnecessarily for those?
Re: [PATCH 0/2] [GIT PULL] tracing: More fixes for 4.20
On Thu, Nov 29, 2018 at 7:19 PM Steven Rostedt wrote: > > Note, this is on top of a previous git pull that I have submitted: > > http://lkml.kernel.org/r/20181127224031.76681...@vmware.local.home Hmm. I had dismissed that, because the patch descriptors for that series had had "for-next" in them. https://lore.kernel.org/lkml/20181122002801.501220...@goodmis.org/ so I dismissed that pull request entirely as being not for this release entirely. I went back and merged things, but in general, please try to avoid confusing me. I'm easily confused when I get mixed messages about the patches and the pull requests, and will then generally default to "ignore, this is informational". Linus
Re: [PATCH 0/2] [GIT PULL] tracing: More fixes for 4.20
On Thu, Nov 29, 2018 at 7:19 PM Steven Rostedt wrote: > > Note, this is on top of a previous git pull that I have submitted: > > http://lkml.kernel.org/r/20181127224031.76681...@vmware.local.home Hmm. I had dismissed that, because the patch descriptors for that series had had "for-next" in them. https://lore.kernel.org/lkml/20181122002801.501220...@goodmis.org/ so I dismissed that pull request entirely as being not for this release entirely. I went back and merged things, but in general, please try to avoid confusing me. I'm easily confused when I get mixed messages about the patches and the pull requests, and will then generally default to "ignore, this is informational". Linus
[PATCH 1/1] selftests: watchdog: Add gettimeleft command line arg
Add command line argument to call and display the results of ioctl WDIOC_GETTIMELEFT. Signed-off-by: Jerry Hoemann --- tools/testing/selftests/watchdog/watchdog-test.c | 13 - 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c index c6bd9a6..dac907a 100644 --- a/tools/testing/selftests/watchdog/watchdog-test.c +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -19,7 +19,7 @@ int fd; const char v = 'V'; -static const char sopts[] = "bdehp:t:Tn:N"; +static const char sopts[] = "bdehp:t:Tn:NL"; static const struct option lopts[] = { {"bootstatus", no_argument, NULL, 'b'}, {"disable", no_argument, NULL, 'd'}, @@ -30,6 +30,7 @@ {"gettimeout", no_argument, NULL, 'T'}, {"pretimeout",required_argument, NULL, 'n'}, {"getpretimeout", no_argument, NULL, 'N'}, + {"gettimeleft", no_argument, NULL, 'L'}, {NULL, no_argument, NULL, 0x0} }; @@ -77,6 +78,7 @@ static void usage(char *progname) printf(" -T, --gettimeoutGet the timeout\n"); printf(" -n, --pretimeout=T Set the pretimeout to T seconds\n"); printf(" -N, --getpretimeout Get the pretimeout\n"); + printf(" -L, --gettimeleft Get the time left until timer experies\n"); printf("\n"); printf("Parameters are parsed left-to-right in real-time.\n"); printf("Example: %s -d -t 10 -p 5 -e\n", progname); @@ -180,6 +182,15 @@ int main(int argc, char *argv[]) else printf("WDIOC_GETPRETIMEOUT error '%s'\n", strerror(errno)); break; + case 'L': + oneshot = 1; + ret = ioctl(fd, WDIOC_GETTIMELEFT, ); + if (!ret) + printf("WDIOC_GETTIMELEFT returns %u seconds.\n", flags); + else + printf("WDIOC_GETTIMELEFT error '%s'\n", strerror(errno)); + break; + default: usage(argv[0]); goto end; -- 1.8.3.1
[PATCH 1/1] selftests: watchdog: Add gettimeleft command line arg
Add command line argument to call and display the results of ioctl WDIOC_GETTIMELEFT. Signed-off-by: Jerry Hoemann --- tools/testing/selftests/watchdog/watchdog-test.c | 13 - 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c index c6bd9a6..dac907a 100644 --- a/tools/testing/selftests/watchdog/watchdog-test.c +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -19,7 +19,7 @@ int fd; const char v = 'V'; -static const char sopts[] = "bdehp:t:Tn:N"; +static const char sopts[] = "bdehp:t:Tn:NL"; static const struct option lopts[] = { {"bootstatus", no_argument, NULL, 'b'}, {"disable", no_argument, NULL, 'd'}, @@ -30,6 +30,7 @@ {"gettimeout", no_argument, NULL, 'T'}, {"pretimeout",required_argument, NULL, 'n'}, {"getpretimeout", no_argument, NULL, 'N'}, + {"gettimeleft", no_argument, NULL, 'L'}, {NULL, no_argument, NULL, 0x0} }; @@ -77,6 +78,7 @@ static void usage(char *progname) printf(" -T, --gettimeoutGet the timeout\n"); printf(" -n, --pretimeout=T Set the pretimeout to T seconds\n"); printf(" -N, --getpretimeout Get the pretimeout\n"); + printf(" -L, --gettimeleft Get the time left until timer experies\n"); printf("\n"); printf("Parameters are parsed left-to-right in real-time.\n"); printf("Example: %s -d -t 10 -p 5 -e\n", progname); @@ -180,6 +182,15 @@ int main(int argc, char *argv[]) else printf("WDIOC_GETPRETIMEOUT error '%s'\n", strerror(errno)); break; + case 'L': + oneshot = 1; + ret = ioctl(fd, WDIOC_GETTIMELEFT, ); + if (!ret) + printf("WDIOC_GETTIMELEFT returns %u seconds.\n", flags); + else + printf("WDIOC_GETTIMELEFT error '%s'\n", strerror(errno)); + break; + default: usage(argv[0]); goto end; -- 1.8.3.1
Re: [PATCH v12 1/5] x86/boot: Add get_acpi_rsdp() to parse RSDP in cmdline from KEXEC
On Fri, Nov 30, 2018 at 10:43:47AM +0800, Chao Fan wrote: ... > >]$ make arch/x86/boot/compressed/misc.o > > CALLscripts/checksyscalls.sh > > DESCEND objtool > > CC arch/x86/boot/compressed/misc.o > >ld: -r and -pie may not be used together > >make[1]: *** [scripts/Makefile.build:294: arch/x86/boot/compressed/misc.o] > >Error 1 > >make: *** [Makefile:1715: arch/x86/boot/compressed/misc.o] Error 2 > >]$ > > Hi Masa, > > So many thanks for your test. > > Could you give me more details about this error? More error message. > Just on the first commit or the whole PATCHSET? > Cause I didn't get error both on this commit and on the whole PATCHSET. I built your whole patchset and got the error. The error depends on CONFIG_MODVERSIONS. If CONFIG_MODVERSIONS=y, you will get the build error. Thanks, Masa
Re: [PATCH v12 1/5] x86/boot: Add get_acpi_rsdp() to parse RSDP in cmdline from KEXEC
On Fri, Nov 30, 2018 at 10:43:47AM +0800, Chao Fan wrote: ... > >]$ make arch/x86/boot/compressed/misc.o > > CALLscripts/checksyscalls.sh > > DESCEND objtool > > CC arch/x86/boot/compressed/misc.o > >ld: -r and -pie may not be used together > >make[1]: *** [scripts/Makefile.build:294: arch/x86/boot/compressed/misc.o] > >Error 1 > >make: *** [Makefile:1715: arch/x86/boot/compressed/misc.o] Error 2 > >]$ > > Hi Masa, > > So many thanks for your test. > > Could you give me more details about this error? More error message. > Just on the first commit or the whole PATCHSET? > Cause I didn't get error both on this commit and on the whole PATCHSET. I built your whole patchset and got the error. The error depends on CONFIG_MODVERSIONS. If CONFIG_MODVERSIONS=y, you will get the build error. Thanks, Masa
[PATCH v2 0/2] arm64: Only call into preempt_schedule() if need_resched()
Hi all, This is version two of the patches I originally posted here: http://lkml.kernel.org/r/1543347902-21170-1-git-send-email-will.dea...@arm.com The only change since v1 is that __preempt_count_dec_and_test() now reloads the need_resched flag if it initially saw that it was set. This resolves the issue spotted by Peter, where an IRQ coming in during the decrement can cause a reschedule to be missed. Feedback welcome. Will --->8 Will Deacon (2): preempt: Move PREEMPT_NEED_RESCHED definition into arch code arm64: preempt: Provide our own implementation of asm/preempt.h arch/arm64/include/asm/Kbuild| 1 - arch/arm64/include/asm/preempt.h | 88 arch/arm64/include/asm/thread_info.h | 13 +- arch/s390/include/asm/preempt.h | 2 + arch/x86/include/asm/preempt.h | 3 ++ include/linux/preempt.h | 3 -- 6 files changed, 105 insertions(+), 5 deletions(-) create mode 100644 arch/arm64/include/asm/preempt.h -- 2.1.4
[PATCH v2 1/2] preempt: Move PREEMPT_NEED_RESCHED definition into arch code
PREEMPT_NEED_RESCHED is never used directly, so move it into the arch code where it can potentially be implemented using either a different bit in the preempt count or as an entirely separate entity. Cc: Robert Love Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Martin Schwidefsky Signed-off-by: Will Deacon --- arch/s390/include/asm/preempt.h | 2 ++ arch/x86/include/asm/preempt.h | 3 +++ include/linux/preempt.h | 3 --- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index 23a14d187fb1..b5ea9e14c017 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -8,6 +8,8 @@ #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES +/* We use the MSB mostly because its available */ +#define PREEMPT_NEED_RESCHED 0x8000 #define PREEMPT_ENABLED(0 + PREEMPT_NEED_RESCHED) static inline int preempt_count(void) diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 90cb2f36c042..99a7fa9ab0a3 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -8,6 +8,9 @@ DECLARE_PER_CPU(int, __preempt_count); +/* We use the MSB mostly because its available */ +#define PREEMPT_NEED_RESCHED 0x8000 + /* * We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such * that a decrement hitting 0 means we can and should reschedule. diff --git a/include/linux/preempt.h b/include/linux/preempt.h index c01813c3fbe9..dd92b1a93919 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -53,9 +53,6 @@ #define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) -/* We use the MSB mostly because its available */ -#define PREEMPT_NEED_RESCHED 0x8000 - #define PREEMPT_DISABLED (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED) /* -- 2.1.4
[PATCH v2 2/2] arm64: preempt: Provide our own implementation of asm/preempt.h
The asm-generic/preempt.h implementation doesn't make use of the PREEMPT_NEED_RESCHED flag, since this can interact badly with load/store architectures which rely on the preempt_count word being unchanged across an interrupt. However, since we're a 64-bit architecture and the preempt count is only 32 bits wide, we can simply pack it next to the resched flag and load the whole thing in one go, so that a dec-and-test operation doesn't need to load twice. Signed-off-by: Will Deacon --- arch/arm64/include/asm/Kbuild| 1 - arch/arm64/include/asm/preempt.h | 88 arch/arm64/include/asm/thread_info.h | 13 +- 3 files changed, 100 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/include/asm/preempt.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 6cd5d77b6b44..33498f900390 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -14,7 +14,6 @@ generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += msi.h -generic-y += preempt.h generic-y += qrwlock.h generic-y += qspinlock.h generic-y += rwsem.h diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h new file mode 100644 index ..f1c1398cf065 --- /dev/null +++ b/arch/arm64/include/asm/preempt.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_PREEMPT_H +#define __ASM_PREEMPT_H + +#include + +#define PREEMPT_NEED_RESCHED BIT(32) +#define PREEMPT_ENABLED(PREEMPT_NEED_RESCHED) + +static inline int preempt_count(void) +{ + return READ_ONCE(current_thread_info()->preempt.count); +} + +static inline void preempt_count_set(u64 pc) +{ + /* Preserve existing value of PREEMPT_NEED_RESCHED */ + WRITE_ONCE(current_thread_info()->preempt.count, pc); +} + +#define init_task_preempt_count(p) do { \ + task_thread_info(p)->preempt_count = FORK_PREEMPT_COUNT; \ +} while (0) + +#define init_idle_preempt_count(p, cpu) do { \ + task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \ +} while (0) + +static inline void set_preempt_need_resched(void) +{ + current_thread_info()->preempt.need_resched = 0; +} + +static inline void clear_preempt_need_resched(void) +{ + current_thread_info()->preempt.need_resched = 1; +} + +static inline bool test_preempt_need_resched(void) +{ + return !current_thread_info()->preempt.need_resched; +} + +static inline void __preempt_count_add(int val) +{ + u32 pc = READ_ONCE(current_thread_info()->preempt.count); + pc += val; + WRITE_ONCE(current_thread_info()->preempt.count, pc); +} + +static inline void __preempt_count_sub(int val) +{ + u32 pc = READ_ONCE(current_thread_info()->preempt.count); + pc -= val; + WRITE_ONCE(current_thread_info()->preempt.count, pc); +} + +static inline bool __preempt_count_dec_and_test(void) +{ + struct thread_info *ti = current_thread_info(); + u64 pc = READ_ONCE(ti->preempt_count); + + WRITE_ONCE(ti->preempt.count, --pc); + + /* +* If we wrote back all zeroes, then we're preemptible and in +* need of a reschedule. Otherwise, we need to reload the +* preempt_count in case the need_resched flag was cleared by an +* interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE +* pair. +*/ + return !pc || !READ_ONCE(ti->preempt_count); +} + +static inline bool should_resched(int preempt_offset) +{ + u64 pc = READ_ONCE(current_thread_info()->preempt_count); + return pc == preempt_offset; +} + +#ifdef CONFIG_PREEMPT +void preempt_schedule(void); +#define __preempt_schedule() preempt_schedule() +void preempt_schedule_notrace(void); +#define __preempt_schedule_notrace() preempt_schedule_notrace() +#endif /* CONFIG_PREEMPT */ + +#endif /* __ASM_PREEMPT_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index cb2c10a8f0a8..bbca68b54732 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -42,7 +42,18 @@ struct thread_info { #ifdef CONFIG_ARM64_SW_TTBR0_PAN u64 ttbr0; /* saved TTBR0_EL1 */ #endif - int preempt_count; /* 0 => preemptable, <0 => bug */ + union { + u64 preempt_count; /* 0 => preemptible, <0 => bug */ + struct { +#ifdef CONFIG_CPU_BIG_ENDIAN + u32 need_resched; + u32 count; +#else + u32 count; + u32 need_resched; +#endif + } preempt; + }; }; #define thread_saved_pc(tsk) \ -- 2.1.4
[PATCH v2 0/2] arm64: Only call into preempt_schedule() if need_resched()
Hi all, This is version two of the patches I originally posted here: http://lkml.kernel.org/r/1543347902-21170-1-git-send-email-will.dea...@arm.com The only change since v1 is that __preempt_count_dec_and_test() now reloads the need_resched flag if it initially saw that it was set. This resolves the issue spotted by Peter, where an IRQ coming in during the decrement can cause a reschedule to be missed. Feedback welcome. Will --->8 Will Deacon (2): preempt: Move PREEMPT_NEED_RESCHED definition into arch code arm64: preempt: Provide our own implementation of asm/preempt.h arch/arm64/include/asm/Kbuild| 1 - arch/arm64/include/asm/preempt.h | 88 arch/arm64/include/asm/thread_info.h | 13 +- arch/s390/include/asm/preempt.h | 2 + arch/x86/include/asm/preempt.h | 3 ++ include/linux/preempt.h | 3 -- 6 files changed, 105 insertions(+), 5 deletions(-) create mode 100644 arch/arm64/include/asm/preempt.h -- 2.1.4
[PATCH v2 1/2] preempt: Move PREEMPT_NEED_RESCHED definition into arch code
PREEMPT_NEED_RESCHED is never used directly, so move it into the arch code where it can potentially be implemented using either a different bit in the preempt count or as an entirely separate entity. Cc: Robert Love Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Martin Schwidefsky Signed-off-by: Will Deacon --- arch/s390/include/asm/preempt.h | 2 ++ arch/x86/include/asm/preempt.h | 3 +++ include/linux/preempt.h | 3 --- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index 23a14d187fb1..b5ea9e14c017 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -8,6 +8,8 @@ #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES +/* We use the MSB mostly because its available */ +#define PREEMPT_NEED_RESCHED 0x8000 #define PREEMPT_ENABLED(0 + PREEMPT_NEED_RESCHED) static inline int preempt_count(void) diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 90cb2f36c042..99a7fa9ab0a3 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -8,6 +8,9 @@ DECLARE_PER_CPU(int, __preempt_count); +/* We use the MSB mostly because its available */ +#define PREEMPT_NEED_RESCHED 0x8000 + /* * We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such * that a decrement hitting 0 means we can and should reschedule. diff --git a/include/linux/preempt.h b/include/linux/preempt.h index c01813c3fbe9..dd92b1a93919 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -53,9 +53,6 @@ #define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) -/* We use the MSB mostly because its available */ -#define PREEMPT_NEED_RESCHED 0x8000 - #define PREEMPT_DISABLED (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED) /* -- 2.1.4
[PATCH v2 2/2] arm64: preempt: Provide our own implementation of asm/preempt.h
The asm-generic/preempt.h implementation doesn't make use of the PREEMPT_NEED_RESCHED flag, since this can interact badly with load/store architectures which rely on the preempt_count word being unchanged across an interrupt. However, since we're a 64-bit architecture and the preempt count is only 32 bits wide, we can simply pack it next to the resched flag and load the whole thing in one go, so that a dec-and-test operation doesn't need to load twice. Signed-off-by: Will Deacon --- arch/arm64/include/asm/Kbuild| 1 - arch/arm64/include/asm/preempt.h | 88 arch/arm64/include/asm/thread_info.h | 13 +- 3 files changed, 100 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/include/asm/preempt.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 6cd5d77b6b44..33498f900390 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -14,7 +14,6 @@ generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += msi.h -generic-y += preempt.h generic-y += qrwlock.h generic-y += qspinlock.h generic-y += rwsem.h diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h new file mode 100644 index ..f1c1398cf065 --- /dev/null +++ b/arch/arm64/include/asm/preempt.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_PREEMPT_H +#define __ASM_PREEMPT_H + +#include + +#define PREEMPT_NEED_RESCHED BIT(32) +#define PREEMPT_ENABLED(PREEMPT_NEED_RESCHED) + +static inline int preempt_count(void) +{ + return READ_ONCE(current_thread_info()->preempt.count); +} + +static inline void preempt_count_set(u64 pc) +{ + /* Preserve existing value of PREEMPT_NEED_RESCHED */ + WRITE_ONCE(current_thread_info()->preempt.count, pc); +} + +#define init_task_preempt_count(p) do { \ + task_thread_info(p)->preempt_count = FORK_PREEMPT_COUNT; \ +} while (0) + +#define init_idle_preempt_count(p, cpu) do { \ + task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \ +} while (0) + +static inline void set_preempt_need_resched(void) +{ + current_thread_info()->preempt.need_resched = 0; +} + +static inline void clear_preempt_need_resched(void) +{ + current_thread_info()->preempt.need_resched = 1; +} + +static inline bool test_preempt_need_resched(void) +{ + return !current_thread_info()->preempt.need_resched; +} + +static inline void __preempt_count_add(int val) +{ + u32 pc = READ_ONCE(current_thread_info()->preempt.count); + pc += val; + WRITE_ONCE(current_thread_info()->preempt.count, pc); +} + +static inline void __preempt_count_sub(int val) +{ + u32 pc = READ_ONCE(current_thread_info()->preempt.count); + pc -= val; + WRITE_ONCE(current_thread_info()->preempt.count, pc); +} + +static inline bool __preempt_count_dec_and_test(void) +{ + struct thread_info *ti = current_thread_info(); + u64 pc = READ_ONCE(ti->preempt_count); + + WRITE_ONCE(ti->preempt.count, --pc); + + /* +* If we wrote back all zeroes, then we're preemptible and in +* need of a reschedule. Otherwise, we need to reload the +* preempt_count in case the need_resched flag was cleared by an +* interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE +* pair. +*/ + return !pc || !READ_ONCE(ti->preempt_count); +} + +static inline bool should_resched(int preempt_offset) +{ + u64 pc = READ_ONCE(current_thread_info()->preempt_count); + return pc == preempt_offset; +} + +#ifdef CONFIG_PREEMPT +void preempt_schedule(void); +#define __preempt_schedule() preempt_schedule() +void preempt_schedule_notrace(void); +#define __preempt_schedule_notrace() preempt_schedule_notrace() +#endif /* CONFIG_PREEMPT */ + +#endif /* __ASM_PREEMPT_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index cb2c10a8f0a8..bbca68b54732 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -42,7 +42,18 @@ struct thread_info { #ifdef CONFIG_ARM64_SW_TTBR0_PAN u64 ttbr0; /* saved TTBR0_EL1 */ #endif - int preempt_count; /* 0 => preemptable, <0 => bug */ + union { + u64 preempt_count; /* 0 => preemptible, <0 => bug */ + struct { +#ifdef CONFIG_CPU_BIG_ENDIAN + u32 need_resched; + u32 count; +#else + u32 count; + u32 need_resched; +#endif + } preempt; + }; }; #define thread_saved_pc(tsk) \ -- 2.1.4