Re: [PATCH] x86/microcode/intel: Check microcode revision before updating sibling threads
* Ashok Rajwrote: > After updating microcode on one of the threads in the core, the > thread sibling automatically gets the update since the microcode > resources are shared. Check the ucode revision on the cpu before > performing a ucode update. s/cpu/CPU > > Signed-off-by: Ashok Raj > Cc: X86 ML > Cc: LKML > --- > arch/x86/kernel/cpu/microcode/intel.c | 16 +--- > 1 file changed, 13 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/kernel/cpu/microcode/intel.c > b/arch/x86/kernel/cpu/microcode/intel.c > index 09b95a7..036d1db 100644 > --- a/arch/x86/kernel/cpu/microcode/intel.c > +++ b/arch/x86/kernel/cpu/microcode/intel.c > @@ -776,7 +776,7 @@ static enum ucode_state apply_microcode_intel(int cpu) > { > struct microcode_intel *mc; > struct ucode_cpu_info *uci; > - struct cpuinfo_x86 *c; > + struct cpuinfo_x86 *c = _data(cpu); > static int prev_rev; > u32 rev; > > @@ -793,6 +793,18 @@ static enum ucode_state apply_microcode_intel(int cpu) > return UCODE_NFOUND; > } > > + rev = intel_get_microcode_revision(); > + /* > + * Its possible the microcode got udpated > + * because its sibling update was done earlier. > + * Skip the udpate in that case. > + */ > + if (rev >= mc->hdr.rev) { > + uci->cpu_sig.rev = rev; > + c->microcode = rev; > + return UCODE_OK; > + } s/udpate /update Also, more fundamentally, during microcode early testing, isn't it possible for internal iterations of the microcode to have the same revision, but be different? This patch would prevent re-loading it - for a seemingly minimal benefit. Thanks, Ingo
Re: [PATCH] x86/microcode/intel: Check microcode revision before updating sibling threads
* Ashok Raj wrote: > After updating microcode on one of the threads in the core, the > thread sibling automatically gets the update since the microcode > resources are shared. Check the ucode revision on the cpu before > performing a ucode update. s/cpu/CPU > > Signed-off-by: Ashok Raj > Cc: X86 ML > Cc: LKML > --- > arch/x86/kernel/cpu/microcode/intel.c | 16 +--- > 1 file changed, 13 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/kernel/cpu/microcode/intel.c > b/arch/x86/kernel/cpu/microcode/intel.c > index 09b95a7..036d1db 100644 > --- a/arch/x86/kernel/cpu/microcode/intel.c > +++ b/arch/x86/kernel/cpu/microcode/intel.c > @@ -776,7 +776,7 @@ static enum ucode_state apply_microcode_intel(int cpu) > { > struct microcode_intel *mc; > struct ucode_cpu_info *uci; > - struct cpuinfo_x86 *c; > + struct cpuinfo_x86 *c = _data(cpu); > static int prev_rev; > u32 rev; > > @@ -793,6 +793,18 @@ static enum ucode_state apply_microcode_intel(int cpu) > return UCODE_NFOUND; > } > > + rev = intel_get_microcode_revision(); > + /* > + * Its possible the microcode got udpated > + * because its sibling update was done earlier. > + * Skip the udpate in that case. > + */ > + if (rev >= mc->hdr.rev) { > + uci->cpu_sig.rev = rev; > + c->microcode = rev; > + return UCODE_OK; > + } s/udpate /update Also, more fundamentally, during microcode early testing, isn't it possible for internal iterations of the microcode to have the same revision, but be different? This patch would prevent re-loading it - for a seemingly minimal benefit. Thanks, Ingo
[PATCH 2/2] proc: use set_puts() at /proc/*/wchan
Signed-off-by: Alexey Dobriyan--- fs/proc/base.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -396,7 +396,7 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, wchan = get_wchan(task); if (wchan && !lookup_symbol_name(wchan, symname)) { - seq_printf(m, "%s", symname); + seq_puts(m, symname); return 0; }
[PATCH 2/2] proc: use set_puts() at /proc/*/wchan
Signed-off-by: Alexey Dobriyan --- fs/proc/base.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -396,7 +396,7 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, wchan = get_wchan(task); if (wchan && !lookup_symbol_name(wchan, symname)) { - seq_printf(m, "%s", symname); + seq_puts(m, symname); return 0; }
[PATCH 1/2] proc: check permissions earlier for /proc/*/wchan
get_wchan() accesses stack page before permissions are checked, let's not play this game. Signed-off-by: Alexey Dobriyan--- fs/proc/base.c | 13 - 1 file changed, 8 insertions(+), 5 deletions(-) --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -391,14 +391,17 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, unsigned long wchan; char symname[KSYM_NAME_LEN]; - wchan = get_wchan(task); + if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) + goto print0; - if (wchan && ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS) - && !lookup_symbol_name(wchan, symname)) + wchan = get_wchan(task); + if (wchan && !lookup_symbol_name(wchan, symname)) { seq_printf(m, "%s", symname); - else - seq_putc(m, '0'); + return 0; + } +print0: + seq_putc(m, '0'); return 0; } #endif /* CONFIG_KALLSYMS */
[PATCH 1/2] proc: check permissions earlier for /proc/*/wchan
get_wchan() accesses stack page before permissions are checked, let's not play this game. Signed-off-by: Alexey Dobriyan --- fs/proc/base.c | 13 - 1 file changed, 8 insertions(+), 5 deletions(-) --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -391,14 +391,17 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, unsigned long wchan; char symname[KSYM_NAME_LEN]; - wchan = get_wchan(task); + if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) + goto print0; - if (wchan && ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS) - && !lookup_symbol_name(wchan, symname)) + wchan = get_wchan(task); + if (wchan && !lookup_symbol_name(wchan, symname)) { seq_printf(m, "%s", symname); - else - seq_putc(m, '0'); + return 0; + } +print0: + seq_putc(m, '0'); return 0; } #endif /* CONFIG_KALLSYMS */
Re: [PATCH] USB: chaoskey: Use kasprintf() over strcpy()/strcat()
Kees Cookwrites: > Instead of kmalloc() with manually calculated values followed by > multiple strcpy()/strcat() calls, just fold it all into a single > kasprintf() call. > > Signed-off-by: Kees Cook Reviewed-by: Keith Packard -- -keith signature.asc Description: PGP signature
Re: [PATCH] USB: chaoskey: Use kasprintf() over strcpy()/strcat()
Kees Cook writes: > Instead of kmalloc() with manually calculated values followed by > multiple strcpy()/strcat() calls, just fold it all into a single > kasprintf() call. > > Signed-off-by: Kees Cook Reviewed-by: Keith Packard -- -keith signature.asc Description: PGP signature
Re: [PATCH 1/3] tools include powerpc: Grab a copy of arch/powerpc/include/uapi/asm/unistd.h
Oops.. Really sorry about that. I've tested acme/perf/core on ubuntu ppc32 with and without libaudit-dev and it's working fine. Thank you very much for fixing it, Ravi On 02/16/2018 11:20 PM, Arnaldo Carvalho de Melo wrote: > Em Fri, Feb 16, 2018 at 02:29:01PM -0300, Arnaldo Carvalho de Melo escreveu: >> Humm, we need to create two tables, one for 32-bit and another for 64, >> even with ppc not having (AFAIK) clashes in syscall numbers for 32/64... >> >> Trying to do it now. > Now seems to work, take a look at my perf/core branch, should be one of the > first few csets. > > perfbuilder@cc1a85517216:/git/perf$ grep 192 > /tmp/build/perf/arch/powerpc/include/generated/asm/syscalls_32.c > [192] = "mmap2", > perfbuilder@cc1a85517216:/git/perf$ powerpc-linux-gnu-gcc --version > powerpc-linux-gnu-gcc (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609 > perfbuilder@cc1a85517216:/git/perf$ > > perfbuilder@9d7fc9dcfb73:/git/perf$ grep 192 > /tmp/build/perf/arch/powerpc/include/generated/asm/syscalls_32.c > [192] = "mmap2", > perfbuilder@9d7fc9dcfb73:/git/perf$ grep 192 > /tmp/build/perf/arch/powerpc/include/generated/asm/syscalls_64.c > perfbuilder@9d7fc9dcfb73:/git/perf$ powerpc64-linux-gnu-gcc --version > powerpc64-linux-gnu-gcc (Ubuntu/IBM 5.4.0-6ubuntu1~16.04.1) 5.4.0 20160609 > perfbuilder@9d7fc9dcfb73:/git/perf$ >
Re: [PATCH 1/3] tools include powerpc: Grab a copy of arch/powerpc/include/uapi/asm/unistd.h
Oops.. Really sorry about that. I've tested acme/perf/core on ubuntu ppc32 with and without libaudit-dev and it's working fine. Thank you very much for fixing it, Ravi On 02/16/2018 11:20 PM, Arnaldo Carvalho de Melo wrote: > Em Fri, Feb 16, 2018 at 02:29:01PM -0300, Arnaldo Carvalho de Melo escreveu: >> Humm, we need to create two tables, one for 32-bit and another for 64, >> even with ppc not having (AFAIK) clashes in syscall numbers for 32/64... >> >> Trying to do it now. > Now seems to work, take a look at my perf/core branch, should be one of the > first few csets. > > perfbuilder@cc1a85517216:/git/perf$ grep 192 > /tmp/build/perf/arch/powerpc/include/generated/asm/syscalls_32.c > [192] = "mmap2", > perfbuilder@cc1a85517216:/git/perf$ powerpc-linux-gnu-gcc --version > powerpc-linux-gnu-gcc (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609 > perfbuilder@cc1a85517216:/git/perf$ > > perfbuilder@9d7fc9dcfb73:/git/perf$ grep 192 > /tmp/build/perf/arch/powerpc/include/generated/asm/syscalls_32.c > [192] = "mmap2", > perfbuilder@9d7fc9dcfb73:/git/perf$ grep 192 > /tmp/build/perf/arch/powerpc/include/generated/asm/syscalls_64.c > perfbuilder@9d7fc9dcfb73:/git/perf$ powerpc64-linux-gnu-gcc --version > powerpc64-linux-gnu-gcc (Ubuntu/IBM 5.4.0-6ubuntu1~16.04.1) 5.4.0 20160609 > perfbuilder@9d7fc9dcfb73:/git/perf$ >
[PATCH v2] block/loop: add documentation for sysfs interface
Documentation has been compiled from git logs and by reading through code. Signed-off-by: Aishwarya Pant--- For drivers/block/loop.c, I don't see any maintainers or mailing lists except for LKML. I am guessing linux-block mailing list should be okay. Changes in v2: - Add linux-bl...@vger.kernel.org to the recipient list. Documentation/ABI/testing/sysfs-block-loop | 50 ++ 1 file changed, 50 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-block-loop diff --git a/Documentation/ABI/testing/sysfs-block-loop b/Documentation/ABI/testing/sysfs-block-loop new file mode 100644 index ..627f4eb87286 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-block-loop @@ -0,0 +1,50 @@ +What: /sys/block/loopX/loop/autoclear +Date: Aug, 2010 +KernelVersion: v2.6.37 +Contact: linux-bl...@vger.kernel.org +Description: + (RO) Shows if the device is in autoclear mode or not ( "1" or + "0"). Autoclear (if set) indicates that the loopback device will + self-distruct after last close. + +What: /sys/block/loopX/loop/backing_file +Date: Aug, 2010 +KernelVersion: v2.6.37 +Contact: linux-bl...@vger.kernel.org +Description: + (RO) The path of the backing file that the loop device maps its + data blocks to. + +What: /sys/block/loopX/loop/offset +Date: Aug, 2010 +KernelVersion: v2.6.37 +Contact: linux-bl...@vger.kernel.org +Description: + (RO) Start offset (in bytes). + +What: /sys/block/loopX/loop/sizelimit +Date: Aug, 2010 +KernelVersion: v2.6.37 +Contact: linux-bl...@vger.kernel.org +Description: + (RO) The size (in bytes) that the block device maps, starting + from the offset. + +What: /sys/block/loopX/loop/partscan +Date: Aug, 2011 +KernelVersion: v3.10 +Contact: linux-bl...@vger.kernel.org +Description: + (RO) Shows if automatic partition scanning is enabled for the + device or not ("1" or "0"). This can be requested individually + per loop device during its setup by setting LO_FLAGS_PARTSCAN in + in the ioctl request. By default, no partition tables are + scanned. + +What: /sys/block/loopX/loop/dio +Date: Aug, 2015 +KernelVersion: v4.10 +Contact: linux-bl...@vger.kernel.org +Description: + (RO) Shows if direct IO is being used to access backing file or + not ("1 or "0"). -- 2.16.1
[PATCH v2] block/loop: add documentation for sysfs interface
Documentation has been compiled from git logs and by reading through code. Signed-off-by: Aishwarya Pant --- For drivers/block/loop.c, I don't see any maintainers or mailing lists except for LKML. I am guessing linux-block mailing list should be okay. Changes in v2: - Add linux-bl...@vger.kernel.org to the recipient list. Documentation/ABI/testing/sysfs-block-loop | 50 ++ 1 file changed, 50 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-block-loop diff --git a/Documentation/ABI/testing/sysfs-block-loop b/Documentation/ABI/testing/sysfs-block-loop new file mode 100644 index ..627f4eb87286 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-block-loop @@ -0,0 +1,50 @@ +What: /sys/block/loopX/loop/autoclear +Date: Aug, 2010 +KernelVersion: v2.6.37 +Contact: linux-bl...@vger.kernel.org +Description: + (RO) Shows if the device is in autoclear mode or not ( "1" or + "0"). Autoclear (if set) indicates that the loopback device will + self-distruct after last close. + +What: /sys/block/loopX/loop/backing_file +Date: Aug, 2010 +KernelVersion: v2.6.37 +Contact: linux-bl...@vger.kernel.org +Description: + (RO) The path of the backing file that the loop device maps its + data blocks to. + +What: /sys/block/loopX/loop/offset +Date: Aug, 2010 +KernelVersion: v2.6.37 +Contact: linux-bl...@vger.kernel.org +Description: + (RO) Start offset (in bytes). + +What: /sys/block/loopX/loop/sizelimit +Date: Aug, 2010 +KernelVersion: v2.6.37 +Contact: linux-bl...@vger.kernel.org +Description: + (RO) The size (in bytes) that the block device maps, starting + from the offset. + +What: /sys/block/loopX/loop/partscan +Date: Aug, 2011 +KernelVersion: v3.10 +Contact: linux-bl...@vger.kernel.org +Description: + (RO) Shows if automatic partition scanning is enabled for the + device or not ("1" or "0"). This can be requested individually + per loop device during its setup by setting LO_FLAGS_PARTSCAN in + in the ioctl request. By default, no partition tables are + scanned. + +What: /sys/block/loopX/loop/dio +Date: Aug, 2015 +KernelVersion: v4.10 +Contact: linux-bl...@vger.kernel.org +Description: + (RO) Shows if direct IO is being used to access backing file or + not ("1 or "0"). -- 2.16.1
Re: [PATCH 08/23] kconfig: add 'macro' keyword to support user-defined function
On Fri, Feb 16, 2018 at 11:44:25PM -0500, Nicolas Pitre wrote: > On Sat, 17 Feb 2018, Ulf Magnusson wrote: > > > On Sat, Feb 17, 2018 at 3:30 AM, Nicolas Pitrewrote: > > > On Sat, 17 Feb 2018, Ulf Magnusson wrote: > > > > > >> On Fri, Feb 16, 2018 at 02:49:31PM -0500, Nicolas Pitre wrote: > > >> > On Sat, 17 Feb 2018, Masahiro Yamada wrote: > > >> > > > >> > > Now, we got a basic ability to test compiler capability in Kconfig. > > >> > > > > >> > > config CC_HAS_STACKPROTECTOR > > >> > > bool > > >> > > default $(shell $CC -Werror -fstack-protector -c -x c > > >> > > /dev/null -o /dev/null) > > >> > > > > >> > > This works, but it is ugly to repeat this long boilerplate. > > >> > > > > >> > > We want to describe like this: > > >> > > > > >> > > config CC_HAS_STACKPROTECTOR > > >> > > bool > > >> > > default $(cc-option -fstack-protector) > > >> > > > > >> > > It is straight-forward to implement a new function, but I do not like > > >> > > to hard-code specialized functions like this. Hence, here is another > > >> > > feature to add functions from Kconfig files. > > >> > > > > >> > > A user-defined function can be defined as a string type symbol with > > >> > > a special keyword 'macro'. It can be referenced in the same way as > > >> > > built-in functions. This feature was also inspired by Makefile where > > >> > > user-defined functions are referenced by $(call func-name, args...), > > >> > > but I omitted the 'call' to makes it shorter. > > >> > > > > >> > > The macro definition can contain $(1), $(2), ... which will be > > >> > > replaced > > >> > > with arguments from the caller. > > >> > > > > >> > > Example code: > > >> > > > > >> > > config cc-option > > >> > > string > > >> > > macro $(shell $CC -Werror $(1) -c -x c /dev/null -o > > >> > > /dev/null) > > >> > > > >> > I think this syntax for defining a macro shouldn't start with the > > >> > "config" keyword, unless you want it to be part of the config symbol > > >> > space and land it in .config. And typing it as a "string" while it > > >> > actually returns y/n (hence a bool) is also strange. > > >> > > > >> > What about this instead: > > >> > > > >> > macro cc-option > > >> > bool $(shell $CC -Werror $(1) -c -x c /dev/null -o /dev/null) > > >> > > > >> > This makes it easier to extend as well if need be. > > >> > > > >> > > > >> > Nicolas > > >> > > >> I haven't gone over the patchset in detail yet and might be missing > > >> something here, but if this is just meant to be a textual shorthand, > > >> then why give it a type at all? > > > > > > It is meant to be like a user-defined function. > > > > > >> Do you think a simpler syntax like this would make sense? > > >> > > >> macro cc-option "$(shell $CC -Werror $(1) -c -x c /dev/null -o > > >> /dev/null)" > > >> > > >> That's the most general version, where you could use it for other stuff > > >> besides $(shell ...) as well, just to keep parity. > > > > > > This is not extendable. Let's imagine that you might want to implement > > > some kind of conditionals some day e.g.: > > > > > > macro complex_test > > > bool $(shell foo) if LOCKDEP_SUPPORT > > > bool y if DEBUG_DRIVER > > > bool n > > > > I still don't quite get the semantics here. How would the behavior > > change if the type was changed to say string or int in some or all of > > the lines? > > I admit this wouldn't make sense to have multiple different types. In > this example, the bool keyword acts as syntactic sugar more than > anything else. > > > Since the current model is to evaluate $() while the Kconfig files are > > being parsed, would this require evaluating Kconfig expressions during > > parsing? There is a relatively clean and (somewhat) easy to understand > > parsing/evaluation separation at the moment, which I like. > > Agreed. Let's forget about the conditionals then. > > > Nicolas This is also related to why it feels off to me to (at least for its own sake) make macro definitions mimic symbol definitions. To me, parsing being a different domain makes it "okay" to use a different syntax for macros compared to symbol definitions, especially if it happens to be handier. It even makes things less confusing, because there's less risk of mixing up the two domains (it's rare to mix up the preprocessor with C "proper", since the syntax is so different). More practically, I'm not sure that macro foo "definition" would be that hard to extend in practice, if you'd ever need to. You could always add a new keyword: fancy-macro/function/whatever foo ... I admit it'd be a bit ugly if you'd ever end up with something like macro foo "definition" bit_ugly It's still not the end of the world though, IMO, and I suspect there'd be better-looking options if you'd need to extend things on the macro side. That macro syntax seems like the simplest possible thing to me, with
Re: [PATCH 08/23] kconfig: add 'macro' keyword to support user-defined function
On Fri, Feb 16, 2018 at 11:44:25PM -0500, Nicolas Pitre wrote: > On Sat, 17 Feb 2018, Ulf Magnusson wrote: > > > On Sat, Feb 17, 2018 at 3:30 AM, Nicolas Pitre wrote: > > > On Sat, 17 Feb 2018, Ulf Magnusson wrote: > > > > > >> On Fri, Feb 16, 2018 at 02:49:31PM -0500, Nicolas Pitre wrote: > > >> > On Sat, 17 Feb 2018, Masahiro Yamada wrote: > > >> > > > >> > > Now, we got a basic ability to test compiler capability in Kconfig. > > >> > > > > >> > > config CC_HAS_STACKPROTECTOR > > >> > > bool > > >> > > default $(shell $CC -Werror -fstack-protector -c -x c > > >> > > /dev/null -o /dev/null) > > >> > > > > >> > > This works, but it is ugly to repeat this long boilerplate. > > >> > > > > >> > > We want to describe like this: > > >> > > > > >> > > config CC_HAS_STACKPROTECTOR > > >> > > bool > > >> > > default $(cc-option -fstack-protector) > > >> > > > > >> > > It is straight-forward to implement a new function, but I do not like > > >> > > to hard-code specialized functions like this. Hence, here is another > > >> > > feature to add functions from Kconfig files. > > >> > > > > >> > > A user-defined function can be defined as a string type symbol with > > >> > > a special keyword 'macro'. It can be referenced in the same way as > > >> > > built-in functions. This feature was also inspired by Makefile where > > >> > > user-defined functions are referenced by $(call func-name, args...), > > >> > > but I omitted the 'call' to makes it shorter. > > >> > > > > >> > > The macro definition can contain $(1), $(2), ... which will be > > >> > > replaced > > >> > > with arguments from the caller. > > >> > > > > >> > > Example code: > > >> > > > > >> > > config cc-option > > >> > > string > > >> > > macro $(shell $CC -Werror $(1) -c -x c /dev/null -o > > >> > > /dev/null) > > >> > > > >> > I think this syntax for defining a macro shouldn't start with the > > >> > "config" keyword, unless you want it to be part of the config symbol > > >> > space and land it in .config. And typing it as a "string" while it > > >> > actually returns y/n (hence a bool) is also strange. > > >> > > > >> > What about this instead: > > >> > > > >> > macro cc-option > > >> > bool $(shell $CC -Werror $(1) -c -x c /dev/null -o /dev/null) > > >> > > > >> > This makes it easier to extend as well if need be. > > >> > > > >> > > > >> > Nicolas > > >> > > >> I haven't gone over the patchset in detail yet and might be missing > > >> something here, but if this is just meant to be a textual shorthand, > > >> then why give it a type at all? > > > > > > It is meant to be like a user-defined function. > > > > > >> Do you think a simpler syntax like this would make sense? > > >> > > >> macro cc-option "$(shell $CC -Werror $(1) -c -x c /dev/null -o > > >> /dev/null)" > > >> > > >> That's the most general version, where you could use it for other stuff > > >> besides $(shell ...) as well, just to keep parity. > > > > > > This is not extendable. Let's imagine that you might want to implement > > > some kind of conditionals some day e.g.: > > > > > > macro complex_test > > > bool $(shell foo) if LOCKDEP_SUPPORT > > > bool y if DEBUG_DRIVER > > > bool n > > > > I still don't quite get the semantics here. How would the behavior > > change if the type was changed to say string or int in some or all of > > the lines? > > I admit this wouldn't make sense to have multiple different types. In > this example, the bool keyword acts as syntactic sugar more than > anything else. > > > Since the current model is to evaluate $() while the Kconfig files are > > being parsed, would this require evaluating Kconfig expressions during > > parsing? There is a relatively clean and (somewhat) easy to understand > > parsing/evaluation separation at the moment, which I like. > > Agreed. Let's forget about the conditionals then. > > > Nicolas This is also related to why it feels off to me to (at least for its own sake) make macro definitions mimic symbol definitions. To me, parsing being a different domain makes it "okay" to use a different syntax for macros compared to symbol definitions, especially if it happens to be handier. It even makes things less confusing, because there's less risk of mixing up the two domains (it's rare to mix up the preprocessor with C "proper", since the syntax is so different). More practically, I'm not sure that macro foo "definition" would be that hard to extend in practice, if you'd ever need to. You could always add a new keyword: fancy-macro/function/whatever foo ... I admit it'd be a bit ugly if you'd ever end up with something like macro foo "definition" bit_ugly It's still not the end of the world though, IMO, and I suspect there'd be better-looking options if you'd need to extend things on the macro side. That macro syntax seems like the simplest possible thing to me, with no obvious major
[PATCH v2] aoe: document sysfs interface
Documentation has been compiled from git commit logs and descriptions in Documentation/aoe/aoe.txt. This should be useful for scripting and tracking changes in the ABI. Signed-off-by: Aishwarya Pant--- Changes in v2: - interface -> interfaces in description of netif Documentation/ABI/testing/sysfs-block-aoe | 44 +++ 1 file changed, 44 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-block-aoe diff --git a/Documentation/ABI/testing/sysfs-block-aoe b/Documentation/ABI/testing/sysfs-block-aoe new file mode 100644 index ..6f0795f7f10c --- /dev/null +++ b/Documentation/ABI/testing/sysfs-block-aoe @@ -0,0 +1,44 @@ +What: /sys/block/etherd*/mac +Date: Apr, 2005 +KernelVersion: v2.6.12 +Contact: Ed L. Cashin +Description: + (RO) The ethernet address of the remote Ata over Ethernet (AoE) + device. + +What: /sys/block/etherd*/netif +Date: Apr, 2005 +KernelVersion: v2.6.12 +Contact: Ed L. Cashin +Description: + (RO) The name of the network interfaces on the localhost through + which we are communicating with the remote AoE device. + +What: /sys/block/etherd*/state +Date: Apr, 2005 +KernelVersion: v2.6.12 +Contact: Ed L. Cashin +Description: + (RO) Device status. The state attribute is "up" when the device + is ready for I/O and "down" if detected but unusable. The + "down,closewait" state shows that the device is still open and + cannot come up again until it has been closed. The "up,kickme" + state means that the driver wants to send more commands to the + target but found out there were already the max number of + commands waiting for a response. It will retry again after being + kicked by the periodic timer handler routine. + +What: /sys/block/etherd*/firmware-version +Date: Apr, 2005 +KernelVersion: v2.6.12 +Contact: Ed L. Cashin +Description: + (RO) Version of the firmware in the target. + +What: /sys/block/etherd*/payload +Date: Dec, 2012 +KernelVersion: v3.10 +Contact: Ed L. Cashin +Description: + (RO) The amount of user data transferred (in bytes) inside each AoE + command on the network, network headers excluded. -- 2.16.1
[PATCH v2] aoe: document sysfs interface
Documentation has been compiled from git commit logs and descriptions in Documentation/aoe/aoe.txt. This should be useful for scripting and tracking changes in the ABI. Signed-off-by: Aishwarya Pant --- Changes in v2: - interface -> interfaces in description of netif Documentation/ABI/testing/sysfs-block-aoe | 44 +++ 1 file changed, 44 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-block-aoe diff --git a/Documentation/ABI/testing/sysfs-block-aoe b/Documentation/ABI/testing/sysfs-block-aoe new file mode 100644 index ..6f0795f7f10c --- /dev/null +++ b/Documentation/ABI/testing/sysfs-block-aoe @@ -0,0 +1,44 @@ +What: /sys/block/etherd*/mac +Date: Apr, 2005 +KernelVersion: v2.6.12 +Contact: Ed L. Cashin +Description: + (RO) The ethernet address of the remote Ata over Ethernet (AoE) + device. + +What: /sys/block/etherd*/netif +Date: Apr, 2005 +KernelVersion: v2.6.12 +Contact: Ed L. Cashin +Description: + (RO) The name of the network interfaces on the localhost through + which we are communicating with the remote AoE device. + +What: /sys/block/etherd*/state +Date: Apr, 2005 +KernelVersion: v2.6.12 +Contact: Ed L. Cashin +Description: + (RO) Device status. The state attribute is "up" when the device + is ready for I/O and "down" if detected but unusable. The + "down,closewait" state shows that the device is still open and + cannot come up again until it has been closed. The "up,kickme" + state means that the driver wants to send more commands to the + target but found out there were already the max number of + commands waiting for a response. It will retry again after being + kicked by the periodic timer handler routine. + +What: /sys/block/etherd*/firmware-version +Date: Apr, 2005 +KernelVersion: v2.6.12 +Contact: Ed L. Cashin +Description: + (RO) Version of the firmware in the target. + +What: /sys/block/etherd*/payload +Date: Dec, 2012 +KernelVersion: v3.10 +Contact: Ed L. Cashin +Description: + (RO) The amount of user data transferred (in bytes) inside each AoE + command on the network, network headers excluded. -- 2.16.1
[PATCH 00/17] Include linux trace docs to Sphinx TOC tree
From: Changbin DuHi All, The linux tracers are so useful that I want to make the docs better. The kernel now uses Sphinx to generate intelligent and beautiful documentation from reStructuredText files. I converted most of the Linux trace docs to rst format in this serias. For you to preview, please visit below url: http://docservice.askxiong.com/linux-kernel/trace/index.html Thank you! Changbin Du (17): Documentation: add Linux tracing to Sphinx TOC tree trace doc: convert trace/ftrace-design.txt to rst format trace doc: add ftrace-uses.rst to doc tree trace doc: convert trace/tracepoint-analysis.txt to rst format trace doc: convert trace/ftrace.txt to rst format trace doc: convert trace/kprobetrace.txt to rst format trace doc: convert trace/uprobetracer.txt to rst format trace doc: convert trace/tracepoints.txt to rst format trace doc: convert trace/events.txt to rst format trace doc: convert trace/events-kmem.txt to rst format trace doc: convert trace/events-power.txt to rst format trace doc: convert trace/events-nmi.txt to rst format trace doc: convert trace/events-msr.txt to rst format trace doc: convert trace/mmiotrace.txt to rst format trace doc: convert trace/hwlat_detector.txt to rst fromat trace doc: convert trace/intel_th.txt to rst format trace doc: convert trace/stm.txt to rst format Documentation/index.rst|1 + .../trace/{events-kmem.txt => events-kmem.rst} | 50 +- Documentation/trace/events-msr.rst | 40 + Documentation/trace/events-msr.txt | 37 - Documentation/trace/events-nmi.rst | 45 + Documentation/trace/events-nmi.txt | 43 - .../trace/{events-power.txt => events-power.rst} | 52 +- Documentation/trace/{events.txt => events.rst} | 669 ++-- .../trace/{ftrace-design.txt => ftrace-design.rst} | 252 +- Documentation/trace/ftrace-uses.rst| 23 +- Documentation/trace/ftrace.rst | 3332 Documentation/trace/ftrace.txt | 3220 --- .../{hwlat_detector.txt => hwlat_detector.rst} | 26 +- Documentation/trace/index.rst | 23 + Documentation/trace/{intel_th.txt => intel_th.rst} | 43 +- .../trace/{kprobetrace.txt => kprobetrace.rst} | 100 +- .../trace/{mmiotrace.txt => mmiotrace.rst} | 86 +- Documentation/trace/{stm.txt => stm.rst} | 23 +- ...epoint-analysis.txt => tracepoint-analysis.rst} | 41 +- .../trace/{tracepoints.txt => tracepoints.rst} | 77 +- .../trace/{uprobetracer.txt => uprobetracer.rst} | 44 +- 21 files changed, 4237 insertions(+), 3990 deletions(-) rename Documentation/trace/{events-kmem.txt => events-kmem.rst} (76%) create mode 100644 Documentation/trace/events-msr.rst delete mode 100644 Documentation/trace/events-msr.txt create mode 100644 Documentation/trace/events-nmi.rst delete mode 100644 Documentation/trace/events-nmi.txt rename Documentation/trace/{events-power.txt => events-power.rst} (65%) rename Documentation/trace/{events.txt => events.rst} (82%) rename Documentation/trace/{ftrace-design.txt => ftrace-design.rst} (74%) create mode 100644 Documentation/trace/ftrace.rst delete mode 100644 Documentation/trace/ftrace.txt rename Documentation/trace/{hwlat_detector.txt => hwlat_detector.rst} (83%) create mode 100644 Documentation/trace/index.rst rename Documentation/trace/{intel_th.txt => intel_th.rst} (82%) rename Documentation/trace/{kprobetrace.txt => kprobetrace.rst} (63%) rename Documentation/trace/{mmiotrace.txt => mmiotrace.rst} (78%) rename Documentation/trace/{stm.txt => stm.rst} (91%) rename Documentation/trace/{tracepoint-analysis.txt => tracepoint-analysis.rst} (93%) rename Documentation/trace/{tracepoints.txt => tracepoints.rst} (74%) rename Documentation/trace/{uprobetracer.txt => uprobetracer.rst} (86%) -- 2.7.4
[PATCH 00/17] Include linux trace docs to Sphinx TOC tree
From: Changbin Du Hi All, The linux tracers are so useful that I want to make the docs better. The kernel now uses Sphinx to generate intelligent and beautiful documentation from reStructuredText files. I converted most of the Linux trace docs to rst format in this serias. For you to preview, please visit below url: http://docservice.askxiong.com/linux-kernel/trace/index.html Thank you! Changbin Du (17): Documentation: add Linux tracing to Sphinx TOC tree trace doc: convert trace/ftrace-design.txt to rst format trace doc: add ftrace-uses.rst to doc tree trace doc: convert trace/tracepoint-analysis.txt to rst format trace doc: convert trace/ftrace.txt to rst format trace doc: convert trace/kprobetrace.txt to rst format trace doc: convert trace/uprobetracer.txt to rst format trace doc: convert trace/tracepoints.txt to rst format trace doc: convert trace/events.txt to rst format trace doc: convert trace/events-kmem.txt to rst format trace doc: convert trace/events-power.txt to rst format trace doc: convert trace/events-nmi.txt to rst format trace doc: convert trace/events-msr.txt to rst format trace doc: convert trace/mmiotrace.txt to rst format trace doc: convert trace/hwlat_detector.txt to rst fromat trace doc: convert trace/intel_th.txt to rst format trace doc: convert trace/stm.txt to rst format Documentation/index.rst|1 + .../trace/{events-kmem.txt => events-kmem.rst} | 50 +- Documentation/trace/events-msr.rst | 40 + Documentation/trace/events-msr.txt | 37 - Documentation/trace/events-nmi.rst | 45 + Documentation/trace/events-nmi.txt | 43 - .../trace/{events-power.txt => events-power.rst} | 52 +- Documentation/trace/{events.txt => events.rst} | 669 ++-- .../trace/{ftrace-design.txt => ftrace-design.rst} | 252 +- Documentation/trace/ftrace-uses.rst| 23 +- Documentation/trace/ftrace.rst | 3332 Documentation/trace/ftrace.txt | 3220 --- .../{hwlat_detector.txt => hwlat_detector.rst} | 26 +- Documentation/trace/index.rst | 23 + Documentation/trace/{intel_th.txt => intel_th.rst} | 43 +- .../trace/{kprobetrace.txt => kprobetrace.rst} | 100 +- .../trace/{mmiotrace.txt => mmiotrace.rst} | 86 +- Documentation/trace/{stm.txt => stm.rst} | 23 +- ...epoint-analysis.txt => tracepoint-analysis.rst} | 41 +- .../trace/{tracepoints.txt => tracepoints.rst} | 77 +- .../trace/{uprobetracer.txt => uprobetracer.rst} | 44 +- 21 files changed, 4237 insertions(+), 3990 deletions(-) rename Documentation/trace/{events-kmem.txt => events-kmem.rst} (76%) create mode 100644 Documentation/trace/events-msr.rst delete mode 100644 Documentation/trace/events-msr.txt create mode 100644 Documentation/trace/events-nmi.rst delete mode 100644 Documentation/trace/events-nmi.txt rename Documentation/trace/{events-power.txt => events-power.rst} (65%) rename Documentation/trace/{events.txt => events.rst} (82%) rename Documentation/trace/{ftrace-design.txt => ftrace-design.rst} (74%) create mode 100644 Documentation/trace/ftrace.rst delete mode 100644 Documentation/trace/ftrace.txt rename Documentation/trace/{hwlat_detector.txt => hwlat_detector.rst} (83%) create mode 100644 Documentation/trace/index.rst rename Documentation/trace/{intel_th.txt => intel_th.rst} (82%) rename Documentation/trace/{kprobetrace.txt => kprobetrace.rst} (63%) rename Documentation/trace/{mmiotrace.txt => mmiotrace.rst} (78%) rename Documentation/trace/{stm.txt => stm.rst} (91%) rename Documentation/trace/{tracepoint-analysis.txt => tracepoint-analysis.rst} (93%) rename Documentation/trace/{tracepoints.txt => tracepoints.rst} (74%) rename Documentation/trace/{uprobetracer.txt => uprobetracer.rst} (86%) -- 2.7.4
[PATCH 03/17] trace doc: add ftrace-uses.rst to doc tree
From: Changbin DuAdd ftrace-uses.rst into Sphinx TOC tree. Few format issues are fixed. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/trace/ftrace-uses.rst | 23 --- Documentation/trace/index.rst | 1 + 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/Documentation/trace/ftrace-uses.rst b/Documentation/trace/ftrace-uses.rst index 3aed560..998a60a 100644 --- a/Documentation/trace/ftrace-uses.rst +++ b/Documentation/trace/ftrace-uses.rst @@ -21,13 +21,14 @@ how to use ftrace to implement your own function callbacks. The ftrace context == +.. warning:: -WARNING: The ability to add a callback to almost any function within the -kernel comes with risks. A callback can be called from any context -(normal, softirq, irq, and NMI). Callbacks can also be called just before -going to idle, during CPU bring up and takedown, or going to user space. -This requires extra care to what can be done inside a callback. A callback -can be called outside the protective scope of RCU. + The ability to add a callback to almost any function within the + kernel comes with risks. A callback can be called from any context + (normal, softirq, irq, and NMI). Callbacks can also be called just before + going to idle, during CPU bring up and takedown, or going to user space. + This requires extra care to what can be done inside a callback. A callback + can be called outside the protective scope of RCU. The ftrace infrastructure has some protections agains recursions and RCU but one must still be very careful how they use the callbacks. @@ -54,15 +55,15 @@ an ftrace_ops with ftrace: Both .flags and .private are optional. Only .func is required. -To enable tracing call:: +To enable tracing call: .. c:function:: register_ftrace_function(); -To disable tracing call:: +To disable tracing call: .. c:function:: unregister_ftrace_function(); -The above is defined by including the header:: +The above is defined by including the header: .. c:function:: #include @@ -200,7 +201,7 @@ match a specific pattern. See Filter Commands in :file:`Documentation/trace/ftrace.txt`. -To just trace the schedule function:: +To just trace the schedule function: .. code-block:: c @@ -210,7 +211,7 @@ To add more functions, call the ftrace_set_filter() more than once with the @reset parameter set to zero. To remove the current filter set and replace it with new functions defined by @buf, have @reset be non-zero. -To remove all the filtered functions and trace all functions:: +To remove all the filtered functions and trace all functions: .. code-block:: c diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index c8000ba..aa2baad 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -6,3 +6,4 @@ Linux Tracing Technologies :maxdepth: 2 ftrace-design + ftrace-uses -- 2.7.4
[PATCH] virtio_balloon: export huge page allocation statistics
Export statistics for successful and failed huge page allocations from the virtio balloon driver. These 2 stats come directly from the vm_events HTLB_BUDDY_PGALLOC and HTLB_BUDDY_PGALLOC_FAIL. Signed-off-by: Jonathan Helman--- drivers/virtio/virtio_balloon.c | 6 ++ include/uapi/linux/virtio_balloon.h | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index dfe5684..6b237e3 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -272,6 +272,12 @@ static unsigned int update_balloon_stats(struct virtio_balloon *vb) pages_to_bytes(events[PSWPOUT])); update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]); update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]); +#ifdef CONFIG_HUGETLB_PAGE + update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC, + events[HTLB_BUDDY_PGALLOC]); + update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGFAIL, + events[HTLB_BUDDY_PGALLOC_FAIL]); +#endif #endif update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, pages_to_bytes(i.freeram)); diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index 4e8b830..e3e8071 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -53,7 +53,9 @@ struct virtio_balloon_config { #define VIRTIO_BALLOON_S_MEMTOT 5 /* Total amount of memory */ #define VIRTIO_BALLOON_S_AVAIL6 /* Available memory as in /proc */ #define VIRTIO_BALLOON_S_CACHES 7 /* Disk caches */ -#define VIRTIO_BALLOON_S_NR 8 +#define VIRTIO_BALLOON_S_HTLB_PGALLOC 8 /* Number of htlb pgalloc successes */ +#define VIRTIO_BALLOON_S_HTLB_PGFAIL 9 /* Number of htlb pgalloc failures */ +#define VIRTIO_BALLOON_S_NR 10 /* * Memory statistics structure. -- 1.8.3.1
[PATCH 03/17] trace doc: add ftrace-uses.rst to doc tree
From: Changbin Du Add ftrace-uses.rst into Sphinx TOC tree. Few format issues are fixed. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/trace/ftrace-uses.rst | 23 --- Documentation/trace/index.rst | 1 + 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/Documentation/trace/ftrace-uses.rst b/Documentation/trace/ftrace-uses.rst index 3aed560..998a60a 100644 --- a/Documentation/trace/ftrace-uses.rst +++ b/Documentation/trace/ftrace-uses.rst @@ -21,13 +21,14 @@ how to use ftrace to implement your own function callbacks. The ftrace context == +.. warning:: -WARNING: The ability to add a callback to almost any function within the -kernel comes with risks. A callback can be called from any context -(normal, softirq, irq, and NMI). Callbacks can also be called just before -going to idle, during CPU bring up and takedown, or going to user space. -This requires extra care to what can be done inside a callback. A callback -can be called outside the protective scope of RCU. + The ability to add a callback to almost any function within the + kernel comes with risks. A callback can be called from any context + (normal, softirq, irq, and NMI). Callbacks can also be called just before + going to idle, during CPU bring up and takedown, or going to user space. + This requires extra care to what can be done inside a callback. A callback + can be called outside the protective scope of RCU. The ftrace infrastructure has some protections agains recursions and RCU but one must still be very careful how they use the callbacks. @@ -54,15 +55,15 @@ an ftrace_ops with ftrace: Both .flags and .private are optional. Only .func is required. -To enable tracing call:: +To enable tracing call: .. c:function:: register_ftrace_function(); -To disable tracing call:: +To disable tracing call: .. c:function:: unregister_ftrace_function(); -The above is defined by including the header:: +The above is defined by including the header: .. c:function:: #include @@ -200,7 +201,7 @@ match a specific pattern. See Filter Commands in :file:`Documentation/trace/ftrace.txt`. -To just trace the schedule function:: +To just trace the schedule function: .. code-block:: c @@ -210,7 +211,7 @@ To add more functions, call the ftrace_set_filter() more than once with the @reset parameter set to zero. To remove the current filter set and replace it with new functions defined by @buf, have @reset be non-zero. -To remove all the filtered functions and trace all functions:: +To remove all the filtered functions and trace all functions: .. code-block:: c diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index c8000ba..aa2baad 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -6,3 +6,4 @@ Linux Tracing Technologies :maxdepth: 2 ftrace-design + ftrace-uses -- 2.7.4
[PATCH] virtio_balloon: export huge page allocation statistics
Export statistics for successful and failed huge page allocations from the virtio balloon driver. These 2 stats come directly from the vm_events HTLB_BUDDY_PGALLOC and HTLB_BUDDY_PGALLOC_FAIL. Signed-off-by: Jonathan Helman --- drivers/virtio/virtio_balloon.c | 6 ++ include/uapi/linux/virtio_balloon.h | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index dfe5684..6b237e3 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -272,6 +272,12 @@ static unsigned int update_balloon_stats(struct virtio_balloon *vb) pages_to_bytes(events[PSWPOUT])); update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]); update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]); +#ifdef CONFIG_HUGETLB_PAGE + update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC, + events[HTLB_BUDDY_PGALLOC]); + update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGFAIL, + events[HTLB_BUDDY_PGALLOC_FAIL]); +#endif #endif update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, pages_to_bytes(i.freeram)); diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index 4e8b830..e3e8071 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -53,7 +53,9 @@ struct virtio_balloon_config { #define VIRTIO_BALLOON_S_MEMTOT 5 /* Total amount of memory */ #define VIRTIO_BALLOON_S_AVAIL6 /* Available memory as in /proc */ #define VIRTIO_BALLOON_S_CACHES 7 /* Disk caches */ -#define VIRTIO_BALLOON_S_NR 8 +#define VIRTIO_BALLOON_S_HTLB_PGALLOC 8 /* Number of htlb pgalloc successes */ +#define VIRTIO_BALLOON_S_HTLB_PGFAIL 9 /* Number of htlb pgalloc failures */ +#define VIRTIO_BALLOON_S_NR 10 /* * Memory statistics structure. -- 1.8.3.1
[PATCH 02/17] trace doc: convert trace/ftrace-design.txt to rst format
From: Changbin DuThis converts the plain text documentation to reStructuredText format and add it to Sphinx TOC tree. This documentation is not synced with current code, so mark it as out of date. Cc: Steven Rostedt Signed-off-by: Changbin Du --- .../trace/{ftrace-design.txt => ftrace-design.rst} | 252 - Documentation/trace/index.rst | 2 + 2 files changed, 141 insertions(+), 113 deletions(-) rename Documentation/trace/{ftrace-design.txt => ftrace-design.rst} (74%) diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.rst similarity index 74% rename from Documentation/trace/ftrace-design.txt rename to Documentation/trace/ftrace-design.rst index a273dd0..a8e22e0 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.rst @@ -1,6 +1,12 @@ - function tracer guts - - By Mike Frysinger +== +Function Tracer Design +== + +:Author: Mike Frysinger + +.. caution:: + This document is out of date. Some of the description below doesn't + match current implementation now. Introduction @@ -21,8 +27,8 @@ Prerequisites - Ftrace relies on these features being implemented: - STACKTRACE_SUPPORT - implement save_stack_trace() - TRACE_IRQFLAGS_SUPPORT - implement include/asm/irqflags.h + - STACKTRACE_SUPPORT - implement save_stack_trace() + - TRACE_IRQFLAGS_SUPPORT - implement include/asm/irqflags.h HAVE_FUNCTION_TRACER @@ -32,9 +38,11 @@ You will need to implement the mcount and the ftrace_stub functions. The exact mcount symbol name will depend on your toolchain. Some call it "mcount", "_mcount", or even "__mcount". You can probably figure it out by -running something like: +running something like:: + $ echo 'main(){}' | gcc -x c -S -o - - -pg | grep mcount callmcount + We'll make the assumption below that the symbol is "mcount" just to keep things nice and simple in the examples. @@ -56,8 +64,9 @@ size of the mcount call that is embedded in the function). For example, if the function foo() calls bar(), when the bar() function calls mcount(), the arguments mcount() will pass to the tracer are: - "frompc" - the address bar() will use to return to foo() - "selfpc" - the address bar() (with mcount() size adjustment) + + - "frompc" - the address bar() will use to return to foo() + - "selfpc" - the address bar() (with mcount() size adjustment) Also keep in mind that this mcount function will be called *a lot*, so optimizing for the default case of no tracer will help the smooth running of @@ -67,39 +76,41 @@ means the code flow should usually be kept linear (i.e. no branching in the nop case). This is of course an optimization and not a hard requirement. Here is some pseudo code that should help (these functions should actually be -implemented in assembly): +implemented in assembly):: -void ftrace_stub(void) -{ - return; -} + void ftrace_stub(void) + { + return; + } -void mcount(void) -{ - /* save any bare state needed in order to do initial checking */ + void mcount(void) + { + /* save any bare state needed in order to do initial checking */ - extern void (*ftrace_trace_function)(unsigned long, unsigned long); - if (ftrace_trace_function != ftrace_stub) - goto do_trace; + extern void (*ftrace_trace_function)(unsigned long, unsigned long); + if (ftrace_trace_function != ftrace_stub) + goto do_trace; - /* restore any bare state */ + /* restore any bare state */ - return; + return; -do_trace: + do_trace: - /* save all state needed by the ABI (see paragraph above) */ + /* save all state needed by the ABI (see paragraph above) */ - unsigned long frompc = ...; - unsigned long selfpc = - MCOUNT_INSN_SIZE; - ftrace_trace_function(frompc, selfpc); + unsigned long frompc = ...; + unsigned long selfpc = - MCOUNT_INSN_SIZE; + ftrace_trace_function(frompc, selfpc); - /* restore all state needed by the ABI */ -} + /* restore all state needed by the ABI */ + } Don't forget to export mcount for modules ! -extern void mcount(void); -EXPORT_SYMBOL(mcount); +:: + + extern void mcount(void); + EXPORT_SYMBOL(mcount); HAVE_FUNCTION_GRAPH_TRACER @@ -127,38 +138,40 @@ That function will simply call the common ftrace_return_to_handler function and that will return the original return address with which you can return to the original call site. -Here is the updated mcount pseudo code: -void mcount(void) -{ -... -
[PATCH 02/17] trace doc: convert trace/ftrace-design.txt to rst format
From: Changbin Du This converts the plain text documentation to reStructuredText format and add it to Sphinx TOC tree. This documentation is not synced with current code, so mark it as out of date. Cc: Steven Rostedt Signed-off-by: Changbin Du --- .../trace/{ftrace-design.txt => ftrace-design.rst} | 252 - Documentation/trace/index.rst | 2 + 2 files changed, 141 insertions(+), 113 deletions(-) rename Documentation/trace/{ftrace-design.txt => ftrace-design.rst} (74%) diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.rst similarity index 74% rename from Documentation/trace/ftrace-design.txt rename to Documentation/trace/ftrace-design.rst index a273dd0..a8e22e0 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.rst @@ -1,6 +1,12 @@ - function tracer guts - - By Mike Frysinger +== +Function Tracer Design +== + +:Author: Mike Frysinger + +.. caution:: + This document is out of date. Some of the description below doesn't + match current implementation now. Introduction @@ -21,8 +27,8 @@ Prerequisites - Ftrace relies on these features being implemented: - STACKTRACE_SUPPORT - implement save_stack_trace() - TRACE_IRQFLAGS_SUPPORT - implement include/asm/irqflags.h + - STACKTRACE_SUPPORT - implement save_stack_trace() + - TRACE_IRQFLAGS_SUPPORT - implement include/asm/irqflags.h HAVE_FUNCTION_TRACER @@ -32,9 +38,11 @@ You will need to implement the mcount and the ftrace_stub functions. The exact mcount symbol name will depend on your toolchain. Some call it "mcount", "_mcount", or even "__mcount". You can probably figure it out by -running something like: +running something like:: + $ echo 'main(){}' | gcc -x c -S -o - - -pg | grep mcount callmcount + We'll make the assumption below that the symbol is "mcount" just to keep things nice and simple in the examples. @@ -56,8 +64,9 @@ size of the mcount call that is embedded in the function). For example, if the function foo() calls bar(), when the bar() function calls mcount(), the arguments mcount() will pass to the tracer are: - "frompc" - the address bar() will use to return to foo() - "selfpc" - the address bar() (with mcount() size adjustment) + + - "frompc" - the address bar() will use to return to foo() + - "selfpc" - the address bar() (with mcount() size adjustment) Also keep in mind that this mcount function will be called *a lot*, so optimizing for the default case of no tracer will help the smooth running of @@ -67,39 +76,41 @@ means the code flow should usually be kept linear (i.e. no branching in the nop case). This is of course an optimization and not a hard requirement. Here is some pseudo code that should help (these functions should actually be -implemented in assembly): +implemented in assembly):: -void ftrace_stub(void) -{ - return; -} + void ftrace_stub(void) + { + return; + } -void mcount(void) -{ - /* save any bare state needed in order to do initial checking */ + void mcount(void) + { + /* save any bare state needed in order to do initial checking */ - extern void (*ftrace_trace_function)(unsigned long, unsigned long); - if (ftrace_trace_function != ftrace_stub) - goto do_trace; + extern void (*ftrace_trace_function)(unsigned long, unsigned long); + if (ftrace_trace_function != ftrace_stub) + goto do_trace; - /* restore any bare state */ + /* restore any bare state */ - return; + return; -do_trace: + do_trace: - /* save all state needed by the ABI (see paragraph above) */ + /* save all state needed by the ABI (see paragraph above) */ - unsigned long frompc = ...; - unsigned long selfpc = - MCOUNT_INSN_SIZE; - ftrace_trace_function(frompc, selfpc); + unsigned long frompc = ...; + unsigned long selfpc = - MCOUNT_INSN_SIZE; + ftrace_trace_function(frompc, selfpc); - /* restore all state needed by the ABI */ -} + /* restore all state needed by the ABI */ + } Don't forget to export mcount for modules ! -extern void mcount(void); -EXPORT_SYMBOL(mcount); +:: + + extern void mcount(void); + EXPORT_SYMBOL(mcount); HAVE_FUNCTION_GRAPH_TRACER @@ -127,38 +138,40 @@ That function will simply call the common ftrace_return_to_handler function and that will return the original return address with which you can return to the original call site. -Here is the updated mcount pseudo code: -void mcount(void) -{ -... - if (ftrace_trace_function != ftrace_stub) -
[PATCH 11/17] trace doc: convert trace/events-power.txt to rst format
From: Changbin DuThis converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- .../trace/{events-power.txt => events-power.rst} | 52 +- Documentation/trace/index.rst | 1 + 2 files changed, 31 insertions(+), 22 deletions(-) rename Documentation/trace/{events-power.txt => events-power.rst} (65%) diff --git a/Documentation/trace/events-power.txt b/Documentation/trace/events-power.rst similarity index 65% rename from Documentation/trace/events-power.txt rename to Documentation/trace/events-power.rst index 21d514c..a77daca 100644 --- a/Documentation/trace/events-power.txt +++ b/Documentation/trace/events-power.rst @@ -1,13 +1,14 @@ - - Subsystem Trace Points: power += +Subsystem Trace Points: power += The power tracing system captures events related to power transitions within the kernel. Broadly speaking there are three major subheadings: - o Power state switch which reports events related to suspend (S-states), - cpuidle (C-states) and cpufreq (P-states) - o System clock related changes - o Power domains related changes and transitions + - Power state switch which reports events related to suspend (S-states), +cpuidle (C-states) and cpufreq (P-states) + - System clock related changes + - Power domains related changes and transitions This document describes what each of the tracepoints is and why they might be useful. @@ -22,14 +23,16 @@ Cf. include/trace/events/power.h for the events definitions. A 'cpu' event class gathers the CPU-related events: cpuidle and cpufreq. +:: -cpu_idle "state=%lu cpu_id=%lu" -cpu_frequency "state=%lu cpu_id=%lu" + cpu_idle "state=%lu cpu_id=%lu" + cpu_frequency"state=%lu cpu_id=%lu" A suspend event is used to indicate the system going in and out of the suspend mode: +:: -machine_suspend"state=%lu" + machine_suspend "state=%lu" Note: the value of '-1' or '4294967295' for state means an exit from the current state, @@ -45,10 +48,11 @@ correctly draw the states diagrams and to calculate accurate statistics etc. The clock events are used for clock enable/disable and for clock rate change. +:: -clock_enable "%s state=%lu cpu_id=%lu" -clock_disable "%s state=%lu cpu_id=%lu" -clock_set_rate "%s state=%lu cpu_id=%lu" + clock_enable "%s state=%lu cpu_id=%lu" + clock_disable"%s state=%lu cpu_id=%lu" + clock_set_rate "%s state=%lu cpu_id=%lu" The first parameter gives the clock name (e.g. "gpio1_iclk"). The second parameter is '1' for enable, '0' for disable, the target @@ -57,8 +61,9 @@ clock rate for set_rate. 3. Power domains events === The power domain events are used for power domains transitions +:: -power_domain_target"%s state=%lu cpu_id=%lu" + power_domain_target "%s state=%lu cpu_id=%lu" The first parameter gives the power domain name (e.g. "mpu_pwrdm"). The second parameter is the power domain target state. @@ -67,28 +72,31 @@ The second parameter is the power domain target state. The PM QoS events are used for QoS add/update/remove request and for target/flags update. +:: -pm_qos_add_request "pm_qos_class=%s value=%d" -pm_qos_update_request "pm_qos_class=%s value=%d" -pm_qos_remove_request "pm_qos_class=%s value=%d" -pm_qos_update_request_timeout "pm_qos_class=%s value=%d, timeout_us=%ld" + pm_qos_add_request "pm_qos_class=%s value=%d" + pm_qos_update_request "pm_qos_class=%s value=%d" + pm_qos_remove_request "pm_qos_class=%s value=%d" + pm_qos_update_request_timeout "pm_qos_class=%s value=%d, timeout_us=%ld" The first parameter gives the QoS class name (e.g. "CPU_DMA_LATENCY"). The second parameter is value to be added/updated/removed. The third parameter is timeout value in usec. +:: -pm_qos_update_target "action=%s prev_value=%d curr_value=%d" -pm_qos_update_flags"action=%s prev_value=0x%x curr_value=0x%x" + pm_qos_update_target "action=%s prev_value=%d curr_value=%d" + pm_qos_update_flags"action=%s prev_value=0x%x curr_value=0x%x" The first parameter gives the QoS action name (e.g. "ADD_REQ"). The second parameter is the previous QoS value. The third parameter is the current QoS value to update. And, there are also events used for device PM QoS add/update/remove request. +:: -dev_pm_qos_add_request "device=%s type=%s new_value=%d" -dev_pm_qos_update_request "device=%s type=%s new_value=%d"
[PATCH 12/17] trace doc: convert trace/events-nmi.txt to rst format
From: Changbin DuThis converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/trace/events-nmi.rst | 45 ++ Documentation/trace/events-nmi.txt | 43 Documentation/trace/index.rst | 1 + 3 files changed, 46 insertions(+), 43 deletions(-) create mode 100644 Documentation/trace/events-nmi.rst delete mode 100644 Documentation/trace/events-nmi.txt diff --git a/Documentation/trace/events-nmi.rst b/Documentation/trace/events-nmi.rst new file mode 100644 index 000..9e0a728 --- /dev/null +++ b/Documentation/trace/events-nmi.rst @@ -0,0 +1,45 @@ + +NMI Trace Events + + +These events normally show up here: + + /sys/kernel/debug/tracing/events/nmi + + +nmi_handler +--- + +You might want to use this tracepoint if you suspect that your +NMI handlers are hogging large amounts of CPU time. The kernel +will warn if it sees long-running handlers:: + + INFO: NMI handler took too long to run: 9.207 msecs + +and this tracepoint will allow you to drill down and get some +more details. + +Let's say you suspect that perf_event_nmi_handler() is causing +you some problems and you only want to trace that handler +specifically. You need to find its address:: + + $ grep perf_event_nmi_handler /proc/kallsyms + 81625600 t perf_event_nmi_handler + +Let's also say you are only interested in when that function is +really hogging a lot of CPU time, like a millisecond at a time. +Note that the kernel's output is in milliseconds, but the input +to the filter is in nanoseconds! You can filter on 'delta_ns':: + + cd /sys/kernel/debug/tracing/events/nmi/nmi_handler + echo 'handler==0x81625600 && delta_ns>100' > filter + echo 1 > enable + +Your output would then look like:: + + $ cat /sys/kernel/debug/tracing/trace_pipe + -0 [000] d.h3 505.397558: nmi_handler: perf_event_nmi_handler() delta_ns: 3236765 handled: 1 + -0 [000] d.h3 505.805893: nmi_handler: perf_event_nmi_handler() delta_ns: 3174234 handled: 1 + -0 [000] d.h3 506.158206: nmi_handler: perf_event_nmi_handler() delta_ns: 3084642 handled: 1 + -0 [000] d.h3 506.334346: nmi_handler: perf_event_nmi_handler() delta_ns: 3080351 handled: 1 + diff --git a/Documentation/trace/events-nmi.txt b/Documentation/trace/events-nmi.txt deleted file mode 100644 index c03c8c8..000 --- a/Documentation/trace/events-nmi.txt +++ /dev/null @@ -1,43 +0,0 @@ -NMI Trace Events - -These events normally show up here: - - /sys/kernel/debug/tracing/events/nmi - --- - -nmi_handler: - -You might want to use this tracepoint if you suspect that your -NMI handlers are hogging large amounts of CPU time. The kernel -will warn if it sees long-running handlers: - - INFO: NMI handler took too long to run: 9.207 msecs - -and this tracepoint will allow you to drill down and get some -more details. - -Let's say you suspect that perf_event_nmi_handler() is causing -you some problems and you only want to trace that handler -specifically. You need to find its address: - - $ grep perf_event_nmi_handler /proc/kallsyms - 81625600 t perf_event_nmi_handler - -Let's also say you are only interested in when that function is -really hogging a lot of CPU time, like a millisecond at a time. -Note that the kernel's output is in milliseconds, but the input -to the filter is in nanoseconds! You can filter on 'delta_ns': - -cd /sys/kernel/debug/tracing/events/nmi/nmi_handler -echo 'handler==0x81625600 && delta_ns>100' > filter -echo 1 > enable - -Your output would then look like: - -$ cat /sys/kernel/debug/tracing/trace_pipe --0 [000] d.h3 505.397558: nmi_handler: perf_event_nmi_handler() delta_ns: 3236765 handled: 1 --0 [000] d.h3 505.805893: nmi_handler: perf_event_nmi_handler() delta_ns: 3174234 handled: 1 --0 [000] d.h3 506.158206: nmi_handler: perf_event_nmi_handler() delta_ns: 3084642 handled: 1 --0 [000] d.h3 506.334346: nmi_handler: perf_event_nmi_handler() delta_ns: 3080351 handled: 1 - diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index 309c9c5..f4a8fbc 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -15,3 +15,4 @@ Linux Tracing Technologies events events-kmem events-power + events-nmi -- 2.7.4
[PATCH 06/17] trace doc: convert trace/kprobetrace.txt to rst format
From: Changbin DuThis converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/trace/index.rst | 1 + .../trace/{kprobetrace.txt => kprobetrace.rst} | 100 +++-- 2 files changed, 55 insertions(+), 46 deletions(-) rename Documentation/trace/{kprobetrace.txt => kprobetrace.rst} (63%) diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index 947c6db..c8e2130 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -9,3 +9,4 @@ Linux Tracing Technologies tracepoint-analysis ftrace ftrace-uses + kprobetrace diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.rst similarity index 63% rename from Documentation/trace/kprobetrace.txt rename to Documentation/trace/kprobetrace.rst index 1a3a3d6..3e0f971 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.rst @@ -1,8 +1,8 @@ -Kprobe-based Event Tracing -== - - Documentation is written by Masami Hiramatsu +== +Kprobe-based Event Tracing +== +:Author: Masami Hiramatsu Overview @@ -23,6 +23,8 @@ current_tracer. Instead of that, add probe points via Synopsis of kprobe_events - +:: + p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe -:[GRP/]EVENT: Clear a probe @@ -66,7 +68,7 @@ String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. Bitfield is another special type, which takes 3 parameters, bit-width, bit- -offset, and container-size (usually 32). The syntax is; +offset, and container-size (usually 32). The syntax is:: b@/ @@ -75,7 +77,7 @@ For $comm, the default type is "string"; any other type is invalid. Per-Probe Event Filtering - - Per-probe event filtering feature allows you to set different filter on each +Per-probe event filtering feature allows you to set different filter on each probe and gives you what arguments will be shown in trace buffer. If an event name is specified right after 'p:' or 'r:' in kprobe_events, it adds an event under tracing/events/kprobes/, at the directory you can see 'id', @@ -96,87 +98,93 @@ id: Event Profiling --- - You can check the total number of probe hits and probe miss-hits via +You can check the total number of probe hits and probe miss-hits via /sys/kernel/debug/tracing/kprobe_profile. - The first column is event name, the second is the number of probe hits, +The first column is event name, the second is the number of probe hits, the third is the number of probe miss-hits. Usage examples -- To add a probe as a new event, write a new definition to kprobe_events -as below. +as below:: echo 'p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack)' > /sys/kernel/debug/tracing/kprobe_events - This sets a kprobe on the top of do_sys_open() function with recording +This sets a kprobe on the top of do_sys_open() function with recording 1st to 4th arguments as "myprobe" event. Note, which register/stack entry is assigned to each function argument depends on arch-specific ABI. If you unsure the ABI, please try to use probe subcommand of perf-tools (you can find it under tools/perf/). As this example shows, users can choose more familiar names for each arguments. +:: echo 'r:myretprobe do_sys_open $retval' >> /sys/kernel/debug/tracing/kprobe_events - This sets a kretprobe on the return point of do_sys_open() function with +This sets a kretprobe on the return point of do_sys_open() function with recording return value as "myretprobe" event. - You can see the format of these events via +You can see the format of these events via /sys/kernel/debug/tracing/events/kprobes//format. +:: cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format -name: myprobe -ID: 780 -format: -field:unsigned short common_type; offset:0; size:2; signed:0; -field:unsigned char common_flags; offset:2; size:1; signed:0; -field:unsigned char common_preempt_count; offset:3; size:1;signed:0; -field:int common_pid; offset:4; size:4; signed:1; + name: myprobe + ID: 780 + format: + field:unsigned short common_type; offset:0; size:2; signed:0; + field:unsigned char common_flags; offset:2; size:1; signed:0; +
[PATCH 11/17] trace doc: convert trace/events-power.txt to rst format
From: Changbin Du This converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- .../trace/{events-power.txt => events-power.rst} | 52 +- Documentation/trace/index.rst | 1 + 2 files changed, 31 insertions(+), 22 deletions(-) rename Documentation/trace/{events-power.txt => events-power.rst} (65%) diff --git a/Documentation/trace/events-power.txt b/Documentation/trace/events-power.rst similarity index 65% rename from Documentation/trace/events-power.txt rename to Documentation/trace/events-power.rst index 21d514c..a77daca 100644 --- a/Documentation/trace/events-power.txt +++ b/Documentation/trace/events-power.rst @@ -1,13 +1,14 @@ - - Subsystem Trace Points: power += +Subsystem Trace Points: power += The power tracing system captures events related to power transitions within the kernel. Broadly speaking there are three major subheadings: - o Power state switch which reports events related to suspend (S-states), - cpuidle (C-states) and cpufreq (P-states) - o System clock related changes - o Power domains related changes and transitions + - Power state switch which reports events related to suspend (S-states), +cpuidle (C-states) and cpufreq (P-states) + - System clock related changes + - Power domains related changes and transitions This document describes what each of the tracepoints is and why they might be useful. @@ -22,14 +23,16 @@ Cf. include/trace/events/power.h for the events definitions. A 'cpu' event class gathers the CPU-related events: cpuidle and cpufreq. +:: -cpu_idle "state=%lu cpu_id=%lu" -cpu_frequency "state=%lu cpu_id=%lu" + cpu_idle "state=%lu cpu_id=%lu" + cpu_frequency"state=%lu cpu_id=%lu" A suspend event is used to indicate the system going in and out of the suspend mode: +:: -machine_suspend"state=%lu" + machine_suspend "state=%lu" Note: the value of '-1' or '4294967295' for state means an exit from the current state, @@ -45,10 +48,11 @@ correctly draw the states diagrams and to calculate accurate statistics etc. The clock events are used for clock enable/disable and for clock rate change. +:: -clock_enable "%s state=%lu cpu_id=%lu" -clock_disable "%s state=%lu cpu_id=%lu" -clock_set_rate "%s state=%lu cpu_id=%lu" + clock_enable "%s state=%lu cpu_id=%lu" + clock_disable"%s state=%lu cpu_id=%lu" + clock_set_rate "%s state=%lu cpu_id=%lu" The first parameter gives the clock name (e.g. "gpio1_iclk"). The second parameter is '1' for enable, '0' for disable, the target @@ -57,8 +61,9 @@ clock rate for set_rate. 3. Power domains events === The power domain events are used for power domains transitions +:: -power_domain_target"%s state=%lu cpu_id=%lu" + power_domain_target "%s state=%lu cpu_id=%lu" The first parameter gives the power domain name (e.g. "mpu_pwrdm"). The second parameter is the power domain target state. @@ -67,28 +72,31 @@ The second parameter is the power domain target state. The PM QoS events are used for QoS add/update/remove request and for target/flags update. +:: -pm_qos_add_request "pm_qos_class=%s value=%d" -pm_qos_update_request "pm_qos_class=%s value=%d" -pm_qos_remove_request "pm_qos_class=%s value=%d" -pm_qos_update_request_timeout "pm_qos_class=%s value=%d, timeout_us=%ld" + pm_qos_add_request "pm_qos_class=%s value=%d" + pm_qos_update_request "pm_qos_class=%s value=%d" + pm_qos_remove_request "pm_qos_class=%s value=%d" + pm_qos_update_request_timeout "pm_qos_class=%s value=%d, timeout_us=%ld" The first parameter gives the QoS class name (e.g. "CPU_DMA_LATENCY"). The second parameter is value to be added/updated/removed. The third parameter is timeout value in usec. +:: -pm_qos_update_target "action=%s prev_value=%d curr_value=%d" -pm_qos_update_flags"action=%s prev_value=0x%x curr_value=0x%x" + pm_qos_update_target "action=%s prev_value=%d curr_value=%d" + pm_qos_update_flags"action=%s prev_value=0x%x curr_value=0x%x" The first parameter gives the QoS action name (e.g. "ADD_REQ"). The second parameter is the previous QoS value. The third parameter is the current QoS value to update. And, there are also events used for device PM QoS add/update/remove request. +:: -dev_pm_qos_add_request "device=%s type=%s new_value=%d" -dev_pm_qos_update_request "device=%s type=%s new_value=%d" -dev_pm_qos_remove_request "device=%s type=%s new_value=%d"
[PATCH 12/17] trace doc: convert trace/events-nmi.txt to rst format
From: Changbin Du This converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/trace/events-nmi.rst | 45 ++ Documentation/trace/events-nmi.txt | 43 Documentation/trace/index.rst | 1 + 3 files changed, 46 insertions(+), 43 deletions(-) create mode 100644 Documentation/trace/events-nmi.rst delete mode 100644 Documentation/trace/events-nmi.txt diff --git a/Documentation/trace/events-nmi.rst b/Documentation/trace/events-nmi.rst new file mode 100644 index 000..9e0a728 --- /dev/null +++ b/Documentation/trace/events-nmi.rst @@ -0,0 +1,45 @@ + +NMI Trace Events + + +These events normally show up here: + + /sys/kernel/debug/tracing/events/nmi + + +nmi_handler +--- + +You might want to use this tracepoint if you suspect that your +NMI handlers are hogging large amounts of CPU time. The kernel +will warn if it sees long-running handlers:: + + INFO: NMI handler took too long to run: 9.207 msecs + +and this tracepoint will allow you to drill down and get some +more details. + +Let's say you suspect that perf_event_nmi_handler() is causing +you some problems and you only want to trace that handler +specifically. You need to find its address:: + + $ grep perf_event_nmi_handler /proc/kallsyms + 81625600 t perf_event_nmi_handler + +Let's also say you are only interested in when that function is +really hogging a lot of CPU time, like a millisecond at a time. +Note that the kernel's output is in milliseconds, but the input +to the filter is in nanoseconds! You can filter on 'delta_ns':: + + cd /sys/kernel/debug/tracing/events/nmi/nmi_handler + echo 'handler==0x81625600 && delta_ns>100' > filter + echo 1 > enable + +Your output would then look like:: + + $ cat /sys/kernel/debug/tracing/trace_pipe + -0 [000] d.h3 505.397558: nmi_handler: perf_event_nmi_handler() delta_ns: 3236765 handled: 1 + -0 [000] d.h3 505.805893: nmi_handler: perf_event_nmi_handler() delta_ns: 3174234 handled: 1 + -0 [000] d.h3 506.158206: nmi_handler: perf_event_nmi_handler() delta_ns: 3084642 handled: 1 + -0 [000] d.h3 506.334346: nmi_handler: perf_event_nmi_handler() delta_ns: 3080351 handled: 1 + diff --git a/Documentation/trace/events-nmi.txt b/Documentation/trace/events-nmi.txt deleted file mode 100644 index c03c8c8..000 --- a/Documentation/trace/events-nmi.txt +++ /dev/null @@ -1,43 +0,0 @@ -NMI Trace Events - -These events normally show up here: - - /sys/kernel/debug/tracing/events/nmi - --- - -nmi_handler: - -You might want to use this tracepoint if you suspect that your -NMI handlers are hogging large amounts of CPU time. The kernel -will warn if it sees long-running handlers: - - INFO: NMI handler took too long to run: 9.207 msecs - -and this tracepoint will allow you to drill down and get some -more details. - -Let's say you suspect that perf_event_nmi_handler() is causing -you some problems and you only want to trace that handler -specifically. You need to find its address: - - $ grep perf_event_nmi_handler /proc/kallsyms - 81625600 t perf_event_nmi_handler - -Let's also say you are only interested in when that function is -really hogging a lot of CPU time, like a millisecond at a time. -Note that the kernel's output is in milliseconds, but the input -to the filter is in nanoseconds! You can filter on 'delta_ns': - -cd /sys/kernel/debug/tracing/events/nmi/nmi_handler -echo 'handler==0x81625600 && delta_ns>100' > filter -echo 1 > enable - -Your output would then look like: - -$ cat /sys/kernel/debug/tracing/trace_pipe --0 [000] d.h3 505.397558: nmi_handler: perf_event_nmi_handler() delta_ns: 3236765 handled: 1 --0 [000] d.h3 505.805893: nmi_handler: perf_event_nmi_handler() delta_ns: 3174234 handled: 1 --0 [000] d.h3 506.158206: nmi_handler: perf_event_nmi_handler() delta_ns: 3084642 handled: 1 --0 [000] d.h3 506.334346: nmi_handler: perf_event_nmi_handler() delta_ns: 3080351 handled: 1 - diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index 309c9c5..f4a8fbc 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -15,3 +15,4 @@ Linux Tracing Technologies events events-kmem events-power + events-nmi -- 2.7.4
[PATCH 06/17] trace doc: convert trace/kprobetrace.txt to rst format
From: Changbin Du This converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/trace/index.rst | 1 + .../trace/{kprobetrace.txt => kprobetrace.rst} | 100 +++-- 2 files changed, 55 insertions(+), 46 deletions(-) rename Documentation/trace/{kprobetrace.txt => kprobetrace.rst} (63%) diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index 947c6db..c8e2130 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -9,3 +9,4 @@ Linux Tracing Technologies tracepoint-analysis ftrace ftrace-uses + kprobetrace diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.rst similarity index 63% rename from Documentation/trace/kprobetrace.txt rename to Documentation/trace/kprobetrace.rst index 1a3a3d6..3e0f971 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.rst @@ -1,8 +1,8 @@ -Kprobe-based Event Tracing -== - - Documentation is written by Masami Hiramatsu +== +Kprobe-based Event Tracing +== +:Author: Masami Hiramatsu Overview @@ -23,6 +23,8 @@ current_tracer. Instead of that, add probe points via Synopsis of kprobe_events - +:: + p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe -:[GRP/]EVENT: Clear a probe @@ -66,7 +68,7 @@ String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. Bitfield is another special type, which takes 3 parameters, bit-width, bit- -offset, and container-size (usually 32). The syntax is; +offset, and container-size (usually 32). The syntax is:: b@/ @@ -75,7 +77,7 @@ For $comm, the default type is "string"; any other type is invalid. Per-Probe Event Filtering - - Per-probe event filtering feature allows you to set different filter on each +Per-probe event filtering feature allows you to set different filter on each probe and gives you what arguments will be shown in trace buffer. If an event name is specified right after 'p:' or 'r:' in kprobe_events, it adds an event under tracing/events/kprobes/, at the directory you can see 'id', @@ -96,87 +98,93 @@ id: Event Profiling --- - You can check the total number of probe hits and probe miss-hits via +You can check the total number of probe hits and probe miss-hits via /sys/kernel/debug/tracing/kprobe_profile. - The first column is event name, the second is the number of probe hits, +The first column is event name, the second is the number of probe hits, the third is the number of probe miss-hits. Usage examples -- To add a probe as a new event, write a new definition to kprobe_events -as below. +as below:: echo 'p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack)' > /sys/kernel/debug/tracing/kprobe_events - This sets a kprobe on the top of do_sys_open() function with recording +This sets a kprobe on the top of do_sys_open() function with recording 1st to 4th arguments as "myprobe" event. Note, which register/stack entry is assigned to each function argument depends on arch-specific ABI. If you unsure the ABI, please try to use probe subcommand of perf-tools (you can find it under tools/perf/). As this example shows, users can choose more familiar names for each arguments. +:: echo 'r:myretprobe do_sys_open $retval' >> /sys/kernel/debug/tracing/kprobe_events - This sets a kretprobe on the return point of do_sys_open() function with +This sets a kretprobe on the return point of do_sys_open() function with recording return value as "myretprobe" event. - You can see the format of these events via +You can see the format of these events via /sys/kernel/debug/tracing/events/kprobes//format. +:: cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format -name: myprobe -ID: 780 -format: -field:unsigned short common_type; offset:0; size:2; signed:0; -field:unsigned char common_flags; offset:2; size:1; signed:0; -field:unsigned char common_preempt_count; offset:3; size:1;signed:0; -field:int common_pid; offset:4; size:4; signed:1; + name: myprobe + ID: 780 + format: + field:unsigned short common_type; offset:0; size:2; signed:0; + field:unsigned char common_flags; offset:2; size:1; signed:0; + field:unsigned char common_preempt_count; offset:3;
[PATCH 10/17] trace doc: convert trace/events-kmem.txt to rst format
From: Changbin DuThis converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- .../trace/{events-kmem.txt => events-kmem.rst} | 50 ++ Documentation/trace/index.rst | 1 + 2 files changed, 32 insertions(+), 19 deletions(-) rename Documentation/trace/{events-kmem.txt => events-kmem.rst} (76%) diff --git a/Documentation/trace/events-kmem.txt b/Documentation/trace/events-kmem.rst similarity index 76% rename from Documentation/trace/events-kmem.txt rename to Documentation/trace/events-kmem.rst index 1948004..5554841 100644 --- a/Documentation/trace/events-kmem.txt +++ b/Documentation/trace/events-kmem.rst @@ -1,22 +1,26 @@ - Subsystem Trace Points: kmem + +Subsystem Trace Points: kmem + The kmem tracing system captures events related to object and page allocation within the kernel. Broadly speaking there are five major subheadings. - o Slab allocation of small objects of unknown type (kmalloc) - o Slab allocation of small objects of known type - o Page allocation - o Per-CPU Allocator Activity - o External Fragmentation + - Slab allocation of small objects of unknown type (kmalloc) + - Slab allocation of small objects of known type + - Page allocation + - Per-CPU Allocator Activity + - External Fragmentation This document describes what each of the tracepoints is and why they might be useful. 1. Slab allocation of small objects of unknown type === -kmalloccall_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s -kmalloc_node call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d -kfree call_site=%lx ptr=%p +:: + + kmalloc call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s + kmalloc_node call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d + kfreecall_site=%lx ptr=%p Heavy activity for these events may indicate that a specific cache is justified, particularly if kmalloc slab pages are getting significantly @@ -27,9 +31,11 @@ the allocation sites were. 2. Slab allocation of small objects of known type = -kmem_cache_alloc call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s -kmem_cache_alloc_node call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d -kmem_cache_freecall_site=%lx ptr=%p +:: + + kmem_cache_alloc call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s + kmem_cache_alloc_nodecall_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d + kmem_cache_free call_site=%lx ptr=%p These events are similar in usage to the kmalloc-related events except that it is likely easier to pin the event down to a specific cache. At the time @@ -38,10 +44,12 @@ but the call_site can usually be used to extrapolate that information. 3. Page allocation == -mm_page_allocpage=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s -mm_page_alloc_zone_locked page=%p pfn=%lu order=%u migratetype=%d cpu=%d percpu_refill=%d -mm_page_free page=%p pfn=%lu order=%d -mm_page_free_batched page=%p pfn=%lu order=%d cold=%d +:: + + mm_page_alloc page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s + mm_page_alloc_zone_locked page=%p pfn=%lu order=%u migratetype=%d cpu=%d percpu_refill=%d + mm_page_free page=%p pfn=%lu order=%d + mm_page_free_batched page=%p pfn=%lu order=%d cold=%d These four events deal with page allocation and freeing. mm_page_alloc is a simple indicator of page allocator activity. Pages may be allocated from @@ -65,8 +73,10 @@ contention on the zone->lru_lock. 4. Per-CPU Allocator Activity = -mm_page_alloc_zone_locked page=%p pfn=%lu order=%u migratetype=%d cpu=%d percpu_refill=%d -mm_page_pcpu_drain page=%p pfn=%lu order=%d cpu=%d migratetype=%d +:: + + mm_page_alloc_zone_lockedpage=%p pfn=%lu order=%u migratetype=%d cpu=%d percpu_refill=%d + mm_page_pcpu_drain page=%p pfn=%lu order=%d cpu=%d migratetype=%d In front of the page allocator is a per-cpu page allocator. It exists only for order-0 pages, reduces contention on the zone->lock and reduces the @@ -92,7 +102,9 @@ can be allocated and freed on the same CPU through some algorithm change. 5. External Fragmentation = -mm_page_alloc_extfrag page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d +:: + +
[PATCH 10/17] trace doc: convert trace/events-kmem.txt to rst format
From: Changbin Du This converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- .../trace/{events-kmem.txt => events-kmem.rst} | 50 ++ Documentation/trace/index.rst | 1 + 2 files changed, 32 insertions(+), 19 deletions(-) rename Documentation/trace/{events-kmem.txt => events-kmem.rst} (76%) diff --git a/Documentation/trace/events-kmem.txt b/Documentation/trace/events-kmem.rst similarity index 76% rename from Documentation/trace/events-kmem.txt rename to Documentation/trace/events-kmem.rst index 1948004..5554841 100644 --- a/Documentation/trace/events-kmem.txt +++ b/Documentation/trace/events-kmem.rst @@ -1,22 +1,26 @@ - Subsystem Trace Points: kmem + +Subsystem Trace Points: kmem + The kmem tracing system captures events related to object and page allocation within the kernel. Broadly speaking there are five major subheadings. - o Slab allocation of small objects of unknown type (kmalloc) - o Slab allocation of small objects of known type - o Page allocation - o Per-CPU Allocator Activity - o External Fragmentation + - Slab allocation of small objects of unknown type (kmalloc) + - Slab allocation of small objects of known type + - Page allocation + - Per-CPU Allocator Activity + - External Fragmentation This document describes what each of the tracepoints is and why they might be useful. 1. Slab allocation of small objects of unknown type === -kmalloccall_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s -kmalloc_node call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d -kfree call_site=%lx ptr=%p +:: + + kmalloc call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s + kmalloc_node call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d + kfreecall_site=%lx ptr=%p Heavy activity for these events may indicate that a specific cache is justified, particularly if kmalloc slab pages are getting significantly @@ -27,9 +31,11 @@ the allocation sites were. 2. Slab allocation of small objects of known type = -kmem_cache_alloc call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s -kmem_cache_alloc_node call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d -kmem_cache_freecall_site=%lx ptr=%p +:: + + kmem_cache_alloc call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s + kmem_cache_alloc_nodecall_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d + kmem_cache_free call_site=%lx ptr=%p These events are similar in usage to the kmalloc-related events except that it is likely easier to pin the event down to a specific cache. At the time @@ -38,10 +44,12 @@ but the call_site can usually be used to extrapolate that information. 3. Page allocation == -mm_page_allocpage=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s -mm_page_alloc_zone_locked page=%p pfn=%lu order=%u migratetype=%d cpu=%d percpu_refill=%d -mm_page_free page=%p pfn=%lu order=%d -mm_page_free_batched page=%p pfn=%lu order=%d cold=%d +:: + + mm_page_alloc page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s + mm_page_alloc_zone_locked page=%p pfn=%lu order=%u migratetype=%d cpu=%d percpu_refill=%d + mm_page_free page=%p pfn=%lu order=%d + mm_page_free_batched page=%p pfn=%lu order=%d cold=%d These four events deal with page allocation and freeing. mm_page_alloc is a simple indicator of page allocator activity. Pages may be allocated from @@ -65,8 +73,10 @@ contention on the zone->lru_lock. 4. Per-CPU Allocator Activity = -mm_page_alloc_zone_locked page=%p pfn=%lu order=%u migratetype=%d cpu=%d percpu_refill=%d -mm_page_pcpu_drain page=%p pfn=%lu order=%d cpu=%d migratetype=%d +:: + + mm_page_alloc_zone_lockedpage=%p pfn=%lu order=%u migratetype=%d cpu=%d percpu_refill=%d + mm_page_pcpu_drain page=%p pfn=%lu order=%d cpu=%d migratetype=%d In front of the page allocator is a per-cpu page allocator. It exists only for order-0 pages, reduces contention on the zone->lock and reduces the @@ -92,7 +102,9 @@ can be allocated and freed on the same CPU through some algorithm change. 5. External Fragmentation = -mm_page_alloc_extfrag page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d +:: + + mm_page_alloc_extfragpage=%p pfn=%lu alloc_order=%d fallback_order=%d
[PATCH 13/17] trace doc: convert trace/events-msr.txt to rst format
From: Changbin DuThis converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/trace/events-msr.rst | 40 ++ Documentation/trace/events-msr.txt | 37 --- Documentation/trace/index.rst | 1 + 3 files changed, 41 insertions(+), 37 deletions(-) create mode 100644 Documentation/trace/events-msr.rst delete mode 100644 Documentation/trace/events-msr.txt diff --git a/Documentation/trace/events-msr.rst b/Documentation/trace/events-msr.rst new file mode 100644 index 000..e938aa0 --- /dev/null +++ b/Documentation/trace/events-msr.rst @@ -0,0 +1,40 @@ + +MSR Trace Events + + +The x86 kernel supports tracing most MSR (Model Specific Register) accesses. +To see the definition of the MSRs on Intel systems please see the SDM +at http://www.intel.com/sdm (Volume 3) + +Available trace points: + +/sys/kernel/debug/tracing/events/msr/ + +Trace MSR reads: + +read_msr + + - msr: MSR number + - val: Value written + - failed: 1 if the access failed, otherwise 0 + + +Trace MSR writes: + +write_msr + + - msr: MSR number + - val: Value written + - failed: 1 if the access failed, otherwise 0 + + +Trace RDPMC in kernel: + +rdpmc + +The trace data can be post processed with the postprocess/decode_msr.py script:: + + cat /sys/kernel/debug/tracing/trace | decode_msr.py /usr/src/linux/include/asm/msr-index.h + +to add symbolic MSR names. + diff --git a/Documentation/trace/events-msr.txt b/Documentation/trace/events-msr.txt deleted file mode 100644 index 78c383b..000 --- a/Documentation/trace/events-msr.txt +++ /dev/null @@ -1,37 +0,0 @@ - -The x86 kernel supports tracing most MSR (Model Specific Register) accesses. -To see the definition of the MSRs on Intel systems please see the SDM -at http://www.intel.com/sdm (Volume 3) - -Available trace points: - -/sys/kernel/debug/tracing/events/msr/ - -Trace MSR reads - -read_msr - -msr: MSR number -val: Value written -failed: 1 if the access failed, otherwise 0 - - -Trace MSR writes - -write_msr - -msr: MSR number -val: Value written -failed: 1 if the access failed, otherwise 0 - - -Trace RDPMC in kernel - -rdpmc - -The trace data can be post processed with the postprocess/decode_msr.py script - -cat /sys/kernel/debug/tracing/trace | decode_msr.py /usr/src/linux/include/asm/msr-index.h - -to add symbolic MSR names. - diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index f4a8fbc..307468d 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -16,3 +16,4 @@ Linux Tracing Technologies events-kmem events-power events-nmi + events-msr -- 2.7.4
[PATCH 13/17] trace doc: convert trace/events-msr.txt to rst format
From: Changbin Du This converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/trace/events-msr.rst | 40 ++ Documentation/trace/events-msr.txt | 37 --- Documentation/trace/index.rst | 1 + 3 files changed, 41 insertions(+), 37 deletions(-) create mode 100644 Documentation/trace/events-msr.rst delete mode 100644 Documentation/trace/events-msr.txt diff --git a/Documentation/trace/events-msr.rst b/Documentation/trace/events-msr.rst new file mode 100644 index 000..e938aa0 --- /dev/null +++ b/Documentation/trace/events-msr.rst @@ -0,0 +1,40 @@ + +MSR Trace Events + + +The x86 kernel supports tracing most MSR (Model Specific Register) accesses. +To see the definition of the MSRs on Intel systems please see the SDM +at http://www.intel.com/sdm (Volume 3) + +Available trace points: + +/sys/kernel/debug/tracing/events/msr/ + +Trace MSR reads: + +read_msr + + - msr: MSR number + - val: Value written + - failed: 1 if the access failed, otherwise 0 + + +Trace MSR writes: + +write_msr + + - msr: MSR number + - val: Value written + - failed: 1 if the access failed, otherwise 0 + + +Trace RDPMC in kernel: + +rdpmc + +The trace data can be post processed with the postprocess/decode_msr.py script:: + + cat /sys/kernel/debug/tracing/trace | decode_msr.py /usr/src/linux/include/asm/msr-index.h + +to add symbolic MSR names. + diff --git a/Documentation/trace/events-msr.txt b/Documentation/trace/events-msr.txt deleted file mode 100644 index 78c383b..000 --- a/Documentation/trace/events-msr.txt +++ /dev/null @@ -1,37 +0,0 @@ - -The x86 kernel supports tracing most MSR (Model Specific Register) accesses. -To see the definition of the MSRs on Intel systems please see the SDM -at http://www.intel.com/sdm (Volume 3) - -Available trace points: - -/sys/kernel/debug/tracing/events/msr/ - -Trace MSR reads - -read_msr - -msr: MSR number -val: Value written -failed: 1 if the access failed, otherwise 0 - - -Trace MSR writes - -write_msr - -msr: MSR number -val: Value written -failed: 1 if the access failed, otherwise 0 - - -Trace RDPMC in kernel - -rdpmc - -The trace data can be post processed with the postprocess/decode_msr.py script - -cat /sys/kernel/debug/tracing/trace | decode_msr.py /usr/src/linux/include/asm/msr-index.h - -to add symbolic MSR names. - diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index f4a8fbc..307468d 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -16,3 +16,4 @@ Linux Tracing Technologies events-kmem events-power events-nmi + events-msr -- 2.7.4
[PATCH 09/17] trace doc: convert trace/events.txt to rst format
From: Changbin DuThis converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/trace/{events.txt => events.rst} | 669 + Documentation/trace/index.rst | 1 + 2 files changed, 337 insertions(+), 333 deletions(-) rename Documentation/trace/{events.txt => events.rst} (82%) diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.rst similarity index 82% rename from Documentation/trace/events.txt rename to Documentation/trace/events.rst index 2cc08d4..3d6fdea 100644 --- a/Documentation/trace/events.txt +++ b/Documentation/trace/events.rst @@ -1,7 +1,9 @@ -Event Tracing += +Event Tracing += - Documentation written by Theodore Ts'o - Updated by Li Zefan and Tom Zanussi +:Author: Theodore Ts'o +:Updated: Li Zefan and Tom Zanussi 1. Introduction === @@ -25,23 +27,22 @@ The events which are available for tracing can be found in the file /sys/kernel/debug/tracing/available_events. To enable a particular event, such as 'sched_wakeup', simply echo it -to /sys/kernel/debug/tracing/set_event. For example: +to /sys/kernel/debug/tracing/set_event. For example:: # echo sched_wakeup >> /sys/kernel/debug/tracing/set_event -[ Note: '>>' is necessary, otherwise it will firstly disable - all the events. ] +.. Note:: '>>' is necessary, otherwise it will firstly disable all the events. To disable an event, echo the event name to the set_event file prefixed -with an exclamation point: +with an exclamation point:: # echo '!sched_wakeup' >> /sys/kernel/debug/tracing/set_event -To disable all events, echo an empty line to the set_event file: +To disable all events, echo an empty line to the set_event file:: # echo > /sys/kernel/debug/tracing/set_event -To enable all events, echo '*:*' or '*:' to the set_event file: +To enable all events, echo '*:*' or '*:' to the set_event file:: # echo *:* > /sys/kernel/debug/tracing/set_event @@ -50,7 +51,7 @@ etc., and a full event name looks like this: :. The subsystem name is optional, but it is displayed in the available_events file. All of the events in a subsystem can be specified via the syntax ":*"; for example, to enable all irq events, you can use the -command: +command:: # echo 'irq:*' > /sys/kernel/debug/tracing/set_event @@ -60,33 +61,33 @@ command: The events available are also listed in /sys/kernel/debug/tracing/events/ hierarchy of directories. -To enable event 'sched_wakeup': +To enable event 'sched_wakeup':: # echo 1 > /sys/kernel/debug/tracing/events/sched/sched_wakeup/enable -To disable it: +To disable it:: # echo 0 > /sys/kernel/debug/tracing/events/sched/sched_wakeup/enable -To enable all events in sched subsystem: +To enable all events in sched subsystem:: # echo 1 > /sys/kernel/debug/tracing/events/sched/enable -To enable all events: +To enable all events:: # echo 1 > /sys/kernel/debug/tracing/events/enable When reading one of these enable files, there are four results: - 0 - all events this file affects are disabled - 1 - all events this file affects are enabled - X - there is a mixture of events enabled and disabled - ? - this file does not affect any event + - 0 - all events this file affects are disabled + - 1 - all events this file affects are enabled + - X - there is a mixture of events enabled and disabled + - ? - this file does not affect any event 2.3 Boot option --- -In order to facilitate early boot debugging, use boot option: +In order to facilitate early boot debugging, use boot option:: trace_event=[event-list] @@ -115,7 +116,7 @@ the fields prefixed with 'common_'. The other fields vary between events and correspond to the fields defined in the TRACE_EVENT definition for that event. -Each field in the format has the form: +Each field in the format has the form:: field:field-type field-name; offset:N; size:N; @@ -123,27 +124,27 @@ where offset is the offset of the field in the trace record and size is the size of the data item, in bytes. For example, here's the information displayed for the 'sched_wakeup' -event: +event:: -# cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/format + # cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/format -name: sched_wakeup -ID: 60 -format: - field:unsigned short common_type; offset:0; size:2; - field:unsigned char common_flags; offset:2; size:1; - field:unsigned char common_preempt_count; offset:3; size:1; - field:int common_pid; offset:4; size:4; - field:int common_tgid; offset:8;
[PATCH 07/17] trace doc: convert trace/uprobetracer.txt to rst format
From: Changbin DuThis converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/trace/index.rst | 1 + .../trace/{uprobetracer.txt => uprobetracer.rst} | 44 +- 2 files changed, 27 insertions(+), 18 deletions(-) rename Documentation/trace/{uprobetracer.txt => uprobetracer.rst} (86%) diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index c8e2130..353fb8a 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -10,3 +10,4 @@ Linux Tracing Technologies ftrace ftrace-uses kprobetrace + uprobetracer diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.rst similarity index 86% rename from Documentation/trace/uprobetracer.txt rename to Documentation/trace/uprobetracer.rst index bf526a7c..98d3f69 100644 --- a/Documentation/trace/uprobetracer.txt +++ b/Documentation/trace/uprobetracer.rst @@ -1,7 +1,8 @@ -Uprobe-tracer: Uprobe-based Event Tracing -= += +Uprobe-tracer: Uprobe-based Event Tracing += - Documentation written by Srikar Dronamraju +:Author: Srikar Dronamraju Overview @@ -19,6 +20,8 @@ user to calculate the offset of the probepoint in the object. Synopsis of uprobe_tracer - +:: + p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a uprobe r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return uprobe (uretprobe) -:[GRP/]EVENT : Clear uprobe or uretprobe event @@ -57,7 +60,7 @@ x86-64 uses x64). String type is a special type, which fetches a "null-terminated" string from user space. Bitfield is another special type, which takes 3 parameters, bit-width, bit- -offset, and container-size (usually 32). The syntax is; +offset, and container-size (usually 32). The syntax is:: b@/ @@ -74,28 +77,28 @@ the third is the number of probe miss-hits. Usage examples -- * Add a probe as a new uprobe event, write a new definition to uprobe_events -as below: (sets a uprobe at an offset of 0x4245c0 in the executable /bin/bash) + as below (sets a uprobe at an offset of 0x4245c0 in the executable /bin/bash):: echo 'p /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events - * Add a probe as a new uretprobe event: + * Add a probe as a new uretprobe event:: echo 'r /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events - * Unset registered event: + * Unset registered event:: echo '-:p_bash_0x4245c0' >> /sys/kernel/debug/tracing/uprobe_events - * Print out the events that are registered: + * Print out the events that are registered:: cat /sys/kernel/debug/tracing/uprobe_events - * Clear all events: + * Clear all events:: echo > /sys/kernel/debug/tracing/uprobe_events Following example shows how to dump the instruction pointer and %ax register -at the probed text address. Probe zfree function in /bin/zsh: +at the probed text address. Probe zfree function in /bin/zsh:: # cd /sys/kernel/debug/tracing/ # cat /proc/`pgrep zsh`/maps | grep /bin/zsh | grep r-xp @@ -103,24 +106,27 @@ at the probed text address. Probe zfree function in /bin/zsh: # objdump -T /bin/zsh | grep -w zfree 00446420 gDF .text 0012 Basezfree - 0x46420 is the offset of zfree in object /bin/zsh that is loaded at - 0x0040. Hence the command to uprobe would be: +0x46420 is the offset of zfree in object /bin/zsh that is loaded at +0x0040. Hence the command to uprobe would be:: # echo 'p:zfree_entry /bin/zsh:0x46420 %ip %ax' > uprobe_events - And the same for the uretprobe would be: +And the same for the uretprobe would be:: # echo 'r:zfree_exit /bin/zsh:0x46420 %ip %ax' >> uprobe_events -Please note: User has to explicitly calculate the offset of the probe-point -in the object. We can see the events that are registered by looking at the -uprobe_events file. +.. note:: User has to explicitly calculate the offset of the probe-point + in the object. + +We can see the events that are registered by looking at the uprobe_events file. +:: # cat uprobe_events p:uprobes/zfree_entry /bin/zsh:0x00046420 arg1=%ip arg2=%ax r:uprobes/zfree_exit /bin/zsh:0x00046420 arg1=%ip arg2=%ax -Format of events can be seen by viewing the file events/uprobes/zfree_entry/format +Format of events can be seen by viewing the file events/uprobes/zfree_entry/format. +:: # cat events/uprobes/zfree_entry/format name: zfree_entry @@ -139,16 +145,18 @@ Format of events can be seen by viewing the file events/uprobes/zfree_entry/form
[PATCH 09/17] trace doc: convert trace/events.txt to rst format
From: Changbin Du This converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/trace/{events.txt => events.rst} | 669 + Documentation/trace/index.rst | 1 + 2 files changed, 337 insertions(+), 333 deletions(-) rename Documentation/trace/{events.txt => events.rst} (82%) diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.rst similarity index 82% rename from Documentation/trace/events.txt rename to Documentation/trace/events.rst index 2cc08d4..3d6fdea 100644 --- a/Documentation/trace/events.txt +++ b/Documentation/trace/events.rst @@ -1,7 +1,9 @@ -Event Tracing += +Event Tracing += - Documentation written by Theodore Ts'o - Updated by Li Zefan and Tom Zanussi +:Author: Theodore Ts'o +:Updated: Li Zefan and Tom Zanussi 1. Introduction === @@ -25,23 +27,22 @@ The events which are available for tracing can be found in the file /sys/kernel/debug/tracing/available_events. To enable a particular event, such as 'sched_wakeup', simply echo it -to /sys/kernel/debug/tracing/set_event. For example: +to /sys/kernel/debug/tracing/set_event. For example:: # echo sched_wakeup >> /sys/kernel/debug/tracing/set_event -[ Note: '>>' is necessary, otherwise it will firstly disable - all the events. ] +.. Note:: '>>' is necessary, otherwise it will firstly disable all the events. To disable an event, echo the event name to the set_event file prefixed -with an exclamation point: +with an exclamation point:: # echo '!sched_wakeup' >> /sys/kernel/debug/tracing/set_event -To disable all events, echo an empty line to the set_event file: +To disable all events, echo an empty line to the set_event file:: # echo > /sys/kernel/debug/tracing/set_event -To enable all events, echo '*:*' or '*:' to the set_event file: +To enable all events, echo '*:*' or '*:' to the set_event file:: # echo *:* > /sys/kernel/debug/tracing/set_event @@ -50,7 +51,7 @@ etc., and a full event name looks like this: :. The subsystem name is optional, but it is displayed in the available_events file. All of the events in a subsystem can be specified via the syntax ":*"; for example, to enable all irq events, you can use the -command: +command:: # echo 'irq:*' > /sys/kernel/debug/tracing/set_event @@ -60,33 +61,33 @@ command: The events available are also listed in /sys/kernel/debug/tracing/events/ hierarchy of directories. -To enable event 'sched_wakeup': +To enable event 'sched_wakeup':: # echo 1 > /sys/kernel/debug/tracing/events/sched/sched_wakeup/enable -To disable it: +To disable it:: # echo 0 > /sys/kernel/debug/tracing/events/sched/sched_wakeup/enable -To enable all events in sched subsystem: +To enable all events in sched subsystem:: # echo 1 > /sys/kernel/debug/tracing/events/sched/enable -To enable all events: +To enable all events:: # echo 1 > /sys/kernel/debug/tracing/events/enable When reading one of these enable files, there are four results: - 0 - all events this file affects are disabled - 1 - all events this file affects are enabled - X - there is a mixture of events enabled and disabled - ? - this file does not affect any event + - 0 - all events this file affects are disabled + - 1 - all events this file affects are enabled + - X - there is a mixture of events enabled and disabled + - ? - this file does not affect any event 2.3 Boot option --- -In order to facilitate early boot debugging, use boot option: +In order to facilitate early boot debugging, use boot option:: trace_event=[event-list] @@ -115,7 +116,7 @@ the fields prefixed with 'common_'. The other fields vary between events and correspond to the fields defined in the TRACE_EVENT definition for that event. -Each field in the format has the form: +Each field in the format has the form:: field:field-type field-name; offset:N; size:N; @@ -123,27 +124,27 @@ where offset is the offset of the field in the trace record and size is the size of the data item, in bytes. For example, here's the information displayed for the 'sched_wakeup' -event: +event:: -# cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/format + # cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/format -name: sched_wakeup -ID: 60 -format: - field:unsigned short common_type; offset:0; size:2; - field:unsigned char common_flags; offset:2; size:1; - field:unsigned char common_preempt_count; offset:3; size:1; - field:int common_pid; offset:4; size:4; - field:int common_tgid; offset:8; size:4; + name: sched_wakeup + ID: 60 + format: +
[PATCH 07/17] trace doc: convert trace/uprobetracer.txt to rst format
From: Changbin Du This converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/trace/index.rst | 1 + .../trace/{uprobetracer.txt => uprobetracer.rst} | 44 +- 2 files changed, 27 insertions(+), 18 deletions(-) rename Documentation/trace/{uprobetracer.txt => uprobetracer.rst} (86%) diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index c8e2130..353fb8a 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -10,3 +10,4 @@ Linux Tracing Technologies ftrace ftrace-uses kprobetrace + uprobetracer diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.rst similarity index 86% rename from Documentation/trace/uprobetracer.txt rename to Documentation/trace/uprobetracer.rst index bf526a7c..98d3f69 100644 --- a/Documentation/trace/uprobetracer.txt +++ b/Documentation/trace/uprobetracer.rst @@ -1,7 +1,8 @@ -Uprobe-tracer: Uprobe-based Event Tracing -= += +Uprobe-tracer: Uprobe-based Event Tracing += - Documentation written by Srikar Dronamraju +:Author: Srikar Dronamraju Overview @@ -19,6 +20,8 @@ user to calculate the offset of the probepoint in the object. Synopsis of uprobe_tracer - +:: + p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a uprobe r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return uprobe (uretprobe) -:[GRP/]EVENT : Clear uprobe or uretprobe event @@ -57,7 +60,7 @@ x86-64 uses x64). String type is a special type, which fetches a "null-terminated" string from user space. Bitfield is another special type, which takes 3 parameters, bit-width, bit- -offset, and container-size (usually 32). The syntax is; +offset, and container-size (usually 32). The syntax is:: b@/ @@ -74,28 +77,28 @@ the third is the number of probe miss-hits. Usage examples -- * Add a probe as a new uprobe event, write a new definition to uprobe_events -as below: (sets a uprobe at an offset of 0x4245c0 in the executable /bin/bash) + as below (sets a uprobe at an offset of 0x4245c0 in the executable /bin/bash):: echo 'p /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events - * Add a probe as a new uretprobe event: + * Add a probe as a new uretprobe event:: echo 'r /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events - * Unset registered event: + * Unset registered event:: echo '-:p_bash_0x4245c0' >> /sys/kernel/debug/tracing/uprobe_events - * Print out the events that are registered: + * Print out the events that are registered:: cat /sys/kernel/debug/tracing/uprobe_events - * Clear all events: + * Clear all events:: echo > /sys/kernel/debug/tracing/uprobe_events Following example shows how to dump the instruction pointer and %ax register -at the probed text address. Probe zfree function in /bin/zsh: +at the probed text address. Probe zfree function in /bin/zsh:: # cd /sys/kernel/debug/tracing/ # cat /proc/`pgrep zsh`/maps | grep /bin/zsh | grep r-xp @@ -103,24 +106,27 @@ at the probed text address. Probe zfree function in /bin/zsh: # objdump -T /bin/zsh | grep -w zfree 00446420 gDF .text 0012 Basezfree - 0x46420 is the offset of zfree in object /bin/zsh that is loaded at - 0x0040. Hence the command to uprobe would be: +0x46420 is the offset of zfree in object /bin/zsh that is loaded at +0x0040. Hence the command to uprobe would be:: # echo 'p:zfree_entry /bin/zsh:0x46420 %ip %ax' > uprobe_events - And the same for the uretprobe would be: +And the same for the uretprobe would be:: # echo 'r:zfree_exit /bin/zsh:0x46420 %ip %ax' >> uprobe_events -Please note: User has to explicitly calculate the offset of the probe-point -in the object. We can see the events that are registered by looking at the -uprobe_events file. +.. note:: User has to explicitly calculate the offset of the probe-point + in the object. + +We can see the events that are registered by looking at the uprobe_events file. +:: # cat uprobe_events p:uprobes/zfree_entry /bin/zsh:0x00046420 arg1=%ip arg2=%ax r:uprobes/zfree_exit /bin/zsh:0x00046420 arg1=%ip arg2=%ax -Format of events can be seen by viewing the file events/uprobes/zfree_entry/format +Format of events can be seen by viewing the file events/uprobes/zfree_entry/format. +:: # cat events/uprobes/zfree_entry/format name: zfree_entry @@ -139,16 +145,18 @@ Format of events can be seen by viewing the file events/uprobes/zfree_entry/form print fmt: "(%lx) arg1=%lx arg2=%lx", REC->__probe_ip,
[PATCH 16/17] trace doc: convert trace/intel_th.txt to rst format
From: Changbin DuThis converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/trace/index.rst | 1 + Documentation/trace/{intel_th.txt => intel_th.rst} | 43 +++--- 2 files changed, 23 insertions(+), 21 deletions(-) rename Documentation/trace/{intel_th.txt => intel_th.rst} (82%) diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index eabbbaf..02cc56c 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -19,3 +19,4 @@ Linux Tracing Technologies events-msr mmiotrace hwlat_detector + intel_th diff --git a/Documentation/trace/intel_th.txt b/Documentation/trace/intel_th.rst similarity index 82% rename from Documentation/trace/intel_th.txt rename to Documentation/trace/intel_th.rst index 7a57165..990f132 100644 --- a/Documentation/trace/intel_th.txt +++ b/Documentation/trace/intel_th.rst @@ -1,3 +1,4 @@ +=== Intel(R) Trace Hub (TH) === @@ -18,13 +19,13 @@ via sysfs attributes. Currently, the following Intel TH subdevices (blocks) are supported: - Software Trace Hub (STH), trace source, which is a System Trace - Module (STM) device, +Module (STM) device, - Memory Storage Unit (MSU), trace output, which allows storing - trace hub output in system memory, +trace hub output in system memory, - Parallel Trace Interface output (PTI), trace output to an external - debug host via a PTI port, +debug host via a PTI port, - Global Trace Hub (GTH), which is a switch and a central component - of Intel(R) Trace Hub architecture. +of Intel(R) Trace Hub architecture. Common attributes for output devices are described in Documentation/ABI/testing/sysfs-bus-intel_th-output-devices, the most @@ -65,41 +66,41 @@ allocated, are accessible via /dev/intel_th0/msc{0,1}. Quick example - -# figure out which GTH port is the first memory controller: +# figure out which GTH port is the first memory controller:: -$ cat /sys/bus/intel_th/devices/0-msc0/port -0 + $ cat /sys/bus/intel_th/devices/0-msc0/port + 0 -# looks like it's port 0, configure master 33 to send data to port 0: +# looks like it's port 0, configure master 33 to send data to port 0:: -$ echo 0 > /sys/bus/intel_th/devices/0-gth/masters/33 + $ echo 0 > /sys/bus/intel_th/devices/0-gth/masters/33 # allocate a 2-windowed multiblock buffer on the first memory -# controller, each with 64 pages: +# controller, each with 64 pages:: -$ echo multi > /sys/bus/intel_th/devices/0-msc0/mode -$ echo 64,64 > /sys/bus/intel_th/devices/0-msc0/nr_pages + $ echo multi > /sys/bus/intel_th/devices/0-msc0/mode + $ echo 64,64 > /sys/bus/intel_th/devices/0-msc0/nr_pages -# enable wrapping for this controller, too: +# enable wrapping for this controller, too:: -$ echo 1 > /sys/bus/intel_th/devices/0-msc0/wrap + $ echo 1 > /sys/bus/intel_th/devices/0-msc0/wrap -# and enable tracing into this port: +# and enable tracing into this port:: -$ echo 1 > /sys/bus/intel_th/devices/0-msc0/active + $ echo 1 > /sys/bus/intel_th/devices/0-msc0/active # .. send data to master 33, see stm.txt for more details .. # .. wait for traces to pile up .. -# .. and stop the trace: +# .. and stop the trace:: -$ echo 0 > /sys/bus/intel_th/devices/0-msc0/active + $ echo 0 > /sys/bus/intel_th/devices/0-msc0/active -# and now you can collect the trace from the device node: +# and now you can collect the trace from the device node:: -$ cat /dev/intel_th0/msc0 > my_stp_trace + $ cat /dev/intel_th0/msc0 > my_stp_trace Host Debugger Mode -== +-- It is possible to configure the Trace Hub and control its trace capture from a remote debug host, which should be connected via one of -- 2.7.4
[PATCH 17/17] trace doc: convert trace/stm.txt to rst format
From: Changbin DuThis converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/trace/index.rst| 1 + Documentation/trace/{stm.txt => stm.rst} | 23 --- 2 files changed, 13 insertions(+), 11 deletions(-) rename Documentation/trace/{stm.txt => stm.rst} (91%) diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index 02cc56c..b58c10b 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -20,3 +20,4 @@ Linux Tracing Technologies mmiotrace hwlat_detector intel_th + stm diff --git a/Documentation/trace/stm.txt b/Documentation/trace/stm.rst similarity index 91% rename from Documentation/trace/stm.txt rename to Documentation/trace/stm.rst index 0376575..2c22ddb 100644 --- a/Documentation/trace/stm.txt +++ b/Documentation/trace/stm.rst @@ -1,3 +1,4 @@ +=== System Trace Module === @@ -32,14 +33,14 @@ associated with it, located in "stp-policy" subsystem directory in configfs. The topmost directory's name (the policy) is formatted as the STM device name to which this policy applies and and arbitrary string identifier separated by a stop. From the examle above, a rule -may look like this: +may look like this:: -$ ls /config/stp-policy/dummy_stm.my-policy/user -channels masters -$ cat /config/stp-policy/dummy_stm.my-policy/user/masters -48 63 -$ cat /config/stp-policy/dummy_stm.my-policy/user/channels -0 127 + $ ls /config/stp-policy/dummy_stm.my-policy/user + channels masters + $ cat /config/stp-policy/dummy_stm.my-policy/user/masters + 48 63 + $ cat /config/stp-policy/dummy_stm.my-policy/user/channels + 0 127 which means that the master allocation pool for this rule consists of masters 48 through 63 and channel allocation pool has channels 0 @@ -78,9 +79,9 @@ stm_source For kernel-based trace sources, there is "stm_source" device class. Devices of this class can be connected and disconnected to/from stm devices at runtime via a sysfs attribute called "stm_source_link" -by writing the name of the desired stm device there, for example: +by writing the name of the desired stm device there, for example:: -$ echo dummy_stm.0 > /sys/class/stm_source/console/stm_source_link + $ echo dummy_stm.0 > /sys/class/stm_source/console/stm_source_link For examples on how to use stm_source interface in the kernel, refer to stm_console, stm_heartbeat or stm_ftrace drivers. @@ -118,5 +119,5 @@ the same time. Currently only Ftrace "function" tracer is supported. -[1] https://software.intel.com/sites/default/files/managed/d3/3c/intel-th-developer-manual.pdf -[2] http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0444b/index.html +* [1] https://software.intel.com/sites/default/files/managed/d3/3c/intel-th-developer-manual.pdf +* [2] http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0444b/index.html -- 2.7.4
[PATCH 16/17] trace doc: convert trace/intel_th.txt to rst format
From: Changbin Du This converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/trace/index.rst | 1 + Documentation/trace/{intel_th.txt => intel_th.rst} | 43 +++--- 2 files changed, 23 insertions(+), 21 deletions(-) rename Documentation/trace/{intel_th.txt => intel_th.rst} (82%) diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index eabbbaf..02cc56c 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -19,3 +19,4 @@ Linux Tracing Technologies events-msr mmiotrace hwlat_detector + intel_th diff --git a/Documentation/trace/intel_th.txt b/Documentation/trace/intel_th.rst similarity index 82% rename from Documentation/trace/intel_th.txt rename to Documentation/trace/intel_th.rst index 7a57165..990f132 100644 --- a/Documentation/trace/intel_th.txt +++ b/Documentation/trace/intel_th.rst @@ -1,3 +1,4 @@ +=== Intel(R) Trace Hub (TH) === @@ -18,13 +19,13 @@ via sysfs attributes. Currently, the following Intel TH subdevices (blocks) are supported: - Software Trace Hub (STH), trace source, which is a System Trace - Module (STM) device, +Module (STM) device, - Memory Storage Unit (MSU), trace output, which allows storing - trace hub output in system memory, +trace hub output in system memory, - Parallel Trace Interface output (PTI), trace output to an external - debug host via a PTI port, +debug host via a PTI port, - Global Trace Hub (GTH), which is a switch and a central component - of Intel(R) Trace Hub architecture. +of Intel(R) Trace Hub architecture. Common attributes for output devices are described in Documentation/ABI/testing/sysfs-bus-intel_th-output-devices, the most @@ -65,41 +66,41 @@ allocated, are accessible via /dev/intel_th0/msc{0,1}. Quick example - -# figure out which GTH port is the first memory controller: +# figure out which GTH port is the first memory controller:: -$ cat /sys/bus/intel_th/devices/0-msc0/port -0 + $ cat /sys/bus/intel_th/devices/0-msc0/port + 0 -# looks like it's port 0, configure master 33 to send data to port 0: +# looks like it's port 0, configure master 33 to send data to port 0:: -$ echo 0 > /sys/bus/intel_th/devices/0-gth/masters/33 + $ echo 0 > /sys/bus/intel_th/devices/0-gth/masters/33 # allocate a 2-windowed multiblock buffer on the first memory -# controller, each with 64 pages: +# controller, each with 64 pages:: -$ echo multi > /sys/bus/intel_th/devices/0-msc0/mode -$ echo 64,64 > /sys/bus/intel_th/devices/0-msc0/nr_pages + $ echo multi > /sys/bus/intel_th/devices/0-msc0/mode + $ echo 64,64 > /sys/bus/intel_th/devices/0-msc0/nr_pages -# enable wrapping for this controller, too: +# enable wrapping for this controller, too:: -$ echo 1 > /sys/bus/intel_th/devices/0-msc0/wrap + $ echo 1 > /sys/bus/intel_th/devices/0-msc0/wrap -# and enable tracing into this port: +# and enable tracing into this port:: -$ echo 1 > /sys/bus/intel_th/devices/0-msc0/active + $ echo 1 > /sys/bus/intel_th/devices/0-msc0/active # .. send data to master 33, see stm.txt for more details .. # .. wait for traces to pile up .. -# .. and stop the trace: +# .. and stop the trace:: -$ echo 0 > /sys/bus/intel_th/devices/0-msc0/active + $ echo 0 > /sys/bus/intel_th/devices/0-msc0/active -# and now you can collect the trace from the device node: +# and now you can collect the trace from the device node:: -$ cat /dev/intel_th0/msc0 > my_stp_trace + $ cat /dev/intel_th0/msc0 > my_stp_trace Host Debugger Mode -== +-- It is possible to configure the Trace Hub and control its trace capture from a remote debug host, which should be connected via one of -- 2.7.4
[PATCH 17/17] trace doc: convert trace/stm.txt to rst format
From: Changbin Du This converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/trace/index.rst| 1 + Documentation/trace/{stm.txt => stm.rst} | 23 --- 2 files changed, 13 insertions(+), 11 deletions(-) rename Documentation/trace/{stm.txt => stm.rst} (91%) diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index 02cc56c..b58c10b 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -20,3 +20,4 @@ Linux Tracing Technologies mmiotrace hwlat_detector intel_th + stm diff --git a/Documentation/trace/stm.txt b/Documentation/trace/stm.rst similarity index 91% rename from Documentation/trace/stm.txt rename to Documentation/trace/stm.rst index 0376575..2c22ddb 100644 --- a/Documentation/trace/stm.txt +++ b/Documentation/trace/stm.rst @@ -1,3 +1,4 @@ +=== System Trace Module === @@ -32,14 +33,14 @@ associated with it, located in "stp-policy" subsystem directory in configfs. The topmost directory's name (the policy) is formatted as the STM device name to which this policy applies and and arbitrary string identifier separated by a stop. From the examle above, a rule -may look like this: +may look like this:: -$ ls /config/stp-policy/dummy_stm.my-policy/user -channels masters -$ cat /config/stp-policy/dummy_stm.my-policy/user/masters -48 63 -$ cat /config/stp-policy/dummy_stm.my-policy/user/channels -0 127 + $ ls /config/stp-policy/dummy_stm.my-policy/user + channels masters + $ cat /config/stp-policy/dummy_stm.my-policy/user/masters + 48 63 + $ cat /config/stp-policy/dummy_stm.my-policy/user/channels + 0 127 which means that the master allocation pool for this rule consists of masters 48 through 63 and channel allocation pool has channels 0 @@ -78,9 +79,9 @@ stm_source For kernel-based trace sources, there is "stm_source" device class. Devices of this class can be connected and disconnected to/from stm devices at runtime via a sysfs attribute called "stm_source_link" -by writing the name of the desired stm device there, for example: +by writing the name of the desired stm device there, for example:: -$ echo dummy_stm.0 > /sys/class/stm_source/console/stm_source_link + $ echo dummy_stm.0 > /sys/class/stm_source/console/stm_source_link For examples on how to use stm_source interface in the kernel, refer to stm_console, stm_heartbeat or stm_ftrace drivers. @@ -118,5 +119,5 @@ the same time. Currently only Ftrace "function" tracer is supported. -[1] https://software.intel.com/sites/default/files/managed/d3/3c/intel-th-developer-manual.pdf -[2] http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0444b/index.html +* [1] https://software.intel.com/sites/default/files/managed/d3/3c/intel-th-developer-manual.pdf +* [2] http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0444b/index.html -- 2.7.4
[PATCH 15/17] trace doc: convert trace/hwlat_detector.txt to rst fromat
From: Changbin DuThis converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- .../{hwlat_detector.txt => hwlat_detector.rst} | 26 +- Documentation/trace/index.rst | 1 + 2 files changed, 16 insertions(+), 11 deletions(-) rename Documentation/trace/{hwlat_detector.txt => hwlat_detector.rst} (83%) diff --git a/Documentation/trace/hwlat_detector.txt b/Documentation/trace/hwlat_detector.rst similarity index 83% rename from Documentation/trace/hwlat_detector.txt rename to Documentation/trace/hwlat_detector.rst index 3207717..5739349 100644 --- a/Documentation/trace/hwlat_detector.txt +++ b/Documentation/trace/hwlat_detector.rst @@ -1,4 +1,8 @@ -Introduction: += +Hardware Latency Detector += + +Introduction - The tracer hwlat_detector is a special purpose tracer that is used to @@ -28,7 +32,7 @@ Note that the hwlat detector should *NEVER* be used in a production environment. It is intended to be run manually to determine if the hardware platform has a problem with long system firmware service routines. -Usage: +Usage -- Write the ASCII text "hwlat" into the current_tracer file of the tracing system @@ -36,16 +40,16 @@ Write the ASCII text "hwlat" into the current_tracer file of the tracing system redefine the threshold in microseconds (us) above which latency spikes will be taken into account. -Example: +Example:: # echo hwlat > /sys/kernel/tracing/current_tracer # echo 100 > /sys/kernel/tracing/tracing_thresh The /sys/kernel/tracing/hwlat_detector interface contains the following files: -width - time period to sample with CPUs held (usecs) - must be less than the total window size (enforced) -window - total period of sampling, width being inside (usecs) + - width - time period to sample with CPUs held (usecs) +must be less than the total window size (enforced) + - window - total period of sampling, width being inside (usecs) By default the width is set to 500,000 and window to 1,000,000, meaning that for every 1,000,000 usecs (1s) the hwlat detector will spin for 500,000 usecs @@ -67,11 +71,11 @@ The following tracing directory files are used by the hwlat_detector: in /sys/kernel/tracing: - tracing_threshold - minimum latency value to be considered (usecs) - tracing_max_latency - maximum hardware latency actually observed (usecs) - tracing_cpumask - the CPUs to move the hwlat thread across - hwlat_detector/width - specified amount of time to spin within window (usecs) - hwlat_detector/window - amount of time between (width) runs (usecs) + - tracing_threshold - minimum latency value to be considered (usecs) + - tracing_max_latency - maximum hardware latency actually observed (usecs) + - tracing_cpumask - the CPUs to move the hwlat thread across + - hwlat_detector/width- specified amount of time to spin within window (usecs) + - hwlat_detector/window - amount of time between (width) runs (usecs) The hwlat detector's kernel thread will migrate across each CPU specified in tracing_cpumask between each window. To limit the migration, either modify diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index 4b3d690..eabbbaf 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -18,3 +18,4 @@ Linux Tracing Technologies events-nmi events-msr mmiotrace + hwlat_detector -- 2.7.4
[PATCH 14/17] trace doc: convert trace/mmiotrace.txt to rst format
From: Changbin DuThis converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/trace/index.rst | 1 + .../trace/{mmiotrace.txt => mmiotrace.rst} | 86 +- 2 files changed, 54 insertions(+), 33 deletions(-) rename Documentation/trace/{mmiotrace.txt => mmiotrace.rst} (78%) diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index 307468d..4b3d690 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -17,3 +17,4 @@ Linux Tracing Technologies events-power events-nmi events-msr + mmiotrace diff --git a/Documentation/trace/mmiotrace.txt b/Documentation/trace/mmiotrace.rst similarity index 78% rename from Documentation/trace/mmiotrace.txt rename to Documentation/trace/mmiotrace.rst index 664e738..5116e8c 100644 --- a/Documentation/trace/mmiotrace.txt +++ b/Documentation/trace/mmiotrace.rst @@ -1,4 +1,6 @@ - In-kernel memory-mapped I/O tracing +=== +In-kernel memory-mapped I/O tracing +=== Home page and links to optional user space tools: @@ -31,30 +33,35 @@ is no way to automatically detect if you are losing events due to CPUs racing. Usage Quick Reference - +:: -$ mount -t debugfs debugfs /sys/kernel/debug -$ echo mmiotrace > /sys/kernel/debug/tracing/current_tracer -$ cat /sys/kernel/debug/tracing/trace_pipe > mydump.txt & -Start X or whatever. -$ echo "X is up" > /sys/kernel/debug/tracing/trace_marker -$ echo nop > /sys/kernel/debug/tracing/current_tracer -Check for lost events. + $ mount -t debugfs debugfs /sys/kernel/debug + $ echo mmiotrace > /sys/kernel/debug/tracing/current_tracer + $ cat /sys/kernel/debug/tracing/trace_pipe > mydump.txt & + Start X or whatever. + $ echo "X is up" > /sys/kernel/debug/tracing/trace_marker + $ echo nop > /sys/kernel/debug/tracing/current_tracer + Check for lost events. Usage - Make sure debugfs is mounted to /sys/kernel/debug. -If not (requires root privileges): -$ mount -t debugfs debugfs /sys/kernel/debug +If not (requires root privileges):: + + $ mount -t debugfs debugfs /sys/kernel/debug Check that the driver you are about to trace is not loaded. -Activate mmiotrace (requires root privileges): -$ echo mmiotrace > /sys/kernel/debug/tracing/current_tracer +Activate mmiotrace (requires root privileges):: + + $ echo mmiotrace > /sys/kernel/debug/tracing/current_tracer + +Start storing the trace:: + + $ cat /sys/kernel/debug/tracing/trace_pipe > mydump.txt & -Start storing the trace: -$ cat /sys/kernel/debug/tracing/trace_pipe > mydump.txt & The 'cat' process should stay running (sleeping) in the background. Load the driver you want to trace and use it. Mmiotrace will only catch MMIO @@ -66,30 +73,42 @@ This makes it easier to see which part of the (huge) trace corresponds to which action. It is recommended to place descriptive markers about what you do. -Shut down mmiotrace (requires root privileges): -$ echo nop > /sys/kernel/debug/tracing/current_tracer +Shut down mmiotrace (requires root privileges):: + + $ echo nop > /sys/kernel/debug/tracing/current_tracer + The 'cat' process exits. If it does not, kill it by issuing 'fg' command and pressing ctrl+c. -Check that mmiotrace did not lose events due to a buffer filling up. Either -$ grep -i lost mydump.txt -which tells you exactly how many events were lost, or use -$ dmesg +Check that mmiotrace did not lose events due to a buffer filling up. Either:: + + $ grep -i lost mydump.txt + +which tells you exactly how many events were lost, or use:: + + $ dmesg + to view your kernel log and look for "mmiotrace has lost events" warning. If events were lost, the trace is incomplete. You should enlarge the buffers and try again. Buffers are enlarged by first seeing how large the current buffers -are: -$ cat /sys/kernel/debug/tracing/buffer_size_kb +are:: + + $ cat /sys/kernel/debug/tracing/buffer_size_kb + gives you a number. Approximately double this number and write it back, for -instance: -$ echo 128000 > /sys/kernel/debug/tracing/buffer_size_kb +instance:: + + $ echo 128000 > /sys/kernel/debug/tracing/buffer_size_kb + Then start again from the top. If you are doing a trace for a driver project, e.g. Nouveau, you should also -do the following before sending your results: -$ lspci -vvv > lspci.txt -$ dmesg > dmesg.txt -$ tar zcf pciid-nick-mmiotrace.tar.gz mydump.txt lspci.txt dmesg.txt +do the following before sending your results:: + + $ lspci -vvv > lspci.txt + $ dmesg > dmesg.txt + $ tar zcf pciid-nick-mmiotrace.tar.gz mydump.txt lspci.txt dmesg.txt +
[PATCH 08/17] trace doc: convert trace/tracepoints.txt to rst format
From: Changbin DuThis converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/trace/index.rst | 1 + .../trace/{tracepoints.txt => tracepoints.rst} | 77 +++--- 2 files changed, 41 insertions(+), 37 deletions(-) rename Documentation/trace/{tracepoints.txt => tracepoints.rst} (74%) diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index 353fb8a..c8bbdfc 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -11,3 +11,4 @@ Linux Tracing Technologies ftrace-uses kprobetrace uprobetracer + tracepoints diff --git a/Documentation/trace/tracepoints.txt b/Documentation/trace/tracepoints.rst similarity index 74% rename from Documentation/trace/tracepoints.txt rename to Documentation/trace/tracepoints.rst index a3efac6..6e3ce3b 100644 --- a/Documentation/trace/tracepoints.txt +++ b/Documentation/trace/tracepoints.rst @@ -1,6 +1,8 @@ -Using the Linux Kernel Tracepoints +== +Using the Linux Kernel Tracepoints +== - Mathieu Desnoyers +:Author: Mathieu Desnoyers This document introduces Linux Kernel Tracepoints and their use. It @@ -9,8 +11,8 @@ connect probe functions to them and provides some examples of probe functions. -* Purpose of tracepoints - +Purpose of tracepoints +-- A tracepoint placed in code provides a hook to call a function (probe) that you can provide at runtime. A tracepoint can be "on" (a probe is connected to it) or "off" (no probe is attached). When a tracepoint is @@ -31,8 +33,8 @@ header file. They can be used for tracing and performance accounting. -* Usage - +Usage +- Two elements are required for tracepoints : - A tracepoint definition, placed in a header file. @@ -40,52 +42,53 @@ Two elements are required for tracepoints : In order to use tracepoints, you should include linux/tracepoint.h. -In include/trace/events/subsys.h : +In include/trace/events/subsys.h:: -#undef TRACE_SYSTEM -#define TRACE_SYSTEM subsys + #undef TRACE_SYSTEM + #define TRACE_SYSTEM subsys -#if !defined(_TRACE_SUBSYS_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_SUBSYS_H + #if !defined(_TRACE_SUBSYS_H) || defined(TRACE_HEADER_MULTI_READ) + #define _TRACE_SUBSYS_H -#include + #include -DECLARE_TRACE(subsys_eventname, - TP_PROTO(int firstarg, struct task_struct *p), - TP_ARGS(firstarg, p)); + DECLARE_TRACE(subsys_eventname, + TP_PROTO(int firstarg, struct task_struct *p), + TP_ARGS(firstarg, p)); -#endif /* _TRACE_SUBSYS_H */ + #endif /* _TRACE_SUBSYS_H */ -/* This part must be outside protection */ -#include + /* This part must be outside protection */ + #include -In subsys/file.c (where the tracing statement must be added) : +In subsys/file.c (where the tracing statement must be added):: -#include + #include -#define CREATE_TRACE_POINTS -DEFINE_TRACE(subsys_eventname); + #define CREATE_TRACE_POINTS + DEFINE_TRACE(subsys_eventname); -void somefct(void) -{ - ... - trace_subsys_eventname(arg, task); - ... -} + void somefct(void) + { + ... + trace_subsys_eventname(arg, task); + ... + } Where : -- subsys_eventname is an identifier unique to your event + - subsys_eventname is an identifier unique to your event + - subsys is the name of your subsystem. - eventname is the name of the event to trace. -- TP_PROTO(int firstarg, struct task_struct *p) is the prototype of the - function called by this tracepoint. + - `TP_PROTO(int firstarg, struct task_struct *p)` is the prototype of the +function called by this tracepoint. -- TP_ARGS(firstarg, p) are the parameters names, same as found in the - prototype. + - `TP_ARGS(firstarg, p)` are the parameters names, same as found in the +prototype. -- if you use the header in multiple source files, #define CREATE_TRACE_POINTS - should appear only in one source file. + - if you use the header in multiple source files, `#define CREATE_TRACE_POINTS` +should appear only in one source file. Connecting a function (probe) to a tracepoint is done by providing a probe (function to call) for the specific tracepoint through @@ -117,7 +120,7 @@ used to export the defined tracepoints. If you need to do a bit of work for a tracepoint parameter, and that work is only used for the tracepoint, that work can be encapsulated -within an if statement with the following: +within an if statement with the following:: if (trace_foo_bar_enabled()) {
[PATCH 14/17] trace doc: convert trace/mmiotrace.txt to rst format
From: Changbin Du This converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/trace/index.rst | 1 + .../trace/{mmiotrace.txt => mmiotrace.rst} | 86 +- 2 files changed, 54 insertions(+), 33 deletions(-) rename Documentation/trace/{mmiotrace.txt => mmiotrace.rst} (78%) diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index 307468d..4b3d690 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -17,3 +17,4 @@ Linux Tracing Technologies events-power events-nmi events-msr + mmiotrace diff --git a/Documentation/trace/mmiotrace.txt b/Documentation/trace/mmiotrace.rst similarity index 78% rename from Documentation/trace/mmiotrace.txt rename to Documentation/trace/mmiotrace.rst index 664e738..5116e8c 100644 --- a/Documentation/trace/mmiotrace.txt +++ b/Documentation/trace/mmiotrace.rst @@ -1,4 +1,6 @@ - In-kernel memory-mapped I/O tracing +=== +In-kernel memory-mapped I/O tracing +=== Home page and links to optional user space tools: @@ -31,30 +33,35 @@ is no way to automatically detect if you are losing events due to CPUs racing. Usage Quick Reference - +:: -$ mount -t debugfs debugfs /sys/kernel/debug -$ echo mmiotrace > /sys/kernel/debug/tracing/current_tracer -$ cat /sys/kernel/debug/tracing/trace_pipe > mydump.txt & -Start X or whatever. -$ echo "X is up" > /sys/kernel/debug/tracing/trace_marker -$ echo nop > /sys/kernel/debug/tracing/current_tracer -Check for lost events. + $ mount -t debugfs debugfs /sys/kernel/debug + $ echo mmiotrace > /sys/kernel/debug/tracing/current_tracer + $ cat /sys/kernel/debug/tracing/trace_pipe > mydump.txt & + Start X or whatever. + $ echo "X is up" > /sys/kernel/debug/tracing/trace_marker + $ echo nop > /sys/kernel/debug/tracing/current_tracer + Check for lost events. Usage - Make sure debugfs is mounted to /sys/kernel/debug. -If not (requires root privileges): -$ mount -t debugfs debugfs /sys/kernel/debug +If not (requires root privileges):: + + $ mount -t debugfs debugfs /sys/kernel/debug Check that the driver you are about to trace is not loaded. -Activate mmiotrace (requires root privileges): -$ echo mmiotrace > /sys/kernel/debug/tracing/current_tracer +Activate mmiotrace (requires root privileges):: + + $ echo mmiotrace > /sys/kernel/debug/tracing/current_tracer + +Start storing the trace:: + + $ cat /sys/kernel/debug/tracing/trace_pipe > mydump.txt & -Start storing the trace: -$ cat /sys/kernel/debug/tracing/trace_pipe > mydump.txt & The 'cat' process should stay running (sleeping) in the background. Load the driver you want to trace and use it. Mmiotrace will only catch MMIO @@ -66,30 +73,42 @@ This makes it easier to see which part of the (huge) trace corresponds to which action. It is recommended to place descriptive markers about what you do. -Shut down mmiotrace (requires root privileges): -$ echo nop > /sys/kernel/debug/tracing/current_tracer +Shut down mmiotrace (requires root privileges):: + + $ echo nop > /sys/kernel/debug/tracing/current_tracer + The 'cat' process exits. If it does not, kill it by issuing 'fg' command and pressing ctrl+c. -Check that mmiotrace did not lose events due to a buffer filling up. Either -$ grep -i lost mydump.txt -which tells you exactly how many events were lost, or use -$ dmesg +Check that mmiotrace did not lose events due to a buffer filling up. Either:: + + $ grep -i lost mydump.txt + +which tells you exactly how many events were lost, or use:: + + $ dmesg + to view your kernel log and look for "mmiotrace has lost events" warning. If events were lost, the trace is incomplete. You should enlarge the buffers and try again. Buffers are enlarged by first seeing how large the current buffers -are: -$ cat /sys/kernel/debug/tracing/buffer_size_kb +are:: + + $ cat /sys/kernel/debug/tracing/buffer_size_kb + gives you a number. Approximately double this number and write it back, for -instance: -$ echo 128000 > /sys/kernel/debug/tracing/buffer_size_kb +instance:: + + $ echo 128000 > /sys/kernel/debug/tracing/buffer_size_kb + Then start again from the top. If you are doing a trace for a driver project, e.g. Nouveau, you should also -do the following before sending your results: -$ lspci -vvv > lspci.txt -$ dmesg > dmesg.txt -$ tar zcf pciid-nick-mmiotrace.tar.gz mydump.txt lspci.txt dmesg.txt +do the following before sending your results:: + + $ lspci -vvv > lspci.txt + $ dmesg > dmesg.txt + $ tar zcf pciid-nick-mmiotrace.tar.gz mydump.txt lspci.txt dmesg.txt + and then send the .tar.gz file. The trace compresses considerably.
[PATCH 08/17] trace doc: convert trace/tracepoints.txt to rst format
From: Changbin Du This converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/trace/index.rst | 1 + .../trace/{tracepoints.txt => tracepoints.rst} | 77 +++--- 2 files changed, 41 insertions(+), 37 deletions(-) rename Documentation/trace/{tracepoints.txt => tracepoints.rst} (74%) diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index 353fb8a..c8bbdfc 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -11,3 +11,4 @@ Linux Tracing Technologies ftrace-uses kprobetrace uprobetracer + tracepoints diff --git a/Documentation/trace/tracepoints.txt b/Documentation/trace/tracepoints.rst similarity index 74% rename from Documentation/trace/tracepoints.txt rename to Documentation/trace/tracepoints.rst index a3efac6..6e3ce3b 100644 --- a/Documentation/trace/tracepoints.txt +++ b/Documentation/trace/tracepoints.rst @@ -1,6 +1,8 @@ -Using the Linux Kernel Tracepoints +== +Using the Linux Kernel Tracepoints +== - Mathieu Desnoyers +:Author: Mathieu Desnoyers This document introduces Linux Kernel Tracepoints and their use. It @@ -9,8 +11,8 @@ connect probe functions to them and provides some examples of probe functions. -* Purpose of tracepoints - +Purpose of tracepoints +-- A tracepoint placed in code provides a hook to call a function (probe) that you can provide at runtime. A tracepoint can be "on" (a probe is connected to it) or "off" (no probe is attached). When a tracepoint is @@ -31,8 +33,8 @@ header file. They can be used for tracing and performance accounting. -* Usage - +Usage +- Two elements are required for tracepoints : - A tracepoint definition, placed in a header file. @@ -40,52 +42,53 @@ Two elements are required for tracepoints : In order to use tracepoints, you should include linux/tracepoint.h. -In include/trace/events/subsys.h : +In include/trace/events/subsys.h:: -#undef TRACE_SYSTEM -#define TRACE_SYSTEM subsys + #undef TRACE_SYSTEM + #define TRACE_SYSTEM subsys -#if !defined(_TRACE_SUBSYS_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_SUBSYS_H + #if !defined(_TRACE_SUBSYS_H) || defined(TRACE_HEADER_MULTI_READ) + #define _TRACE_SUBSYS_H -#include + #include -DECLARE_TRACE(subsys_eventname, - TP_PROTO(int firstarg, struct task_struct *p), - TP_ARGS(firstarg, p)); + DECLARE_TRACE(subsys_eventname, + TP_PROTO(int firstarg, struct task_struct *p), + TP_ARGS(firstarg, p)); -#endif /* _TRACE_SUBSYS_H */ + #endif /* _TRACE_SUBSYS_H */ -/* This part must be outside protection */ -#include + /* This part must be outside protection */ + #include -In subsys/file.c (where the tracing statement must be added) : +In subsys/file.c (where the tracing statement must be added):: -#include + #include -#define CREATE_TRACE_POINTS -DEFINE_TRACE(subsys_eventname); + #define CREATE_TRACE_POINTS + DEFINE_TRACE(subsys_eventname); -void somefct(void) -{ - ... - trace_subsys_eventname(arg, task); - ... -} + void somefct(void) + { + ... + trace_subsys_eventname(arg, task); + ... + } Where : -- subsys_eventname is an identifier unique to your event + - subsys_eventname is an identifier unique to your event + - subsys is the name of your subsystem. - eventname is the name of the event to trace. -- TP_PROTO(int firstarg, struct task_struct *p) is the prototype of the - function called by this tracepoint. + - `TP_PROTO(int firstarg, struct task_struct *p)` is the prototype of the +function called by this tracepoint. -- TP_ARGS(firstarg, p) are the parameters names, same as found in the - prototype. + - `TP_ARGS(firstarg, p)` are the parameters names, same as found in the +prototype. -- if you use the header in multiple source files, #define CREATE_TRACE_POINTS - should appear only in one source file. + - if you use the header in multiple source files, `#define CREATE_TRACE_POINTS` +should appear only in one source file. Connecting a function (probe) to a tracepoint is done by providing a probe (function to call) for the specific tracepoint through @@ -117,7 +120,7 @@ used to export the defined tracepoints. If you need to do a bit of work for a tracepoint parameter, and that work is only used for the tracepoint, that work can be encapsulated -within an if statement with the following: +within an if statement with the following:: if (trace_foo_bar_enabled()) { int i; @@ -139,7 +142,7 @@ The advantage of using the
[PATCH 15/17] trace doc: convert trace/hwlat_detector.txt to rst fromat
From: Changbin Du This converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- .../{hwlat_detector.txt => hwlat_detector.rst} | 26 +- Documentation/trace/index.rst | 1 + 2 files changed, 16 insertions(+), 11 deletions(-) rename Documentation/trace/{hwlat_detector.txt => hwlat_detector.rst} (83%) diff --git a/Documentation/trace/hwlat_detector.txt b/Documentation/trace/hwlat_detector.rst similarity index 83% rename from Documentation/trace/hwlat_detector.txt rename to Documentation/trace/hwlat_detector.rst index 3207717..5739349 100644 --- a/Documentation/trace/hwlat_detector.txt +++ b/Documentation/trace/hwlat_detector.rst @@ -1,4 +1,8 @@ -Introduction: += +Hardware Latency Detector += + +Introduction - The tracer hwlat_detector is a special purpose tracer that is used to @@ -28,7 +32,7 @@ Note that the hwlat detector should *NEVER* be used in a production environment. It is intended to be run manually to determine if the hardware platform has a problem with long system firmware service routines. -Usage: +Usage -- Write the ASCII text "hwlat" into the current_tracer file of the tracing system @@ -36,16 +40,16 @@ Write the ASCII text "hwlat" into the current_tracer file of the tracing system redefine the threshold in microseconds (us) above which latency spikes will be taken into account. -Example: +Example:: # echo hwlat > /sys/kernel/tracing/current_tracer # echo 100 > /sys/kernel/tracing/tracing_thresh The /sys/kernel/tracing/hwlat_detector interface contains the following files: -width - time period to sample with CPUs held (usecs) - must be less than the total window size (enforced) -window - total period of sampling, width being inside (usecs) + - width - time period to sample with CPUs held (usecs) +must be less than the total window size (enforced) + - window - total period of sampling, width being inside (usecs) By default the width is set to 500,000 and window to 1,000,000, meaning that for every 1,000,000 usecs (1s) the hwlat detector will spin for 500,000 usecs @@ -67,11 +71,11 @@ The following tracing directory files are used by the hwlat_detector: in /sys/kernel/tracing: - tracing_threshold - minimum latency value to be considered (usecs) - tracing_max_latency - maximum hardware latency actually observed (usecs) - tracing_cpumask - the CPUs to move the hwlat thread across - hwlat_detector/width - specified amount of time to spin within window (usecs) - hwlat_detector/window - amount of time between (width) runs (usecs) + - tracing_threshold - minimum latency value to be considered (usecs) + - tracing_max_latency - maximum hardware latency actually observed (usecs) + - tracing_cpumask - the CPUs to move the hwlat thread across + - hwlat_detector/width- specified amount of time to spin within window (usecs) + - hwlat_detector/window - amount of time between (width) runs (usecs) The hwlat detector's kernel thread will migrate across each CPU specified in tracing_cpumask between each window. To limit the migration, either modify diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index 4b3d690..eabbbaf 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -18,3 +18,4 @@ Linux Tracing Technologies events-nmi events-msr mmiotrace + hwlat_detector -- 2.7.4
[PATCH 01/17] Documentation: add Linux tracing to Sphinx TOC tree
From: Changbin DuThis just add a index.rst for trace subsystem. More docs will be added later. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/index.rst | 1 + Documentation/trace/index.rst | 6 ++ 2 files changed, 7 insertions(+) create mode 100644 Documentation/trace/index.rst diff --git a/Documentation/index.rst b/Documentation/index.rst index ef5080c..3b99ab9 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -64,6 +64,7 @@ merged much easier. dev-tools/index doc-guide/index kernel-hacking/index + trace/index maintainer/index Kernel API documentation diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst new file mode 100644 index 000..d986ead --- /dev/null +++ b/Documentation/trace/index.rst @@ -0,0 +1,6 @@ +== +Linux Tracing Technologies +== + +.. toctree:: + :maxdepth: 2 -- 2.7.4
[PATCH 01/17] Documentation: add Linux tracing to Sphinx TOC tree
From: Changbin Du This just add a index.rst for trace subsystem. More docs will be added later. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/index.rst | 1 + Documentation/trace/index.rst | 6 ++ 2 files changed, 7 insertions(+) create mode 100644 Documentation/trace/index.rst diff --git a/Documentation/index.rst b/Documentation/index.rst index ef5080c..3b99ab9 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -64,6 +64,7 @@ merged much easier. dev-tools/index doc-guide/index kernel-hacking/index + trace/index maintainer/index Kernel API documentation diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst new file mode 100644 index 000..d986ead --- /dev/null +++ b/Documentation/trace/index.rst @@ -0,0 +1,6 @@ +== +Linux Tracing Technologies +== + +.. toctree:: + :maxdepth: 2 -- 2.7.4
[PATCH 04/17] trace doc: convert trace/tracepoint-analysis.txt to rst format
From: Changbin DuThis converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/trace/index.rst | 1 + ...epoint-analysis.txt => tracepoint-analysis.rst} | 41 ++ 2 files changed, 27 insertions(+), 15 deletions(-) rename Documentation/trace/{tracepoint-analysis.txt => tracepoint-analysis.rst} (93%) diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index aa2baad..61b5551 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -6,4 +6,5 @@ Linux Tracing Technologies :maxdepth: 2 ftrace-design + tracepoint-analysis ftrace-uses diff --git a/Documentation/trace/tracepoint-analysis.txt b/Documentation/trace/tracepoint-analysis.rst similarity index 93% rename from Documentation/trace/tracepoint-analysis.txt rename to Documentation/trace/tracepoint-analysis.rst index 058cc6c..a4d3ff2 100644 --- a/Documentation/trace/tracepoint-analysis.txt +++ b/Documentation/trace/tracepoint-analysis.rst @@ -1,7 +1,7 @@ - Notes on Analysing Behaviour Using Events and Tracepoints - - Documentation written by Mel Gorman - PCL information heavily based on email from Ingo Molnar += +Notes on Analysing Behaviour Using Events and Tracepoints += +:Author: Mel Gorman (PCL information heavily based on email from Ingo Molnar) 1. Introduction === @@ -27,18 +27,18 @@ assumed that the PCL tool tools/perf has been installed and is in your path. -- All possible events are visible from /sys/kernel/debug/tracing/events. Simply -calling +calling:: $ find /sys/kernel/debug/tracing/events -type d will give a fair indication of the number of events available. 2.2 PCL (Performance Counters for Linux) + Discovery and enumeration of all counters and events, including tracepoints, are available with the perf tool. Getting a list of available events is a -simple case of: +simple case of:: $ perf list 2>&1 | grep Tracepoint ext4:ext4_free_inode [Tracepoint event] @@ -57,7 +57,7 @@ simple case of: See Documentation/trace/events.txt for a proper description on how events can be enabled system-wide. A short example of enabling all events related -to page allocation would look something like: +to page allocation would look something like:: $ for i in `find /sys/kernel/debug/tracing/events -name "enable" | grep mm_`; do echo 1 > $i; done @@ -67,6 +67,7 @@ to page allocation would look something like: In SystemTap, tracepoints are accessible using the kernel.trace() function call. The following is an example that reports every 5 seconds what processes were allocating the pages. +:: global page_allocs @@ -91,6 +92,7 @@ were allocating the pages. By specifying the -a switch and analysing sleep, the system-wide events for a duration of time can be examined. +:: $ perf stat -a \ -e kmem:mm_page_alloc -e kmem:mm_page_free \ @@ -118,6 +120,7 @@ basis using set_ftrace_pid. Events can be activated and tracked for the duration of a process on a local basis using PCL such as follows. +:: $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free \ -e kmem:mm_page_free_batched ./hackbench 10 @@ -145,6 +148,7 @@ Any workload can exhibit variances between runs and it can be important to know what the standard deviation is. By and large, this is left to the performance analyst to do it by hand. In the event that the discrete event occurrences are useful to the performance analyst, then perf can be used. +:: $ perf stat --repeat 5 -e kmem:mm_page_alloc -e kmem:mm_page_free -e kmem:mm_page_free_batched ./hackbench 10 @@ -167,6 +171,7 @@ aggregation of discrete events, then a script would need to be developed. Using --repeat, it is also possible to view how events are fluctuating over time on a system-wide basis using -a and sleep. +:: $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free \ -e kmem:mm_page_free_batched \ @@ -188,9 +193,9 @@ When events are enabled the events that are triggering can be read from options exist as well. By post-processing the output, further information can be gathered on-line as appropriate. Examples of post-processing might include - o Reading information from /proc for the PID that triggered the event - o Deriving a higher-level event from a series of lower-level events. - o Calculating latencies between two events + - Reading information from /proc for the PID that triggered the event + - Deriving a higher-level
[PATCH 04/17] trace doc: convert trace/tracepoint-analysis.txt to rst format
From: Changbin Du This converts the plain text documentation to reStructuredText format and add it into Sphinx TOC tree. No essential content change. Cc: Steven Rostedt Signed-off-by: Changbin Du --- Documentation/trace/index.rst | 1 + ...epoint-analysis.txt => tracepoint-analysis.rst} | 41 ++ 2 files changed, 27 insertions(+), 15 deletions(-) rename Documentation/trace/{tracepoint-analysis.txt => tracepoint-analysis.rst} (93%) diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index aa2baad..61b5551 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -6,4 +6,5 @@ Linux Tracing Technologies :maxdepth: 2 ftrace-design + tracepoint-analysis ftrace-uses diff --git a/Documentation/trace/tracepoint-analysis.txt b/Documentation/trace/tracepoint-analysis.rst similarity index 93% rename from Documentation/trace/tracepoint-analysis.txt rename to Documentation/trace/tracepoint-analysis.rst index 058cc6c..a4d3ff2 100644 --- a/Documentation/trace/tracepoint-analysis.txt +++ b/Documentation/trace/tracepoint-analysis.rst @@ -1,7 +1,7 @@ - Notes on Analysing Behaviour Using Events and Tracepoints - - Documentation written by Mel Gorman - PCL information heavily based on email from Ingo Molnar += +Notes on Analysing Behaviour Using Events and Tracepoints += +:Author: Mel Gorman (PCL information heavily based on email from Ingo Molnar) 1. Introduction === @@ -27,18 +27,18 @@ assumed that the PCL tool tools/perf has been installed and is in your path. -- All possible events are visible from /sys/kernel/debug/tracing/events. Simply -calling +calling:: $ find /sys/kernel/debug/tracing/events -type d will give a fair indication of the number of events available. 2.2 PCL (Performance Counters for Linux) + Discovery and enumeration of all counters and events, including tracepoints, are available with the perf tool. Getting a list of available events is a -simple case of: +simple case of:: $ perf list 2>&1 | grep Tracepoint ext4:ext4_free_inode [Tracepoint event] @@ -57,7 +57,7 @@ simple case of: See Documentation/trace/events.txt for a proper description on how events can be enabled system-wide. A short example of enabling all events related -to page allocation would look something like: +to page allocation would look something like:: $ for i in `find /sys/kernel/debug/tracing/events -name "enable" | grep mm_`; do echo 1 > $i; done @@ -67,6 +67,7 @@ to page allocation would look something like: In SystemTap, tracepoints are accessible using the kernel.trace() function call. The following is an example that reports every 5 seconds what processes were allocating the pages. +:: global page_allocs @@ -91,6 +92,7 @@ were allocating the pages. By specifying the -a switch and analysing sleep, the system-wide events for a duration of time can be examined. +:: $ perf stat -a \ -e kmem:mm_page_alloc -e kmem:mm_page_free \ @@ -118,6 +120,7 @@ basis using set_ftrace_pid. Events can be activated and tracked for the duration of a process on a local basis using PCL such as follows. +:: $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free \ -e kmem:mm_page_free_batched ./hackbench 10 @@ -145,6 +148,7 @@ Any workload can exhibit variances between runs and it can be important to know what the standard deviation is. By and large, this is left to the performance analyst to do it by hand. In the event that the discrete event occurrences are useful to the performance analyst, then perf can be used. +:: $ perf stat --repeat 5 -e kmem:mm_page_alloc -e kmem:mm_page_free -e kmem:mm_page_free_batched ./hackbench 10 @@ -167,6 +171,7 @@ aggregation of discrete events, then a script would need to be developed. Using --repeat, it is also possible to view how events are fluctuating over time on a system-wide basis using -a and sleep. +:: $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free \ -e kmem:mm_page_free_batched \ @@ -188,9 +193,9 @@ When events are enabled the events that are triggering can be read from options exist as well. By post-processing the output, further information can be gathered on-line as appropriate. Examples of post-processing might include - o Reading information from /proc for the PID that triggered the event - o Deriving a higher-level event from a series of lower-level events. - o Calculating latencies between two events + - Reading information from /proc for the PID that triggered the event + - Deriving a higher-level event from a series of lower-level events. + - Calculating
RE: [PATCH v1 0/4] platform/x86: mlx-platform: Add bus differed and auto detection functionalities
> -Original Message- > From: Darren Hart [mailto:dvh...@infradead.org] > Sent: Friday, February 16, 2018 3:33 AM > To: Vadim Pasternak> Cc: andy.shevche...@gmail.com; gre...@linuxfoundation.org; platform- > driver-...@vger.kernel.org; linux-kernel@vger.kernel.org; j...@resnulli.us; > Michael Shych > Subject: Re: [PATCH v1 0/4] platform/x86: mlx-platform: Add bus differed and > auto detection functionalities > > On Tue, Feb 13, 2018 at 10:09:32PM +, Vadim Pasternak wrote: > > This patchset: > > - Adds define for the channels number for mux device. > > - Adds differed bus functionality. > > - Changes input for device create routine in mlxreg-hotplug driver. > > - Adds physical bus number auto detection. > > Hi Vadim, Hi Darren, > > We are now in the RC cycle for 4.16. Do you consider these changes to be > "fixes" that need to be included in 4.16? It was difficult to tell from the > commit > messages how severe some of the possible issues were. I actually considered these patches for the next, having in mind that 4.16 is already closed. > > I'm concerned about the level of testing seen by the previous patch series if > we > are getting these changes so soon after they were merged. > Can you comment on why these are being discovered now - and how confident > are we in the drivers with these changes applied? These patches has been tested on the all relevant platforms. It has been discovered some time ago and it was in my plans to submit these patches after the previous series. Do you think it's better to consider this series as a bug fix, and target it to 4.16? Thanks, Vadim. > > > > > Vadim Pasternak (4): > > platform/x86: mlx-platform: Use define for the channel numbers > > platform/x86: mlx-platform: Add differed bus functionality > > platform/mellanox: mlxreg-hotplug: Change input for device create > > routine > > platform/x86: mlx-platform: Add physical bus number auto detection > > > > drivers/platform/mellanox/mlxreg-hotplug.c | 31 +- > > drivers/platform/x86/mlx-platform.c| 68 > -- > > include/linux/platform_data/mlxreg.h | 4 ++ > > 3 files changed, 90 insertions(+), 13 deletions(-) > > > > -- > > 2.1.4 > > > > > > -- > Darren Hart > VMware Open Source Technology Center
RE: [PATCH v1 0/4] platform/x86: mlx-platform: Add bus differed and auto detection functionalities
> -Original Message- > From: Darren Hart [mailto:dvh...@infradead.org] > Sent: Friday, February 16, 2018 3:33 AM > To: Vadim Pasternak > Cc: andy.shevche...@gmail.com; gre...@linuxfoundation.org; platform- > driver-...@vger.kernel.org; linux-kernel@vger.kernel.org; j...@resnulli.us; > Michael Shych > Subject: Re: [PATCH v1 0/4] platform/x86: mlx-platform: Add bus differed and > auto detection functionalities > > On Tue, Feb 13, 2018 at 10:09:32PM +, Vadim Pasternak wrote: > > This patchset: > > - Adds define for the channels number for mux device. > > - Adds differed bus functionality. > > - Changes input for device create routine in mlxreg-hotplug driver. > > - Adds physical bus number auto detection. > > Hi Vadim, Hi Darren, > > We are now in the RC cycle for 4.16. Do you consider these changes to be > "fixes" that need to be included in 4.16? It was difficult to tell from the > commit > messages how severe some of the possible issues were. I actually considered these patches for the next, having in mind that 4.16 is already closed. > > I'm concerned about the level of testing seen by the previous patch series if > we > are getting these changes so soon after they were merged. > Can you comment on why these are being discovered now - and how confident > are we in the drivers with these changes applied? These patches has been tested on the all relevant platforms. It has been discovered some time ago and it was in my plans to submit these patches after the previous series. Do you think it's better to consider this series as a bug fix, and target it to 4.16? Thanks, Vadim. > > > > > Vadim Pasternak (4): > > platform/x86: mlx-platform: Use define for the channel numbers > > platform/x86: mlx-platform: Add differed bus functionality > > platform/mellanox: mlxreg-hotplug: Change input for device create > > routine > > platform/x86: mlx-platform: Add physical bus number auto detection > > > > drivers/platform/mellanox/mlxreg-hotplug.c | 31 +- > > drivers/platform/x86/mlx-platform.c| 68 > -- > > include/linux/platform_data/mlxreg.h | 4 ++ > > 3 files changed, 90 insertions(+), 13 deletions(-) > > > > -- > > 2.1.4 > > > > > > -- > Darren Hart > VMware Open Source Technology Center
[PATCH] ARM: sunxi: Fix multi-cluster SMP support compilation in multi v6/v7 configs
Various parts of the assembly code used in the multi-cluster SMP support requires ARMv7-A. If the kernel config also has multi v6 support enabled, Kbuild defaults to building for armv6k, which does not support some of the instructions we use. Configure the Makefile such that the multi-cluster SMP code is always built for ARMv7-A. This is also what mach-exynos does for their MC-SMP code. Signed-off-by: Chen-Yu Tsai--- This addresses "[sunxi:sunxi/core-for-4.17 1/4] /tmp/ccSQM2rD.s:438: Error: selected processor does not support `isb' in ARM mode" reported by the kbuild test robot for arm-allmodconfig. Should we apply it, or squash it in the original patch? --- arch/arm/mach-sunxi/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-sunxi/Makefile b/arch/arm/mach-sunxi/Makefile index 3e741e959c7c..3c2c4384357a 100644 --- a/arch/arm/mach-sunxi/Makefile +++ b/arch/arm/mach-sunxi/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_ARCH_SUNXI) += sunxi.o obj-$(CONFIG_ARCH_SUNXI_MC_SMP) += mc_smp.o +CFLAGS_mc_smp.o+= -march=armv7-a obj-$(CONFIG_SMP) += platsmp.o -- 2.16.1
[PATCH] ARM: sunxi: Fix multi-cluster SMP support compilation in multi v6/v7 configs
Various parts of the assembly code used in the multi-cluster SMP support requires ARMv7-A. If the kernel config also has multi v6 support enabled, Kbuild defaults to building for armv6k, which does not support some of the instructions we use. Configure the Makefile such that the multi-cluster SMP code is always built for ARMv7-A. This is also what mach-exynos does for their MC-SMP code. Signed-off-by: Chen-Yu Tsai --- This addresses "[sunxi:sunxi/core-for-4.17 1/4] /tmp/ccSQM2rD.s:438: Error: selected processor does not support `isb' in ARM mode" reported by the kbuild test robot for arm-allmodconfig. Should we apply it, or squash it in the original patch? --- arch/arm/mach-sunxi/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-sunxi/Makefile b/arch/arm/mach-sunxi/Makefile index 3e741e959c7c..3c2c4384357a 100644 --- a/arch/arm/mach-sunxi/Makefile +++ b/arch/arm/mach-sunxi/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_ARCH_SUNXI) += sunxi.o obj-$(CONFIG_ARCH_SUNXI_MC_SMP) += mc_smp.o +CFLAGS_mc_smp.o+= -march=armv7-a obj-$(CONFIG_SMP) += platsmp.o -- 2.16.1
Plan for DTC re-sync?
Hi Rob, Do you have a plan to sync scripts/dtc/ with upstream? I want the following commit in the upstream DTC project. commit b260c4f610c004c6e9e36c5f7bbb58d23e605bf1 Author: Grant LikelyDate: Mon Nov 20 17:12:18 2017 + Fix ambiguous grammar for devicetree rule -- Best Regards Masahiro Yamada
Plan for DTC re-sync?
Hi Rob, Do you have a plan to sync scripts/dtc/ with upstream? I want the following commit in the upstream DTC project. commit b260c4f610c004c6e9e36c5f7bbb58d23e605bf1 Author: Grant Likely Date: Mon Nov 20 17:12:18 2017 + Fix ambiguous grammar for devicetree rule -- Best Regards Masahiro Yamada
[PATCH] USB: chaoskey: Use kasprintf() over strcpy()/strcat()
Instead of kmalloc() with manually calculated values followed by multiple strcpy()/strcat() calls, just fold it all into a single kasprintf() call. Signed-off-by: Kees Cook--- drivers/usb/misc/chaoskey.c | 8 ++-- 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/usb/misc/chaoskey.c b/drivers/usb/misc/chaoskey.c index 716cb515523e..cf5828ce927a 100644 --- a/drivers/usb/misc/chaoskey.c +++ b/drivers/usb/misc/chaoskey.c @@ -168,14 +168,10 @@ static int chaoskey_probe(struct usb_interface *interface, */ if (udev->product && udev->serial) { - dev->name = kmalloc(strlen(udev->product) + 1 + - strlen(udev->serial) + 1, GFP_KERNEL); + dev->name = kasprintf(GFP_KERNEL, "%s-%s", udev->product, + udev->serial); if (dev->name == NULL) goto out; - - strcpy(dev->name, udev->product); - strcat(dev->name, "-"); - strcat(dev->name, udev->serial); } dev->interface = interface; -- 2.7.4 -- Kees Cook Pixel Security
[PATCH] USB: chaoskey: Use kasprintf() over strcpy()/strcat()
Instead of kmalloc() with manually calculated values followed by multiple strcpy()/strcat() calls, just fold it all into a single kasprintf() call. Signed-off-by: Kees Cook --- drivers/usb/misc/chaoskey.c | 8 ++-- 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/usb/misc/chaoskey.c b/drivers/usb/misc/chaoskey.c index 716cb515523e..cf5828ce927a 100644 --- a/drivers/usb/misc/chaoskey.c +++ b/drivers/usb/misc/chaoskey.c @@ -168,14 +168,10 @@ static int chaoskey_probe(struct usb_interface *interface, */ if (udev->product && udev->serial) { - dev->name = kmalloc(strlen(udev->product) + 1 + - strlen(udev->serial) + 1, GFP_KERNEL); + dev->name = kasprintf(GFP_KERNEL, "%s-%s", udev->product, + udev->serial); if (dev->name == NULL) goto out; - - strcpy(dev->name, udev->product); - strcat(dev->name, "-"); - strcat(dev->name, udev->serial); } dev->interface = interface; -- 2.7.4 -- Kees Cook Pixel Security
Re: [PATCH 08/23] kconfig: add 'macro' keyword to support user-defined function
On Sat, 17 Feb 2018, Ulf Magnusson wrote: > On Sat, Feb 17, 2018 at 3:30 AM, Nicolas Pitrewrote: > > On Sat, 17 Feb 2018, Ulf Magnusson wrote: > > > >> On Fri, Feb 16, 2018 at 02:49:31PM -0500, Nicolas Pitre wrote: > >> > On Sat, 17 Feb 2018, Masahiro Yamada wrote: > >> > > >> > > Now, we got a basic ability to test compiler capability in Kconfig. > >> > > > >> > > config CC_HAS_STACKPROTECTOR > >> > > bool > >> > > default $(shell $CC -Werror -fstack-protector -c -x c > >> > > /dev/null -o /dev/null) > >> > > > >> > > This works, but it is ugly to repeat this long boilerplate. > >> > > > >> > > We want to describe like this: > >> > > > >> > > config CC_HAS_STACKPROTECTOR > >> > > bool > >> > > default $(cc-option -fstack-protector) > >> > > > >> > > It is straight-forward to implement a new function, but I do not like > >> > > to hard-code specialized functions like this. Hence, here is another > >> > > feature to add functions from Kconfig files. > >> > > > >> > > A user-defined function can be defined as a string type symbol with > >> > > a special keyword 'macro'. It can be referenced in the same way as > >> > > built-in functions. This feature was also inspired by Makefile where > >> > > user-defined functions are referenced by $(call func-name, args...), > >> > > but I omitted the 'call' to makes it shorter. > >> > > > >> > > The macro definition can contain $(1), $(2), ... which will be replaced > >> > > with arguments from the caller. > >> > > > >> > > Example code: > >> > > > >> > > config cc-option > >> > > string > >> > > macro $(shell $CC -Werror $(1) -c -x c /dev/null -o > >> > > /dev/null) > >> > > >> > I think this syntax for defining a macro shouldn't start with the > >> > "config" keyword, unless you want it to be part of the config symbol > >> > space and land it in .config. And typing it as a "string" while it > >> > actually returns y/n (hence a bool) is also strange. > >> > > >> > What about this instead: > >> > > >> > macro cc-option > >> > bool $(shell $CC -Werror $(1) -c -x c /dev/null -o /dev/null) > >> > > >> > This makes it easier to extend as well if need be. > >> > > >> > > >> > Nicolas > >> > >> I haven't gone over the patchset in detail yet and might be missing > >> something here, but if this is just meant to be a textual shorthand, > >> then why give it a type at all? > > > > It is meant to be like a user-defined function. > > > >> Do you think a simpler syntax like this would make sense? > >> > >> macro cc-option "$(shell $CC -Werror $(1) -c -x c /dev/null -o > >> /dev/null)" > >> > >> That's the most general version, where you could use it for other stuff > >> besides $(shell ...) as well, just to keep parity. > > > > This is not extendable. Let's imagine that you might want to implement > > some kind of conditionals some day e.g.: > > > > macro complex_test > > bool $(shell foo) if LOCKDEP_SUPPORT > > bool y if DEBUG_DRIVER > > bool n > > I still don't quite get the semantics here. How would the behavior > change if the type was changed to say string or int in some or all of > the lines? I admit this wouldn't make sense to have multiple different types. In this example, the bool keyword acts as syntactic sugar more than anything else. > Since the current model is to evaluate $() while the Kconfig files are > being parsed, would this require evaluating Kconfig expressions during > parsing? There is a relatively clean and (somewhat) easy to understand > parsing/evaluation separation at the moment, which I like. Agreed. Let's forget about the conditionals then. Nicolas
Re: [PATCH 08/23] kconfig: add 'macro' keyword to support user-defined function
On Sat, 17 Feb 2018, Ulf Magnusson wrote: > On Sat, Feb 17, 2018 at 3:30 AM, Nicolas Pitre wrote: > > On Sat, 17 Feb 2018, Ulf Magnusson wrote: > > > >> On Fri, Feb 16, 2018 at 02:49:31PM -0500, Nicolas Pitre wrote: > >> > On Sat, 17 Feb 2018, Masahiro Yamada wrote: > >> > > >> > > Now, we got a basic ability to test compiler capability in Kconfig. > >> > > > >> > > config CC_HAS_STACKPROTECTOR > >> > > bool > >> > > default $(shell $CC -Werror -fstack-protector -c -x c > >> > > /dev/null -o /dev/null) > >> > > > >> > > This works, but it is ugly to repeat this long boilerplate. > >> > > > >> > > We want to describe like this: > >> > > > >> > > config CC_HAS_STACKPROTECTOR > >> > > bool > >> > > default $(cc-option -fstack-protector) > >> > > > >> > > It is straight-forward to implement a new function, but I do not like > >> > > to hard-code specialized functions like this. Hence, here is another > >> > > feature to add functions from Kconfig files. > >> > > > >> > > A user-defined function can be defined as a string type symbol with > >> > > a special keyword 'macro'. It can be referenced in the same way as > >> > > built-in functions. This feature was also inspired by Makefile where > >> > > user-defined functions are referenced by $(call func-name, args...), > >> > > but I omitted the 'call' to makes it shorter. > >> > > > >> > > The macro definition can contain $(1), $(2), ... which will be replaced > >> > > with arguments from the caller. > >> > > > >> > > Example code: > >> > > > >> > > config cc-option > >> > > string > >> > > macro $(shell $CC -Werror $(1) -c -x c /dev/null -o > >> > > /dev/null) > >> > > >> > I think this syntax for defining a macro shouldn't start with the > >> > "config" keyword, unless you want it to be part of the config symbol > >> > space and land it in .config. And typing it as a "string" while it > >> > actually returns y/n (hence a bool) is also strange. > >> > > >> > What about this instead: > >> > > >> > macro cc-option > >> > bool $(shell $CC -Werror $(1) -c -x c /dev/null -o /dev/null) > >> > > >> > This makes it easier to extend as well if need be. > >> > > >> > > >> > Nicolas > >> > >> I haven't gone over the patchset in detail yet and might be missing > >> something here, but if this is just meant to be a textual shorthand, > >> then why give it a type at all? > > > > It is meant to be like a user-defined function. > > > >> Do you think a simpler syntax like this would make sense? > >> > >> macro cc-option "$(shell $CC -Werror $(1) -c -x c /dev/null -o > >> /dev/null)" > >> > >> That's the most general version, where you could use it for other stuff > >> besides $(shell ...) as well, just to keep parity. > > > > This is not extendable. Let's imagine that you might want to implement > > some kind of conditionals some day e.g.: > > > > macro complex_test > > bool $(shell foo) if LOCKDEP_SUPPORT > > bool y if DEBUG_DRIVER > > bool n > > I still don't quite get the semantics here. How would the behavior > change if the type was changed to say string or int in some or all of > the lines? I admit this wouldn't make sense to have multiple different types. In this example, the bool keyword acts as syntactic sugar more than anything else. > Since the current model is to evaluate $() while the Kconfig files are > being parsed, would this require evaluating Kconfig expressions during > parsing? There is a relatively clean and (somewhat) easy to understand > parsing/evaluation separation at the moment, which I like. Agreed. Let's forget about the conditionals then. Nicolas
Re: [PATCH v2 0/6] crypto: engine - Permit to enqueue all async requests
On Fri, Feb 16, 2018 at 04:36:56PM +0100, Corentin Labbe wrote: > > As mentionned in the cover letter, all patchs (except documentation one) > should be squashed. > A kbuild robot reported a build error on cryptodev due to this. It's too late now. In future if you want the patches to be squashed then please send them in one email. Thanks, -- Email: Herbert XuHome Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH v2 0/6] crypto: engine - Permit to enqueue all async requests
On Fri, Feb 16, 2018 at 04:36:56PM +0100, Corentin Labbe wrote: > > As mentionned in the cover letter, all patchs (except documentation one) > should be squashed. > A kbuild robot reported a build error on cryptodev due to this. It's too late now. In future if you want the patches to be squashed then please send them in one email. Thanks, -- Email: Herbert Xu Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH 08/23] kconfig: add 'macro' keyword to support user-defined function
On Sat, Feb 17, 2018 at 3:30 AM, Nicolas Pitrewrote: > On Sat, 17 Feb 2018, Ulf Magnusson wrote: > >> On Fri, Feb 16, 2018 at 02:49:31PM -0500, Nicolas Pitre wrote: >> > On Sat, 17 Feb 2018, Masahiro Yamada wrote: >> > >> > > Now, we got a basic ability to test compiler capability in Kconfig. >> > > >> > > config CC_HAS_STACKPROTECTOR >> > > bool >> > > default $(shell $CC -Werror -fstack-protector -c -x c /dev/null >> > > -o /dev/null) >> > > >> > > This works, but it is ugly to repeat this long boilerplate. >> > > >> > > We want to describe like this: >> > > >> > > config CC_HAS_STACKPROTECTOR >> > > bool >> > > default $(cc-option -fstack-protector) >> > > >> > > It is straight-forward to implement a new function, but I do not like >> > > to hard-code specialized functions like this. Hence, here is another >> > > feature to add functions from Kconfig files. >> > > >> > > A user-defined function can be defined as a string type symbol with >> > > a special keyword 'macro'. It can be referenced in the same way as >> > > built-in functions. This feature was also inspired by Makefile where >> > > user-defined functions are referenced by $(call func-name, args...), >> > > but I omitted the 'call' to makes it shorter. >> > > >> > > The macro definition can contain $(1), $(2), ... which will be replaced >> > > with arguments from the caller. >> > > >> > > Example code: >> > > >> > > config cc-option >> > > string >> > > macro $(shell $CC -Werror $(1) -c -x c /dev/null -o /dev/null) >> > >> > I think this syntax for defining a macro shouldn't start with the >> > "config" keyword, unless you want it to be part of the config symbol >> > space and land it in .config. And typing it as a "string" while it >> > actually returns y/n (hence a bool) is also strange. >> > >> > What about this instead: >> > >> > macro cc-option >> > bool $(shell $CC -Werror $(1) -c -x c /dev/null -o /dev/null) >> > >> > This makes it easier to extend as well if need be. >> > >> > >> > Nicolas >> >> I haven't gone over the patchset in detail yet and might be missing >> something here, but if this is just meant to be a textual shorthand, >> then why give it a type at all? > > It is meant to be like a user-defined function. > >> Do you think a simpler syntax like this would make sense? >> >> macro cc-option "$(shell $CC -Werror $(1) -c -x c /dev/null -o >> /dev/null)" >> >> That's the most general version, where you could use it for other stuff >> besides $(shell ...) as well, just to keep parity. > > This is not extendable. Let's imagine that you might want to implement > some kind of conditionals some day e.g.: > > macro complex_test > bool $(shell foo) if LOCKDEP_SUPPORT > bool y if DEBUG_DRIVER > bool n I still don't quite get the semantics here. How would the behavior change if the type was changed to say string or int in some or all of the lines? Since the current model is to evaluate $() while the Kconfig files are being parsed, would this require evaluating Kconfig expressions during parsing? There is a relatively clean and (somewhat) easy to understand parsing/evaluation separation at the moment, which I like. Do you have anything in mind that would be cleaner and simpler to implement in this way compared to using plain symbols? > > There is no real advantage to simplify the macro definition to its > simplest expression, unlike its actual usage. Maybe I'm being grumpy, but this feels like it's adding complexity rather than reducing it. I like the rest of this patchset, because the behavior is easy to understand and fits well with Kconfig's evaluation model: $() is just a kind of preprocessor that runs during parsing and does value substitution based on shell commands, possibly along with some helper macros to avoid repetition. I think we should think hard about whether we actually need anything more than that before complicating Kconfig even further "just in case." If the goal is simplification, then it's bad if we eventually end up with a bigger mess than the Makefiles. > >> Are there any cases where something more advanced than that might be >> warranted (e.g., macros that expand to complete expressions)? > > Maybe not now, but there is no need to close the door on the possibility > either. > > > Nicolas Kconfig has no notion of types for expressions by the way. The simplest way to look at it is that all symbols have a tristate value (which is n for non-bool/tristate symbols) and a string value. Which one gets used depends on the context. In A && B, the tristate values are used, and in A = B the string values are compared. In something like 'default "foo bar"', "foo bar" is actually a constant symbol. If we were to drop the straightforward preprocessor model, then constant symbols would no longer necessarily be constant. I have a feeling that that might turn Kconfig's internals even messier. Constant (and
Re: [PATCH 08/23] kconfig: add 'macro' keyword to support user-defined function
On Sat, Feb 17, 2018 at 3:30 AM, Nicolas Pitre wrote: > On Sat, 17 Feb 2018, Ulf Magnusson wrote: > >> On Fri, Feb 16, 2018 at 02:49:31PM -0500, Nicolas Pitre wrote: >> > On Sat, 17 Feb 2018, Masahiro Yamada wrote: >> > >> > > Now, we got a basic ability to test compiler capability in Kconfig. >> > > >> > > config CC_HAS_STACKPROTECTOR >> > > bool >> > > default $(shell $CC -Werror -fstack-protector -c -x c /dev/null >> > > -o /dev/null) >> > > >> > > This works, but it is ugly to repeat this long boilerplate. >> > > >> > > We want to describe like this: >> > > >> > > config CC_HAS_STACKPROTECTOR >> > > bool >> > > default $(cc-option -fstack-protector) >> > > >> > > It is straight-forward to implement a new function, but I do not like >> > > to hard-code specialized functions like this. Hence, here is another >> > > feature to add functions from Kconfig files. >> > > >> > > A user-defined function can be defined as a string type symbol with >> > > a special keyword 'macro'. It can be referenced in the same way as >> > > built-in functions. This feature was also inspired by Makefile where >> > > user-defined functions are referenced by $(call func-name, args...), >> > > but I omitted the 'call' to makes it shorter. >> > > >> > > The macro definition can contain $(1), $(2), ... which will be replaced >> > > with arguments from the caller. >> > > >> > > Example code: >> > > >> > > config cc-option >> > > string >> > > macro $(shell $CC -Werror $(1) -c -x c /dev/null -o /dev/null) >> > >> > I think this syntax for defining a macro shouldn't start with the >> > "config" keyword, unless you want it to be part of the config symbol >> > space and land it in .config. And typing it as a "string" while it >> > actually returns y/n (hence a bool) is also strange. >> > >> > What about this instead: >> > >> > macro cc-option >> > bool $(shell $CC -Werror $(1) -c -x c /dev/null -o /dev/null) >> > >> > This makes it easier to extend as well if need be. >> > >> > >> > Nicolas >> >> I haven't gone over the patchset in detail yet and might be missing >> something here, but if this is just meant to be a textual shorthand, >> then why give it a type at all? > > It is meant to be like a user-defined function. > >> Do you think a simpler syntax like this would make sense? >> >> macro cc-option "$(shell $CC -Werror $(1) -c -x c /dev/null -o >> /dev/null)" >> >> That's the most general version, where you could use it for other stuff >> besides $(shell ...) as well, just to keep parity. > > This is not extendable. Let's imagine that you might want to implement > some kind of conditionals some day e.g.: > > macro complex_test > bool $(shell foo) if LOCKDEP_SUPPORT > bool y if DEBUG_DRIVER > bool n I still don't quite get the semantics here. How would the behavior change if the type was changed to say string or int in some or all of the lines? Since the current model is to evaluate $() while the Kconfig files are being parsed, would this require evaluating Kconfig expressions during parsing? There is a relatively clean and (somewhat) easy to understand parsing/evaluation separation at the moment, which I like. Do you have anything in mind that would be cleaner and simpler to implement in this way compared to using plain symbols? > > There is no real advantage to simplify the macro definition to its > simplest expression, unlike its actual usage. Maybe I'm being grumpy, but this feels like it's adding complexity rather than reducing it. I like the rest of this patchset, because the behavior is easy to understand and fits well with Kconfig's evaluation model: $() is just a kind of preprocessor that runs during parsing and does value substitution based on shell commands, possibly along with some helper macros to avoid repetition. I think we should think hard about whether we actually need anything more than that before complicating Kconfig even further "just in case." If the goal is simplification, then it's bad if we eventually end up with a bigger mess than the Makefiles. > >> Are there any cases where something more advanced than that might be >> warranted (e.g., macros that expand to complete expressions)? > > Maybe not now, but there is no need to close the door on the possibility > either. > > > Nicolas Kconfig has no notion of types for expressions by the way. The simplest way to look at it is that all symbols have a tristate value (which is n for non-bool/tristate symbols) and a string value. Which one gets used depends on the context. In A && B, the tristate values are used, and in A = B the string values are compared. In something like 'default "foo bar"', "foo bar" is actually a constant symbol. If we were to drop the straightforward preprocessor model, then constant symbols would no longer necessarily be constant. I have a feeling that that might turn Kconfig's internals even messier. Constant (and undefined) symbols
Re: [PATCH 2/3] Documentation: convert trace/ftrace-design.txt to rst format
On Fri, Feb 16, 2018 at 12:36:29PM -0500, Steven Rostedt wrote: > On Fri, 16 Feb 2018 05:49:52 -0700 > Jonathan Corbetwrote: > > > On Thu, 15 Feb 2018 22:57:05 -0500 > > Steven Rostedt wrote: > > > > > This document is out of date, and I rather have it updated before we > > > make it more "available" elsewhere. > > > > Imagine that, an out-of-date doc in the kernel :) > > > > Seriously, though, I'd argue that (1) it's already highly available, and > > (2) it's useful now. And (3) who knows when that update will happen? > > Unless we have reason to believe that a new version is waiting on the > > wings, I don't really see why we would want to delay this work. > > Actually, some of these documents I was thinking of labeling as > "obsolete" or simply removing them. The ftrace-design one is about > how to port ftrace to other architectures, and I already had to correct > people that based their work on it. > > Yeah, I really need to get some time to update them, but like everyone > else, that's just the 90th thing I have to do. > > -- Steve Reading this doc, I think most of information are still useful for undertading the implemeation. So how abount just put a caution at the begining of doc as below defore get updated? http://docservice.askxiong.com/linux-kernel/trace/ftrace-design.html Anyway, I just converted them all. I will send them out. Please comemnt if some of them should be removed. -- Thanks, Changbin Du
Re: [PATCH 2/3] Documentation: convert trace/ftrace-design.txt to rst format
On Fri, Feb 16, 2018 at 12:36:29PM -0500, Steven Rostedt wrote: > On Fri, 16 Feb 2018 05:49:52 -0700 > Jonathan Corbet wrote: > > > On Thu, 15 Feb 2018 22:57:05 -0500 > > Steven Rostedt wrote: > > > > > This document is out of date, and I rather have it updated before we > > > make it more "available" elsewhere. > > > > Imagine that, an out-of-date doc in the kernel :) > > > > Seriously, though, I'd argue that (1) it's already highly available, and > > (2) it's useful now. And (3) who knows when that update will happen? > > Unless we have reason to believe that a new version is waiting on the > > wings, I don't really see why we would want to delay this work. > > Actually, some of these documents I was thinking of labeling as > "obsolete" or simply removing them. The ftrace-design one is about > how to port ftrace to other architectures, and I already had to correct > people that based their work on it. > > Yeah, I really need to get some time to update them, but like everyone > else, that's just the 90th thing I have to do. > > -- Steve Reading this doc, I think most of information are still useful for undertading the implemeation. So how abount just put a caution at the begining of doc as below defore get updated? http://docservice.askxiong.com/linux-kernel/trace/ftrace-design.html Anyway, I just converted them all. I will send them out. Please comemnt if some of them should be removed. -- Thanks, Changbin Du
Re: [PATCH] tools/memory-model: remove rb-dep, smp_read_barrier_depends, and lockless_dereference
On Fri, Feb 16, 2018 at 05:22:55PM -0500, Alan Stern wrote: > Since commit 76ebbe78f739 ("locking/barriers: Add implicit > smp_read_barrier_depends() to READ_ONCE()") was merged for the 4.15 > kernel, it has not been necessary to use smp_read_barrier_depends(). > Similarly, commit 59ecbbe7b31c ("locking/barriers: Kill > lockless_dereference()") removed lockless_dereference() from the > kernel. > > Since these primitives are no longer part of the kernel, they do not > belong in the Linux Kernel Memory Consistency Model. This patch > removes them, along with the internal rb-dep relation, and updates the > revelant documentation. > > Signed-off-by: Alan Stern> > --- [...] > Index: usb-4.x/tools/memory-model/linux-kernel.def > === > --- usb-4.x/tools/memory-model.orig/linux-kernel.def > +++ usb-4.x/tools/memory-model/linux-kernel.def > @@ -13,14 +13,12 @@ WRITE_ONCE(X,V) { __store{once}(X,V); } > smp_store_release(X,V) { __store{release}(*X,V); } > smp_load_acquire(X) __load{acquire}(*X) > rcu_assign_pointer(X,V) { __store{release}(X,V); } > -lockless_dereference(X) __load{lderef}(X) > rcu_dereference(X) __load{deref}(X) ^^^ __load{once} > > // Fences > smp_mb() { __fence{mb} ; } > smp_rmb() { __fence{rmb} ; } > smp_wmb() { __fence{wmb} ; } > -smp_read_barrier_depends() { __fence{rb_dep}; } > smp_mb__before_atomic() { __fence{before-atomic} ; } > smp_mb__after_atomic() { __fence{after-atomic} ; } > smp_mb__after_spinlock() { __fence{after-spinlock} ; } > Index: usb-4.x/tools/memory-model/Documentation/cheatsheet.txt > === > --- usb-4.x/tools/memory-model.orig/Documentation/cheatsheet.txt > +++ usb-4.x/tools/memory-model/Documentation/cheatsheet.txt > @@ -6,8 +6,7 @@ > Store, e.g., WRITE_ONCE()Y > Y > Load, e.g., READ_ONCE() Y Y > Y > Unsuccessful RMW operation Y Y > Y > -smp_read_barrier_depends() Y Y Y > -*_dereference() Y Y Y > Y > +rcu_dereference()Y Y Y > Y > Successful *_acquire() R Y Y Y YY > Y > Successful *_release() CY YY W > Y > smp_rmb() Y RY YR Akira's observation about READ_ONCE extends to all (annotated) loads. In fact, it also applies to loads corresponding to unsuccessful RMW operations; consider, for example, the following variation of MP+onceassign+derefonce: C T { y=z; z=0; } P0(int *x, int **y) { WRITE_ONCE(*x, 1); smp_store_release(y, x); } P1(int **y, int *z) { int *r0; int r1; r0 = cmpxchg_relaxed(y, z, z); r1 = READ_ONCE(*r0); } exists (1:r0=x /\ 1:r1=0) The final state is allowed w/o the patch, and forbidden w/ the patch. This also reminds me of 5a8897cc7631fa544d079c443800f4420d1b173f ("locking/atomics/alpha: Add smp_read_barrier_depends() to _release()/_relaxed() atomics") (that we probably want to mention in the commit message). Andrea > >
Re: [PATCH] tools/memory-model: remove rb-dep, smp_read_barrier_depends, and lockless_dereference
On Fri, Feb 16, 2018 at 05:22:55PM -0500, Alan Stern wrote: > Since commit 76ebbe78f739 ("locking/barriers: Add implicit > smp_read_barrier_depends() to READ_ONCE()") was merged for the 4.15 > kernel, it has not been necessary to use smp_read_barrier_depends(). > Similarly, commit 59ecbbe7b31c ("locking/barriers: Kill > lockless_dereference()") removed lockless_dereference() from the > kernel. > > Since these primitives are no longer part of the kernel, they do not > belong in the Linux Kernel Memory Consistency Model. This patch > removes them, along with the internal rb-dep relation, and updates the > revelant documentation. > > Signed-off-by: Alan Stern > > --- [...] > Index: usb-4.x/tools/memory-model/linux-kernel.def > === > --- usb-4.x/tools/memory-model.orig/linux-kernel.def > +++ usb-4.x/tools/memory-model/linux-kernel.def > @@ -13,14 +13,12 @@ WRITE_ONCE(X,V) { __store{once}(X,V); } > smp_store_release(X,V) { __store{release}(*X,V); } > smp_load_acquire(X) __load{acquire}(*X) > rcu_assign_pointer(X,V) { __store{release}(X,V); } > -lockless_dereference(X) __load{lderef}(X) > rcu_dereference(X) __load{deref}(X) ^^^ __load{once} > > // Fences > smp_mb() { __fence{mb} ; } > smp_rmb() { __fence{rmb} ; } > smp_wmb() { __fence{wmb} ; } > -smp_read_barrier_depends() { __fence{rb_dep}; } > smp_mb__before_atomic() { __fence{before-atomic} ; } > smp_mb__after_atomic() { __fence{after-atomic} ; } > smp_mb__after_spinlock() { __fence{after-spinlock} ; } > Index: usb-4.x/tools/memory-model/Documentation/cheatsheet.txt > === > --- usb-4.x/tools/memory-model.orig/Documentation/cheatsheet.txt > +++ usb-4.x/tools/memory-model/Documentation/cheatsheet.txt > @@ -6,8 +6,7 @@ > Store, e.g., WRITE_ONCE()Y > Y > Load, e.g., READ_ONCE() Y Y > Y > Unsuccessful RMW operation Y Y > Y > -smp_read_barrier_depends() Y Y Y > -*_dereference() Y Y Y > Y > +rcu_dereference()Y Y Y > Y > Successful *_acquire() R Y Y Y YY > Y > Successful *_release() CY YY W > Y > smp_rmb() Y RY YR Akira's observation about READ_ONCE extends to all (annotated) loads. In fact, it also applies to loads corresponding to unsuccessful RMW operations; consider, for example, the following variation of MP+onceassign+derefonce: C T { y=z; z=0; } P0(int *x, int **y) { WRITE_ONCE(*x, 1); smp_store_release(y, x); } P1(int **y, int *z) { int *r0; int r1; r0 = cmpxchg_relaxed(y, z, z); r1 = READ_ONCE(*r0); } exists (1:r0=x /\ 1:r1=0) The final state is allowed w/o the patch, and forbidden w/ the patch. This also reminds me of 5a8897cc7631fa544d079c443800f4420d1b173f ("locking/atomics/alpha: Add smp_read_barrier_depends() to _release()/_relaxed() atomics") (that we probably want to mention in the commit message). Andrea > >
Re: [PATCH] aoe: document sysfs interface
On 02/16/2018 10:39 AM, Aishwarya Pant wrote: Documentation has been compiled from git commit logs and descriptions in Documentation/aoe/aoe.txt. This should be useful for scripting and tracking changes in the ABI. ... +What: /sys/block/etherd*/netif ... +Description: + (RO) The name of the network interface on the localhost through + which we are communicating with the remote AoE device. I'd recommend changing that to, "network interfaces". Thanks! -- Ed
Re: [PATCH] aoe: document sysfs interface
On 02/16/2018 10:39 AM, Aishwarya Pant wrote: Documentation has been compiled from git commit logs and descriptions in Documentation/aoe/aoe.txt. This should be useful for scripting and tracking changes in the ABI. ... +What: /sys/block/etherd*/netif ... +Description: + (RO) The name of the network interface on the localhost through + which we are communicating with the remote AoE device. I'd recommend changing that to, "network interfaces". Thanks! -- Ed
Re: [PATCH 08/23] kconfig: add 'macro' keyword to support user-defined function
On Sat, 17 Feb 2018, Ulf Magnusson wrote: > On Fri, Feb 16, 2018 at 02:49:31PM -0500, Nicolas Pitre wrote: > > On Sat, 17 Feb 2018, Masahiro Yamada wrote: > > > > > Now, we got a basic ability to test compiler capability in Kconfig. > > > > > > config CC_HAS_STACKPROTECTOR > > > bool > > > default $(shell $CC -Werror -fstack-protector -c -x c /dev/null > > > -o /dev/null) > > > > > > This works, but it is ugly to repeat this long boilerplate. > > > > > > We want to describe like this: > > > > > > config CC_HAS_STACKPROTECTOR > > > bool > > > default $(cc-option -fstack-protector) > > > > > > It is straight-forward to implement a new function, but I do not like > > > to hard-code specialized functions like this. Hence, here is another > > > feature to add functions from Kconfig files. > > > > > > A user-defined function can be defined as a string type symbol with > > > a special keyword 'macro'. It can be referenced in the same way as > > > built-in functions. This feature was also inspired by Makefile where > > > user-defined functions are referenced by $(call func-name, args...), > > > but I omitted the 'call' to makes it shorter. > > > > > > The macro definition can contain $(1), $(2), ... which will be replaced > > > with arguments from the caller. > > > > > > Example code: > > > > > > config cc-option > > > string > > > macro $(shell $CC -Werror $(1) -c -x c /dev/null -o /dev/null) > > > > I think this syntax for defining a macro shouldn't start with the > > "config" keyword, unless you want it to be part of the config symbol > > space and land it in .config. And typing it as a "string" while it > > actually returns y/n (hence a bool) is also strange. > > > > What about this instead: > > > > macro cc-option > > bool $(shell $CC -Werror $(1) -c -x c /dev/null -o /dev/null) > > > > This makes it easier to extend as well if need be. > > > > > > Nicolas > > I haven't gone over the patchset in detail yet and might be missing > something here, but if this is just meant to be a textual shorthand, > then why give it a type at all? It is meant to be like a user-defined function. > Do you think a simpler syntax like this would make sense? > > macro cc-option "$(shell $CC -Werror $(1) -c -x c /dev/null -o > /dev/null)" > > That's the most general version, where you could use it for other stuff > besides $(shell ...) as well, just to keep parity. This is not extendable. Let's imagine that you might want to implement some kind of conditionals some day e.g.: macro complex_test bool $(shell foo) if LOCKDEP_SUPPORT bool y if DEBUG_DRIVER bool n There is no real advantage to simplify the macro definition to its simplest expression, unlike its actual usage. > Are there any cases where something more advanced than that might be > warranted (e.g., macros that expand to complete expressions)? Maybe not now, but there is no need to close the door on the possibility either. Nicolas
Re: [PATCH 08/23] kconfig: add 'macro' keyword to support user-defined function
On Sat, 17 Feb 2018, Ulf Magnusson wrote: > On Fri, Feb 16, 2018 at 02:49:31PM -0500, Nicolas Pitre wrote: > > On Sat, 17 Feb 2018, Masahiro Yamada wrote: > > > > > Now, we got a basic ability to test compiler capability in Kconfig. > > > > > > config CC_HAS_STACKPROTECTOR > > > bool > > > default $(shell $CC -Werror -fstack-protector -c -x c /dev/null > > > -o /dev/null) > > > > > > This works, but it is ugly to repeat this long boilerplate. > > > > > > We want to describe like this: > > > > > > config CC_HAS_STACKPROTECTOR > > > bool > > > default $(cc-option -fstack-protector) > > > > > > It is straight-forward to implement a new function, but I do not like > > > to hard-code specialized functions like this. Hence, here is another > > > feature to add functions from Kconfig files. > > > > > > A user-defined function can be defined as a string type symbol with > > > a special keyword 'macro'. It can be referenced in the same way as > > > built-in functions. This feature was also inspired by Makefile where > > > user-defined functions are referenced by $(call func-name, args...), > > > but I omitted the 'call' to makes it shorter. > > > > > > The macro definition can contain $(1), $(2), ... which will be replaced > > > with arguments from the caller. > > > > > > Example code: > > > > > > config cc-option > > > string > > > macro $(shell $CC -Werror $(1) -c -x c /dev/null -o /dev/null) > > > > I think this syntax for defining a macro shouldn't start with the > > "config" keyword, unless you want it to be part of the config symbol > > space and land it in .config. And typing it as a "string" while it > > actually returns y/n (hence a bool) is also strange. > > > > What about this instead: > > > > macro cc-option > > bool $(shell $CC -Werror $(1) -c -x c /dev/null -o /dev/null) > > > > This makes it easier to extend as well if need be. > > > > > > Nicolas > > I haven't gone over the patchset in detail yet and might be missing > something here, but if this is just meant to be a textual shorthand, > then why give it a type at all? It is meant to be like a user-defined function. > Do you think a simpler syntax like this would make sense? > > macro cc-option "$(shell $CC -Werror $(1) -c -x c /dev/null -o > /dev/null)" > > That's the most general version, where you could use it for other stuff > besides $(shell ...) as well, just to keep parity. This is not extendable. Let's imagine that you might want to implement some kind of conditionals some day e.g.: macro complex_test bool $(shell foo) if LOCKDEP_SUPPORT bool y if DEBUG_DRIVER bool n There is no real advantage to simplify the macro definition to its simplest expression, unlike its actual usage. > Are there any cases where something more advanced than that might be > warranted (e.g., macros that expand to complete expressions)? Maybe not now, but there is no need to close the door on the possibility either. Nicolas
Re: [PATCH v3 08/11] watchdog/hpwdt: Programable Pretimeout NMI
On 02/16/2018 05:56 PM, Jerry Hoemann wrote: On Fri, Feb 16, 2018 at 03:55:06PM -0800, Guenter Roeck wrote: On Fri, Feb 16, 2018 at 04:46:17PM -0700, Jerry Hoemann wrote: On Fri, Feb 16, 2018 at 12:34:40PM -0800, Guenter Roeck wrote: On Thu, Feb 15, 2018 at 04:43:57PM -0700, Jerry Hoemann wrote: ... @@ -98,12 +106,21 @@ static int hpwdt_settimeout(struct watchdog_device *dev, unsigned int val) } #ifdef CONFIG_HPWDT_NMI_DECODING /* { */ +static int hpwdt_set_pretimeout(struct watchdog_device *dev, unsigned int val) +{ + if (val && (val != PRETIMEOUT_SEC)) { Unnecessary ( ) There are several things going on here. I'm not sure which one the above comment is intended. The "Unnecessary" refers to the ( ) around the second part of the expression above. While there may be valid reasons to include extra ( ), I think we can trust the C compiler to get it right here. Okay, wasn't sure what you were getting at here. I trust the C compiler, I don't trust humans. In compound conditionals, I'll add parens so that the meaning is clear. ... and I don't accept patches with excessive ( ) if I catch them, because it confuses me since I start looking for a meaning that isn't there. While a pretimeout NMI isn't required by the HW to be enabled, if enabled the length of pretimeout is fixed by HW. I didn't see anything in the API that would allow us to communicate to the user this "feature." timeout at leasst has both min_timeout and max_timeout, but I didn't see similar for pretimeout. I also don't think its reasonable to fail here if the requested value is not 9 as the user really has no way of knowing what the valid range of pretimeout values are. So I accept, any non-zero value for pretimeout, but then set pretimeout to be 9. But at the same time, I don't like to silently change a human request w/o at least warning. Sorry, I lost you here. I wasn't sure to what you were objecting to. I thought you might not have understood why I was converting non-zero values of "pretimeout" to 9. Was trying to explain the reasoning. > A problem I see with the watchdog API is that users don't know what is an acceptable range of values for pretimeout. For HPE proliant systems, one cannot just choose an arbitrary value for pretimeout. I don't see a reasonable way that a user can determine the valid range for pretimeout for HPE systems given our hardware restrictions. I fully understand this, and I did not object to it. Watchdog drivers may and are expected to adjust timeouts and pretimeouts to match hardware constraints, and user space programs are expected to check the actual values after setting timeouts. That is nothing unexpected or special. The actual timeout can be a value smaller than 9 seconds. Minimum is 1 second. What happens if the user configures a timeout of less than 9 seconds as well as a pretimeout ? Will it fire immediately ? The architecture is silent on this issue. My experience with this is that if timeout < 9 seconds, the NMI is not issued. System resets when the timeout expires. This could be implementation dependent. Note, this is not a new issue. Bad argument. Not sure exactly to what your objections are. I'll point out that: 1) hpwdt has been using pretimeout NMI for watchdog for > 10 years. 2) For 8 years, its been possible to have a timeout < 9 seconds. 3) AFAIK this hasn't proven to be a big issue. 4) I have real questions as to how (or if) to address the issue. and I don't see the problem with returning EINVAL, ie rejecting the pretimeout, if the selected timeout value is <= 9 seconds. So the review found a problem (or maybe inconsistency), and you refuse to fix it because you don't see the fix as part of this patch set, even though it would literally require one or two lines of code. Hmm. I am perfectly willing to discuss the problem, but I don't think it is a requirement for this patch set. Well, I think it is. But see below. I thought about setting the min timeout to ten seconds to avoid this situation. You could reject reject request to set the pretimeout to a value <= the timeout. I think you mis-communicated here. I think you understand what I mean: reject setting a a pretimeout if the timeout is set to be <= 9 seconds. It is perfectly fine to have a timeout of 30 seconds with the pretimeout of 9 seconds. I haven't dug into the various user level clients of watchdog so I'm not sure what the impact of making this change would be to them. + dev_info(dev->parent, "Setting pretimeout to %d\n", PRETIMEOUT_SEC); Please no ongoing logging noise. This can easily be abused to clog the kernel log. Good point. I will look at WARN_ONCE or something similar. A traceback if someone sets a bad timeout ? That would be even worse. I am thinking something more in line with setting a static variable if the message had already been printed and not reprinting it. That is ok, and different to
Re: [PATCH v3 08/11] watchdog/hpwdt: Programable Pretimeout NMI
On 02/16/2018 05:56 PM, Jerry Hoemann wrote: On Fri, Feb 16, 2018 at 03:55:06PM -0800, Guenter Roeck wrote: On Fri, Feb 16, 2018 at 04:46:17PM -0700, Jerry Hoemann wrote: On Fri, Feb 16, 2018 at 12:34:40PM -0800, Guenter Roeck wrote: On Thu, Feb 15, 2018 at 04:43:57PM -0700, Jerry Hoemann wrote: ... @@ -98,12 +106,21 @@ static int hpwdt_settimeout(struct watchdog_device *dev, unsigned int val) } #ifdef CONFIG_HPWDT_NMI_DECODING /* { */ +static int hpwdt_set_pretimeout(struct watchdog_device *dev, unsigned int val) +{ + if (val && (val != PRETIMEOUT_SEC)) { Unnecessary ( ) There are several things going on here. I'm not sure which one the above comment is intended. The "Unnecessary" refers to the ( ) around the second part of the expression above. While there may be valid reasons to include extra ( ), I think we can trust the C compiler to get it right here. Okay, wasn't sure what you were getting at here. I trust the C compiler, I don't trust humans. In compound conditionals, I'll add parens so that the meaning is clear. ... and I don't accept patches with excessive ( ) if I catch them, because it confuses me since I start looking for a meaning that isn't there. While a pretimeout NMI isn't required by the HW to be enabled, if enabled the length of pretimeout is fixed by HW. I didn't see anything in the API that would allow us to communicate to the user this "feature." timeout at leasst has both min_timeout and max_timeout, but I didn't see similar for pretimeout. I also don't think its reasonable to fail here if the requested value is not 9 as the user really has no way of knowing what the valid range of pretimeout values are. So I accept, any non-zero value for pretimeout, but then set pretimeout to be 9. But at the same time, I don't like to silently change a human request w/o at least warning. Sorry, I lost you here. I wasn't sure to what you were objecting to. I thought you might not have understood why I was converting non-zero values of "pretimeout" to 9. Was trying to explain the reasoning. > A problem I see with the watchdog API is that users don't know what is an acceptable range of values for pretimeout. For HPE proliant systems, one cannot just choose an arbitrary value for pretimeout. I don't see a reasonable way that a user can determine the valid range for pretimeout for HPE systems given our hardware restrictions. I fully understand this, and I did not object to it. Watchdog drivers may and are expected to adjust timeouts and pretimeouts to match hardware constraints, and user space programs are expected to check the actual values after setting timeouts. That is nothing unexpected or special. The actual timeout can be a value smaller than 9 seconds. Minimum is 1 second. What happens if the user configures a timeout of less than 9 seconds as well as a pretimeout ? Will it fire immediately ? The architecture is silent on this issue. My experience with this is that if timeout < 9 seconds, the NMI is not issued. System resets when the timeout expires. This could be implementation dependent. Note, this is not a new issue. Bad argument. Not sure exactly to what your objections are. I'll point out that: 1) hpwdt has been using pretimeout NMI for watchdog for > 10 years. 2) For 8 years, its been possible to have a timeout < 9 seconds. 3) AFAIK this hasn't proven to be a big issue. 4) I have real questions as to how (or if) to address the issue. and I don't see the problem with returning EINVAL, ie rejecting the pretimeout, if the selected timeout value is <= 9 seconds. So the review found a problem (or maybe inconsistency), and you refuse to fix it because you don't see the fix as part of this patch set, even though it would literally require one or two lines of code. Hmm. I am perfectly willing to discuss the problem, but I don't think it is a requirement for this patch set. Well, I think it is. But see below. I thought about setting the min timeout to ten seconds to avoid this situation. You could reject reject request to set the pretimeout to a value <= the timeout. I think you mis-communicated here. I think you understand what I mean: reject setting a a pretimeout if the timeout is set to be <= 9 seconds. It is perfectly fine to have a timeout of 30 seconds with the pretimeout of 9 seconds. I haven't dug into the various user level clients of watchdog so I'm not sure what the impact of making this change would be to them. + dev_info(dev->parent, "Setting pretimeout to %d\n", PRETIMEOUT_SEC); Please no ongoing logging noise. This can easily be abused to clog the kernel log. Good point. I will look at WARN_ONCE or something similar. A traceback if someone sets a bad timeout ? That would be even worse. I am thinking something more in line with setting a static variable if the message had already been printed and not reprinting it. That is ok, and different to
[PATCH 4/4] Staging: ks7010: hostif: Fix multiple use of arguments in ps_confirm_wait_inc() macro.
Use GCC extensions to prevent macro arguments from accidentally being evaluated multiple times when the macro is called. Signed-off-by: Quytelda Kahja--- drivers/staging/ks7010/ks_hostif.c | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/staging/ks7010/ks_hostif.c b/drivers/staging/ks7010/ks_hostif.c index 30c9592b3a00..92035e8ac843 100644 --- a/drivers/staging/ks7010/ks_hostif.c +++ b/drivers/staging/ks7010/ks_hostif.c @@ -1306,11 +1306,10 @@ int hostif_data_request(struct ks_wlan_private *priv, struct sk_buff *skb) return ret; } -#define ps_confirm_wait_inc(priv) \ - do { \ - if (atomic_read(>psstatus.status) > PS_ACTIVE_SET) \ - atomic_inc(>psstatus.confirm_wait);\ - } while (0) +#define ps_confirm_wait_inc(priv) \ + ({ typeof(priv) priv_ = (priv); \ + if (atomic_read(_->psstatus.status) > PS_ACTIVE_SET) \ + atomic_inc(_->psstatus.confirm_wait); }) static void hostif_mib_get_request(struct ks_wlan_private *priv, -- 2.16.1
[PATCH 4/4] Staging: ks7010: hostif: Fix multiple use of arguments in ps_confirm_wait_inc() macro.
Use GCC extensions to prevent macro arguments from accidentally being evaluated multiple times when the macro is called. Signed-off-by: Quytelda Kahja --- drivers/staging/ks7010/ks_hostif.c | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/staging/ks7010/ks_hostif.c b/drivers/staging/ks7010/ks_hostif.c index 30c9592b3a00..92035e8ac843 100644 --- a/drivers/staging/ks7010/ks_hostif.c +++ b/drivers/staging/ks7010/ks_hostif.c @@ -1306,11 +1306,10 @@ int hostif_data_request(struct ks_wlan_private *priv, struct sk_buff *skb) return ret; } -#define ps_confirm_wait_inc(priv) \ - do { \ - if (atomic_read(>psstatus.status) > PS_ACTIVE_SET) \ - atomic_inc(>psstatus.confirm_wait);\ - } while (0) +#define ps_confirm_wait_inc(priv) \ + ({ typeof(priv) priv_ = (priv); \ + if (atomic_read(_->psstatus.status) > PS_ACTIVE_SET) \ + atomic_inc(_->psstatus.confirm_wait); }) static void hostif_mib_get_request(struct ks_wlan_private *priv, -- 2.16.1
[PATCH 3/4] Staging: ks7010: hostif: Fix multiple use of arguments in rate and event masking macros.
Use GCC extensions to prevent macro arguments from accidentally being evaluated multiple times when the macro is called. Signed-off-by: Quytelda Kahja--- drivers/staging/ks7010/ks_hostif.h | 74 +- 1 file changed, 50 insertions(+), 24 deletions(-) diff --git a/drivers/staging/ks7010/ks_hostif.h b/drivers/staging/ks7010/ks_hostif.h index 5bae8d468e23..750ac86cee77 100644 --- a/drivers/staging/ks7010/ks_hostif.h +++ b/drivers/staging/ks7010/ks_hostif.h @@ -599,19 +599,39 @@ struct hostif_mic_failure_confirm_t { #define TX_RATE_48M(uint8_t)(480 / 5) #define TX_RATE_54M(uint8_t)(540 / 5) -#define IS_11B_RATE(A) (((A & RATE_MASK) == TX_RATE_1M) || ((A & RATE_MASK) == TX_RATE_2M) || \ - ((A & RATE_MASK) == TX_RATE_5M) || ((A & RATE_MASK) == TX_RATE_11M)) - -#define IS_OFDM_RATE(A) (((A & RATE_MASK) == TX_RATE_6M) || ((A & RATE_MASK) == TX_RATE_12M) || \ -((A & RATE_MASK) == TX_RATE_24M) || ((A & RATE_MASK) == TX_RATE_9M) || \ -((A & RATE_MASK) == TX_RATE_18M) || ((A & RATE_MASK) == TX_RATE_36M) || \ -((A & RATE_MASK) == TX_RATE_48M) || ((A & RATE_MASK) == TX_RATE_54M)) - -#define IS_11BG_RATE(A) (IS_11B_RATE(A) || IS_OFDM_RATE(A)) - -#define IS_OFDM_EXT_RATE(A) (((A & RATE_MASK) == TX_RATE_9M) || ((A & RATE_MASK) == TX_RATE_18M) || \ -((A & RATE_MASK) == TX_RATE_36M) || ((A & RATE_MASK) == TX_RATE_48M) || \ -((A & RATE_MASK) == TX_RATE_54M)) +#define IS_11B_RATE(A) \ + ({ \ + typeof(A) A_ = (A); \ + ((A_ & RATE_MASK) == TX_RATE_1M) || \ + ((A_ & RATE_MASK) == TX_RATE_2M) || \ + ((A_ & RATE_MASK) == TX_RATE_5M) || \ + ((A_ & RATE_MASK) == TX_RATE_11M); }) + +#define IS_OFDM_RATE(A) \ + ({ \ + typeof(A) A_ = (A); \ + ((A_ & RATE_MASK) == TX_RATE_6M) || \ + ((A_ & RATE_MASK) == TX_RATE_12M) ||\ + ((A_ & RATE_MASK) == TX_RATE_24M) ||\ + ((A_ & RATE_MASK) == TX_RATE_9M) || \ + ((A_ & RATE_MASK) == TX_RATE_18M) ||\ + ((A_ & RATE_MASK) == TX_RATE_36M) ||\ + ((A_ & RATE_MASK) == TX_RATE_48M) ||\ + ((A_ & RATE_MASK) == TX_RATE_54M); }) + +#define IS_11BG_RATE(A)\ + ({ \ + typeof(A) A_ = (A); \ + IS_11B_RATE(A_) || IS_OFDM_RATE(A_); }) + +#define IS_OFDM_EXT_RATE(A)\ + ({ \ + typeof(A) A_ = (A); \ + ((A_ & RATE_MASK) == TX_RATE_9M) || \ + ((A_ & RATE_MASK) == TX_RATE_18M) ||\ + ((A_ & RATE_MASK) == TX_RATE_36M) ||\ + ((A_ & RATE_MASK) == TX_RATE_48M) ||\ + ((A_ & RATE_MASK) == TX_RATE_54M); }) enum connect_status_type { CONNECT_STATUS, @@ -633,17 +653,23 @@ enum multicast_filter_type { /* macro function */ #define HIF_EVENT_MASK 0xE800 -#define IS_HIF_IND(_EVENT) ((_EVENT & HIF_EVENT_MASK) == 0xE800 && \ -((_EVENT & ~HIF_EVENT_MASK) == 0x0001 || \ -(_EVENT & ~HIF_EVENT_MASK) == 0x0006 || \ -(_EVENT & ~HIF_EVENT_MASK) == 0x000C || \ -(_EVENT & ~HIF_EVENT_MASK) == 0x0011 || \ -(_EVENT & ~HIF_EVENT_MASK) == 0x0012)) - -#define IS_HIF_CONF(_EVENT) ((_EVENT & HIF_EVENT_MASK) == 0xE800 && \ -(_EVENT & ~HIF_EVENT_MASK) > 0x && \ -(_EVENT & ~HIF_EVENT_MASK) < 0x0012 && \ -!IS_HIF_IND(_EVENT)) +#define IS_HIF_IND(_EVENT) \ + ({\ + typeof(_EVENT) EVENT_ = (_EVENT); \ + (EVENT_ & HIF_EVENT_MASK) == 0xE800 && \ + ((EVENT_ & ~HIF_EVENT_MASK) == 0x0001 || \ +(EVENT_ & ~HIF_EVENT_MASK) ==
[PATCH 3/4] Staging: ks7010: hostif: Fix multiple use of arguments in rate and event masking macros.
Use GCC extensions to prevent macro arguments from accidentally being evaluated multiple times when the macro is called. Signed-off-by: Quytelda Kahja --- drivers/staging/ks7010/ks_hostif.h | 74 +- 1 file changed, 50 insertions(+), 24 deletions(-) diff --git a/drivers/staging/ks7010/ks_hostif.h b/drivers/staging/ks7010/ks_hostif.h index 5bae8d468e23..750ac86cee77 100644 --- a/drivers/staging/ks7010/ks_hostif.h +++ b/drivers/staging/ks7010/ks_hostif.h @@ -599,19 +599,39 @@ struct hostif_mic_failure_confirm_t { #define TX_RATE_48M(uint8_t)(480 / 5) #define TX_RATE_54M(uint8_t)(540 / 5) -#define IS_11B_RATE(A) (((A & RATE_MASK) == TX_RATE_1M) || ((A & RATE_MASK) == TX_RATE_2M) || \ - ((A & RATE_MASK) == TX_RATE_5M) || ((A & RATE_MASK) == TX_RATE_11M)) - -#define IS_OFDM_RATE(A) (((A & RATE_MASK) == TX_RATE_6M) || ((A & RATE_MASK) == TX_RATE_12M) || \ -((A & RATE_MASK) == TX_RATE_24M) || ((A & RATE_MASK) == TX_RATE_9M) || \ -((A & RATE_MASK) == TX_RATE_18M) || ((A & RATE_MASK) == TX_RATE_36M) || \ -((A & RATE_MASK) == TX_RATE_48M) || ((A & RATE_MASK) == TX_RATE_54M)) - -#define IS_11BG_RATE(A) (IS_11B_RATE(A) || IS_OFDM_RATE(A)) - -#define IS_OFDM_EXT_RATE(A) (((A & RATE_MASK) == TX_RATE_9M) || ((A & RATE_MASK) == TX_RATE_18M) || \ -((A & RATE_MASK) == TX_RATE_36M) || ((A & RATE_MASK) == TX_RATE_48M) || \ -((A & RATE_MASK) == TX_RATE_54M)) +#define IS_11B_RATE(A) \ + ({ \ + typeof(A) A_ = (A); \ + ((A_ & RATE_MASK) == TX_RATE_1M) || \ + ((A_ & RATE_MASK) == TX_RATE_2M) || \ + ((A_ & RATE_MASK) == TX_RATE_5M) || \ + ((A_ & RATE_MASK) == TX_RATE_11M); }) + +#define IS_OFDM_RATE(A) \ + ({ \ + typeof(A) A_ = (A); \ + ((A_ & RATE_MASK) == TX_RATE_6M) || \ + ((A_ & RATE_MASK) == TX_RATE_12M) ||\ + ((A_ & RATE_MASK) == TX_RATE_24M) ||\ + ((A_ & RATE_MASK) == TX_RATE_9M) || \ + ((A_ & RATE_MASK) == TX_RATE_18M) ||\ + ((A_ & RATE_MASK) == TX_RATE_36M) ||\ + ((A_ & RATE_MASK) == TX_RATE_48M) ||\ + ((A_ & RATE_MASK) == TX_RATE_54M); }) + +#define IS_11BG_RATE(A)\ + ({ \ + typeof(A) A_ = (A); \ + IS_11B_RATE(A_) || IS_OFDM_RATE(A_); }) + +#define IS_OFDM_EXT_RATE(A)\ + ({ \ + typeof(A) A_ = (A); \ + ((A_ & RATE_MASK) == TX_RATE_9M) || \ + ((A_ & RATE_MASK) == TX_RATE_18M) ||\ + ((A_ & RATE_MASK) == TX_RATE_36M) ||\ + ((A_ & RATE_MASK) == TX_RATE_48M) ||\ + ((A_ & RATE_MASK) == TX_RATE_54M); }) enum connect_status_type { CONNECT_STATUS, @@ -633,17 +653,23 @@ enum multicast_filter_type { /* macro function */ #define HIF_EVENT_MASK 0xE800 -#define IS_HIF_IND(_EVENT) ((_EVENT & HIF_EVENT_MASK) == 0xE800 && \ -((_EVENT & ~HIF_EVENT_MASK) == 0x0001 || \ -(_EVENT & ~HIF_EVENT_MASK) == 0x0006 || \ -(_EVENT & ~HIF_EVENT_MASK) == 0x000C || \ -(_EVENT & ~HIF_EVENT_MASK) == 0x0011 || \ -(_EVENT & ~HIF_EVENT_MASK) == 0x0012)) - -#define IS_HIF_CONF(_EVENT) ((_EVENT & HIF_EVENT_MASK) == 0xE800 && \ -(_EVENT & ~HIF_EVENT_MASK) > 0x && \ -(_EVENT & ~HIF_EVENT_MASK) < 0x0012 && \ -!IS_HIF_IND(_EVENT)) +#define IS_HIF_IND(_EVENT) \ + ({\ + typeof(_EVENT) EVENT_ = (_EVENT); \ + (EVENT_ & HIF_EVENT_MASK) == 0xE800 && \ + ((EVENT_ & ~HIF_EVENT_MASK) == 0x0001 || \ +(EVENT_ & ~HIF_EVENT_MASK) == 0x0006 || \ +
[PATCH 2/4] Staging: ks7010: hostif: Fix multiple use of arguments in SME queue macros.
Use GCC extensions to prevent macro arguments from accidentally being evaluated multiple times when the macro is called. Signed-off-by: Quytelda Kahja--- drivers/staging/ks7010/ks_hostif.c | 19 +-- 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/staging/ks7010/ks_hostif.c b/drivers/staging/ks7010/ks_hostif.c index 975dbbb3abd0..30c9592b3a00 100644 --- a/drivers/staging/ks7010/ks_hostif.c +++ b/drivers/staging/ks7010/ks_hostif.c @@ -22,12 +22,19 @@ #include /* New driver API */ /* macro */ -#define inc_smeqhead(priv) \ - (priv->sme_i.qhead = (priv->sme_i.qhead + 1) % SME_EVENT_BUFF_SIZE) -#define inc_smeqtail(priv) \ - (priv->sme_i.qtail = (priv->sme_i.qtail + 1) % SME_EVENT_BUFF_SIZE) -#define cnt_smeqbody(priv) \ - (((priv->sme_i.qtail + SME_EVENT_BUFF_SIZE) - (priv->sme_i.qhead)) % SME_EVENT_BUFF_SIZE) +#define inc_smeqhead(priv) \ + ({ typeof(priv) priv_ = (priv); \ + unsigned int next_qhead = priv_->sme_i.qhead + 1; \ + priv_->sme_i.qhead = next_qhead % SME_EVENT_BUFF_SIZE; }) +#define inc_smeqtail(priv) \ + ({ typeof(priv) priv_ = (priv); \ + unsigned int next_qtail = priv_->sme_i.qtail + 1; \ + priv_->sme_i.qtail = next_qtail % SME_EVENT_BUFF_SIZE; }) +#define cnt_smeqbody(priv) \ + ({ typeof(priv) priv_ = (priv); \ + unsigned int left_cnt = \ + priv_->sme_i.qtail + SME_EVENT_BUFF_SIZE; \ +(left_cnt - (priv_->sme_i.qhead)) % SME_EVENT_BUFF_SIZE; }) #define KS_WLAN_MEM_FLAG (GFP_ATOMIC) -- 2.16.1
[PATCH 2/4] Staging: ks7010: hostif: Fix multiple use of arguments in SME queue macros.
Use GCC extensions to prevent macro arguments from accidentally being evaluated multiple times when the macro is called. Signed-off-by: Quytelda Kahja --- drivers/staging/ks7010/ks_hostif.c | 19 +-- 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/staging/ks7010/ks_hostif.c b/drivers/staging/ks7010/ks_hostif.c index 975dbbb3abd0..30c9592b3a00 100644 --- a/drivers/staging/ks7010/ks_hostif.c +++ b/drivers/staging/ks7010/ks_hostif.c @@ -22,12 +22,19 @@ #include /* New driver API */ /* macro */ -#define inc_smeqhead(priv) \ - (priv->sme_i.qhead = (priv->sme_i.qhead + 1) % SME_EVENT_BUFF_SIZE) -#define inc_smeqtail(priv) \ - (priv->sme_i.qtail = (priv->sme_i.qtail + 1) % SME_EVENT_BUFF_SIZE) -#define cnt_smeqbody(priv) \ - (((priv->sme_i.qtail + SME_EVENT_BUFF_SIZE) - (priv->sme_i.qhead)) % SME_EVENT_BUFF_SIZE) +#define inc_smeqhead(priv) \ + ({ typeof(priv) priv_ = (priv); \ + unsigned int next_qhead = priv_->sme_i.qhead + 1; \ + priv_->sme_i.qhead = next_qhead % SME_EVENT_BUFF_SIZE; }) +#define inc_smeqtail(priv) \ + ({ typeof(priv) priv_ = (priv); \ + unsigned int next_qtail = priv_->sme_i.qtail + 1; \ + priv_->sme_i.qtail = next_qtail % SME_EVENT_BUFF_SIZE; }) +#define cnt_smeqbody(priv) \ + ({ typeof(priv) priv_ = (priv); \ + unsigned int left_cnt = \ + priv_->sme_i.qtail + SME_EVENT_BUFF_SIZE; \ +(left_cnt - (priv_->sme_i.qhead)) % SME_EVENT_BUFF_SIZE; }) #define KS_WLAN_MEM_FLAG (GFP_ATOMIC) -- 2.16.1
[PATCH 1/4] Staging: ks7010: sdio: Fix multiple use of arguments in RX/TX queue macros.
Use GCC extensions to prevent macro arguments from accidentally being evaluated multiple times when the macro is called. Signed-off-by: Quytelda Kahja--- drivers/staging/ks7010/ks7010_sdio.c | 40 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/drivers/staging/ks7010/ks7010_sdio.c b/drivers/staging/ks7010/ks7010_sdio.c index 8cfdff198334..ffa7e2382353 100644 --- a/drivers/staging/ks7010/ks7010_sdio.c +++ b/drivers/staging/ks7010/ks7010_sdio.c @@ -32,19 +32,33 @@ static const struct sdio_device_id ks7010_sdio_ids[] = { }; MODULE_DEVICE_TABLE(sdio, ks7010_sdio_ids); -#define inc_txqhead(priv) \ - (priv->tx_dev.qhead = (priv->tx_dev.qhead + 1) % TX_DEVICE_BUFF_SIZE) -#define inc_txqtail(priv) \ - (priv->tx_dev.qtail = (priv->tx_dev.qtail + 1) % TX_DEVICE_BUFF_SIZE) -#define cnt_txqbody(priv) \ - (((priv->tx_dev.qtail + TX_DEVICE_BUFF_SIZE) - (priv->tx_dev.qhead)) % TX_DEVICE_BUFF_SIZE) - -#define inc_rxqhead(priv) \ - (priv->rx_dev.qhead = (priv->rx_dev.qhead + 1) % RX_DEVICE_BUFF_SIZE) -#define inc_rxqtail(priv) \ - (priv->rx_dev.qtail = (priv->rx_dev.qtail + 1) % RX_DEVICE_BUFF_SIZE) -#define cnt_rxqbody(priv) \ - (((priv->rx_dev.qtail + RX_DEVICE_BUFF_SIZE) - (priv->rx_dev.qhead)) % RX_DEVICE_BUFF_SIZE) +#define inc_txqhead(priv) \ + ({ typeof(priv) priv_ = (priv); \ + unsigned int next_qhead = priv_->tx_dev.qhead + 1; \ + priv_->tx_dev.qhead = next_qhead % TX_DEVICE_BUFF_SIZE; }) +#define inc_txqtail(priv) \ + ({ typeof(priv) priv_ = (priv); \ + unsigned int next_qtail = priv_->tx_dev.qtail + 1; \ + priv_->tx_dev.qtail = next_qtail % TX_DEVICE_BUFF_SIZE; }) +#define cnt_txqbody(priv) \ + ({ typeof(priv) priv_ = (priv); \ + unsigned int left_cnt = \ + priv_->tx_dev.qtail + TX_DEVICE_BUFF_SIZE; \ + (left_cnt - (priv_->tx_dev.qhead)) % TX_DEVICE_BUFF_SIZE; }) + +#define inc_rxqhead(priv) \ + ({ typeof(priv) priv_ = (priv); \ + unsigned int next_qhead = priv_->rx_dev.qhead + 1; \ + priv_->rx_dev.qhead = next_qhead % RX_DEVICE_BUFF_SIZE; }) +#define inc_rxqtail(priv) \ + ({ typeof(priv) priv_ = (priv); \ + unsigned int next_qtail = priv_->rx_dev.qtail + 1; \ + priv_->rx_dev.qtail = next_qtail % RX_DEVICE_BUFF_SIZE; }) +#define cnt_rxqbody(priv) \ + ({ typeof(priv) priv_ = (priv); \ + unsigned int left_cnt = \ + priv_->rx_dev.qtail + RX_DEVICE_BUFF_SIZE; \ + (left_cnt - (priv_->rx_dev.qhead)) % RX_DEVICE_BUFF_SIZE; }) /* Read single byte from device address into byte (CMD52) */ static int ks7010_sdio_readb(struct ks_wlan_private *priv, unsigned int address, -- 2.16.1
[PATCH 1/4] Staging: ks7010: sdio: Fix multiple use of arguments in RX/TX queue macros.
Use GCC extensions to prevent macro arguments from accidentally being evaluated multiple times when the macro is called. Signed-off-by: Quytelda Kahja --- drivers/staging/ks7010/ks7010_sdio.c | 40 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/drivers/staging/ks7010/ks7010_sdio.c b/drivers/staging/ks7010/ks7010_sdio.c index 8cfdff198334..ffa7e2382353 100644 --- a/drivers/staging/ks7010/ks7010_sdio.c +++ b/drivers/staging/ks7010/ks7010_sdio.c @@ -32,19 +32,33 @@ static const struct sdio_device_id ks7010_sdio_ids[] = { }; MODULE_DEVICE_TABLE(sdio, ks7010_sdio_ids); -#define inc_txqhead(priv) \ - (priv->tx_dev.qhead = (priv->tx_dev.qhead + 1) % TX_DEVICE_BUFF_SIZE) -#define inc_txqtail(priv) \ - (priv->tx_dev.qtail = (priv->tx_dev.qtail + 1) % TX_DEVICE_BUFF_SIZE) -#define cnt_txqbody(priv) \ - (((priv->tx_dev.qtail + TX_DEVICE_BUFF_SIZE) - (priv->tx_dev.qhead)) % TX_DEVICE_BUFF_SIZE) - -#define inc_rxqhead(priv) \ - (priv->rx_dev.qhead = (priv->rx_dev.qhead + 1) % RX_DEVICE_BUFF_SIZE) -#define inc_rxqtail(priv) \ - (priv->rx_dev.qtail = (priv->rx_dev.qtail + 1) % RX_DEVICE_BUFF_SIZE) -#define cnt_rxqbody(priv) \ - (((priv->rx_dev.qtail + RX_DEVICE_BUFF_SIZE) - (priv->rx_dev.qhead)) % RX_DEVICE_BUFF_SIZE) +#define inc_txqhead(priv) \ + ({ typeof(priv) priv_ = (priv); \ + unsigned int next_qhead = priv_->tx_dev.qhead + 1; \ + priv_->tx_dev.qhead = next_qhead % TX_DEVICE_BUFF_SIZE; }) +#define inc_txqtail(priv) \ + ({ typeof(priv) priv_ = (priv); \ + unsigned int next_qtail = priv_->tx_dev.qtail + 1; \ + priv_->tx_dev.qtail = next_qtail % TX_DEVICE_BUFF_SIZE; }) +#define cnt_txqbody(priv) \ + ({ typeof(priv) priv_ = (priv); \ + unsigned int left_cnt = \ + priv_->tx_dev.qtail + TX_DEVICE_BUFF_SIZE; \ + (left_cnt - (priv_->tx_dev.qhead)) % TX_DEVICE_BUFF_SIZE; }) + +#define inc_rxqhead(priv) \ + ({ typeof(priv) priv_ = (priv); \ + unsigned int next_qhead = priv_->rx_dev.qhead + 1; \ + priv_->rx_dev.qhead = next_qhead % RX_DEVICE_BUFF_SIZE; }) +#define inc_rxqtail(priv) \ + ({ typeof(priv) priv_ = (priv); \ + unsigned int next_qtail = priv_->rx_dev.qtail + 1; \ + priv_->rx_dev.qtail = next_qtail % RX_DEVICE_BUFF_SIZE; }) +#define cnt_rxqbody(priv) \ + ({ typeof(priv) priv_ = (priv); \ + unsigned int left_cnt = \ + priv_->rx_dev.qtail + RX_DEVICE_BUFF_SIZE; \ + (left_cnt - (priv_->rx_dev.qhead)) % RX_DEVICE_BUFF_SIZE; }) /* Read single byte from device address into byte (CMD52) */ static int ks7010_sdio_readb(struct ks_wlan_private *priv, unsigned int address, -- 2.16.1
Re: [PATCH v3 01/15] Documentation: add newcx initramfs format description
On February 16, 2018 1:47:35 PM PST, Victor Kamenskywrote: > > >On Fri, 16 Feb 2018, Rob Landley wrote: > >> >> On 02/16/2018 02:59 PM, H. Peter Anvin wrote: >>> On 02/16/18 12:33, Taras Kondratiuk wrote: Many of the Linux security/integrity features are dependent on file metadata, stored as extended attributes (xattrs), for making >decisions. These features need to be initialized during initcall and enabled >as early as possible for complete security coverage. Initramfs (tmpfs) supports xattrs, but newc CPIO archive format >does not support including them into the archive. This patch describes "extended" newc format (newcx) that is based >on newc and has following changes: - extended attributes support - increased size of filesize to support files >4GB - increased mtime field size to have 64 bits of seconds and added a field for nanoseconds - removed unused checksum field >>> >>> If you are going to implement a new, non-backwards-compatible >format, >>> you shouldn't replicate the mistakes of the current format. >Specifically: >> >> So rather than make minimal changes to the existing format and >continue to >> support the existing format (sharing as much code as possible), you >recommend >> gratuitous aesthetic changes? >> >>> 1. The use of ASCII-encoded fixed-length numbers is an idiotic >legacy >>> from an era before there were any portable way of dealing with >numbers >>> with prespecified endianness. >> >> It lets encoders and decoders easily share code with the existing >cpio format, >> which we still intend to be able to read and write. >> >>> If you are going to use ASCII, make them >>> delimited so that they don't have fixed limits, or just use binary. >> >> When it's gzipped this accomplishes what? (Other than being >gratuitously >> different from the previous iteration?) >> >>> The cpio header isn't fixed size, so that argument goes away, in >fact >>> the only way to determine the end of the header is to scan forward. >>> >>> 2. Alignment sensitivity! Because there is no header length >>> information, the above scan tells you where the header ends, but >there >>> is padding before the data, and the size of that padding is only >defined >>> by alignment. >> >> Again, these are minimal changes to the existing cpio format. You're >complaining >> about _cpio_, and that the new stuff isn't _different_ enough from >it. >> >>> 3. Inband encoding of EOF: if you actually have a filename >"TRAILER!!!" >>> you have problems. >> >> Been there, done that: >> >> http://lkml.iu.edu/hypermail/linux/kernel/1801.3/01791.html >> >>> But first, before you define a whole new format for which no tools >exist >>> (you will have to work with the maintainers of the GNU tools to add >>> support) >> >> No, he's been working with the maintainer of toybox to add support >(for about a >> year now), which gets him the Android command line. And the kernel >has its own >> built-in tool to generate cpio images anyway. >> >> Why would anyone care what the GNU project thinks? > >In our internal use of this patch series we do use gnu cpio >to create initramfs.cpio. > >And reference to gnu cpio patch that supports newcx format is >posted in description for this serieis: > >https://raw.githubusercontent.com/victorkamensky/initramfs-xattrs-poky/rocko/meta/recipes-extended/cpio/cpio-2.12/cpio-xattrs.patch > >Whether GNU cpio maintainers will accept it is different matter. >We will try, but we need to start somewhere and agree on >new format first. > >Thanks, >Victor > >>> you should see how complex it would be to support the POSIX >>> tar/pax format, >> >> That argument was had (at length) when initramfs went in over a >decade ago. >> There are links in >Documentation/filesystems/ramfs-rootfs-initramfs.txt to the >> mailing list entries about it. >> >>> which already has all the features you are seeking, and >>> by now is well-supported. >> >> So... tar wasn't well-supported 15 years ago? (Hasn't the kernel >source always >> been distributed via tarball back since 0.0.1?) >> >> You're suggesting having a whole second codepath that shares no code >with the >> existing cpio extractor. Are you suggesting abandoning support for >the existing >> initramfs.cpio.gz file format? >> >> Rob >> Introducing new, incompatible data formats is an inherently *very* costly operation; unfortunately many engineers don't seem to have a good grip of just *how* expensive it is (see "silly embedded nonsense hacks", "too little, too soon".) Cpio itself is a great horror show of just how bad this gets: a bunch of minor tweaks without finding underlying design bugs resulting in a ton of mutually incompatible formats. "They are almost the same" doesn't help: they are still incompatible. Introducing a new incompatible data format without strong justification is engineering malpractice. Doing it under the non-justification of expedience ("oh, we
Re: [PATCH v3 01/15] Documentation: add newcx initramfs format description
On February 16, 2018 1:47:35 PM PST, Victor Kamensky wrote: > > >On Fri, 16 Feb 2018, Rob Landley wrote: > >> >> On 02/16/2018 02:59 PM, H. Peter Anvin wrote: >>> On 02/16/18 12:33, Taras Kondratiuk wrote: Many of the Linux security/integrity features are dependent on file metadata, stored as extended attributes (xattrs), for making >decisions. These features need to be initialized during initcall and enabled >as early as possible for complete security coverage. Initramfs (tmpfs) supports xattrs, but newc CPIO archive format >does not support including them into the archive. This patch describes "extended" newc format (newcx) that is based >on newc and has following changes: - extended attributes support - increased size of filesize to support files >4GB - increased mtime field size to have 64 bits of seconds and added a field for nanoseconds - removed unused checksum field >>> >>> If you are going to implement a new, non-backwards-compatible >format, >>> you shouldn't replicate the mistakes of the current format. >Specifically: >> >> So rather than make minimal changes to the existing format and >continue to >> support the existing format (sharing as much code as possible), you >recommend >> gratuitous aesthetic changes? >> >>> 1. The use of ASCII-encoded fixed-length numbers is an idiotic >legacy >>> from an era before there were any portable way of dealing with >numbers >>> with prespecified endianness. >> >> It lets encoders and decoders easily share code with the existing >cpio format, >> which we still intend to be able to read and write. >> >>> If you are going to use ASCII, make them >>> delimited so that they don't have fixed limits, or just use binary. >> >> When it's gzipped this accomplishes what? (Other than being >gratuitously >> different from the previous iteration?) >> >>> The cpio header isn't fixed size, so that argument goes away, in >fact >>> the only way to determine the end of the header is to scan forward. >>> >>> 2. Alignment sensitivity! Because there is no header length >>> information, the above scan tells you where the header ends, but >there >>> is padding before the data, and the size of that padding is only >defined >>> by alignment. >> >> Again, these are minimal changes to the existing cpio format. You're >complaining >> about _cpio_, and that the new stuff isn't _different_ enough from >it. >> >>> 3. Inband encoding of EOF: if you actually have a filename >"TRAILER!!!" >>> you have problems. >> >> Been there, done that: >> >> http://lkml.iu.edu/hypermail/linux/kernel/1801.3/01791.html >> >>> But first, before you define a whole new format for which no tools >exist >>> (you will have to work with the maintainers of the GNU tools to add >>> support) >> >> No, he's been working with the maintainer of toybox to add support >(for about a >> year now), which gets him the Android command line. And the kernel >has its own >> built-in tool to generate cpio images anyway. >> >> Why would anyone care what the GNU project thinks? > >In our internal use of this patch series we do use gnu cpio >to create initramfs.cpio. > >And reference to gnu cpio patch that supports newcx format is >posted in description for this serieis: > >https://raw.githubusercontent.com/victorkamensky/initramfs-xattrs-poky/rocko/meta/recipes-extended/cpio/cpio-2.12/cpio-xattrs.patch > >Whether GNU cpio maintainers will accept it is different matter. >We will try, but we need to start somewhere and agree on >new format first. > >Thanks, >Victor > >>> you should see how complex it would be to support the POSIX >>> tar/pax format, >> >> That argument was had (at length) when initramfs went in over a >decade ago. >> There are links in >Documentation/filesystems/ramfs-rootfs-initramfs.txt to the >> mailing list entries about it. >> >>> which already has all the features you are seeking, and >>> by now is well-supported. >> >> So... tar wasn't well-supported 15 years ago? (Hasn't the kernel >source always >> been distributed via tarball back since 0.0.1?) >> >> You're suggesting having a whole second codepath that shares no code >with the >> existing cpio extractor. Are you suggesting abandoning support for >the existing >> initramfs.cpio.gz file format? >> >> Rob >> Introducing new, incompatible data formats is an inherently *very* costly operation; unfortunately many engineers don't seem to have a good grip of just *how* expensive it is (see "silly embedded nonsense hacks", "too little, too soon".) Cpio itself is a great horror show of just how bad this gets: a bunch of minor tweaks without finding underlying design bugs resulting in a ton of mutually incompatible formats. "They are almost the same" doesn't help: they are still incompatible. Introducing a new incompatible data format without strong justification is engineering malpractice. Doing it under the non-justification of expedience ("oh, we can share most of
Re: [PATCH v2] reset: add support for non-DT systems
On 02/13/2018 12:39 PM, Bartosz Golaszewski wrote: From: Bartosz GolaszewskiThe reset framework only supports device-tree. There are some platforms however, which need to use it even in legacy, board-file based mode. An example of such architecture is the DaVinci family of SoCs which supports both device tree and legacy boot modes and we don't want to introduce any regressions. We're currently working on converting the platform from its hand-crafted clock API to using the common clock framework. Part of the overhaul will be representing the chip's power sleep controller's reset lines using the reset framework. This changeset extends the core reset code with a new field in the reset controller struct which contains an array of lookup entries. Each entry contains the device name and an additional, optional identifier string. Drivers can register a set of reset lines using this lookup table and concerned devices can access them using the regular reset_control API. This new function is only called as a fallback in case the of_node field is NULL and doesn't change anything for current users. Tested with a dummy reset driver with several lookup entries. An example lookup table can look like this: static const struct reset_lookup foobar_reset_lookup[] = { [FOO_RESET] = { .dev = "foo", .id = "foo_id" }, [BAR_RESET] = { .dev = "bar", .id = NULL }, { } }; where FOO_RESET and BAR_RESET will correspond with the id parameters of reset callbacks. Cc: Sekhar Nori Cc: Kevin Hilman Cc: David Lechner Signed-off-by: Bartosz Golaszewski --- v1 -> v2: - renamed the new function to __reset_control_get_from_lookup() - added a missing break; when a matching entry is found - rearranged the code in __reset_control_get() - we can no longer get to the return at the bottom, so remove it and return from __reset_control_get_from_lookup() if __of_reset_control_get() fails - return -ENOENT from reset_contol_get() if we can't find a matching entry, prevously returned -EINVAL referred to the fact that we passed a device without the of_node which is no longer an error condition - add a comment about needing a sentinel in the lookup table drivers/reset/core.c | 40 +++- include/linux/reset-controller.h | 14 ++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/drivers/reset/core.c b/drivers/reset/core.c index da4292e9de97..b104a0c5c511 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c @@ -493,6 +493,44 @@ struct reset_control *__of_reset_control_get(struct device_node *node, } EXPORT_SYMBOL_GPL(__of_reset_control_get); +static struct reset_control * +__reset_control_get_from_lookup(struct device *dev, const char *id, + bool shared, bool optional) +{ + struct reset_controller_dev *rcdev; + const char *dev_id = dev_name(dev); + struct reset_control *rstc = NULL; + const struct reset_lookup *lookup; + int index; + + mutex_lock(_list_mutex); + + list_for_each_entry(rcdev, _controller_list, list) { + if (!rcdev->lookup) + continue; + + lookup = rcdev->lookup; + for (index = 0; lookup->dev; index++, lookup++) {> + if (strcmp(dev_id, lookup->dev)) + continue; + + if ((!id && !lookup->id) || + (id && lookup->id && !strcmp(id, lookup->id))) { + rstc = __reset_control_get_internal(rcdev, + index, shared); + break; + } + } + } This method of determining the index is not very useful. In the case of the DSP reset on OMAP-L138, the index *must* be the LPSC module domain number, which is 15. This would require us to create 15 dummy entries in the rcdev->lookup array so that we get the correct index in order to get the correct reset control. I think it would be better to just store the index in struct reset_lookup. Another option would be to require the length of lookup to be rcdev->nr_resets instead of using a sentinel.
Re: [PATCH v2] reset: add support for non-DT systems
On 02/13/2018 12:39 PM, Bartosz Golaszewski wrote: From: Bartosz Golaszewski The reset framework only supports device-tree. There are some platforms however, which need to use it even in legacy, board-file based mode. An example of such architecture is the DaVinci family of SoCs which supports both device tree and legacy boot modes and we don't want to introduce any regressions. We're currently working on converting the platform from its hand-crafted clock API to using the common clock framework. Part of the overhaul will be representing the chip's power sleep controller's reset lines using the reset framework. This changeset extends the core reset code with a new field in the reset controller struct which contains an array of lookup entries. Each entry contains the device name and an additional, optional identifier string. Drivers can register a set of reset lines using this lookup table and concerned devices can access them using the regular reset_control API. This new function is only called as a fallback in case the of_node field is NULL and doesn't change anything for current users. Tested with a dummy reset driver with several lookup entries. An example lookup table can look like this: static const struct reset_lookup foobar_reset_lookup[] = { [FOO_RESET] = { .dev = "foo", .id = "foo_id" }, [BAR_RESET] = { .dev = "bar", .id = NULL }, { } }; where FOO_RESET and BAR_RESET will correspond with the id parameters of reset callbacks. Cc: Sekhar Nori Cc: Kevin Hilman Cc: David Lechner Signed-off-by: Bartosz Golaszewski --- v1 -> v2: - renamed the new function to __reset_control_get_from_lookup() - added a missing break; when a matching entry is found - rearranged the code in __reset_control_get() - we can no longer get to the return at the bottom, so remove it and return from __reset_control_get_from_lookup() if __of_reset_control_get() fails - return -ENOENT from reset_contol_get() if we can't find a matching entry, prevously returned -EINVAL referred to the fact that we passed a device without the of_node which is no longer an error condition - add a comment about needing a sentinel in the lookup table drivers/reset/core.c | 40 +++- include/linux/reset-controller.h | 14 ++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/drivers/reset/core.c b/drivers/reset/core.c index da4292e9de97..b104a0c5c511 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c @@ -493,6 +493,44 @@ struct reset_control *__of_reset_control_get(struct device_node *node, } EXPORT_SYMBOL_GPL(__of_reset_control_get); +static struct reset_control * +__reset_control_get_from_lookup(struct device *dev, const char *id, + bool shared, bool optional) +{ + struct reset_controller_dev *rcdev; + const char *dev_id = dev_name(dev); + struct reset_control *rstc = NULL; + const struct reset_lookup *lookup; + int index; + + mutex_lock(_list_mutex); + + list_for_each_entry(rcdev, _controller_list, list) { + if (!rcdev->lookup) + continue; + + lookup = rcdev->lookup; + for (index = 0; lookup->dev; index++, lookup++) {> + if (strcmp(dev_id, lookup->dev)) + continue; + + if ((!id && !lookup->id) || + (id && lookup->id && !strcmp(id, lookup->id))) { + rstc = __reset_control_get_internal(rcdev, + index, shared); + break; + } + } + } This method of determining the index is not very useful. In the case of the DSP reset on OMAP-L138, the index *must* be the LPSC module domain number, which is 15. This would require us to create 15 dummy entries in the rcdev->lookup array so that we get the correct index in order to get the correct reset control. I think it would be better to just store the index in struct reset_lookup. Another option would be to require the length of lookup to be rcdev->nr_resets instead of using a sentinel.
Re: [PATCH v3 08/11] watchdog/hpwdt: Programable Pretimeout NMI
On Fri, Feb 16, 2018 at 03:55:06PM -0800, Guenter Roeck wrote: > On Fri, Feb 16, 2018 at 04:46:17PM -0700, Jerry Hoemann wrote: > > On Fri, Feb 16, 2018 at 12:34:40PM -0800, Guenter Roeck wrote: > > > On Thu, Feb 15, 2018 at 04:43:57PM -0700, Jerry Hoemann wrote: ... > > > > @@ -98,12 +106,21 @@ static int hpwdt_settimeout(struct watchdog_device > > > > *dev, unsigned int val) > > > > } > > > > > > > > #ifdef CONFIG_HPWDT_NMI_DECODING /* { */ > > > > +static int hpwdt_set_pretimeout(struct watchdog_device *dev, unsigned > > > > int val) > > > > +{ > > > > + if (val && (val != PRETIMEOUT_SEC)) { > > > > > > Unnecessary ( ) > > > > > > There are several things going on here. I'm not sure which one the above > > comment is intended. > > > The "Unnecessary" refers to the ( ) around the second part of the expression > above. While there may be valid reasons to include extra ( ), I think we > can trust the C compiler to get it right here. Okay, wasn't sure what you were getting at here. I trust the C compiler, I don't trust humans. In compound conditionals, I'll add parens so that the meaning is clear. > > While a pretimeout NMI isn't required by the HW to be enabled, if enabled > > the > > length of pretimeout is fixed by HW. > > > > I didn't see anything in the API that would allow us to communicate to > > the user this "feature." timeout at leasst has both min_timeout and > > max_timeout, but > > I didn't see similar for pretimeout. I also don't think its reasonable to > > fail > > here if the requested value is not 9 as the user really has no way of > > knowing what > > the valid range of pretimeout values are. So I accept, any non-zero value > > for pretimeout, but then set pretimeout to be 9. > > > > But at the same time, I don't like to silently change a human request > > w/o at least warning. > > > Sorry, I lost you here. I wasn't sure to what you were objecting to. I thought you might not have understood why I was converting non-zero values of "pretimeout" to 9. Was trying to explain the reasoning. A problem I see with the watchdog API is that users don't know what is an acceptable range of values for pretimeout. For HPE proliant systems, one cannot just choose an arbitrary value for pretimeout. I don't see a reasonable way that a user can determine the valid range for pretimeout for HPE systems given our hardware restrictions. > > > > > > > The actual timeout can be a value smaller than 9 seconds. > > > Minimum is 1 second. What happens if the user configures > > > a timeout of less than 9 seconds as well as a pretimeout ? > > > Will it fire immediately ? > > > > The architecture is silent on this issue. My experience with > > this is that if timeout < 9 seconds, the NMI is not issued. > > System resets when the timeout expires. This could be implementation > > dependent. > > > > Note, this is not a new issue. > > > Bad argument. Not sure exactly to what your objections are. I'll point out that: 1) hpwdt has been using pretimeout NMI for watchdog for > 10 years. 2) For 8 years, its been possible to have a timeout < 9 seconds. 3) AFAIK this hasn't proven to be a big issue. 4) I have real questions as to how (or if) to address the issue. I am perfectly willing to discuss the problem, but I don't think it is a requirement for this patch set. > > > I thought about setting the min timeout to ten seconds to avoid this > > situation. > > > You could reject reject request to set the pretimeout to a value <= the > timeout. I think you mis-communicated here. It is perfectly fine to have a timeout of 30 seconds with the pretimeout of 9 seconds. > > > I haven't dug into the various user level clients of watchdog so I'm not > > sure > > what the impact of making this change would be to them. > > > > > > > > > > > + dev_info(dev->parent, "Setting pretimeout to %d\n", > > > > PRETIMEOUT_SEC); > > > > > > Please no ongoing logging noise. This can easily be abused to clog > > > the kernel log. > > > > Good point. I will look at WARN_ONCE or something similar. > > > A traceback if someone sets a bad timeout ? That would be even worse. I am thinking something more in line with setting a static variable if the message had already been printed and not reprinting it. -- - Jerry Hoemann Software Engineer Hewlett Packard Enterprise -
Re: [PATCH v3 08/11] watchdog/hpwdt: Programable Pretimeout NMI
On Fri, Feb 16, 2018 at 03:55:06PM -0800, Guenter Roeck wrote: > On Fri, Feb 16, 2018 at 04:46:17PM -0700, Jerry Hoemann wrote: > > On Fri, Feb 16, 2018 at 12:34:40PM -0800, Guenter Roeck wrote: > > > On Thu, Feb 15, 2018 at 04:43:57PM -0700, Jerry Hoemann wrote: ... > > > > @@ -98,12 +106,21 @@ static int hpwdt_settimeout(struct watchdog_device > > > > *dev, unsigned int val) > > > > } > > > > > > > > #ifdef CONFIG_HPWDT_NMI_DECODING /* { */ > > > > +static int hpwdt_set_pretimeout(struct watchdog_device *dev, unsigned > > > > int val) > > > > +{ > > > > + if (val && (val != PRETIMEOUT_SEC)) { > > > > > > Unnecessary ( ) > > > > > > There are several things going on here. I'm not sure which one the above > > comment is intended. > > > The "Unnecessary" refers to the ( ) around the second part of the expression > above. While there may be valid reasons to include extra ( ), I think we > can trust the C compiler to get it right here. Okay, wasn't sure what you were getting at here. I trust the C compiler, I don't trust humans. In compound conditionals, I'll add parens so that the meaning is clear. > > While a pretimeout NMI isn't required by the HW to be enabled, if enabled > > the > > length of pretimeout is fixed by HW. > > > > I didn't see anything in the API that would allow us to communicate to > > the user this "feature." timeout at leasst has both min_timeout and > > max_timeout, but > > I didn't see similar for pretimeout. I also don't think its reasonable to > > fail > > here if the requested value is not 9 as the user really has no way of > > knowing what > > the valid range of pretimeout values are. So I accept, any non-zero value > > for pretimeout, but then set pretimeout to be 9. > > > > But at the same time, I don't like to silently change a human request > > w/o at least warning. > > > Sorry, I lost you here. I wasn't sure to what you were objecting to. I thought you might not have understood why I was converting non-zero values of "pretimeout" to 9. Was trying to explain the reasoning. A problem I see with the watchdog API is that users don't know what is an acceptable range of values for pretimeout. For HPE proliant systems, one cannot just choose an arbitrary value for pretimeout. I don't see a reasonable way that a user can determine the valid range for pretimeout for HPE systems given our hardware restrictions. > > > > > > > The actual timeout can be a value smaller than 9 seconds. > > > Minimum is 1 second. What happens if the user configures > > > a timeout of less than 9 seconds as well as a pretimeout ? > > > Will it fire immediately ? > > > > The architecture is silent on this issue. My experience with > > this is that if timeout < 9 seconds, the NMI is not issued. > > System resets when the timeout expires. This could be implementation > > dependent. > > > > Note, this is not a new issue. > > > Bad argument. Not sure exactly to what your objections are. I'll point out that: 1) hpwdt has been using pretimeout NMI for watchdog for > 10 years. 2) For 8 years, its been possible to have a timeout < 9 seconds. 3) AFAIK this hasn't proven to be a big issue. 4) I have real questions as to how (or if) to address the issue. I am perfectly willing to discuss the problem, but I don't think it is a requirement for this patch set. > > > I thought about setting the min timeout to ten seconds to avoid this > > situation. > > > You could reject reject request to set the pretimeout to a value <= the > timeout. I think you mis-communicated here. It is perfectly fine to have a timeout of 30 seconds with the pretimeout of 9 seconds. > > > I haven't dug into the various user level clients of watchdog so I'm not > > sure > > what the impact of making this change would be to them. > > > > > > > > > > > + dev_info(dev->parent, "Setting pretimeout to %d\n", > > > > PRETIMEOUT_SEC); > > > > > > Please no ongoing logging noise. This can easily be abused to clog > > > the kernel log. > > > > Good point. I will look at WARN_ONCE or something similar. > > > A traceback if someone sets a bad timeout ? That would be even worse. I am thinking something more in line with setting a static variable if the message had already been printed and not reprinting it. -- - Jerry Hoemann Software Engineer Hewlett Packard Enterprise -
Re: [PATCH 1/1] perf: Add CPU hotplug support for events
On 02/16/2018 12:39 PM, Peter Zijlstra wrote: On Fri, Feb 16, 2018 at 10:06:29AM -0800, Raghavendra Rao Ananta wrote: No this is absolutely disguisting. You can simply keep the events in the dead CPU's context. It's really not that hard. Keeping the events in the dead CPU's context was also an idea that we had. However, detaching that event from the PMU when the CPU is offline would be a pain. Consider the scenario in which an event is about to be destroyed when the CPU is offline (yet still attached to the CPU). During it's destruction, a cross-cpu call is made (from perf_remove_from_context()) to the offlined CPU to detach the event from the CPU's PMU. As the CPU is offline, that would not be possible, and again a separate logic has to be written for cleaning up the events whose CPUs are offlined. That is actually really simple to deal with. The real problems are with semantics, is an event enabled when the CPU is dead? Can you disable/enable an event on a dead CPU. The below patch (_completely_ untested) should do most of it, but needs help with the details. I suspect we want to allow enable/disable on events that are on a dead CPU, and equally I think we want to account the time an enabled event spends on a dead CPU to go towards the 'enabled' bucket. I've gone through your diff, and it gave me a hint of similar texture what we are trying to do (except for maintaining an offline event list). Nevertheless, I tried to test your patch. I created an hw event, and tried to offline the CPU in parallel, and I immediately hit a watchdog soft lockup bug! Tried the same this by first switching off the CPU (without any event created), and I hit into similar issue. I am sure we can fix it, but apart from the "why we are doing hotplug?" question, was was there specifically any issue with our patch? Also, you _still_ don't explain why you care about dead CPUs. I wanted to understand, if we no longer care about hotplugging of CPUs, then why do we still have exported symbols such as cpu_up() and cpu_down()? Moreover, we also have the hotplug interface exposed to users-space as well (through sysfs). As long as these interfaces exist, there's always a potential chance of bringing the CPU up/down. Can you please clear this thing up for me? -- Raghavendra -- Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project
Re: [PATCH 1/1] perf: Add CPU hotplug support for events
On 02/16/2018 12:39 PM, Peter Zijlstra wrote: On Fri, Feb 16, 2018 at 10:06:29AM -0800, Raghavendra Rao Ananta wrote: No this is absolutely disguisting. You can simply keep the events in the dead CPU's context. It's really not that hard. Keeping the events in the dead CPU's context was also an idea that we had. However, detaching that event from the PMU when the CPU is offline would be a pain. Consider the scenario in which an event is about to be destroyed when the CPU is offline (yet still attached to the CPU). During it's destruction, a cross-cpu call is made (from perf_remove_from_context()) to the offlined CPU to detach the event from the CPU's PMU. As the CPU is offline, that would not be possible, and again a separate logic has to be written for cleaning up the events whose CPUs are offlined. That is actually really simple to deal with. The real problems are with semantics, is an event enabled when the CPU is dead? Can you disable/enable an event on a dead CPU. The below patch (_completely_ untested) should do most of it, but needs help with the details. I suspect we want to allow enable/disable on events that are on a dead CPU, and equally I think we want to account the time an enabled event spends on a dead CPU to go towards the 'enabled' bucket. I've gone through your diff, and it gave me a hint of similar texture what we are trying to do (except for maintaining an offline event list). Nevertheless, I tried to test your patch. I created an hw event, and tried to offline the CPU in parallel, and I immediately hit a watchdog soft lockup bug! Tried the same this by first switching off the CPU (without any event created), and I hit into similar issue. I am sure we can fix it, but apart from the "why we are doing hotplug?" question, was was there specifically any issue with our patch? Also, you _still_ don't explain why you care about dead CPUs. I wanted to understand, if we no longer care about hotplugging of CPUs, then why do we still have exported symbols such as cpu_up() and cpu_down()? Moreover, we also have the hotplug interface exposed to users-space as well (through sysfs). As long as these interfaces exist, there's always a potential chance of bringing the CPU up/down. Can you please clear this thing up for me? -- Raghavendra -- Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project
[no subject]
Hi, I am Ryan. I consider myself an easy-going man,honest and loving person. I am currently looking for a relationship in which i feel loved. Please tell me more about yourself, if you do not mind. Regards, Ryan Ellis.
[no subject]
Hi, I am Ryan. I consider myself an easy-going man,honest and loving person. I am currently looking for a relationship in which i feel loved. Please tell me more about yourself, if you do not mind. Regards, Ryan Ellis.
syscon regmap for disabled node?
Hi Pankaj, Arnd, Lee, I am testing some code to use a syscon/regmap interface and I find that the syscon/regmap is initialized even on a disabled device node using a "syscon" compatible property when I have expected it to fail. Prior to commit bdb0066df96e ("mfd: syscon: Decouple syscon interface from platform devices"), the driver would have never probed, and the of_syscon_register() only checks for the compatible, but not if the device node is available. Is this intentional or a bug? regards Suman
syscon regmap for disabled node?
Hi Pankaj, Arnd, Lee, I am testing some code to use a syscon/regmap interface and I find that the syscon/regmap is initialized even on a disabled device node using a "syscon" compatible property when I have expected it to fail. Prior to commit bdb0066df96e ("mfd: syscon: Decouple syscon interface from platform devices"), the driver would have never probed, and the of_syscon_register() only checks for the compatible, but not if the device node is available. Is this intentional or a bug? regards Suman
Re: [PATCH 3/3] taint: Add taint for randstruct
On Fri, Feb 16, 2018 at 1:02 PM, Andrew Mortonwrote: > On Thu, 15 Feb 2018 19:37:44 -0800 Kees Cook wrote: > >> --- a/Documentation/sysctl/kernel.txt >> +++ b/Documentation/sysctl/kernel.txt >> @@ -991,6 +991,7 @@ ORed together. The letters are seen in "Tainted" line of >> Oops reports. >> 16384 (L): A soft lockup has previously occurred on the system. >> 32768 (K): The kernel has been live patched. >> 65536 (X): Auxiliary taint, defined and used by for distros. >> +131072 (T): The kernel was built with the struct randomization plugin. > > Uncle. > > > From: Andrew Morton > Subject: Documentation/sysctl/kernel.txt: show taint codes in hex > > The decimal representation is getting a bit hard to follow. The rationale, AIUI, is that /proc/sys/kernel/tainted prints the values in decimal. If we change the docs to be hex and leave the output decimal, that makes it even harder to examine. If we change the proc output, will we break userspace? And if we change it, maybe avoid numbers at all, and proc should bring the same thing that Oops does (the letter codes)? (But then the sysctl would need to parse the letters...) -Kees -- Kees Cook Pixel Security
Re: [PATCH 3/3] taint: Add taint for randstruct
On Fri, Feb 16, 2018 at 1:02 PM, Andrew Morton wrote: > On Thu, 15 Feb 2018 19:37:44 -0800 Kees Cook wrote: > >> --- a/Documentation/sysctl/kernel.txt >> +++ b/Documentation/sysctl/kernel.txt >> @@ -991,6 +991,7 @@ ORed together. The letters are seen in "Tainted" line of >> Oops reports. >> 16384 (L): A soft lockup has previously occurred on the system. >> 32768 (K): The kernel has been live patched. >> 65536 (X): Auxiliary taint, defined and used by for distros. >> +131072 (T): The kernel was built with the struct randomization plugin. > > Uncle. > > > From: Andrew Morton > Subject: Documentation/sysctl/kernel.txt: show taint codes in hex > > The decimal representation is getting a bit hard to follow. The rationale, AIUI, is that /proc/sys/kernel/tainted prints the values in decimal. If we change the docs to be hex and leave the output decimal, that makes it even harder to examine. If we change the proc output, will we break userspace? And if we change it, maybe avoid numbers at all, and proc should bring the same thing that Oops does (the letter codes)? (But then the sysctl would need to parse the letters...) -Kees -- Kees Cook Pixel Security
[PATCH] arm64: Add support for new control bits CTR_EL0.IDC and CTR_EL0.IDC
Two point of unification cache maintenance operations 'DC CVAU' and 'IC IVAU' are optional for implementors as per ARMv8 specification. This patch parses the updated CTR_EL0 register definition and adds the required changes to skip POU operations if the hardware reports CTR_EL0.IDC and/or CTR_EL0.IDC. CTR_EL0.DIC: Instruction cache invalidation requirements for instruction to data coherence. The meaning of this bit[29]. 0: Instruction cache invalidation to the point of unification is required for instruction to data coherence. 1: Instruction cache cleaning to the point of unification is not required for instruction to data coherence. CTR_EL0.IDC: Data cache clean requirements for instruction to data coherence. The meaning of this bit[28]. 0: Data cache clean to the point of unification is required for instruction to data coherence, unless CLIDR_EL1.LoC == 0b000 or (CLIDR_EL1.LoUIS == 0b000 && CLIDR_EL1.LoUU == 0b000). 1: Data cache clean to the point of unification is not required for instruction to data coherence. Signed-off-by: Philip ElcanSigned-off-by: Shanker Donthineni --- arch/arm64/include/asm/assembler.h | 48 -- arch/arm64/include/asm/cache.h | 2 ++ arch/arm64/kernel/cpufeature.c | 2 ++ arch/arm64/mm/cache.S | 26 ++--- 4 files changed, 51 insertions(+), 27 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 3c78835..9eaa948 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -30,6 +30,7 @@ #include #include #include +#include .macro save_and_disable_daif, flags mrs \flags, daif @@ -334,9 +335,9 @@ * raw_dcache_line_size - get the minimum D-cache line size on this CPU * from the CTR register. */ - .macro raw_dcache_line_size, reg, tmp - mrs \tmp, ctr_el0 // read CTR - ubfm\tmp, \tmp, #16, #19// cache line size encoding + .macro raw_dcache_line_size, reg, tmp, ctr + mrs \ctr, ctr_el0 // read CTR + ubfm\tmp, \ctr, #16, #19// cache line size encoding mov \reg, #4// bytes per word lsl \reg, \reg, \tmp// actual cache line size .endm @@ -344,9 +345,9 @@ /* * dcache_line_size - get the safe D-cache line size across all CPUs */ - .macro dcache_line_size, reg, tmp - read_ctr\tmp - ubfm\tmp, \tmp, #16, #19// cache line size encoding + .macro dcache_line_size, reg, tmp, ctr + read_ctr\ctr + ubfm\tmp, \ctr, #16, #19// cache line size encoding mov \reg, #4// bytes per word lsl \reg, \reg, \tmp// actual cache line size .endm @@ -355,9 +356,9 @@ * raw_icache_line_size - get the minimum I-cache line size on this CPU * from the CTR register. */ - .macro raw_icache_line_size, reg, tmp - mrs \tmp, ctr_el0 // read CTR - and \tmp, \tmp, #0xf// cache line size encoding + .macro raw_icache_line_size, reg, tmp, ctr + mrs \ctr, ctr_el0 // read CTR + and \tmp, \ctr, #0xf// cache line size encoding mov \reg, #4// bytes per word lsl \reg, \reg, \tmp// actual cache line size .endm @@ -365,9 +366,9 @@ /* * icache_line_size - get the safe I-cache line size across all CPUs */ - .macro icache_line_size, reg, tmp - read_ctr\tmp - and \tmp, \tmp, #0xf// cache line size encoding + .macro icache_line_size, reg, tmp, ctr + read_ctr\ctr + and \tmp, \ctr, #0xf// cache line size encoding mov \reg, #4// bytes per word lsl \reg, \reg, \tmp// actual cache line size .endm @@ -408,13 +409,21 @@ * size: size of the region * Corrupts: kaddr, size, tmp1, tmp2 */ - .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 - dcache_line_size \tmp1, \tmp2 + .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2, tmp3 + dcache_line_size \tmp1, \tmp2, \tmp3 add \size, \kaddr, \size sub \tmp2, \tmp1, #1 bic \kaddr, \kaddr, \tmp2 9998: - .if (\op == cvau || \op == cvac) + .if (\op == cvau) +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE + tbnz\tmp3, #CTR_IDC_SHIFT, 9997f + dc cvau, \kaddr +alternative_else + dc civac, \kaddr + nop +alternative_endif + .elseif (\op == cvac)
[PATCH] arm64: Add support for new control bits CTR_EL0.IDC and CTR_EL0.IDC
Two point of unification cache maintenance operations 'DC CVAU' and 'IC IVAU' are optional for implementors as per ARMv8 specification. This patch parses the updated CTR_EL0 register definition and adds the required changes to skip POU operations if the hardware reports CTR_EL0.IDC and/or CTR_EL0.IDC. CTR_EL0.DIC: Instruction cache invalidation requirements for instruction to data coherence. The meaning of this bit[29]. 0: Instruction cache invalidation to the point of unification is required for instruction to data coherence. 1: Instruction cache cleaning to the point of unification is not required for instruction to data coherence. CTR_EL0.IDC: Data cache clean requirements for instruction to data coherence. The meaning of this bit[28]. 0: Data cache clean to the point of unification is required for instruction to data coherence, unless CLIDR_EL1.LoC == 0b000 or (CLIDR_EL1.LoUIS == 0b000 && CLIDR_EL1.LoUU == 0b000). 1: Data cache clean to the point of unification is not required for instruction to data coherence. Signed-off-by: Philip Elcan Signed-off-by: Shanker Donthineni --- arch/arm64/include/asm/assembler.h | 48 -- arch/arm64/include/asm/cache.h | 2 ++ arch/arm64/kernel/cpufeature.c | 2 ++ arch/arm64/mm/cache.S | 26 ++--- 4 files changed, 51 insertions(+), 27 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 3c78835..9eaa948 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -30,6 +30,7 @@ #include #include #include +#include .macro save_and_disable_daif, flags mrs \flags, daif @@ -334,9 +335,9 @@ * raw_dcache_line_size - get the minimum D-cache line size on this CPU * from the CTR register. */ - .macro raw_dcache_line_size, reg, tmp - mrs \tmp, ctr_el0 // read CTR - ubfm\tmp, \tmp, #16, #19// cache line size encoding + .macro raw_dcache_line_size, reg, tmp, ctr + mrs \ctr, ctr_el0 // read CTR + ubfm\tmp, \ctr, #16, #19// cache line size encoding mov \reg, #4// bytes per word lsl \reg, \reg, \tmp// actual cache line size .endm @@ -344,9 +345,9 @@ /* * dcache_line_size - get the safe D-cache line size across all CPUs */ - .macro dcache_line_size, reg, tmp - read_ctr\tmp - ubfm\tmp, \tmp, #16, #19// cache line size encoding + .macro dcache_line_size, reg, tmp, ctr + read_ctr\ctr + ubfm\tmp, \ctr, #16, #19// cache line size encoding mov \reg, #4// bytes per word lsl \reg, \reg, \tmp// actual cache line size .endm @@ -355,9 +356,9 @@ * raw_icache_line_size - get the minimum I-cache line size on this CPU * from the CTR register. */ - .macro raw_icache_line_size, reg, tmp - mrs \tmp, ctr_el0 // read CTR - and \tmp, \tmp, #0xf// cache line size encoding + .macro raw_icache_line_size, reg, tmp, ctr + mrs \ctr, ctr_el0 // read CTR + and \tmp, \ctr, #0xf// cache line size encoding mov \reg, #4// bytes per word lsl \reg, \reg, \tmp// actual cache line size .endm @@ -365,9 +366,9 @@ /* * icache_line_size - get the safe I-cache line size across all CPUs */ - .macro icache_line_size, reg, tmp - read_ctr\tmp - and \tmp, \tmp, #0xf// cache line size encoding + .macro icache_line_size, reg, tmp, ctr + read_ctr\ctr + and \tmp, \ctr, #0xf// cache line size encoding mov \reg, #4// bytes per word lsl \reg, \reg, \tmp// actual cache line size .endm @@ -408,13 +409,21 @@ * size: size of the region * Corrupts: kaddr, size, tmp1, tmp2 */ - .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 - dcache_line_size \tmp1, \tmp2 + .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2, tmp3 + dcache_line_size \tmp1, \tmp2, \tmp3 add \size, \kaddr, \size sub \tmp2, \tmp1, #1 bic \kaddr, \kaddr, \tmp2 9998: - .if (\op == cvau || \op == cvac) + .if (\op == cvau) +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE + tbnz\tmp3, #CTR_IDC_SHIFT, 9997f + dc cvau, \kaddr +alternative_else + dc civac, \kaddr + nop +alternative_endif + .elseif (\op == cvac) alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE dc