[PATCH tip/locking/core v5 2/6] atomics: Add test for atomic operations with _relaxed variants
Some atomic operations now have _relaxed/acquire/release variants, this patch then adds some trivial tests for two purpose: 1. test the behavior of these new operations in single-CPU environment. 2. make their code generated before we actually use them somewhere, so that we can examine their assembly code. Signed-off-by: Boqun Feng--- lib/atomic64_test.c | 120 ++-- 1 file changed, 79 insertions(+), 41 deletions(-) diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index 83c33a5b..18e422b 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -27,6 +27,65 @@ do { \ (unsigned long long)r); \ } while (0) +/* + * Test for a atomic operation family, + * @test should be a macro accepting parameters (bit, op, ...) + */ + +#define FAMILY_TEST(test, bit, op, args...)\ +do { \ + test(bit, op, ##args); \ + test(bit, op##_acquire, ##args);\ + test(bit, op##_release, ##args);\ + test(bit, op##_relaxed, ##args);\ +} while (0) + +#define TEST_RETURN(bit, op, c_op, val)\ +do { \ + atomic##bit##_set(, v0); \ + r = v0; \ + r c_op val; \ + BUG_ON(atomic##bit##_##op(val, ) != r); \ + BUG_ON(atomic##bit##_read() != r);\ +} while (0) + +#define RETURN_FAMILY_TEST(bit, op, c_op, val) \ +do { \ + FAMILY_TEST(TEST_RETURN, bit, op, c_op, val); \ +} while (0) + +#define TEST_ARGS(bit, op, init, ret, expect, args...) \ +do { \ + atomic##bit##_set(, init);\ + BUG_ON(atomic##bit##_##op(, ##args) != ret); \ + BUG_ON(atomic##bit##_read() != expect); \ +} while (0) + +#define XCHG_FAMILY_TEST(bit, init, new) \ +do { \ + FAMILY_TEST(TEST_ARGS, bit, xchg, init, init, new, new);\ +} while (0) + +#define CMPXCHG_FAMILY_TEST(bit, init, new, wrong) \ +do { \ + FAMILY_TEST(TEST_ARGS, bit, cmpxchg,\ + init, init, new, init, new);\ + FAMILY_TEST(TEST_ARGS, bit, cmpxchg,\ + init, init, init, wrong, new); \ +} while (0) + +#define INC_RETURN_FAMILY_TEST(bit, i) \ +do { \ + FAMILY_TEST(TEST_ARGS, bit, inc_return, \ + i, (i) + one, (i) + one); \ +} while (0) + +#define DEC_RETURN_FAMILY_TEST(bit, i) \ +do { \ + FAMILY_TEST(TEST_ARGS, bit, dec_return, \ + i, (i) - one, (i) - one); \ +} while (0) + static __init void test_atomic(void) { int v0 = 0xaaa31337; @@ -45,6 +104,18 @@ static __init void test_atomic(void) TEST(, and, &=, v1); TEST(, xor, ^=, v1); TEST(, andnot, &= ~, v1); + + RETURN_FAMILY_TEST(, add_return, +=, onestwos); + RETURN_FAMILY_TEST(, add_return, +=, -one); + RETURN_FAMILY_TEST(, sub_return, -=, onestwos); + RETURN_FAMILY_TEST(, sub_return, -=, -one); + + INC_RETURN_FAMILY_TEST(, v0); + DEC_RETURN_FAMILY_TEST(, v0); + + XCHG_FAMILY_TEST(, v0, v1); + CMPXCHG_FAMILY_TEST(, v0, v1, onestwos); + } #define INIT(c) do { atomic64_set(, c); r = c; } while (0) @@ -74,25 +145,10 @@ static __init void test_atomic64(void) TEST(64, xor, ^=, v1); TEST(64, andnot, &= ~, v1); - INIT(v0); - r += onestwos; - BUG_ON(atomic64_add_return(onestwos, ) != r); - BUG_ON(v.counter != r); - - INIT(v0); - r += -one; - BUG_ON(atomic64_add_return(-one, ) != r); - BUG_ON(v.counter != r); - - INIT(v0); - r -= onestwos; - BUG_ON(atomic64_sub_return(onestwos, ) != r); - BUG_ON(v.counter != r); - - INIT(v0); - r -= -one; - BUG_ON(atomic64_sub_return(-one, ) != r); - BUG_ON(v.counter != r); + RETURN_FAMILY_TEST(64, add_return, +=, onestwos); + RETURN_FAMILY_TEST(64, add_return, +=, -one); + RETURN_FAMILY_TEST(64, sub_return, -=, onestwos); + RETURN_FAMILY_TEST(64, sub_return, -=, -one);
[PATCH tip/locking/core v5 6/6] powerpc: atomic: Implement cmpxchg{, 64}_* and atomic{, 64}_cmpxchg_* variants
Implement cmpxchg{,64}_relaxed and atomic{,64}_cmpxchg_relaxed, based on which _release variants can be built. To avoid superfluous barriers in _acquire variants, we implement these operations with assembly code rather use __atomic_op_acquire() to build them automatically. For the same reason, we keep the assembly implementation of fully ordered cmpxchg operations. However, we don't do the similar for _release, because that will require putting barriers in the middle of ll/sc loops, which is probably a bad idea. Note cmpxchg{,64}_relaxed and atomic{,64}_cmpxchg_relaxed are not compiler barriers. Signed-off-by: Boqun Feng--- arch/powerpc/include/asm/atomic.h | 10 +++ arch/powerpc/include/asm/cmpxchg.h | 149 - 2 files changed, 158 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 2c3d4f0..195dc85 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -176,6 +176,11 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v) #define atomic_dec_return_relaxed atomic_dec_return_relaxed #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) +#define atomic_cmpxchg_relaxed(v, o, n) \ + cmpxchg_relaxed(&((v)->counter), (o), (n)) +#define atomic_cmpxchg_acquire(v, o, n) \ + cmpxchg_acquire(&((v)->counter), (o), (n)) + #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) #define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) @@ -444,6 +449,11 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v) } #define atomic64_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) +#define atomic64_cmpxchg_relaxed(v, o, n) \ + cmpxchg_relaxed(&((v)->counter), (o), (n)) +#define atomic64_cmpxchg_acquire(v, o, n) \ + cmpxchg_acquire(&((v)->counter), (o), (n)) + #define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) #define atomic64_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index 17c7e14..cae4fa8 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -181,6 +181,56 @@ __cmpxchg_u32_local(volatile unsigned int *p, unsigned long old, return prev; } +static __always_inline unsigned long +__cmpxchg_u32_relaxed(u32 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1:lwarx %0,0,%2 # __cmpxchg_u32_relaxed\n" +" cmpw0,%0,%3\n" +" bne-2f\n" + PPC405_ERR77(0, %2) +" stwcx. %4,0,%2\n" +" bne-1b\n" +"2:" + : "=" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc"); + + return prev; +} + +/* + * cmpxchg family don't have order guarantee if cmp part fails, therefore we + * can avoid superfluous barriers if we use assembly code to implement + * cmpxchg() and cmpxchg_acquire(), however we don't do the similar for + * cmpxchg_release() because that will result in putting a barrier in the + * middle of a ll/sc loop, which is probably a bad idea. For example, this + * might cause the conditional store more likely to fail. + */ +static __always_inline unsigned long +__cmpxchg_u32_acquire(u32 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1:lwarx %0,0,%2 # __cmpxchg_u32_acquire\n" +" cmpw0,%0,%3\n" +" bne-2f\n" + PPC405_ERR77(0, %2) +" stwcx. %4,0,%2\n" +" bne-1b\n" + PPC_ACQUIRE_BARRIER + "\n" +"2:" + : "=" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + #ifdef CONFIG_PPC64 static __always_inline unsigned long __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new) @@ -224,6 +274,46 @@ __cmpxchg_u64_local(volatile unsigned long *p, unsigned long old, return prev; } + +static __always_inline unsigned long +__cmpxchg_u64_relaxed(u64 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1:ldarx %0,0,%2 # __cmpxchg_u64_relaxed\n" +" cmpd0,%0,%3\n" +" bne-2f\n" +" stdcx. %4,0,%2\n" +" bne-1b\n" +"2:" + : "=" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u64_acquire(u64 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1:ldarx %0,0,%2 # __cmpxchg_u64_acquire\n" +" cmpd0,%0,%3\n" +" bne-2f\n" +" stdcx. %4,0,%2\n" +" bne-1b\n" + PPC_ACQUIRE_BARRIER + "\n" +"2:" + : "=" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc",
[PATCH tip/locking/core v5 4/6] powerpc: atomic: Implement atomic{, 64}_*_return_* variants
On powerpc, acquire and release semantics can be achieved with lightweight barriers("lwsync" and "ctrl+isync"), which can be used to implement __atomic_op_{acquire,release}. For release semantics, since we only need to ensure all memory accesses that issue before must take effects before the -store- part of the atomics, "lwsync" is what we only need. On the platform without "lwsync", "sync" should be used. Therefore, smp_lwsync() is used here. For acquire semantics, "lwsync" is what we only need for the similar reason. However on the platform without "lwsync", we can use "isync" rather than "sync" as an acquire barrier. Therefore in __atomic_op_acquire() we use PPC_ACQUIRE_BARRIER, which is barrier() on UP, "lwsync" if available and "isync" otherwise. Implement atomic{,64}_{add,sub,inc,dec}_return_relaxed, and build other variants with these helpers. Signed-off-by: Boqun Feng--- arch/powerpc/include/asm/atomic.h | 107 +++--- 1 file changed, 65 insertions(+), 42 deletions(-) diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 55f106e..f9c0c6c 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -12,6 +12,24 @@ #define ATOMIC_INIT(i) { (i) } +/* + * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with + * a "bne-" instruction at the end, so an isync is enough as a acquire barrier + * on the platform without lwsync. + */ +#define __atomic_op_acquire(op, args...) \ +({ \ + typeof(op##_relaxed(args)) __ret = op##_relaxed(args); \ + __asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory");\ + __ret; \ +}) + +#define __atomic_op_release(op, args...) \ +({ \ + smp_lwsync(); \ + op##_relaxed(args); \ +}) + static __inline__ int atomic_read(const atomic_t *v) { int t; @@ -42,27 +60,27 @@ static __inline__ void atomic_##op(int a, atomic_t *v) \ : "cc");\ } \ -#define ATOMIC_OP_RETURN(op, asm_op) \ -static __inline__ int atomic_##op##_return(int a, atomic_t *v) \ +#define ATOMIC_OP_RETURN_RELAXED(op, asm_op) \ +static inline int atomic_##op##_return_relaxed(int a, atomic_t *v) \ { \ int t; \ \ __asm__ __volatile__( \ - PPC_ATOMIC_ENTRY_BARRIER\ -"1:lwarx %0,0,%2 # atomic_" #op "_return\n" \ - #asm_op " %0,%1,%0\n" \ - PPC405_ERR77(0,%2) \ -" stwcx. %0,0,%2 \n" \ +"1:lwarx %0,0,%3 # atomic_" #op "_return_relaxed\n" \ + #asm_op " %0,%2,%0\n" \ + PPC405_ERR77(0, %3) \ +" stwcx. %0,0,%3\n" \ " bne-1b\n" \ - PPC_ATOMIC_EXIT_BARRIER \ - : "=" (t) \ + : "=" (t), "+m" (v->counter) \ : "r" (a), "r" (>counter)\ - : "cc", "memory"); \ + : "cc");\ \ return t; \ } -#define ATOMIC_OPS(op, asm_op) ATOMIC_OP(op, asm_op) ATOMIC_OP_RETURN(op, asm_op) +#define ATOMIC_OPS(op, asm_op) \ + ATOMIC_OP(op, asm_op) \ + ATOMIC_OP_RETURN_RELAXED(op, asm_op) ATOMIC_OPS(add, add) ATOMIC_OPS(sub, subf) @@ -71,8 +89,11 @@ ATOMIC_OP(and, and) ATOMIC_OP(or, or) ATOMIC_OP(xor, xor) +#define atomic_add_return_relaxed atomic_add_return_relaxed +#define atomic_sub_return_relaxed atomic_sub_return_relaxed + #undef ATOMIC_OPS -#undef ATOMIC_OP_RETURN
[PATCH tip/locking/core v5 0/6] atomics: powerpc: Implement relaxed/acquire/release variants of some atomics
Hi all, This is v5 of the series. Link for v1: https://lkml.org/lkml/2015/8/27/798 Link for v2: https://lkml.org/lkml/2015/9/16/527 Link for v3: https://lkml.org/lkml/2015/10/12/368 Link for v4: https://lkml.org/lkml/2015/10/14/670 Changes since v4: * define PPC_ATOMIC_ENTRY_BARRIER as "sync" (Paul E. Mckenney) * remove PPC-specific __atomic_op_fence(). Relaxed/acquire/release variants of atomic operations {add,sub}_return and {cmp,}xchg are introduced by commit: "atomics: add acquire/release/relaxed variants of some atomic operations" and {inc,dec}_return has been introduced by commit: "locking/asm-generic: Add _{relaxed|acquire|release}() variants for inc/dec atomics" Both of these are in the current locking/core branch of the tip tree. By default, the generic code will implement a relaxed variant as a full ordered atomic operation and release/acquire a variant as a relaxed variant with a necessary general barrier before or after. On PPC, which has a weak memory order model, a relaxed variant can be implemented more lightweightly than a full ordered one. Further more, release and acquire variants can be implemented with arch-specific lightweight barriers. Besides, cmpxchg, xchg and their atomic_ versions are only RELEASE+ACQUIRE rather that fully ordered in current PPC implementation, which is incorrect according to memory-barriers.txt. Further more, PPC_ATOMIC_ENTRY_BARRIER, the leading barrier of fully ordered atomics, should be "sync" rather than "lwsync" if SMP=y, to guarantee fully ordered semantics. Therefore this patchset fixes the order guarantee of cmpxchg, xchg and value-returning atomics on PPC and implements the relaxed/acquire/release variants based on PPC memory model and specific barriers, Some trivial tests for these new variants are also included in this series, because some of these variants are not used in kernel for now, I think is a good idea to at least generate the code for these variants somewhere. The patchset consists of 6 parts: 1. Make value-returning atomics, futex atomics, xchg and cmpxchg fully ordered 2. Add trivial tests for the new variants in lib/atomic64_test.c 3. Allow architectures to define their own __atomic_op_*() helpers to build other variants based on relaxed. 4. Implement atomic{,64}_{add,sub,inc,dec}_return_* variants 5. Implement xchg_* and atomic{,64}_xchg_* variants 6. Implement cmpxchg_* atomic{,64}_cmpxchg_* variants This patchset is based on current locking/core branch of the tip tree and all patches are built and boot tested for little endian pseries, and also tested by 0day. Looking forward to any suggestion, question and comment ;-) Regards, Boqun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH tip/locking/core v5 3/6] atomics: Allow architectures to define their own __atomic_op_* helpers
Some architectures may have their special barriers for acquire, release and fence semantics, so that general memory barriers(smp_mb__*_atomic()) in the default __atomic_op_*() may be too strong, so allow architectures to define their own helpers which can overwrite the default helpers. Signed-off-by: Boqun Feng--- include/linux/atomic.h | 10 ++ 1 file changed, 10 insertions(+) diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 27e580d..947c1dc 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -43,20 +43,29 @@ static inline int atomic_read_ctrl(const atomic_t *v) * The idea here is to build acquire/release variants by adding explicit * barriers on top of the relaxed variant. In the case where the relaxed * variant is already fully ordered, no additional barriers are needed. + * + * Besides, if an arch has a special barrier for acquire/release, it could + * implement its own __atomic_op_* and use the same framework for building + * variants */ +#ifndef __atomic_op_acquire #define __atomic_op_acquire(op, args...) \ ({ \ typeof(op##_relaxed(args)) __ret = op##_relaxed(args); \ smp_mb__after_atomic(); \ __ret; \ }) +#endif +#ifndef __atomic_op_release #define __atomic_op_release(op, args...) \ ({ \ smp_mb__before_atomic();\ op##_relaxed(args); \ }) +#endif +#ifndef __atomic_op_fence #define __atomic_op_fence(op, args...) \ ({ \ typeof(op##_relaxed(args)) __ret; \ @@ -65,6 +74,7 @@ static inline int atomic_read_ctrl(const atomic_t *v) smp_mb__after_atomic(); \ __ret; \ }) +#endif /* atomic_add_return_relaxed */ #ifndef atomic_add_return_relaxed -- 2.6.2 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH tip/locking/core v5 1/6] powerpc: atomic: Make _return atomics and *{cmp}xchg fully ordered
On Mon, Oct 26, 2015 at 05:50:52PM +0800, Boqun Feng wrote: > This patch fixes two problems to make value-returning atomics and > {cmp}xchg fully ordered on PPC. > > According to memory-barriers.txt: > > > Any atomic operation that modifies some state in memory and returns > > information about the state (old or new) implies an SMP-conditional > > general memory barrier (smp_mb()) on each side of the actual > > operation ... > > which means these operations should be fully ordered. However on PPC, > PPC_ATOMIC_ENTRY_BARRIER is the barrier before the actual operation, > which is currently "lwsync" if SMP=y. The leading "lwsync" can not > guarantee fully ordered atomics, according to Paul Mckenney: > > https://lkml.org/lkml/2015/10/14/970 > > To fix this, we define PPC_ATOMIC_ENTRY_BARRIER as "sync" to guarantee > the fully-ordered semantics. > > This also makes futex atomics fully ordered, which can avoid possible > memory ordering problems if userspace code relies on futex system call > for fully ordered semantics. > > Another thing to fix is that xchg, cmpxchg and their atomic{64}_ > versions are currently RELEASE+ACQUIRE, which are not fully ordered. > > So also replace PPC_RELEASE_BARRIER and PPC_ACQUIRE_BARRIER with > PPC_ATOMIC_ENTRY_BARRIER and PPC_ATOMIC_EXIT_BARRIER in > __{cmp,}xchg_{u32,u64} respectively to guarantee fully ordered semantics > of atomic{,64}_{cmp,}xchg() and {cmp,}xchg(), as a complement of commit > b97021f85517 ("powerpc: Fix atomic_xxx_return barrier semantics"). > > Cc:# 3.4+ Hmm.. I use the same Cc tag as v4, seems my git(2.6.2) send-email has a weird behavior of composing Cc address? I will resend this one soon, sorry ;-( Regards, Boqun > Signed-off-by: Boqun Feng > --- > > Michael, I also change PPC_ATOMIC_ENTRY_BARRIER as "sync" if SMP=y in this > version , which is different from the previous one, so request for a new ack. > Thank you ;-) > > arch/powerpc/include/asm/cmpxchg.h | 16 > arch/powerpc/include/asm/synch.h | 2 +- > 2 files changed, 9 insertions(+), 9 deletions(-) > > diff --git a/arch/powerpc/include/asm/cmpxchg.h > b/arch/powerpc/include/asm/cmpxchg.h > index ad6263c..d1a8d93 100644 > --- a/arch/powerpc/include/asm/cmpxchg.h > +++ b/arch/powerpc/include/asm/cmpxchg.h > @@ -18,12 +18,12 @@ __xchg_u32(volatile void *p, unsigned long val) > unsigned long prev; > > __asm__ __volatile__( > - PPC_RELEASE_BARRIER > + PPC_ATOMIC_ENTRY_BARRIER > "1: lwarx %0,0,%2 \n" > PPC405_ERR77(0,%2) > "stwcx. %3,0,%2 \n\ > bne-1b" > - PPC_ACQUIRE_BARRIER > + PPC_ATOMIC_EXIT_BARRIER > : "=" (prev), "+m" (*(volatile unsigned int *)p) > : "r" (p), "r" (val) > : "cc", "memory"); > @@ -61,12 +61,12 @@ __xchg_u64(volatile void *p, unsigned long val) > unsigned long prev; > > __asm__ __volatile__( > - PPC_RELEASE_BARRIER > + PPC_ATOMIC_ENTRY_BARRIER > "1: ldarx %0,0,%2 \n" > PPC405_ERR77(0,%2) > "stdcx. %3,0,%2 \n\ > bne-1b" > - PPC_ACQUIRE_BARRIER > + PPC_ATOMIC_EXIT_BARRIER > : "=" (prev), "+m" (*(volatile unsigned long *)p) > : "r" (p), "r" (val) > : "cc", "memory"); > @@ -151,14 +151,14 @@ __cmpxchg_u32(volatile unsigned int *p, unsigned long > old, unsigned long new) > unsigned int prev; > > __asm__ __volatile__ ( > - PPC_RELEASE_BARRIER > + PPC_ATOMIC_ENTRY_BARRIER > "1: lwarx %0,0,%2 # __cmpxchg_u32\n\ > cmpw0,%0,%3\n\ > bne-2f\n" > PPC405_ERR77(0,%2) > "stwcx. %4,0,%2\n\ > bne-1b" > - PPC_ACQUIRE_BARRIER > + PPC_ATOMIC_EXIT_BARRIER > "\n\ > 2:" > : "=" (prev), "+m" (*p) > @@ -197,13 +197,13 @@ __cmpxchg_u64(volatile unsigned long *p, unsigned long > old, unsigned long new) > unsigned long prev; > > __asm__ __volatile__ ( > - PPC_RELEASE_BARRIER > + PPC_ATOMIC_ENTRY_BARRIER > "1: ldarx %0,0,%2 # __cmpxchg_u64\n\ > cmpd0,%0,%3\n\ > bne-2f\n\ > stdcx. %4,0,%2\n\ > bne-1b" > - PPC_ACQUIRE_BARRIER > + PPC_ATOMIC_EXIT_BARRIER > "\n\ > 2:" > : "=" (prev), "+m" (*p) > diff --git a/arch/powerpc/include/asm/synch.h > b/arch/powerpc/include/asm/synch.h > index e682a71..c508686 100644 > --- a/arch/powerpc/include/asm/synch.h > +++ b/arch/powerpc/include/asm/synch.h > @@ -44,7 +44,7 @@ static inline void isync(void) > MAKE_LWSYNC_SECTION_ENTRY(97, __lwsync_fixup); > #define PPC_ACQUIRE_BARRIER "\n" stringify_in_c(__PPC_ACQUIRE_BARRIER) > #define PPC_RELEASE_BARRIER stringify_in_c(LWSYNC) "\n" > -#define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(LWSYNC) "\n" > +#define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(sync) "\n" > #define PPC_ATOMIC_EXIT_BARRIER "\n" stringify_in_c(sync) "\n" > #else > #define PPC_ACQUIRE_BARRIER > -- > 2.6.2 >
[PATCH RESEND tip/locking/core v5 1/6] powerpc: atomic: Make _return atomics and *{cmp}xchg fully ordered
This patch fixes two problems to make value-returning atomics and {cmp}xchg fully ordered on PPC. According to memory-barriers.txt: > Any atomic operation that modifies some state in memory and returns > information about the state (old or new) implies an SMP-conditional > general memory barrier (smp_mb()) on each side of the actual > operation ... which means these operations should be fully ordered. However on PPC, PPC_ATOMIC_ENTRY_BARRIER is the barrier before the actual operation, which is currently "lwsync" if SMP=y. The leading "lwsync" can not guarantee fully ordered atomics, according to Paul Mckenney: https://lkml.org/lkml/2015/10/14/970 To fix this, we define PPC_ATOMIC_ENTRY_BARRIER as "sync" to guarantee the fully-ordered semantics. This also makes futex atomics fully ordered, which can avoid possible memory ordering problems if userspace code relies on futex system call for fully ordered semantics. Another thing to fix is that xchg, cmpxchg and their atomic{64}_ versions are currently RELEASE+ACQUIRE, which are not fully ordered. So also replace PPC_RELEASE_BARRIER and PPC_ACQUIRE_BARRIER with PPC_ATOMIC_ENTRY_BARRIER and PPC_ATOMIC_EXIT_BARRIER in __{cmp,}xchg_{u32,u64} respectively to guarantee fully ordered semantics of atomic{,64}_{cmp,}xchg() and {cmp,}xchg(), as a complement of commit b97021f85517 ("powerpc: Fix atomic_xxx_return barrier semantics"). Cc:# 3.4+ Signed-off-by: Boqun Feng --- Michael, I also change PPC_ATOMIC_ENTRY_BARRIER as "sync" if SMP=y in this version , which is different from the previous one, so request for a new ack. Thank you ;-) arch/powerpc/include/asm/cmpxchg.h | 16 arch/powerpc/include/asm/synch.h | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index ad6263c..d1a8d93 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -18,12 +18,12 @@ __xchg_u32(volatile void *p, unsigned long val) unsigned long prev; __asm__ __volatile__( - PPC_RELEASE_BARRIER + PPC_ATOMIC_ENTRY_BARRIER "1:lwarx %0,0,%2 \n" PPC405_ERR77(0,%2) " stwcx. %3,0,%2 \n\ bne-1b" - PPC_ACQUIRE_BARRIER + PPC_ATOMIC_EXIT_BARRIER : "=" (prev), "+m" (*(volatile unsigned int *)p) : "r" (p), "r" (val) : "cc", "memory"); @@ -61,12 +61,12 @@ __xchg_u64(volatile void *p, unsigned long val) unsigned long prev; __asm__ __volatile__( - PPC_RELEASE_BARRIER + PPC_ATOMIC_ENTRY_BARRIER "1:ldarx %0,0,%2 \n" PPC405_ERR77(0,%2) " stdcx. %3,0,%2 \n\ bne-1b" - PPC_ACQUIRE_BARRIER + PPC_ATOMIC_EXIT_BARRIER : "=" (prev), "+m" (*(volatile unsigned long *)p) : "r" (p), "r" (val) : "cc", "memory"); @@ -151,14 +151,14 @@ __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) unsigned int prev; __asm__ __volatile__ ( - PPC_RELEASE_BARRIER + PPC_ATOMIC_ENTRY_BARRIER "1:lwarx %0,0,%2 # __cmpxchg_u32\n\ cmpw0,%0,%3\n\ bne-2f\n" PPC405_ERR77(0,%2) " stwcx. %4,0,%2\n\ bne-1b" - PPC_ACQUIRE_BARRIER + PPC_ATOMIC_EXIT_BARRIER "\n\ 2:" : "=" (prev), "+m" (*p) @@ -197,13 +197,13 @@ __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new) unsigned long prev; __asm__ __volatile__ ( - PPC_RELEASE_BARRIER + PPC_ATOMIC_ENTRY_BARRIER "1:ldarx %0,0,%2 # __cmpxchg_u64\n\ cmpd0,%0,%3\n\ bne-2f\n\ stdcx. %4,0,%2\n\ bne-1b" - PPC_ACQUIRE_BARRIER + PPC_ATOMIC_EXIT_BARRIER "\n\ 2:" : "=" (prev), "+m" (*p) diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h index e682a71..c508686 100644 --- a/arch/powerpc/include/asm/synch.h +++ b/arch/powerpc/include/asm/synch.h @@ -44,7 +44,7 @@ static inline void isync(void) MAKE_LWSYNC_SECTION_ENTRY(97, __lwsync_fixup); #define PPC_ACQUIRE_BARRIER "\n" stringify_in_c(__PPC_ACQUIRE_BARRIER) #define PPC_RELEASE_BARRIER stringify_in_c(LWSYNC) "\n" -#define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(LWSYNC) "\n" +#define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(sync) "\n" #define PPC_ATOMIC_EXIT_BARRIER "\n" stringify_in_c(sync) "\n" #else #define PPC_ACQUIRE_BARRIER -- 2.6.2 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH tip/locking/core v5 1/6] powerpc: atomic: Make _return atomics and *{cmp}xchg fully ordered
This patch fixes two problems to make value-returning atomics and {cmp}xchg fully ordered on PPC. According to memory-barriers.txt: > Any atomic operation that modifies some state in memory and returns > information about the state (old or new) implies an SMP-conditional > general memory barrier (smp_mb()) on each side of the actual > operation ... which means these operations should be fully ordered. However on PPC, PPC_ATOMIC_ENTRY_BARRIER is the barrier before the actual operation, which is currently "lwsync" if SMP=y. The leading "lwsync" can not guarantee fully ordered atomics, according to Paul Mckenney: https://lkml.org/lkml/2015/10/14/970 To fix this, we define PPC_ATOMIC_ENTRY_BARRIER as "sync" to guarantee the fully-ordered semantics. This also makes futex atomics fully ordered, which can avoid possible memory ordering problems if userspace code relies on futex system call for fully ordered semantics. Another thing to fix is that xchg, cmpxchg and their atomic{64}_ versions are currently RELEASE+ACQUIRE, which are not fully ordered. So also replace PPC_RELEASE_BARRIER and PPC_ACQUIRE_BARRIER with PPC_ATOMIC_ENTRY_BARRIER and PPC_ATOMIC_EXIT_BARRIER in __{cmp,}xchg_{u32,u64} respectively to guarantee fully ordered semantics of atomic{,64}_{cmp,}xchg() and {cmp,}xchg(), as a complement of commit b97021f85517 ("powerpc: Fix atomic_xxx_return barrier semantics"). Cc:# 3.4+ Signed-off-by: Boqun Feng --- Michael, I also change PPC_ATOMIC_ENTRY_BARRIER as "sync" if SMP=y in this version , which is different from the previous one, so request for a new ack. Thank you ;-) arch/powerpc/include/asm/cmpxchg.h | 16 arch/powerpc/include/asm/synch.h | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index ad6263c..d1a8d93 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -18,12 +18,12 @@ __xchg_u32(volatile void *p, unsigned long val) unsigned long prev; __asm__ __volatile__( - PPC_RELEASE_BARRIER + PPC_ATOMIC_ENTRY_BARRIER "1:lwarx %0,0,%2 \n" PPC405_ERR77(0,%2) " stwcx. %3,0,%2 \n\ bne-1b" - PPC_ACQUIRE_BARRIER + PPC_ATOMIC_EXIT_BARRIER : "=" (prev), "+m" (*(volatile unsigned int *)p) : "r" (p), "r" (val) : "cc", "memory"); @@ -61,12 +61,12 @@ __xchg_u64(volatile void *p, unsigned long val) unsigned long prev; __asm__ __volatile__( - PPC_RELEASE_BARRIER + PPC_ATOMIC_ENTRY_BARRIER "1:ldarx %0,0,%2 \n" PPC405_ERR77(0,%2) " stdcx. %3,0,%2 \n\ bne-1b" - PPC_ACQUIRE_BARRIER + PPC_ATOMIC_EXIT_BARRIER : "=" (prev), "+m" (*(volatile unsigned long *)p) : "r" (p), "r" (val) : "cc", "memory"); @@ -151,14 +151,14 @@ __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) unsigned int prev; __asm__ __volatile__ ( - PPC_RELEASE_BARRIER + PPC_ATOMIC_ENTRY_BARRIER "1:lwarx %0,0,%2 # __cmpxchg_u32\n\ cmpw0,%0,%3\n\ bne-2f\n" PPC405_ERR77(0,%2) " stwcx. %4,0,%2\n\ bne-1b" - PPC_ACQUIRE_BARRIER + PPC_ATOMIC_EXIT_BARRIER "\n\ 2:" : "=" (prev), "+m" (*p) @@ -197,13 +197,13 @@ __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new) unsigned long prev; __asm__ __volatile__ ( - PPC_RELEASE_BARRIER + PPC_ATOMIC_ENTRY_BARRIER "1:ldarx %0,0,%2 # __cmpxchg_u64\n\ cmpd0,%0,%3\n\ bne-2f\n\ stdcx. %4,0,%2\n\ bne-1b" - PPC_ACQUIRE_BARRIER + PPC_ATOMIC_EXIT_BARRIER "\n\ 2:" : "=" (prev), "+m" (*p) diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h index e682a71..c508686 100644 --- a/arch/powerpc/include/asm/synch.h +++ b/arch/powerpc/include/asm/synch.h @@ -44,7 +44,7 @@ static inline void isync(void) MAKE_LWSYNC_SECTION_ENTRY(97, __lwsync_fixup); #define PPC_ACQUIRE_BARRIER "\n" stringify_in_c(__PPC_ACQUIRE_BARRIER) #define PPC_RELEASE_BARRIER stringify_in_c(LWSYNC) "\n" -#define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(LWSYNC) "\n" +#define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(sync) "\n" #define PPC_ATOMIC_EXIT_BARRIER "\n" stringify_in_c(sync) "\n" #else #define PPC_ACQUIRE_BARRIER -- 2.6.2 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH tip/locking/core v5 5/6] powerpc: atomic: Implement xchg_* and atomic{, 64}_xchg_* variants
Implement xchg_relaxed and atomic{,64}_xchg_relaxed, based on these _relaxed variants, release/acquire variants and fully ordered versions can be built. Note that xchg_relaxed and atomic_{,64}_xchg_relaxed are not compiler barriers. Signed-off-by: Boqun Feng--- arch/powerpc/include/asm/atomic.h | 2 ++ arch/powerpc/include/asm/cmpxchg.h | 69 +- 2 files changed, 32 insertions(+), 39 deletions(-) diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index f9c0c6c..2c3d4f0 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -177,6 +177,7 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v) #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) /** * __atomic_add_unless - add unless the number is a given value @@ -444,6 +445,7 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v) #define atomic64_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) #define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) +#define atomic64_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) /** * atomic64_add_unless - add unless the number is a given value diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index d1a8d93..17c7e14 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -9,21 +9,20 @@ /* * Atomic exchange * - * Changes the memory location '*ptr' to be val and returns + * Changes the memory location '*p' to be val and returns * the previous value stored there. */ + static __always_inline unsigned long -__xchg_u32(volatile void *p, unsigned long val) +__xchg_u32_local(volatile void *p, unsigned long val) { unsigned long prev; __asm__ __volatile__( - PPC_ATOMIC_ENTRY_BARRIER "1:lwarx %0,0,%2 \n" PPC405_ERR77(0,%2) " stwcx. %3,0,%2 \n\ bne-1b" - PPC_ATOMIC_EXIT_BARRIER : "=" (prev), "+m" (*(volatile unsigned int *)p) : "r" (p), "r" (val) : "cc", "memory"); @@ -31,42 +30,34 @@ __xchg_u32(volatile void *p, unsigned long val) return prev; } -/* - * Atomic exchange - * - * Changes the memory location '*ptr' to be val and returns - * the previous value stored there. - */ static __always_inline unsigned long -__xchg_u32_local(volatile void *p, unsigned long val) +__xchg_u32_relaxed(u32 *p, unsigned long val) { unsigned long prev; __asm__ __volatile__( -"1:lwarx %0,0,%2 \n" - PPC405_ERR77(0,%2) -" stwcx. %3,0,%2 \n\ - bne-1b" - : "=" (prev), "+m" (*(volatile unsigned int *)p) +"1:lwarx %0,0,%2\n" + PPC405_ERR77(0, %2) +" stwcx. %3,0,%2\n" +" bne-1b" + : "=" (prev), "+m" (*p) : "r" (p), "r" (val) - : "cc", "memory"); + : "cc"); return prev; } #ifdef CONFIG_PPC64 static __always_inline unsigned long -__xchg_u64(volatile void *p, unsigned long val) +__xchg_u64_local(volatile void *p, unsigned long val) { unsigned long prev; __asm__ __volatile__( - PPC_ATOMIC_ENTRY_BARRIER "1:ldarx %0,0,%2 \n" PPC405_ERR77(0,%2) " stdcx. %3,0,%2 \n\ bne-1b" - PPC_ATOMIC_EXIT_BARRIER : "=" (prev), "+m" (*(volatile unsigned long *)p) : "r" (p), "r" (val) : "cc", "memory"); @@ -75,18 +66,18 @@ __xchg_u64(volatile void *p, unsigned long val) } static __always_inline unsigned long -__xchg_u64_local(volatile void *p, unsigned long val) +__xchg_u64_relaxed(u64 *p, unsigned long val) { unsigned long prev; __asm__ __volatile__( -"1:ldarx %0,0,%2 \n" - PPC405_ERR77(0,%2) -" stdcx. %3,0,%2 \n\ - bne-1b" - : "=" (prev), "+m" (*(volatile unsigned long *)p) +"1:ldarx %0,0,%2\n" + PPC405_ERR77(0, %2) +" stdcx. %3,0,%2\n" +" bne-1b" + : "=" (prev), "+m" (*p) : "r" (p), "r" (val) - : "cc", "memory"); + : "cc"); return prev; } @@ -99,14 +90,14 @@ __xchg_u64_local(volatile void *p, unsigned long val) extern void __xchg_called_with_bad_pointer(void); static __always_inline unsigned long -__xchg(volatile void *ptr, unsigned long x, unsigned int size) +__xchg_local(volatile void *ptr, unsigned long x, unsigned int size) { switch (size) { case 4: - return __xchg_u32(ptr, x); + return __xchg_u32_local(ptr, x); #ifdef CONFIG_PPC64 case 8: - return __xchg_u64(ptr, x); + return __xchg_u64_local(ptr, x); #endif } __xchg_called_with_bad_pointer(); @@ -114,25 +105,19 @@ __xchg(volatile void *ptr, unsigned long x, unsigned int size) } static
Re: [PATCH tip/locking/core v4 1/6] powerpc: atomic: Make *xchg and *cmpxchg a full barrier
On Mon, Oct 26, 2015 at 11:20:01AM +0900, Michael Ellerman wrote: > > Sorry guys, these threads are so long I tend not to read them very actively :} > > Looking at the system call path, the straight line path does not include any > barriers. I can't see any hidden in macros either. > > We also have an explicit sync in the switch_to() path, which suggests that we > know system call is not a full barrier. > > Also looking at the architecture, section 1.5 which talks about the > synchronisation that occurs on system calls, defines nothing in terms of > memory ordering, and includes a programming note which says "Unlike the > Synchronize instruction, a context synchronizing operation does not affect the > order in which storage accesses are performed.". > Thank you, Michael. So IIUC, "sc" and "rfid" just imply an execution barrier like "isync" rather than a memory barrier. So memory barriers are needed if a system call need a memory ordering guarantee. Regards, Boqun > Whether that's actually how it's implemented I don't know, I'll see if I can > find out. > > cheers > signature.asc Description: PGP signature ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v9 1/4] perf, kvm/{x86, s390}: Remove dependency on uapi/kvm_perf.h
Hi Arnaldo, Could you please take a look at this series and pull it? --- Thanks, Hemant On 10/07/2015 07:55 AM, Hemant Kumar wrote: Its better to remove the dependency on uapi/kvm_perf.h to allow dynamic discovery of kvm events (if its needed). To do this, some extern variables have been introduced with which we can keep the generic functions generic. Signed-off-by: Hemant Kumar--- Changelog: v8 to v9: - Removed the macro definitions. - Changed the access of kvm_entry_trace and kvm_exit_trace - Removed unnecessary formatting. v7 to v8: - Removed unnecessary __unused_parameter modifiers. tools/perf/arch/s390/util/kvm-stat.c | 8 +++- tools/perf/arch/x86/util/kvm-stat.c | 14 +++--- tools/perf/builtin-kvm.c | 32 ++-- tools/perf/util/kvm-stat.h | 5 + 4 files changed, 45 insertions(+), 14 deletions(-) diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c index a5dbc07..b85a94b 100644 --- a/tools/perf/arch/s390/util/kvm-stat.c +++ b/tools/perf/arch/s390/util/kvm-stat.c @@ -10,7 +10,7 @@ */ #include "../../util/kvm-stat.h" -#include +#include define_exit_reasons_table(sie_exit_reasons, sie_intercept_code); define_exit_reasons_table(sie_icpt_insn_codes, icpt_insn_codes); @@ -18,6 +18,12 @@ define_exit_reasons_table(sie_sigp_order_codes, sigp_order_codes); define_exit_reasons_table(sie_diagnose_codes, diagnose_codes); define_exit_reasons_table(sie_icpt_prog_codes, icpt_prog_codes); +const char *vcpu_id_str = "id"; +const int decode_str_len = 40; +const char *kvm_exit_reason = "icptcode"; +const char *kvm_entry_trace = "kvm:kvm_s390_sie_enter"; +const char *kvm_exit_trace = "kvm:kvm_s390_sie_exit"; + static void event_icpt_insn_get_key(struct perf_evsel *evsel, struct perf_sample *sample, struct event_key *key) diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c index 14e4e66..babefda 100644 --- a/tools/perf/arch/x86/util/kvm-stat.c +++ b/tools/perf/arch/x86/util/kvm-stat.c @@ -1,5 +1,7 @@ #include "../../util/kvm-stat.h" -#include +#include +#include +#include define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS); define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS); @@ -11,6 +13,12 @@ static struct kvm_events_ops exit_events = { .name = "VM-EXIT" }; +const char *vcpu_id_str = "vcpu_id"; +const int decode_str_len = 20; +const char *kvm_exit_reason = "exit_reason"; +const char *kvm_entry_trace = "kvm:kvm_entry"; +const char *kvm_exit_trace = "kvm:kvm_exit"; + /* * For the mmio events, we treat: * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry @@ -65,7 +73,7 @@ static void mmio_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, struct event_key *key, char *decode) { - scnprintf(decode, DECODE_STR_LEN, "%#lx:%s", + scnprintf(decode, decode_str_len, "%#lx:%s", (unsigned long)key->key, key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R"); } @@ -109,7 +117,7 @@ static void ioport_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, struct event_key *key, char *decode) { - scnprintf(decode, DECODE_STR_LEN, "%#llx:%s", + scnprintf(decode, decode_str_len, "%#llx:%s", (unsigned long long)key->key, key->info ? "POUT" : "PIN"); } diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index fc1cffb..5104c7e 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -31,7 +31,6 @@ #include #ifdef HAVE_KVM_STAT_SUPPORT -#include #include "util/kvm-stat.h" void exit_event_get_key(struct perf_evsel *evsel, @@ -39,12 +38,12 @@ void exit_event_get_key(struct perf_evsel *evsel, struct event_key *key) { key->info = 0; - key->key = perf_evsel__intval(evsel, sample, KVM_EXIT_REASON); + key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason); } bool kvm_exit_event(struct perf_evsel *evsel) { - return !strcmp(evsel->name, KVM_EXIT_TRACE); + return !strcmp(evsel->name, kvm_exit_trace); } bool exit_event_begin(struct perf_evsel *evsel, @@ -60,7 +59,7 @@ bool exit_event_begin(struct perf_evsel *evsel, bool kvm_entry_event(struct perf_evsel *evsel) { - return !strcmp(evsel->name, KVM_ENTRY_TRACE); + return !strcmp(evsel->name, kvm_entry_trace); } bool exit_event_end(struct perf_evsel *evsel, @@ -92,7 +91,7 @@ void exit_event_decode_key(struct perf_kvm_stat *kvm, const char *exit_reason = get_exit_reason(kvm, key->exit_reasons, key->key); -
[PATCH v4] Documentation: dt: binding: fsl: add devicetree binding for describing RCPM
From: Wang DongshengRCPM is the Run Control and Power Management module performs all device-level tasks associated with device run control and power management. Add this for freescale powerpc platform and layerscape platform. Signed-off-by: Chenhui Zhao Signed-off-by: Tang Yuantian Signed-off-by: Wang Dongsheng --- *v4* - Change patch subject. - A few grammatical mistakes. - Change "rcpm-wakeup" property to "fsl,rcpm-wakeup" property. - Remove a few "fsl,-rcpm" examples. - Now the value of "fsl,#rcpm-wakeup-cells" is not contain rcpm node. - Add a NOTE to describe IPPDEXPCR register. *v3* - Add "fsl,#rcpm-wakeup-cells" for rcpm node. The number of cells correspond rcpm-wakeup property. - Modify rcpm-wakeup property description. *v2* - Remove P4080 example. - Modify rcpm-wakeup property description. diff --git a/Documentation/devicetree/bindings/soc/fsl/rcpm.txt b/Documentation/devicetree/bindings/soc/fsl/rcpm.txt new file mode 100644 index 000..757e0eb --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/rcpm.txt @@ -0,0 +1,64 @@ +* Run Control and Power Management +--- +The RCPM performs all device-level tasks associated with device run control +and power management. + +Required properites: + - reg : Offset and length of the register set of the RCPM block. + - fsl,#rcpm-wakeup-cells : The number of IPPDEXPCR register cells in the + fsl,rcpm-wakeup property. + - compatible : Must contain a chip-specific RCPM block compatible string + and (if applicable) may contain a chassis-version RCPM compatible + string. Chip-specific strings are of the form "fsl,-rcpm", + such as: + * "fsl,p2041-rcpm" + * "fsl,p5020-rcpm" + * "fsl,t4240-rcpm" + + Chassis-version strings are of the form "fsl,qoriq-rcpm-", + such as: + * "fsl,qoriq-rcpm-1.0": for chassis 1.0 rcpm + * "fsl,qoriq-rcpm-2.0": for chassis 2.0 rcpm + * "fsl,qoriq-rcpm-2.1": for chassis 2.1 rcpm + +All references to "1.0" and "2.0" refer to the QorIQ chassis version to +which the chip complies. +Chassis VersionExample Chips +------ +1.0p4080, p5020, p5040, p2041, p3041 +2.0t4240, b4860, b4420 +2.1t1040, ls1021 + +Example: +The RCPM node for T4240: + rcpm: global-utilities@e2000 { + compatible = "fsl,t4240-rcpm", "fsl,qoriq-rcpm-2.0"; + reg = <0xe2000 0x1000>; + fsl,#rcpm-wakeup-cells = <2>; + }; + +* Freescale RCPM Wakeup Source Device Tree Bindings +--- +Required fsl,rcpm-wakeup property should be added to a device node if the device +can be used as a wakeup source. + + - fsl,rcpm-wakeup: Consists of a pointer to the rcpm node and the IPPDEXPCR + register cells. The number of IPPDEXPCR register cells is defined in + "fsl,#rcpm-wakeup-cells" in the rcpm node. The first register cell is + the bit mask that should be set in IPPDEXPCR0, and the second register + cell is for IPPDEXPCR1, and so on. + + Note: IPPDEXPCR(IP Powerdown Exception Control Register) provides a + mechanism for keeping certain blocks awake during STANDBY and MEM, in + order to use them as wake-up sources. + +Example: + lpuart0: serial@295 { + compatible = "fsl,ls1021a-lpuart"; + reg = <0x0 0x295 0x0 0x1000>; + interrupts = ; + clocks = <>; + clock-names = "ipg"; + fsl,rcpm-wakeup = < 0x0 0x4000>; + status = "disabled"; + }; -- 2.1.0.27.g96db324 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH tip/locking/core v4 1/6] powerpc: atomic: Make *xchg and *cmpxchg a full barrier
On Mon, Oct 26, 2015 at 02:20:21PM +1100, Paul Mackerras wrote: > On Wed, Oct 21, 2015 at 10:18:33AM +0200, Peter Zijlstra wrote: > > On Tue, Oct 20, 2015 at 02:28:35PM -0700, Paul E. McKenney wrote: > > > I am not seeing a sync there, but I really have to defer to the > > > maintainers on this one. I could easily have missed one. > > > > So x86 implies a full barrier for everything that changes the CPL; and > > some form of implied ordering seems a must if you change the privilege > > level unless you tag every single load/store with the priv level at that > > time, which seems the more expensive option. > > > > So I suspect the typical implementation will flush all load/stores, > > change the effective priv level and continue. > > > > This can of course be implemented at a pure per CPU ordering (RCpc), > > which would be in line with the rest of Power, in which case you do > > indeed need an explicit sync to make it visible to other CPUs. > > Right - interrupts and returns from interrupt are context > synchronizing operations, which means they wait until all outstanding > instructions have got to the point where they have reported any > exceptions they're going to report, which means in turn that loads and > stores have completed address translation. But all of that doesn't > imply anything about the visibility of the loads and stores. > > There is a full barrier in the context switch path, but not in the > system call entry/exit path. > Thank you, Paul. That's much clear now ;-) Regards, Boqun signature.asc Description: PGP signature ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v18 05/19] perf, tools: Support CPU id matching for x86 v2
From: Andi KleenImplement the code to match CPU types to mapfile types for x86 based on CPUID. This extends an existing similar function, but changes it to use the x86 mapfile cpu description. This allows to resolve event lists generated by jevents. Signed-off-by: Andi Kleen Signed-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa --- v2: Update to new get_cpuid_str() interface --- tools/perf/arch/x86/util/header.c | 24 +--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c index 146d12a..a74a48d 100644 --- a/tools/perf/arch/x86/util/header.c +++ b/tools/perf/arch/x86/util/header.c @@ -19,8 +19,8 @@ cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c, : "a" (op)); } -int -get_cpuid(char *buffer, size_t sz) +static int +__get_cpuid(char *buffer, size_t sz, const char *fmt) { unsigned int a, b, c, d, lvl; int family = -1, model = -1, step = -1; @@ -48,7 +48,7 @@ get_cpuid(char *buffer, size_t sz) if (family >= 0x6) model += ((a >> 16) & 0xf) << 4; } - nb = scnprintf(buffer, sz, "%s,%u,%u,%u$", vendor, family, model, step); + nb = scnprintf(buffer, sz, fmt, vendor, family, model, step); /* look for end marker to ensure the entire data fit */ if (strchr(buffer, '$')) { @@ -57,3 +57,21 @@ get_cpuid(char *buffer, size_t sz) } return -1; } + +int +get_cpuid(char *buffer, size_t sz) +{ + return __get_cpuid(buffer, sz, "%s,%u,%u,%u$"); +} + +char * +get_cpuid_str(void) +{ + char *buf = malloc(128); + + if (__get_cpuid(buf, 128, "%s-%u-%X$") < 0) { + free(buf); + return NULL; + } + return buf; +} -- 2.5.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v18 02/19] perf, tools, jevents: Program to convert JSON file to C style file
From: Andi KleenThis is a modified version of an earlier patch by Andi Kleen. We expect architectures to describe the performance monitoring events for each CPU in a corresponding JSON file, which look like: [ { "EventCode": "0x00", "UMask": "0x01", "EventName": "INST_RETIRED.ANY", "BriefDescription": "Instructions retired from execution.", "PublicDescription": "Instructions retired from execution.", "Counter": "Fixed counter 1", "CounterHTOff": "Fixed counter 1", "SampleAfterValue": "203", "SampleAfterValue": "203", "MSRIndex": "0", "MSRValue": "0", "TakenAlone": "0", "CounterMask": "0", "Invert": "0", "AnyThread": "0", "EdgeDetect": "0", "PEBS": "0", "PRECISE_STORE": "0", "Errata": "null", "Offcore": "0" } ] We also expect the architectures to provide a mapping between individual CPUs to their JSON files. Eg: GenuineIntel-6-1E,V1,/NHM-EP/NehalemEP_core_V1.json,core which maps each CPU, identified by [vendor, family, model, version, type] to a JSON file. Given these files, the program, jevents:: - locates all JSON files for the architecture, - parses each JSON file and generates a C-style "PMU-events table" (pmu-events.c) - locates a mapfile for the architecture - builds a global table, mapping each model of CPU to the corresponding PMU-events table. The 'pmu-events.c' is generated when building perf and added to libperf.a. The global table pmu_events_map[] table in this pmu-events.c will be used in perf in a follow-on patch. If the architecture does not have any JSON files or there is an error in processing them, an empty mapping file is created. This would allow the build of perf to proceed even if we are not able to provide aliases for events. The parser for JSON files allows parsing Intel style JSON event files. This allows to use an Intel event list directly with perf. The Intel event lists can be quite large and are too big to store in unswappable kernel memory. The conversion from JSON to C-style is straight forward. The parser knows (very little) Intel specific information, and can be easily extended to handle fields for other CPUs. The parser code is partially shared with an independent parsing library, which is 2-clause BSD licenced. To avoid any conflicts I marked those files as BSD licenced too. As part of perf they become GPLv2. Signed-off-by: Andi Kleen Signed-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa --- v2: Address review feedback. Rename option to --event-files v3: Add JSON example v4: Update manpages. v5: Don't remove dot in fixname. Fix compile error. Add include protection. Comment realloc. v6: Include debug/util.h v7: (Sukadev Bhattiprolu) Rebase to 4.0 and fix some conflicts. v8: (Sukadev Bhattiprolu) Move jevents.[hc] to tools/perf/pmu-events/ Rewrite to locate and process arch specific JSON and "map" files; and generate a C file. (Removed acked-by Namhyung Kim due to modest changes to patch) Compile the generated pmu-events.c and add the pmu-events.o to libperf.a v9: [Sukadev Bhattiprolu/Andi Kleen] Rename ->vfm to ->cpuid and use that field to encode the PVR in Power. Allow blank lines in mapfile. [Jiri Olsa] Pass ARCH as a parameter to jevents so we don't have to detect it. [Jiri Olsa] Use the infrastrastructure to build pmu-events/perf (Makefile changes from Jiri included in this patch). [Jiri Olsa, Andi Kleen] Detect changes to JSON files and rebuild pmu-events.o only if necessary. v11:- [Andi Kleen] Add mapfile, jevents dependency on pmu-events.c - [Jiri Olsa] Be silient if arch doesn't have JSON files - Also silence 'jevents' when parsing JSON files unless V=1 is specified during build. Cleanup error messages. v14:- - [Jiri Olsa] Fix compile error with DEBUG=1; drop unlink() and use "w" mode with fopen(); simplify file_name_to_table_name() v15:- Fix minor conflict in tools/perf/Makefile.perf when rebasing to recent perf/core. v16:- Rebase to upstream; fix conflicts in tools/perf/Makefile.perf v18:- Rebase to upstream; fix conflicts in tools/perf/Makefile.perf --- tools/perf/Makefile.perf | 28 +- tools/perf/pmu-events/Build| 11 + tools/perf/pmu-events/jevents.c| 686 + tools/perf/pmu-events/jevents.h| 17 + tools/perf/pmu-events/json.h | 3 + tools/perf/pmu-events/pmu-events.h | 35 ++ 6 files changed, 776 insertions(+), 4 deletions(-) create mode 100644 tools/perf/pmu-events/Build create mode 100644 tools/perf/pmu-events/jevents.c
[PATCH v18 11/19] perf, tools: Add alias support for long descriptions
Previously we were dropping the useful longer descriptions that some events have in the event list completely. Now that jevents provides support for longer descriptions (see previous patch), add support for parsing the long descriptions Signed-off-by: Andi KleenSigned-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa --- Changelog[v14] - [Jiri Olsa] Break up independent parts of the patch into separate patches. --- tools/perf/util/parse-events.c | 5 +++-- tools/perf/util/parse-events.h | 3 ++- tools/perf/util/pmu.c | 15 ++- tools/perf/util/pmu.h | 4 +++- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 8ec909e..f05d4c4 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1795,7 +1795,8 @@ out_enomem: /* * Print the help text for the event symbols: */ -void print_events(const char *event_glob, bool name_only, bool quiet_flag) +void print_events(const char *event_glob, bool name_only, bool quiet_flag, + bool long_desc) { print_symbol_events(event_glob, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, name_only); @@ -1805,7 +1806,7 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag) print_hwcache_events(event_glob, name_only); - print_pmu_events(event_glob, name_only, quiet_flag); + print_pmu_events(event_glob, name_only, quiet_flag, long_desc); if (event_glob != NULL) return; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index d78d34a..6524918 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -140,7 +140,8 @@ void parse_events_update_lists(struct list_head *list_event, void parse_events_evlist_error(struct parse_events_evlist *data, int idx, const char *str); -void print_events(const char *event_glob, bool name_only, bool quiet); +void print_events(const char *event_glob, bool name_only, bool quiet, + bool long_desc); struct event_symbol { const char *symbol; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index eacc733..1e3b3bf1 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -210,7 +210,7 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, } static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, -char *desc, char *val) +char *desc, char *val, char *long_desc) { struct perf_pmu_alias *alias; int ret; @@ -243,6 +243,8 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, } alias->desc = desc ? strdup(desc) : NULL; + alias->long_desc = long_desc ? strdup(long_desc) : + desc ? strdup(desc) : NULL; list_add_tail(>list, list); @@ -260,7 +262,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI buf[ret] = 0; - return __perf_pmu__new_alias(list, dir, name, NULL, buf); + return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL); } static inline bool pmu_alias_info_file(char *name) @@ -508,7 +510,8 @@ static int pmu_add_cpu_aliases(struct list_head *head) /* need type casts to override 'const' */ __perf_pmu__new_alias(head, NULL, (char *)pe->name, - (char *)pe->desc, (char *)pe->event); + (char *)pe->desc, (char *)pe->event, + (char *)pe->long_desc); } out: @@ -1068,7 +1071,8 @@ static void wordwrap(char *s, int start, int max, int corr) } } -void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag) +void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, + bool long_desc) { struct perf_pmu *pmu; struct perf_pmu_alias *alias; @@ -1115,7 +1119,8 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag) if (!aliases[j].name) goto out_enomem; - aliases[j].desc = alias->desc; + aliases[j].desc = long_desc ? alias->long_desc : + alias->desc; j++; } if (pmu->selectable && diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 42999c7..1aa614e 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -39,6 +39,7 @@ struct perf_pmu_info { struct perf_pmu_alias { char *name; char *desc; + char *long_desc;
[PATCH v18 16/19] perf, tools: Add README for info on parsing JSON/map files
Signed-off-by: Sukadev BhattiproluAcked-by: Jiri Olsa --- tools/perf/pmu-events/README | 122 +++ 1 file changed, 122 insertions(+) create mode 100644 tools/perf/pmu-events/README diff --git a/tools/perf/pmu-events/README b/tools/perf/pmu-events/README new file mode 100644 index 000..da57cb5 --- /dev/null +++ b/tools/perf/pmu-events/README @@ -0,0 +1,122 @@ + +The contents of this directory allow users to specify PMU events in +their CPUs by their symbolic names rather than raw event codes (see +example below). + +The main program in this directory, is the 'jevents', which is built and +executed _before_ the perf binary itself is built. + +The 'jevents' program tries to locate and process JSON files in the directory +tree tools/perf/pmu-events/arch/foo. + + - Regular files with '.json' extension in the name are assumed to be + JSON files, each of which describes a set of PMU events. + + - Regular files with basename starting with 'mapfile.csv' are assumed + to be a CSV file that maps a specific CPU to its set of PMU events. + (see below for mapfile format) + + - Directories are traversed, but all other files are ignored. + +Using the JSON files and the mapfile, 'jevents' generates the C source file, +'pmu-events.c', which encodes the two sets of tables: + + - Set of 'PMU events tables' for all known CPUs in the architecture, + (one table like the following, per JSON file; table name 'pme_power8' + is derived from JSON file name, 'power8.json'). + + struct pmu_event pme_power8[] = { + + ... + + { + .name = "pm_1plus_ppc_cmpl", + .event = "event=0x100f2", + .desc = "1 or more ppc insts finished,", + }, + + ... + } + + - A 'mapping table' that maps each CPU of the architecture, to its + 'PMU events table' + + struct pmu_events_map pmu_events_map[] = { + { + .cpuid = "004b", + .version = "1", + .type = "core", + .table = pme_power8 + }, + ... + + }; + +After the 'pmu-events.c' is generated, it is compiled and the resulting +'pmu-events.o' is added to 'libperf.a' which is then used to build perf. + +NOTES: + 1. Several CPUs can support same set of events and hence use a common + JSON file. Hence several entries in the pmu_events_map[] could map + to a single 'PMU events table'. + + 2. The 'pmu-events.h' has an extern declaration for the mapping table + and the generated 'pmu-events.c' defines this table. + + 3. _All_ known CPU tables for architecture are included in the perf + binary. + +At run time, perf determines the actual CPU it is running on, finds the +matching events table and builds aliases for those events. This allows +users to specify events by their name: + + $ perf stat -e pm_1plus_ppc_cmpl sleep 1 + +where 'pm_1plus_ppc_cmpl' is a Power8 PMU event. + +In case of errors when processing files in the tools/perf/pmu-events/arch +directory, 'jevents' tries to create an empty mapping file to allow the perf +build to succeed even if the PMU event aliases cannot be used. + +However some errors in processing may cause the perf build to fail. + +Mapfile format +=== + +The mapfile.csv format is expected to be: + + Header line + CPUID,Version,File/path/name.json,Type + +where: + + Comma: + is the required field delimiter (i.e other fields cannot + have commas within them). + + Comments: + Lines in which the first character is either '\n' or '#' + are ignored. + + Header line + The header line is the first line in the file, which is + _IGNORED_. It can be a comment (begin with '#') or empty. + + CPUID: + CPUID is an arch-specific char string, that can be used + to identify CPU (and associate it with a set of PMU events + it supports). Multiple CPUIDS can point to the same + File/path/name.json. + + Example: + CPUID == 'GenuineIntel-6-2E' (on x86). + CPUID == '004b0100' (PVR value in Powerpc) + Version: + is the Version of the mapfile. + + File/path/name.json: + is the pathname for the JSON file, relative to the directory + containing the mapfile.csv + + Type: + indicates whether the events or "core" or "uncore" events. -- 2.5.3 ___ Linuxppc-dev
[PATCH v18 06/19] perf, tools: Support alias descriptions
From: Andi KleenAdd support to print alias descriptions in perf list, which are taken from the generated event files. The sorting code is changed to put the events with descriptions at the end. The descriptions are printed as possibly multiple word wrapped lines. Example output: % perf list ... arith.fpu_div [Divide operations executed] arith.fpu_div_active [Cycles when divider is busy executing divide operations] Signed-off-by: Andi Kleen Signed-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa --- Changelog - Delete a redundant free() Changelog[v14] - [Jiri Olsa] Fail, rather than continue if strdup() returns NULL; remove unnecessary __maybe_unused. --- tools/perf/util/pmu.c | 82 +-- tools/perf/util/pmu.h | 1 + 2 files changed, 67 insertions(+), 16 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 57059ea..e011398 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -209,7 +209,7 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, } static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, -char *desc __maybe_unused, char *val) +char *desc, char *val) { struct perf_pmu_alias *alias; int ret; @@ -241,6 +241,8 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, perf_pmu__parse_snapshot(alias, dir, name); } + alias->desc = desc ? strdup(desc) : NULL; + list_add_tail(>list, list); return 0; @@ -1021,11 +1023,42 @@ static char *format_alias_or(char *buf, int len, struct perf_pmu *pmu, return buf; } -static int cmp_string(const void *a, const void *b) +struct pair { + char *name; + char *desc; +}; + +static int cmp_pair(const void *a, const void *b) +{ + const struct pair *as = a; + const struct pair *bs = b; + + /* Put extra events last */ + if (!!as->desc != !!bs->desc) + return !!as->desc - !!bs->desc; + return strcmp(as->name, bs->name); +} + +static void wordwrap(char *s, int start, int max, int corr) { - const char * const *as = a; - const char * const *bs = b; - return strcmp(*as, *bs); + int column = start; + int n; + + while (*s) { + int wlen = strcspn(s, " \t"); + + if (column + wlen >= max && column > start) { + printf("\n%*s", start, ""); + column = start + corr; + } + n = printf("%s%.*s", column > start ? " " : "", wlen, s); + if (n <= 0) + break; + s += wlen; + column += n; + while (isspace(*s)) + s++; + } } void print_pmu_events(const char *event_glob, bool name_only) @@ -1035,7 +1068,9 @@ void print_pmu_events(const char *event_glob, bool name_only) char buf[1024]; int printed = 0; int len, j; - char **aliases; + struct pair *aliases; + int numdesc = 0; + int columns = 78; pmu = NULL; len = 0; @@ -1045,14 +1080,15 @@ void print_pmu_events(const char *event_glob, bool name_only) if (pmu->selectable) len++; } - aliases = zalloc(sizeof(char *) * len); + aliases = zalloc(sizeof(struct pair) * len); if (!aliases) goto out_enomem; pmu = NULL; j = 0; while ((pmu = perf_pmu__scan(pmu)) != NULL) { list_for_each_entry(alias, >aliases, list) { - char *name = format_alias(buf, sizeof(buf), pmu, alias); + char *name = alias->desc ? alias->name : + format_alias(buf, sizeof(buf), pmu, alias); bool is_cpu = !strcmp(pmu->name, "cpu"); if (event_glob != NULL && @@ -1061,12 +1097,18 @@ void print_pmu_events(const char *event_glob, bool name_only) event_glob continue; - if (is_cpu && !name_only) + if (is_cpu && !name_only && !alias->desc) name = format_alias_or(buf, sizeof(buf), pmu, alias); - aliases[j] = strdup(name); - if (aliases[j] == NULL) + aliases[j].name = name; + if (is_cpu && !name_only && !alias->desc) + aliases[j].name = format_alias_or(buf, sizeof(buf), + pmu, alias); +
[PATCH v17 00/19] perf, tools: Add support for PMU events in JSON format
CPUs support a large number of performance monitoring events (PMU events) and often these events are very specific to an architecture/model of the CPU. To use most of these PMU events with perf, we currently have to identify them by their raw codes: perf stat -e r100f2 sleep 1 This patchset allows architectures to specify these PMU events in JSON files located in 'tools/perf/pmu-events/arch/' of the mainline tree. The events from the JSON files for the architecture are then built into the perf binary. At run time, perf identifies the specific set of events for the CPU and creates "event aliases". These aliases allow users to specify events by "name" as: perf stat -e pm_1plus_ppc_cmpl sleep 1 The file, 'tools/perf/pmu-events/README' in [PATCH 16/16] gives more details. Note: - All known events tables for the architecture are included in the perf binary. - For architectures that don't have any JSON files, an empty mapping table is created and they should continue to build. Thanks to input from Andi Kleen, Jiri Olsa, Namhyung Kim and Ingo Molnar. These patches are available from: https://github.com/sukadev/linux.git Branch Description -- json-code-v18 Source Code only json-data-9 x86 and Powerpc datafiles only json-code+data-v18 Both code and data (for build/test) NOTE: Only "source code" patches (i.e those in json-code-v18) are being emailed. Please pull the "data files" from the json-data-8 branch. Changelog[v18] Rebase to recent perf/core; fix minor merge conflicts. Changelog[v17] Rebase to recent perf/core; couple of small fixes to processing Intel JSON files; allow case-insensitive PMU event names. Changelog[v16] Rebase to recent perf/core; fix minor merge conflicts; drop 3 patches that were merged into perf/core. Changelog[v15] Code changes: - Fix 'perf list' usage string and update man page. - Remove a redundant __maybe_unused tag. - Rebase to recent perf/core branch. Data files updates: json-files-5 branch - Rebase to perf/intel-json-files-5 from Andi Kleen - Add patch from Madhavan Srinivasan for couple more Powerpc models Changelog[v14] Comments from Jiri Olsa: - Change parameter name/type for pmu_add_cpu_aliases (from void *data to list_head *head) - Use asprintf() in file_name_to_tablename() and simplify/reorg code. - Use __weak definition from - Use fopen() with mode "w" and eliminate unlink() - Remove minor TODO. - Add error check for return value from strdup() in print_pmu_events(). - Move independent changes from patches 3,11,12 .. to separate patches for easier review/backport. - Clarify mapfile's "header line support" in patch description. - Fix build failure with DEBUG=1 Comment from Andi Kleen: - In tools/perf/pmu-events/Build, check for 'mapfile.csv' rather than 'mapfile*' Misc: - Minor changes/clarifications to tools/perf/pmu-events/README. Changelog[v13] Version: Individual patches have their own history :-) that I am preserving. Patchset version (v13) is for overall patchset and is somewhat arbitrary. - Added support for "categories" of events to perf - Add mapfile, jevents build dependency on pmu-events.c - Silence jevents when parsing JSON files unless V=1 is specified - Cleanup error messages - Fix memory leak with ->cpuid - Rebase to Arnaldo's tree - Allow overriding CPUID via environment variable - Support long descriptions for events - Handle header line in mapfile.csv - Cleanup JSON files (trim PublicDescription if identical to/prefix of BriefDescription field) Andi Kleen (12): perf, tools: Add jsmn `jasmine' JSON parser perf, tools, jevents: Program to convert JSON file to C style file perf, tools: Support CPU id matching for x86 v2 perf, tools: Support alias descriptions perf, tools: Query terminal width and use in perf list perf, tools: Add a --no-desc flag to perf list perf, tools: Add override support for event list CPUID perf, tools: Add support for event list topics perf, tools: Handle header line in mapfile perf, tools: Make alias matching case-insensitive perf, tools, pmu-events: Fix fixed counters on Intel perf, tools, pmu-events: Add Skylake frontend MSR support Sukadev Bhattiprolu (7): perf, tools: Use pmu_events table to create aliases perf, tools: Support CPU ID matching for Powerpc perf, tools, jevents: Add support for long descriptions perf, tools: Add alias support for long descriptions perf, tools: Support long descriptions with perf
[PATCH v18 03/19] perf, tools: Use pmu_events table to create aliases
At run time (when 'perf' is starting up), locate the specific table of PMU events that corresponds to the current CPU. Using that table, create aliases for the each of the PMU events in the CPU. The use these aliases to parse the user specified perf event. In short this would allow the user to specify events using their aliases rather than raw event codes. Based on input and some earlier patches from Andi Kleen, Jiri Olsa. Signed-off-by: Sukadev BhattiproluAcked-by: Jiri Olsa --- Changelog[v4] - Split off unrelated code into separate patches. Changelog[v3] - [Jiri Olsa] Fix memory leak in cpuid Changelog[v2] - [Andi Kleen] Replace pmu_events_map->vfm with a generic "cpuid". --- tools/perf/util/header.h | 1 + tools/perf/util/pmu.c| 61 2 files changed, 62 insertions(+) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 05f27cb..d02b2f9 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -133,4 +133,5 @@ int write_padded(int fd, const void *bf, size_t count, size_t count_aligned); */ int get_cpuid(char *buffer, size_t sz); +char *get_cpuid_str(void); #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index e4b173d..57059ea 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -12,6 +12,8 @@ #include "pmu.h" #include "parse-events.h" #include "cpumap.h" +#include "header.h" +#include "pmu-events/pmu-events.h" struct perf_pmu_format { char *name; @@ -449,6 +451,62 @@ static struct cpu_map *pmu_cpumask(const char *name) return cpus; } +/* + * Return the CPU id as a raw string. + * + * Each architecture should provide a more precise id string that + * can be use to match the architecture's "mapfile". + */ +char * __weak get_cpuid_str(void) +{ + return NULL; +} + +/* + * From the pmu_events_map, find the table of PMU events that corresponds + * to the current running CPU. Then, add all PMU events from that table + * as aliases. + */ +static int pmu_add_cpu_aliases(struct list_head *head) +{ + int i; + struct pmu_events_map *map; + struct pmu_event *pe; + char *cpuid; + + cpuid = get_cpuid_str(); + if (!cpuid) + return 0; + + i = 0; + while (1) { + map = _events_map[i++]; + if (!map->table) + goto out; + + if (!strcmp(map->cpuid, cpuid)) + break; + } + + /* +* Found a matching PMU events table. Create aliases +*/ + i = 0; + while (1) { + pe = >table[i++]; + if (!pe->name) + break; + + /* need type casts to override 'const' */ + __perf_pmu__new_alias(head, NULL, (char *)pe->name, + (char *)pe->desc, (char *)pe->event); + } + +out: + free(cpuid); + return 0; +} + struct perf_event_attr * __weak perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) { @@ -473,6 +531,9 @@ static struct perf_pmu *pmu_lookup(const char *name) if (pmu_aliases(name, )) return NULL; + if (!strcmp(name, "cpu")) + (void)pmu_add_cpu_aliases(); + if (pmu_type(name, )) return NULL; -- 2.5.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v18 15/19] perf, tools: Handle header line in mapfile
From: Andi KleenTo work with existing mapfiles, assume that the first line in 'mapfile.csv' is a header line and skip over it. Signed-off-by: Andi Kleen Signed-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa --- Changelog[v2] All architectures may not use the "Family" to identify. So, assume first line is header. --- tools/perf/pmu-events/jevents.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index ea3474b..7347cca 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -462,7 +462,12 @@ static int process_mapfile(FILE *outfp, char *fpath) print_mapping_table_prefix(outfp); - line_num = 0; + /* Skip first line (header) */ + p = fgets(line, n, mapfp); + if (!p) + goto out; + + line_num = 1; while (1) { char *cpuid, *version, *type, *fname; @@ -506,8 +511,8 @@ static int process_mapfile(FILE *outfp, char *fpath) fprintf(outfp, "},\n"); } +out: print_mapping_table_suffix(outfp); - return 0; } -- 2.5.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc/pseries: Correct string length in pseries_of_derive_parent()
Commit a030e1e4bbd085bbcfd0a23f8d355fcd41f39bed made a change to use kstrndup() instead of kmalloc() + strlcpy() in pseries_of_derive_parent() which introduces a subtle change in the parent path name generated. The kstrndup() routine will copy n characters followed by a terminating null, whereas strlcpy() will copy n-1 characters and add a terminating null. This slight difference results in having a parent path that includes the trailing '/' character, i.e. "/cpus/" vs. "/cpus". This then causes the subsequent call to of_find_node_by_path() to fail, and in the case of DLPAR add operations, the DLPAR request fails. This patch reduces the total length of the string to copy in kstrndup by 1 so we no longer copy the trailing '/'. Signed-off-by: Nathan Fontenot--- arch/powerpc/platforms/pseries/of_helpers.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/of_helpers.c b/arch/powerpc/platforms/pseries/of_helpers.c index 4417afe..6d90378 100644 --- a/arch/powerpc/platforms/pseries/of_helpers.c +++ b/arch/powerpc/platforms/pseries/of_helpers.c @@ -24,7 +24,7 @@ struct device_node *pseries_of_derive_parent(const char *path) return ERR_PTR(-EINVAL); if (tail > path + 1) { - parent_path = kstrndup(path, tail - path, GFP_KERNEL); + parent_path = kstrndup(path, (tail - 1) - path, GFP_KERNEL); if (!parent_path) return ERR_PTR(-ENOMEM); } ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc/pseries: Verify CPU doesn't exist before adding
On 10/25/2015 11:30 AM, Denis Kirjanov wrote: > On 10/23/15, Nathan Fontenotwrote: >> When DLPAR adding a CPU we should verify that the CPU does not already >> exist. Failure to do so can generate a kernel oops; >> >> [9.465585] kernel BUG at arch/powerpc/platforms/pseries/dlpar.c:382! >> [9.465796] Oops: Exception in kernel mode, sig: 5 [#1] >> >> This oops can be generated by causing a probe to be performed on a cpu >> by writing to the sysfs cpu probe file (/sys/devices/system/cpu/probe). >> This patch adds a check for the existence of cpu prior to probing the cpu >> so userspace doing the wrong thing won't trigger a BUG_ON(). > > Hi Nathan, > > Can you please tell how to trigger the oops manually since I've tried to write > a core number to the probe file, but with no luck. Always get -EINVAL. Triggering the oops manually may be a bit trickier than just writing to the sysfs file. First, make sure you are writing the drc index of a cpu that is already present. You can see a list of present cpus with 'lsslot -c cpu'. You may be able to trigger the oops by just writing the drc index to the sysfs file but will likely get -EINVAL since the probe code tries to acquire the cpu from firmware and call configure-connector, both of which will likely fail before we try to online the cpu and see the BUG_ON. For a kvm guest, you should be able to generate the oops by killing the rtas_errd daemon, do a cpu hotplug add from the host, then reboot the guest. If the rtas_errd daemon is set to autostart you should see the oops at boot, otherwise you can manually start rtas_errd and see the oops. For a Power LPAR, the process would be much trickier. You would have to do some hacking to the drmgr command (which write the drc index to the probe file) to trick it into trying to add the same cpu twice when invoking cpu hotplug from the HMC. Hope that helps. -Nathan > > Thanks! >> >> Signed-off-by: Nathan Fontenot >> --- >> arch/powerpc/platforms/pseries/dlpar.c | 43 >> +--- >> 1 file changed, 39 insertions(+), 4 deletions(-) >> >> diff --git a/arch/powerpc/platforms/pseries/dlpar.c >> b/arch/powerpc/platforms/pseries/dlpar.c >> index f244dcb..fe6320d 100644 >> --- a/arch/powerpc/platforms/pseries/dlpar.c >> +++ b/arch/powerpc/platforms/pseries/dlpar.c >> @@ -381,6 +381,32 @@ out: >> >> } >> >> +static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index) >> +{ >> +struct device_node *child = NULL; >> +u32 my_drc_index; >> +bool found; >> +int rc; >> + >> +/* Assume cpu doesn't exist */ >> +found = false; >> + >> +for_each_child_of_node(parent, child) { >> +rc = of_property_read_u32(child, "ibm,my-drc-index", >> + _drc_index); >> +if (rc) >> +continue; >> + >> +if (my_drc_index == drc_index) { >> +of_node_put(child); >> +found = true; >> +break; >> +} >> +} >> + >> +return found; >> +} >> + >> static ssize_t dlpar_cpu_probe(const char *buf, size_t count) >> { >> struct device_node *dn, *parent; >> @@ -391,14 +417,23 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t >> count) >> if (rc) >> return -EINVAL; >> >> -rc = dlpar_acquire_drc(drc_index); >> -if (rc) >> -return -EINVAL; >> - >> parent = of_find_node_by_path("/cpus"); >> if (!parent) >> return -ENODEV; >> >> +if (dlpar_cpu_exists(parent, drc_index)) { >> +of_node_put(parent); >> +printk(KERN_WARNING "CPU with drc index %x already exists\n", >> + drc_index); >> +return -EINVAL; >> +} >> + >> +rc = dlpar_acquire_drc(drc_index); >> +if (rc) { >> +of_node_put(parent); >> +return -EINVAL; >> +} >> + >> dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent); >> of_node_put(parent); >> if (!dn) { >> >> ___ >> Linuxppc-dev mailing list >> Linuxppc-dev@lists.ozlabs.org >> https://lists.ozlabs.org/listinfo/linuxppc-dev > ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v18 10/19] perf, tools, jevents: Add support for long descriptions
Implement support in jevents to parse long descriptions for events that may have them in the JSON files. A follow on patch will make this long description available to user through the 'perf list' command. Signed-off-by: Andi KleenSigned-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa --- Changelog[v14] - [Jiri Olsa] Break up independent parts of the patch into separate patches. --- tools/perf/pmu-events/jevents.c| 31 +++ tools/perf/pmu-events/jevents.h| 3 ++- tools/perf/pmu-events/pmu-events.h | 1 + 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 5f7603b..a8507c9 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -203,7 +203,7 @@ static void print_events_table_prefix(FILE *fp, const char *tblname) } static int print_events_table_entry(void *data, char *name, char *event, - char *desc) + char *desc, char *long_desc) { FILE *outfp = data; /* @@ -215,6 +215,8 @@ static int print_events_table_entry(void *data, char *name, char *event, fprintf(outfp, "\t.name = \"%s\",\n", name); fprintf(outfp, "\t.event = \"%s\",\n", event); fprintf(outfp, "\t.desc = \"%s\",\n", desc); + if (long_desc && long_desc[0]) + fprintf(outfp, "\t.long_desc = \"%s\",\n", long_desc); fprintf(outfp, "},\n"); @@ -235,7 +237,8 @@ static void print_events_table_suffix(FILE *outfp) /* Call func with each event in the json file */ int json_events(const char *fn, - int (*func)(void *data, char *name, char *event, char *desc), + int (*func)(void *data, char *name, char *event, char *desc, + char *long_desc), void *data) { int err = -EIO; @@ -254,6 +257,8 @@ int json_events(const char *fn, tok = tokens + 1; for (i = 0; i < tokens->size; i++) { char *event = NULL, *desc = NULL, *name = NULL; + char *long_desc = NULL; + char *extra_desc = NULL; struct msrmap *msr = NULL; jsmntok_t *msrval = NULL; jsmntok_t *precise = NULL; @@ -279,6 +284,9 @@ int json_events(const char *fn, } else if (json_streq(map, field, "BriefDescription")) { addfield(map, , "", "", val); fixdesc(desc); + } else if (json_streq(map, field, "PublicDescription")) { + addfield(map, _desc, "", "", val); + fixdesc(long_desc); } else if (json_streq(map, field, "PEBS") && nz) { precise = val; } else if (json_streq(map, field, "MSRIndex") && nz) { @@ -287,10 +295,10 @@ int json_events(const char *fn, msrval = val; } else if (json_streq(map, field, "Errata") && !json_streq(map, val, "null")) { - addfield(map, , ". ", + addfield(map, _desc, ". ", " Spec update: ", val); } else if (json_streq(map, field, "Data_LA") && nz) { - addfield(map, , ". ", + addfield(map, _desc, ". ", " Supports address when precise", NULL); } @@ -298,19 +306,26 @@ int json_events(const char *fn, } if (precise && !strstr(desc, "(Precise Event)")) { if (json_streq(map, precise, "2")) - addfield(map, , " ", "(Must be precise)", - NULL); + addfield(map, _desc, " ", + "(Must be precise)", NULL); else - addfield(map, , " ", + addfield(map, _desc, " ", "(Precise event)", NULL); } + if (desc && extra_desc) + addfield(map, , " ", extra_desc, NULL); + if (long_desc && extra_desc) + addfield(map, _desc, " ", extra_desc, NULL); if (msr != NULL) addfield(map, , ",", msr->pname, msrval); fixname(name); - err = func(data, name, event, desc); + + err = func(data, name, event, desc, long_desc); free(event);
[PATCH v18 12/19] perf, tools: Support long descriptions with perf list
Previously we were dropping the useful longer descriptions that some events have in the event list completely. This patch makes them appear with perf list. Old perf list: baclears: baclears.all [Counts the number of baclears] vs new: perf list -v: ... baclears: baclears.all [The BACLEARS event counts the number of times the front end is resteered, mainly when the Branch Prediction Unit cannot provide a correct prediction and this is corrected by the Branch Address Calculator at the front end. The BACLEARS.ANY event counts the number of baclears for any type of branch] Signed-off-by: Andi KleenSigned-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa --- Changelog[v15] - [Jir Olsa, Andi Kleen] Fix usage strings; update man page. Changelog[v14] - [Jiri Olsa] Break up independent parts of the patch into separate patches. Changelog[v18]: - Fix minor conflict in tools/perf/builtin-list.c; add long_desc_flag parameter to new print_pmu_events() call site. --- tools/perf/Documentation/perf-list.txt | 6 +- tools/perf/builtin-list.c | 16 +++- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 13aeb09..4664643 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -8,7 +8,7 @@ perf-list - List all symbolic event types SYNOPSIS [verse] -'perf list' [--no-desc] [hw|sw|cache|tracepoint|pmu|event_glob] +'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|event_glob] DESCRIPTION --- @@ -20,6 +20,10 @@ OPTIONS --no-desc:: Don't print descriptions. +-v:: +--long-desc:: +Print longer event descriptions. + [[EVENT_MODIFIERS]] EVENT MODIFIERS diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 43b375d..6b5d0a4 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -22,14 +22,17 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) { int i; bool raw_dump = false; + bool long_desc_flag = false; struct option list_options[] = { OPT_BOOLEAN(0, "raw-dump", _dump, "Dump raw events"), OPT_BOOLEAN('d', "desc", _flag, "Print extra event descriptions. --no-desc to not print."), + OPT_BOOLEAN('v', "long-desc", _desc_flag, + "Print longer event descriptions."), OPT_END() }; const char * const list_usage[] = { - "perf list [--no-desc] [hw|sw|cache|tracepoint|pmu|event_glob]", + "perf list [] [hw|sw|cache|tracepoint|pmu|event_glob]", NULL }; @@ -44,7 +47,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) printf("\nList of pre-defined events (to be used in -e):\n\n"); if (argc == 0) { - print_events(NULL, raw_dump, !desc_flag); + print_events(NULL, raw_dump, !desc_flag, long_desc_flag); return 0; } @@ -65,12 +68,14 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) strcmp(argv[i], "hwcache") == 0) print_hwcache_events(NULL, raw_dump); else if (strcmp(argv[i], "pmu") == 0) - print_pmu_events(NULL, raw_dump, !desc_flag); + print_pmu_events(NULL, raw_dump, !desc_flag, + long_desc_flag); else if ((sep = strchr(argv[i], ':')) != NULL) { int sep_idx; if (sep == NULL) { - print_events(argv[i], raw_dump, !desc_flag); + print_events(argv[i], raw_dump, !desc_flag, + long_desc_flag); continue; } sep_idx = sep - argv[i]; @@ -91,7 +96,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) print_symbol_events(s, PERF_TYPE_SOFTWARE, event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump); print_hwcache_events(s, raw_dump); - print_pmu_events(s, raw_dump, !desc_flag); + print_pmu_events(s, raw_dump, !desc_flag, + long_desc_flag); print_tracepoint_events(NULL, s, raw_dump); free(s); } -- 2.5.3 ___ Linuxppc-dev mailing list
[PATCH v18 13/19] perf, tools, jevents: Add support for event topics
Allow assigning categories "Topics" field to the PMU events i.e. process the topic field from the JSON file and add a corresponding topic field to the generated C events tables. Signed-off-by: Andi KleenSigned-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa --- Changelog[v14] [Jiri Olsa] Move this independent code off into a separate patch. --- tools/perf/pmu-events/jevents.c| 12 +--- tools/perf/pmu-events/jevents.h| 2 +- tools/perf/pmu-events/pmu-events.h | 1 + 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index a8507c9..ea3474b 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -203,7 +203,7 @@ static void print_events_table_prefix(FILE *fp, const char *tblname) } static int print_events_table_entry(void *data, char *name, char *event, - char *desc, char *long_desc) + char *desc, char *long_desc, char *topic) { FILE *outfp = data; /* @@ -217,6 +217,8 @@ static int print_events_table_entry(void *data, char *name, char *event, fprintf(outfp, "\t.desc = \"%s\",\n", desc); if (long_desc && long_desc[0]) fprintf(outfp, "\t.long_desc = \"%s\",\n", long_desc); + if (topic) + fprintf(outfp, "\t.topic = \"%s\",\n", topic); fprintf(outfp, "},\n"); @@ -238,7 +240,7 @@ static void print_events_table_suffix(FILE *outfp) /* Call func with each event in the json file */ int json_events(const char *fn, int (*func)(void *data, char *name, char *event, char *desc, - char *long_desc), + char *long_desc, char *topic), void *data) { int err = -EIO; @@ -259,6 +261,7 @@ int json_events(const char *fn, char *event = NULL, *desc = NULL, *name = NULL; char *long_desc = NULL; char *extra_desc = NULL; + char *topic = NULL; struct msrmap *msr = NULL; jsmntok_t *msrval = NULL; jsmntok_t *precise = NULL; @@ -297,6 +300,8 @@ int json_events(const char *fn, !json_streq(map, val, "null")) { addfield(map, _desc, ". ", " Spec update: ", val); + } else if (json_streq(map, field, "Topic")) { + addfield(map, , "", "", val); } else if (json_streq(map, field, "Data_LA") && nz) { addfield(map, _desc, ". ", " Supports address when precise", @@ -320,12 +325,13 @@ int json_events(const char *fn, addfield(map, , ",", msr->pname, msrval); fixname(name); - err = func(data, name, event, desc, long_desc); + err = func(data, name, event, desc, long_desc, topic); free(event); free(desc); free(name); free(long_desc); free(extra_desc); + free(topic); if (err) break; tok += j; diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h index b0eb274..9ffcb89 100644 --- a/tools/perf/pmu-events/jevents.h +++ b/tools/perf/pmu-events/jevents.h @@ -3,7 +3,7 @@ int json_events(const char *fn, int (*func)(void *data, char *name, char *event, char *desc, - char *long_desc), + char *long_desc, char *topic), void *data); char *get_cpu_str(void); diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index 711f049..6b69f4b 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -9,6 +9,7 @@ struct pmu_event { const char *event; const char *desc; const char *long_desc; + const char *topic; }; /* -- 2.5.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v18 08/19] perf, tools: Add a --no-desc flag to perf list
From: Andi KleenAdd a --no-desc flag to perf list to not print the event descriptions that were earlier added for JSON events. This may be useful to get a less crowded listing. It's still default to print descriptions as that is the more useful default for most users. Signed-off-by: Andi Kleen Signed-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa --- v2: Rename --quiet to --no-desc. Add option to man page. v18: Fix minor conflict in tools/perf/builtin-list.c; Add !desc_flag to the newly introduced print_pmu_events() call site. --- tools/perf/Documentation/perf-list.txt | 8 +++- tools/perf/builtin-list.c | 14 +- tools/perf/util/parse-events.c | 4 ++-- tools/perf/util/parse-events.h | 2 +- tools/perf/util/pmu.c | 4 ++-- tools/perf/util/pmu.h | 2 +- 6 files changed, 22 insertions(+), 12 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 79483f4..13aeb09 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -8,13 +8,19 @@ perf-list - List all symbolic event types SYNOPSIS [verse] -'perf list' [hw|sw|cache|tracepoint|pmu|event_glob] +'perf list' [--no-desc] [hw|sw|cache|tracepoint|pmu|event_glob] DESCRIPTION --- This command displays the symbolic event types which can be selected in the various perf commands with the -e option. +OPTIONS +--- +--no-desc:: +Don't print descriptions. + + [[EVENT_MODIFIERS]] EVENT MODIFIERS --- diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index bf679e2..43b375d 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -16,16 +16,20 @@ #include "util/pmu.h" #include "util/parse-options.h" +static bool desc_flag = true; + int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) { int i; bool raw_dump = false; struct option list_options[] = { OPT_BOOLEAN(0, "raw-dump", _dump, "Dump raw events"), + OPT_BOOLEAN('d', "desc", _flag, + "Print extra event descriptions. --no-desc to not print."), OPT_END() }; const char * const list_usage[] = { - "perf list [hw|sw|cache|tracepoint|pmu|event_glob]", + "perf list [--no-desc] [hw|sw|cache|tracepoint|pmu|event_glob]", NULL }; @@ -40,7 +44,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) printf("\nList of pre-defined events (to be used in -e):\n\n"); if (argc == 0) { - print_events(NULL, raw_dump); + print_events(NULL, raw_dump, !desc_flag); return 0; } @@ -61,12 +65,12 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) strcmp(argv[i], "hwcache") == 0) print_hwcache_events(NULL, raw_dump); else if (strcmp(argv[i], "pmu") == 0) - print_pmu_events(NULL, raw_dump); + print_pmu_events(NULL, raw_dump, !desc_flag); else if ((sep = strchr(argv[i], ':')) != NULL) { int sep_idx; if (sep == NULL) { - print_events(argv[i], raw_dump); + print_events(argv[i], raw_dump, !desc_flag); continue; } sep_idx = sep - argv[i]; @@ -87,7 +91,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) print_symbol_events(s, PERF_TYPE_SOFTWARE, event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump); print_hwcache_events(s, raw_dump); - print_pmu_events(s, raw_dump); + print_pmu_events(s, raw_dump, !desc_flag); print_tracepoint_events(NULL, s, raw_dump); free(s); } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 991bbd4..8ec909e 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1795,7 +1795,7 @@ out_enomem: /* * Print the help text for the event symbols: */ -void print_events(const char *event_glob, bool name_only) +void print_events(const char *event_glob, bool name_only, bool quiet_flag) { print_symbol_events(event_glob, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, name_only); @@ -1805,7 +1805,7 @@ void print_events(const char *event_glob, bool name_only) print_hwcache_events(event_glob, name_only);
[PATCH v18 09/19] perf, tools: Add override support for event list CPUID
From: Andi KleenAdd a PERF_CPUID variable to override the CPUID of the current CPU (within the current architecture). This is useful for testing, so that all event lists can be tested on a single system. Signed-off-by: Andi Kleen Signed-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa --- v2: Fix double free in earlier version. Print actual CPUID being used with verbose option. --- tools/perf/util/pmu.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index a209246..eacc733 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -477,10 +477,16 @@ static int pmu_add_cpu_aliases(struct list_head *head) struct pmu_event *pe; char *cpuid; - cpuid = get_cpuid_str(); + cpuid = getenv("PERF_CPUID"); + if (cpuid) + cpuid = strdup(cpuid); + if (!cpuid) + cpuid = get_cpuid_str(); if (!cpuid) return 0; + pr_debug("Using CPUID %s\n", cpuid); + i = 0; while (1) { map = _events_map[i++]; -- 2.5.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v18 01/19] perf, tools: Add jsmn `jasmine' JSON parser
From: Andi KleenI need a JSON parser. This adds the simplest JSON parser I could find -- Serge Zaitsev's jsmn `jasmine' -- to the perf library. I merely converted it to (mostly) Linux style and added support for non 0 terminated input. The parser is quite straight forward and does not copy any data, just returns tokens with offsets into the input buffer. So it's relatively efficient and simple to use. The code is not fully checkpatch clean, but I didn't want to completely fork the upstream code. Original source: http://zserge.bitbucket.org/jsmn.html In addition I added a simple wrapper that mmaps a json file and provides some straight forward access functions. Used in follow-on patches to parse event files. Acked-by: Namhyung Kim Acked-by: Jiri Olsa Signed-off-by: Andi Kleen Signed-off-by: Sukadev Bhattiprolu --- v2: Address review feedback. v3: Minor checkpatch fixes. v4 (by Sukadev Bhattiprolu) - Rebase to 4.0 and fix minor conflicts in tools/perf/Makefile.perf - Report error if specified events file is invalid. v5 (Sukadev Bhattiprolu) - Move files to tools/perf/pmu-events/ since parsing of JSON file now occurs when _building_ rather than running perf. --- tools/perf/pmu-events/jsmn.c | 313 +++ tools/perf/pmu-events/jsmn.h | 67 + tools/perf/pmu-events/json.c | 162 ++ tools/perf/pmu-events/json.h | 36 + 4 files changed, 578 insertions(+) create mode 100644 tools/perf/pmu-events/jsmn.c create mode 100644 tools/perf/pmu-events/jsmn.h create mode 100644 tools/perf/pmu-events/json.c create mode 100644 tools/perf/pmu-events/json.h diff --git a/tools/perf/pmu-events/jsmn.c b/tools/perf/pmu-events/jsmn.c new file mode 100644 index 000..11d1fa1 --- /dev/null +++ b/tools/perf/pmu-events/jsmn.c @@ -0,0 +1,313 @@ +/* + * Copyright (c) 2010 Serge A. Zaitsev + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * Slightly modified by AK to not assume 0 terminated input. + */ + +#include +#include "jsmn.h" + +/* + * Allocates a fresh unused token from the token pool. + */ +static jsmntok_t *jsmn_alloc_token(jsmn_parser *parser, + jsmntok_t *tokens, size_t num_tokens) +{ + jsmntok_t *tok; + + if ((unsigned)parser->toknext >= num_tokens) + return NULL; + tok = [parser->toknext++]; + tok->start = tok->end = -1; + tok->size = 0; + return tok; +} + +/* + * Fills token type and boundaries. + */ +static void jsmn_fill_token(jsmntok_t *token, jsmntype_t type, + int start, int end) +{ + token->type = type; + token->start = start; + token->end = end; + token->size = 0; +} + +/* + * Fills next available token with JSON primitive. + */ +static jsmnerr_t jsmn_parse_primitive(jsmn_parser *parser, const char *js, + size_t len, + jsmntok_t *tokens, size_t num_tokens) +{ + jsmntok_t *token; + int start; + + start = parser->pos; + + for (; parser->pos < len; parser->pos++) { + switch (js[parser->pos]) { +#ifndef JSMN_STRICT + /* +* In strict mode primitive must be followed by "," +* or "}" or "]" +*/ + case ':': +#endif + case '\t': + case '\r': + case '\n': + case ' ': + case ',': + case ']': + case '}': + goto found; + default: + break; + } + if (js[parser->pos] < 32 || js[parser->pos] >= 127) { + parser->pos = start; + return
[PATCH v18 17/19] perf, tools: Make alias matching case-insensitive
From: Andi KleenMake alias matching the events parser case-insensitive. This is useful with the JSON events. perf uses lower case events, but the CPU manuals generally use upper case event names. The JSON files use lower case by default too. But if we search case insensitively then users can cut-n-paste the upper case event names. So the following works: % perf stat -e BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL true Performance counter stats for 'true': 305 BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL 0.000492799 seconds time elapsed Signed-off-by: Andi Kleen --- tools/perf/util/parse-events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f05d4c4..16329d0 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1098,7 +1098,7 @@ comp_pmu(const void *p1, const void *p2) struct perf_pmu_event_symbol *pmu1 = (struct perf_pmu_event_symbol *) p1; struct perf_pmu_event_symbol *pmu2 = (struct perf_pmu_event_symbol *) p2; - return strcmp(pmu1->symbol, pmu2->symbol); + return strcasecmp(pmu1->symbol, pmu2->symbol); } static void perf_pmu__parse_cleanup(void) -- 2.5.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v18 07/19] perf, tools: Query terminal width and use in perf list
From: Andi KleenAutomatically adapt the now wider and word wrapped perf list output to wider terminals. This requires querying the terminal before the auto pager takes over, and exporting this information from the pager subsystem. Signed-off-by: Andi Kleen Signed-off-by: Sukadev Bhattiprolu Acked-by: Namhyung Kim Acked-by: Jiri Olsa --- tools/perf/util/cache.h | 1 + tools/perf/util/pager.c | 15 +++ tools/perf/util/pmu.c | 3 ++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index c861373..8e0d4b8 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -32,6 +32,7 @@ extern const char *perf_config_dirname(const char *, const char *); extern void setup_pager(void); extern int pager_in_use(void); extern int pager_use_color; +int pager_get_columns(void); char *alias_lookup(const char *alias); int split_cmdline(char *cmdline, const char ***argv); diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c index 53ef006..1770c88 100644 --- a/tools/perf/util/pager.c +++ b/tools/perf/util/pager.c @@ -1,6 +1,7 @@ #include "cache.h" #include "run-command.h" #include "sigchain.h" +#include /* * This is split up from the rest of git so that we can do @@ -8,6 +9,7 @@ */ static int spawned_pager; +static int pager_columns; static void pager_preexec(void) { @@ -47,9 +49,12 @@ static void wait_for_pager_signal(int signo) void setup_pager(void) { const char *pager = getenv("PERF_PAGER"); + struct winsize sz; if (!isatty(1)) return; + if (ioctl(1, TIOCGWINSZ, ) == 0) + pager_columns = sz.ws_col; if (!pager) pager = getenv("PAGER"); if (!(pager || access("/usr/bin/pager", X_OK))) @@ -93,3 +98,13 @@ int pager_in_use(void) env = getenv("PERF_PAGER_IN_USE"); return env ? perf_config_bool("PERF_PAGER_IN_USE", env) : 0; } + +int pager_get_columns(void) +{ + char *s; + + s = getenv("COLUMNS"); + if (s) + return atoi(s); + return (pager_columns ? pager_columns : 80) - 2; +} diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index e011398..fcab857 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -14,6 +14,7 @@ #include "cpumap.h" #include "header.h" #include "pmu-events/pmu-events.h" +#include "cache.h" struct perf_pmu_format { char *name; @@ -1070,7 +1071,7 @@ void print_pmu_events(const char *event_glob, bool name_only) int len, j; struct pair *aliases; int numdesc = 0; - int columns = 78; + int columns = pager_get_columns(); pmu = NULL; len = 0; -- 2.5.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v18 14/19] perf, tools: Add support for event list topics
From: Andi KleenAdd support to group the output of perf list by the Topic field in the JSON file. Example output: % perf list ... Cache: l1d.replacement [L1D data line replacements] l1d_pend_miss.pending [L1D miss oustandings duration in cycles] l1d_pend_miss.pending_cycles [Cycles with L1D load Misses outstanding] l2_l1d_wb_rqsts.all [Not rejected writebacks from L1D to L2 cache lines in any state] l2_l1d_wb_rqsts.hit_e [Not rejected writebacks from L1D to L2 cache lines in E state] l2_l1d_wb_rqsts.hit_m [Not rejected writebacks from L1D to L2 cache lines in M state] ... Pipeline: arith.fpu_div [Divide operations executed] arith.fpu_div_active [Cycles when divider is busy executing divide operations] baclears.any [Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end] br_inst_exec.all_branches [Speculative and retired branches] br_inst_exec.all_conditional [Speculative and retired macro-conditional branches] br_inst_exec.all_direct_jmp [Speculative and retired macro-unconditional branches excluding calls and indirects] br_inst_exec.all_direct_near_call [Speculative and retired direct near calls] br_inst_exec.all_indirect_jump_non_call_ret Signed-off-by: Andi Kleen Signed-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa --- Changelog[v14] - [Jiri Olsa] Move jevents support for Topic to a separate patch. --- tools/perf/util/pmu.c | 37 +++-- tools/perf/util/pmu.h | 1 + 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 1e3b3bf1..5745139 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -210,7 +210,8 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, } static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, -char *desc, char *val, char *long_desc) +char *desc, char *val, char *long_desc, +char *topic) { struct perf_pmu_alias *alias; int ret; @@ -245,6 +246,7 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, alias->desc = desc ? strdup(desc) : NULL; alias->long_desc = long_desc ? strdup(long_desc) : desc ? strdup(desc) : NULL; + alias->topic = topic ? strdup(topic) : NULL; list_add_tail(>list, list); @@ -262,7 +264,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI buf[ret] = 0; - return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL); + return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL); } static inline bool pmu_alias_info_file(char *name) @@ -511,7 +513,7 @@ static int pmu_add_cpu_aliases(struct list_head *head) /* need type casts to override 'const' */ __perf_pmu__new_alias(head, NULL, (char *)pe->name, (char *)pe->desc, (char *)pe->event, - (char *)pe->long_desc); + (char *)pe->long_desc, (char *)pe->topic); } out: @@ -1033,19 +1035,26 @@ static char *format_alias_or(char *buf, int len, struct perf_pmu *pmu, return buf; } -struct pair { +struct sevent { char *name; char *desc; + char *topic; }; -static int cmp_pair(const void *a, const void *b) +static int cmp_sevent(const void *a, const void *b) { - const struct pair *as = a; - const struct pair *bs = b; + const struct sevent *as = a; + const struct sevent *bs = b; /* Put extra events last */ if (!!as->desc != !!bs->desc) return !!as->desc - !!bs->desc; + if (as->topic && bs->topic) { + int n = strcmp(as->topic, bs->topic); + + if (n) + return n; + } return strcmp(as->name, bs->name); } @@ -1079,9 +1088,10 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, char buf[1024]; int printed = 0; int len, j; - struct pair *aliases; + struct sevent *aliases; int numdesc = 0; int columns = pager_get_columns(); + char *topic = NULL; pmu = NULL; len = 0; @@ -1091,7 +1101,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, if (pmu->selectable) len++; } - aliases = zalloc(sizeof(struct pair) * len); + aliases = zalloc(sizeof(struct sevent)
[PATCH v18 18/19] perf, tools, pmu-events: Fix fixed counters on Intel
From: Andi KleenThe JSON event lists use a different encoding for fixed counters than perf for instructions and cycles (ref-cycles is ok) This lead to some common events like inst_retired.any or cpu_clk_unhalted.thread not counting, when specified with their JSON name. Special case these events in the jevents conversion process. I prefer to not touch the JSON files for this, as it's intended that standard JSON files can be just dropped into the perf build without changes. Signed-off-by: Andi Kleen Signed-off-by: Sukadev Bhattiprolu [Fix minor compile error] --- tools/perf/pmu-events/jevents.c | 25 +++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 7347cca..247d777e 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -237,6 +237,28 @@ static void print_events_table_suffix(FILE *outfp) fprintf(outfp, "};\n"); } +static struct fixed { + const char *name; + const char *event; +} fixed[] = { + { "inst_retired.any", "event=0xc0" }, + { "cpu_clk_unhalted.thread", "event=0x3c" }, + { "cpu_clk_unhalted.thread_any", "event=0x3c,any=1" }, + { NULL, NULL}, +}; + +/* + * Handle different fixed counter encodings between JSON and perf. + */ +static char *real_event(const char *name, char *event) +{ + int i; + for (i = 0; fixed[i].name; i++) + if (!strcasecmp(name, fixed[i].name)) + return (char *)fixed[i].event; + return event; +} + /* Call func with each event in the json file */ int json_events(const char *fn, int (*func)(void *data, char *name, char *event, char *desc, @@ -324,8 +346,7 @@ int json_events(const char *fn, if (msr != NULL) addfield(map, , ",", msr->pname, msrval); fixname(name); - - err = func(data, name, event, desc, long_desc, topic); + err = func(data, name, real_event(name, event), desc, long_desc, topic); free(event); free(desc); free(name); -- 2.5.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 0/5 v3] Fix NVMe driver support on Power with 32-bit DMA
From: Nishanth AravamudanDate: Fri, 23 Oct 2015 13:54:20 -0700 > 1) add a generic dma_get_page_shift implementation that just returns > PAGE_SHIFT I won't object to this patch series, but if I had implemented this I would have required the architectures to implement this explicitly, one-by-one. I think it is less error prone and more likely to end up with all the architectures setting this correctly. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v18 04/19] perf, tools: Support CPU ID matching for Powerpc
Implement code that returns the generic CPU ID string for Powerpc. This will be used to identify the specific table of PMU events to parse/compare user specified events against. Signed-off-by: Sukadev BhattiproluAcked-by: Jiri Olsa --- Changelog[v14] - [Jiri Olsa] Move this independent code off into a separate patch. --- tools/perf/arch/powerpc/util/header.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index 6c1b8a7..65f9391 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -32,3 +32,14 @@ get_cpuid(char *buffer, size_t sz) } return -1; } + +char * +get_cpuid_str(void) +{ + char *bufp; + + if (asprintf(, "%.8lx", mfspr(SPRN_PVR)) < 0) + bufp = NULL; + + return bufp; +} -- 2.5.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc/eeh: eeh_pci_enable(): fix checking of post-request state
Andrew Donnellanwrites: > diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c > index 40e4d4a..d757e7c 100644 > --- a/arch/powerpc/kernel/eeh.c > +++ b/arch/powerpc/kernel/eeh.c > @@ -677,7 +677,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function) > /* Check if the request is finished successfully */ > if (active_flag) { > rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); > - if (rc <= 0) > + if (rc < 0) > return rc; > > if (rc & active_flag) Reviewed-by: Daniel Axtens > -- > Andrew Donnellan Software Engineer, OzLabs > andrew.donnel...@au1.ibm.com Australia Development Lab, Canberra > +61 2 6201 8874 (work)IBM Australia Limited > > ___ > Linuxppc-dev mailing list > Linuxppc-dev@lists.ozlabs.org > https://lists.ozlabs.org/listinfo/linuxppc-dev signature.asc Description: PGP signature ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc/eeh: eeh_pci_enable(): fix checking of post-request state
Andrew Donnellanwrites: > diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c > index 40e4d4a..d757e7c 100644 > --- a/arch/powerpc/kernel/eeh.c > +++ b/arch/powerpc/kernel/eeh.c > @@ -677,7 +677,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function) > /* Check if the request is finished successfully */ > if (active_flag) { > rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); > - if (rc <= 0) > + if (rc < 0) > return rc; > > if (rc & active_flag) Reviewed-by: Daniel Axtens > -- > Andrew Donnellan Software Engineer, OzLabs > andrew.donnel...@au1.ibm.com Australia Development Lab, Canberra > +61 2 6201 8874 (work)IBM Australia Limited > > ___ > Linuxppc-dev mailing list > Linuxppc-dev@lists.ozlabs.org > https://lists.ozlabs.org/listinfo/linuxppc-dev signature.asc Description: PGP signature ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc/dma: Basic DMA operations shouldn't be GPL only
When turning these from inline to exported functions I was a bit over-eager and made them GPL only. This basically prevent use of any non-GPL PCI driver which is a bit over the top. Bring them in line with other architecture. Signed-off-by: Benjamin Herrenschmidt--- diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 59503ed..18ea012 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -303,7 +303,7 @@ int dma_set_coherent_mask(struct device *dev, u64 mask) dev->coherent_dma_mask = mask; return 0; } -EXPORT_SYMBOL_GPL(dma_set_coherent_mask); +EXPORT_SYMBOL(dma_set_coherent_mask); #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) @@ -362,7 +362,7 @@ u64 dma_get_required_mask(struct device *dev) return __dma_get_required_mask(dev); } -EXPORT_SYMBOL_GPL(dma_get_required_mask); +EXPORT_SYMBOL(dma_get_required_mask); static int __init dma_init(void) { ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RESEND, tip/locking/core, v5, 1/6] powerpc: atomic: Make _return atomics and *{cmp}xchg fully ordered
On Tue, Oct 27, 2015 at 01:33:47PM +1100, Michael Ellerman wrote: > On Mon, 2015-26-10 at 10:15:36 UTC, Boqun Feng wrote: > > This patch fixes two problems to make value-returning atomics and > > {cmp}xchg fully ordered on PPC. > > Hi Boqun, > > Can you please split this into two patches. One that does the cmpxchg change > and one that changes PPC_ATOMIC_ENTRY_BARRIER. > OK, make sense ;-) > Also given how pervasive this change is I'd like to take it via the powerpc > next tree, so can you please send this patch (which will be two after you > split > it) as powerpc patches. And the rest can go via tip? > One problem is that patch 5 will remove __xchg_u32 and __xchg_64 entirely, which are modified in this patch(patch 1), so there will be some conflicts if two branch get merged, I think. Alternative way is that all this series go to powerpc next tree as most of the dependent patches are already there. I just need to remove inc/dec related code and resend them when appropriate. Besides, I can pull patch 2 out and send it as a tip patch because it's general code and no one depends on this in this series. To summerize: patch 1(split to two), 3, 4(remove inc/dec implementation), 5, 6 sent as powerpc patches for powerpc next, patch 2(unmodified) sent as tip patch for locking/core. Peter and Michael, this works for you both? Regards, signature.asc Description: PGP signature ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH V10 03/12] powerpc/pci: Cache VF index in pci_dn
On 26/10/15 14:15, Wei Yang wrote: The patch caches the VF index in pci_dn, which can be used to calculate VF's bus, device and function number. Those information helps to locate the VF's PCI device instance when doing hotplug during EEH recovery if necessary. Signed-off-by: Wei YangAcked-by: Gavin Shan Reviewed-by: Andrew Donnellan -- Andrew Donnellan Software Engineer, OzLabs andrew.donnel...@au1.ibm.com Australia Development Lab, Canberra +61 2 6201 8874 (work)IBM Australia Limited ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH V10 02/12] PCI: Add pcibios_bus_add_device() weak function
On 26/10/15 14:15, Wei Yang wrote: This patch adds a weak function pcibios_bus_add_device() for arch dependent code could do proper setup. For example, powerpc could setup EEH related resources. Signed-off-by: Wei YangAcked-by: Bjorn Helgaas Reviewed-by: Andrew Donnellan -- Andrew Donnellan Software Engineer, OzLabs andrew.donnel...@au1.ibm.com Australia Development Lab, Canberra +61 2 6201 8874 (work)IBM Australia Limited ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v4 0/2] powerpc/512x: add LocalPlus Bus FIFO device driver
On Thu, Oct 22, 2015 at 01:15:03AM +0300, Alexander Popov wrote: > On 12.10.2015 00:08, Alexander Popov wrote: > > This driver for Freescale MPC512x LocalPlus Bus FIFO (called SCLPC > > in the Reference Manual) allows Direct Memory Access transfers > > between RAM and peripheral devices on LocalPlus Bus. > > > Changes in v4: > > - the race condition is fixed; > > - plenty of style fixes are made; > > - devm_* functions and EPROBE_DEFER are used in probe(). > > Hello, > > I've done my best to fix the issues pointed by Timur Tabi and Vinod Koul. > Could I have a feedback please? I dont see to have v4 in my list :( Can you please repost -- ~Vinod ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RESEND, tip/locking/core, v5, 1/6] powerpc: atomic: Make _return atomics and *{cmp}xchg fully ordered
On Mon, 2015-26-10 at 10:15:36 UTC, Boqun Feng wrote: > This patch fixes two problems to make value-returning atomics and > {cmp}xchg fully ordered on PPC. Hi Boqun, Can you please split this into two patches. One that does the cmpxchg change and one that changes PPC_ATOMIC_ENTRY_BARRIER. Also given how pervasive this change is I'd like to take it via the powerpc next tree, so can you please send this patch (which will be two after you split it) as powerpc patches. And the rest can go via tip? cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH V10 01/12] PCI/IOV: Rename and export virtfn_add/virtfn_remove
On 26/10/15 14:15, Wei Yang wrote: During EEH recovery, hotplug is applied to the devices which don't have drivers or their drivers don't support EEH. However, the hotplug, which was implemented based on PCI bus, can't be applied to VF directly. The patch renames virtn_{add,remove}() and exports them so that they can be used in PCI hotplug during EEH recovery. [gwshan: changelog] Signed-off-by: Wei YangReviewed-by: Gavin Shan Acked-by: Bjorn Helgaas Reviewed-by: Andrew Donnellan -- Andrew Donnellan Software Engineer, OzLabs andrew.donnel...@au1.ibm.com Australia Development Lab, Canberra +61 2 6201 8874 (work)IBM Australia Limited ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v12 3/6] CPM/QE: use genalloc to manage CPM/QE muram
On Sun, 2015-10-25 at 22:15 -0500, Zhao Qiang-B45475 wrote: > On Sat, 2015-10-24 at 04:59 AM, Wood Scott-B07421> wrote: > > -Original Message- > > From: Wood Scott-B07421 > > Sent: Saturday, October 24, 2015 4:59 AM > > To: Zhao Qiang-B45475 > > Cc: linux-ker...@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; > > lau...@codeaurora.org; Xie Xiaobo-R63061 ; > > b...@kernel.crashing.org; Li Yang-Leo-R58472 ; > > pau...@samba.org > > Subject: Re: [PATCH v12 3/6] CPM/QE: use genalloc to manage CPM/QE muram > > > > Don't send HTML e-mail. > > > > On Fri, 2015-10-23 at 02:06 -0500, Zhao Qiang-B45475 wrote: > > > On Fri, 2015-10-23 at 11:00 AM, Wood Scott-B07421 > > > > > > wrote: > > > > -Original Message- > > > > From: Wood Scott-B07421 > > > > Sent: Friday, October 23, 2015 11:00 AM > > > > To: Zhao Qiang-B45475 > > > > Cc: linux-ker...@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; > > > > lau...@codeaurora.org; Xie Xiaobo-R63061 ; > > > > b...@kernel.crashing.org; Li Yang-Leo-R58472 ; > > > > pau...@samba.org > > > > Subject: Re: [PATCH v12 3/6] CPM/QE: use genalloc to manage CPM/QE > > > > muram > > > > > > > > On Wed, 2015-10-14 at 15:16 +0800, Zhao Qiang wrote: > > > > > -/** > > > > > +/* > > > > > * cpm_muram_alloc - allocate the requested size worth of > > > > > multi-user > > > ram > > > > > * @size: number of bytes to allocate > > > > > * @align: requested alignment, in bytes @@ -141,59 +151,102 @@ > > > > > out: > > > > > */ > > > > > unsigned long cpm_muram_alloc(unsigned long size, unsigned long > > > > > align) { > > > > > - unsigned long start; > > > > > unsigned long flags; > > > > > - > > > > > + unsigned long start; > > > > > + static struct genpool_data_align muram_pool_data; > > > > > spin_lock_irqsave(_muram_lock, flags); > > > > > - cpm_muram_info.alignment = align; > > > > > - start = rh_alloc(_muram_info, size, "commproc"); > > > > > - memset(cpm_muram_addr(start), 0, size); > > > > > + muram_pool_data.align = align; > > > > > + gen_pool_set_algo(muram_pool, gen_pool_first_fit_align, > > > > > + _pool_data); > > > > > + start = cpm_muram_alloc_common(size, _pool_data); > > > > > spin_unlock_irqrestore(_muram_lock, flags); > > > > > - > > > > > return start; > > > > > } > > > > > EXPORT_SYMBOL(cpm_muram_alloc); > > > > > > > > Why is muram_pool_data static? Why is it being passed to > > > > gen_pool_set_algo()? > > > Cpm_muram use both align algo and fixed algo, so we need to set > > > corresponding algo and Algo data. > > > > The data gets passed in via gen_pool_alloc_data(). The point was to > > allow it to > > be on the caller's stack, not a long-lived data structure shared by all > > callers and > > needing synchronization. > > You mean it is not necessary to point pool->data to data, just passing the > data to gen_pool_alloc_data()? > However, the algo it needed to be set. > > > > > > > The whole reason we're adding gen_pool_alloc_data() is to avoid > > > > that. Do we need gen_pool_alloc_algo() too? > > > > > > We add gen_pool_alloc_data() to pass data to algo, because align algo > > > and fixed algo, Because align and fixed algos need specific data. > > > > And my point is that because of that, it seems like we need a version that > > accepts an algorithm as well. > > It the user just use only one algo, it doesn’t need to set algo, > However, qe_muram use two algos with alloc_align function > And alloc_fixed function. Yes. That is why gen_pool_alloc_data() does not accomplish what we want. When we were discussing gen_pool_alloc_data(), you had not yet mentioned the need for fixed allocations. -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v12 4/6] QE/CPM: move muram management functions to qe_common
On Sun, 2015-10-25 at 21:42 -0500, Zhao Qiang-B45475 wrote: > On Sat, Oct 24, 2015 at 04:56 AM, Wood Scott-B07421 wrote: > > -Original Message- > > From: Wood Scott-B07421 > > Sent: Saturday, October 24, 2015 4:56 AM > > To: Zhao Qiang-B45475> > Cc: linux-ker...@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; > > lau...@codeaurora.org; Xie Xiaobo-R63061 ; > > b...@kernel.crashing.org; Li Yang-Leo-R58472 ; > > pau...@samba.org > > Subject: Re: [PATCH v12 4/6] QE/CPM: move muram management functions to > > qe_common > > > > On Fri, 2015-10-23 at 02:45 -0500, Zhao Qiang-B45475 wrote: > > > On Fri, 2015-10-23 at 11:10 AM, Wood Scott-B07421 > > > > > > wrote: > > > > -Original Message- > > > > From: Wood Scott-B07421 > > > > Sent: Friday, October 23, 2015 11:10 AM > > > > To: Zhao Qiang-B45475 > > > > Cc: linux-ker...@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; > > > > lau...@codeaurora.org; Xie Xiaobo-R63061 ; > > > > b...@kernel.crashing.org; Li Yang-Leo-R58472 ; > > > > pau...@samba.org > > > > Subject: Re: [PATCH v12 4/6] QE/CPM: move muram management > > functions > > > > to qe_common > > > > > > > > On Wed, 2015-10-14 at 15:16 +0800, Zhao Qiang wrote: > > > > > QE and CPM have the same muram, they use the same management > > > > > functions. Now QE support both ARM and PowerPC, it is necessary to > > > > > move QE to "driver/soc", so move the muram management functions > > > > > from cpm_common to qe_common for preparing to move QE code to > > "driver/soc" > > > > > > > > > > Signed-off-by: Zhao Qiang > > > > > --- > > > > > Changes for v2: > > > > > - no changes > > > > > Changes for v3: > > > > > - no changes > > > > > Changes for v4: > > > > > - no changes > > > > > Changes for v5: > > > > > - no changes > > > > > Changes for v6: > > > > > - using genalloc instead rheap to manage QE MURAM > > > > > - remove qe_reset from platform file, using > > > > > - subsys_initcall to call qe_init function. > > > > > Changes for v7: > > > > > - move this patch from 3/3 to 2/3 > > > > > - convert cpm with genalloc > > > > > - check for gen_pool allocation failure Changes for v8: > > > > > - rebase > > > > > - move BD_SC_* macro instead of copy Changes for v9: > > > > > - doesn't modify CPM, add a new patch to modify. > > > > > - rebase > > > > > Changes for v10: > > > > > - rebase > > > > > Changes for v11: > > > > > - remove renaming > > > > > - delete removing qe_reset and delete adding qe_init. > > > > > Changes for v12: > > > > > - SPI_FSL_CPM depends on QE-MURAM, select QUICC_ENGINE for it. > > > > > > > > Why is the SPI change part of this patch? Why is it even part of > > > > this patchset, rather than an independent patch sent to the SPI list > > > > and maintainer? If it's tied to other changes you're making, > > > > explain that. As is, there is zero mention of the SPI change in the > > > > part of the e-mail that will become the git changelog. > > > > > > > This SPI_FSL_CPM is cpm-spi, it is part of CPM. > > > > So then why are you selecting QUICC_ENGINE? And again, what does it have > > to do with this patch? > > Cpm-spi is dependent on qe_muram, if not select it, Cpm-spi will failed to > build. Why QUICC_ENGINE and not CPM? -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 4/7 v2] pseries/iommu: implement DDW-aware dma_get_page_shift
On 10/24/2015 07:59 AM, Nishanth Aravamudan wrote: When DDW (Dynamic DMA Windows) are present for a device, we have stored the TCE (Translation Control Entry) size in a special device tree property. Check if we have enabled DDW for the device and return the TCE size from that property if present. If the property isn't present, fallback to looking the value up in struct iommu_table. If we don't find a iommu_table, fallback to the kernel's page size. Signed-off-by: Nishanth Aravamudan--- arch/powerpc/platforms/pseries/iommu.c | 36 ++ 1 file changed, 36 insertions(+) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 0946b98..1bf6471 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1292,6 +1292,40 @@ static u64 dma_get_required_mask_pSeriesLP(struct device *dev) return dma_iommu_ops.get_required_mask(dev); } +static unsigned long dma_get_page_shift_pSeriesLP(struct device *dev) +{ + struct iommu_table *tbl; + + if (!disable_ddw && dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(dev); + struct device_node *dn; + + dn = pci_device_to_OF_node(pdev); + + /* search upwards for ibm,dma-window */ + for (; dn && PCI_DN(dn) && !PCI_DN(dn)->table_group; + dn = dn->parent) + if (of_get_property(dn, "ibm,dma-window", NULL)) + break; + /* +* if there is a DDW configuration, the TCE shift is stored in +* the property +*/ + if (dn && PCI_DN(dn)) { + const struct dynamic_dma_window_prop *direct64 = + of_get_property(dn, DIRECT64_PROPNAME, NULL); This DIRECT64_PROPNAME property is only present under pHyp, QEMU/KVM does not set it as 64bit windows are dynamic there so something like find_existing_ddw() needs to be used here. + if (direct64) + return be32_to_cpu(direct64->tce_shift); + } + } + + tbl = get_iommu_table_base(dev); + if (tbl) + return tbl->it_page_shift; + + return PAGE_SHIFT; +} + #else /* CONFIG_PCI */ #define pci_dma_bus_setup_pSeries NULL #define pci_dma_dev_setup_pSeries NULL @@ -1299,6 +1333,7 @@ static u64 dma_get_required_mask_pSeriesLP(struct device *dev) #define pci_dma_dev_setup_pSeriesLP NULL #define dma_set_mask_pSeriesLPNULL #define dma_get_required_mask_pSeriesLP NULL +#define dma_get_page_shift_pSeriesLP NULL #endif /* !CONFIG_PCI */ static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, @@ -1395,6 +1430,7 @@ void iommu_init_early_pSeries(void) pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP; ppc_md.dma_set_mask = dma_set_mask_pSeriesLP; ppc_md.dma_get_required_mask = dma_get_required_mask_pSeriesLP; + ppc_md.dma_get_page_shift = dma_get_page_shift_pSeriesLP; } else { pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries; pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries; -- Alexey ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH V2 2/3] tools/perf:Map the ID values with register names
The id values are mapped with the corresponding register names. This names are displayed while using a perf report/perf script command. Signed-off-by: Anju T--- tools/perf/arch/powerpc/include/perf_regs.h | 118 1 file changed, 118 insertions(+) create mode 100644 tools/perf/arch/powerpc/include/perf_regs.h diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h new file mode 100644 index 000..621aa94 --- /dev/null +++ b/tools/perf/arch/powerpc/include/perf_regs.h @@ -0,0 +1,118 @@ +#ifndef ARCH_PERF_REGS_H +#define ARCH_PERF_REGS_H + +#include +#include +#include + +void perf_regs_load(u64 *regs); + +#define PERF_REGS_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1) +#define PERF_REGS_MAX PERF_REG_POWERPC_MAX +#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64 + +#define PERF_REG_IP PERF_REG_POWERPC_NIP +#define PERF_REG_SP PERF_REG_POWERPC_R1 + +static inline const char *perf_reg_name(int id) +{ + switch (id) { + case PERF_REG_POWERPC_GPR0: + return "gpr0"; + case PERF_REG_POWERPC_GPR1: + return "gpr1"; + case PERF_REG_POWERPC_GPR2: + return "gpr2"; + case PERF_REG_POWERPC_GPR3: + return "gpr3"; + case PERF_REG_POWERPC_GPR4: + return "gpr4"; + case PERF_REG_POWERPC_GPR5: + return "gpr5"; + case PERF_REG_POWERPC_GPR6: + return "gpr6"; + case PERF_REG_POWERPC_GPR7: + return "gpr7"; + case PERF_REG_POWERPC_GPR8: + return "gpr8"; + case PERF_REG_POWERPC_GPR9: + return "gpr9"; + case PERF_REG_POWERPC_GPR10: + return "gpr10"; + case PERF_REG_POWERPC_GPR11: + return "gpr11"; + case PERF_REG_POWERPC_GPR12: + return "gpr12"; + case PERF_REG_POWERPC_GPR13: + return "gpr13"; + case PERF_REG_POWERPC_GPR14: + return "gpr14"; + case PERF_REG_POWERPC_GPR15: + return "gpr15"; + case PERF_REG_POWERPC_GPR16: + return "gpr16"; + case PERF_REG_POWERPC_GPR17: + return "gpr17"; + case PERF_REG_POWERPC_GPR18: + return "gpr18"; + case PERF_REG_POWERPC_GPR19: + return "gpr19"; + case PERF_REG_POWERPC_GPR20: + return "gpr20"; + case PERF_REG_POWERPC_GPR21: + return "gpr21"; + case PERF_REG_POWERPC_GPR22: + return "gpr22"; + case PERF_REG_POWERPC_GPR23: + return "gpr23"; + case PERF_REG_POWERPC_GPR24: + return "gpr24"; + case PERF_REG_POWERPC_GPR25: + return "gpr25"; + case PERF_REG_POWERPC_GPR26: + return "gpr26"; + case PERF_REG_POWERPC_GPR27: + return "gpr27"; + case PERF_REG_POWERPC_GPR28: + return "gpr28"; + case PERF_REG_POWERPC_GPR29: + return "gpr29"; + case PERF_REG_POWERPC_GPR30: + return "gpr30"; + case PERF_REG_POWERPC_GPR31: + return "gpr31"; + case PERF_REG_POWERPC_NIP: + return "nip"; + case PERF_REG_POWERPC_MSR: + return "msr"; + case PERF_REG_POWERPC_ORIG_R3: + return "orig_r3"; + case PERF_REG_POWERPC_CTR: + return "ctr"; + case PERF_REG_POWERPC_LNK: + return "link"; + case PERF_REG_POWERPC_XER: + return "xer"; + case PERF_REG_POWERPC_CCR: + return "ccr"; +#ifdef __powerpc64__ + case PERF_REG_POWERPC_SOFTE: + return "softe"; +#else + case PERF_REG_POWERPC_MQ: + return "mq"; +#endif + case PERF_REG_POWERPC_TRAP: + return "trap"; + case PERF_REG_POWERPC_DAR: + return "dar"; + case PERF_REG_POWERPC_DSISR: + return "dsisr"; + case PERF_REG_POWERPC_RESULT: + return "result"; + default: + return NULL; + } + return NULL; +} +#endif /*ARCH_PERF_REGS_H */ -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH V2 0/3] perf/powerpc:Add ability to sample intr machine state in powerpc
This short patch series add the ability to sample the interrupted machine state for each hardware sample Anju (3): perf/powerpc:add ability to sample intr machine state in power tools/perf:Map the ID values with register names perf/powerpc:add support for sampling intr machine state arch/powerpc/Kconfig| 1 + arch/powerpc/include/uapi/asm/perf_regs.h | 55 + arch/powerpc/perf/Makefile | 1 + arch/powerpc/perf/perf_regs.c | 87 tools/perf/arch/powerpc/include/perf_regs.h | 118 tools/perf/config/Makefile | 5 ++ 6 files changed, 267 insertions(+) create mode 100644 arch/powerpc/include/uapi/asm/perf_regs.h create mode 100644 arch/powerpc/perf/perf_regs.c create mode 100644 tools/perf/arch/powerpc/include/perf_regs.h -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH V2 3/3] perf/powerpc :add support for sampling intr machine state
The registers to sample are passed through the sample_regs_intr bitmask. The name and bit position for each register is defined in asm/perf_regs.h. This feature can be enabled by using -I option with perf record command. To display the sampled register values use perf script -D. The kernel uses the "PERF" register ids to find offset of the register in 'struct pt_regs'. CONFIG_HAVE_PERF_REGS will enable sampling of the interrupted machine state. Signed-off-by: Anju T--- arch/powerpc/Kconfig | 1 + arch/powerpc/perf/Makefile| 1 + arch/powerpc/perf/perf_regs.c | 87 +++ tools/perf/config/Makefile| 5 +++ 4 files changed, 94 insertions(+) create mode 100644 arch/powerpc/perf/perf_regs.c diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 5ef2711..768d700 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -116,6 +116,7 @@ config PPC select GENERIC_ATOMIC64 if PPC32 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select HAVE_PERF_EVENTS + select HAVE_PERF_REGS select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64 select ARCH_WANT_IPC_PARSE_VERSION diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index f9c083a..0d53815 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o +obj-$(CONFIG_PERF_EVENTS) += perf_regs.o obj-$(CONFIG_PPC64)+= $(obj64-y) obj-$(CONFIG_PPC32)+= $(obj32-y) diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c new file mode 100644 index 000..2474dc4 --- /dev/null +++ b/arch/powerpc/perf/perf_regs.c @@ -0,0 +1,87 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r) + +#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1)) + +static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = { + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR0, gpr[0]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR1, gpr[1]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR2, gpr[2]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR3, gpr[3]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR4, gpr[4]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR5, gpr[5]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR6, gpr[6]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR7, gpr[7]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR8, gpr[8]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR9, gpr[9]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR10, gpr[10]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR11, gpr[11]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR12, gpr[12]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR13, gpr[13]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR14, gpr[14]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR15, gpr[15]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR16, gpr[16]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR17, gpr[17]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR18, gpr[18]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR19, gpr[19]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR20, gpr[20]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR21, gpr[21]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR22, gpr[22]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR23, gpr[23]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR24, gpr[24]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR25, gpr[25]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR26, gpr[26]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR27, gpr[27]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR28, gpr[28]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR29, gpr[29]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR30, gpr[30]), + PT_REGS_OFFSET(PERF_REG_POWERPC_GPR31, gpr[31]), + PT_REGS_OFFSET(PERF_REG_POWERPC_NIP, nip), + PT_REGS_OFFSET(PERF_REG_POWERPC_MSR, msr), + PT_REGS_OFFSET(PERF_REG_POWERPC_ORIG_R3, orig_gpr3), + PT_REGS_OFFSET(PERF_REG_POWERPC_CTR, ctr), + PT_REGS_OFFSET(PERF_REG_POWERPC_LNK, link), + PT_REGS_OFFSET(PERF_REG_POWERPC_XER, xer), + PT_REGS_OFFSET(PERF_REG_POWERPC_CCR, ccr), +#ifdef __powerpc64__ + PT_REGS_OFFSET(PERF_REG_POWERPC_SOFTE, softe), +#else + PT_REGS_OFFSET(PERF_REG_POWERPC_MQ, mq), +#endif + PT_REGS_OFFSET(PERF_REG_POWERPC_TRAP, trap), + PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar), + PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr), + PT_REGS_OFFSET(PERF_REG_POWERPC_RESULT, result), +}; +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX)) + return 0; + return regs_get_register(regs, pt_regs_offset[idx]); +} +int
[PATCH V2 1/3] perf/powerpc:add ability to sample intr machine state in power
The enum definition assigns an 'id' to each register in "struct pt_regs" of arch/powerpc.The order of these values in the enum definition are based on the corresponding macros in arch/powerpc/include/uapi/asm/ptrace.h . Signed-off-by: Anju T--- arch/powerpc/include/uapi/asm/perf_regs.h | 55 +++ 1 file changed, 55 insertions(+) create mode 100644 arch/powerpc/include/uapi/asm/perf_regs.h diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h b/arch/powerpc/include/uapi/asm/perf_regs.h new file mode 100644 index 000..b97727c --- /dev/null +++ b/arch/powerpc/include/uapi/asm/perf_regs.h @@ -0,0 +1,55 @@ +#ifndef _ASM_POWERPC_PERF_REGS_H +#define _ASM_POWERPC_PERF_REGS_H + +enum perf_event_powerpc_regs { + PERF_REG_POWERPC_GPR0, + PERF_REG_POWERPC_GPR1, + PERF_REG_POWERPC_GPR2, + PERF_REG_POWERPC_GPR3, + PERF_REG_POWERPC_GPR4, + PERF_REG_POWERPC_GPR5, + PERF_REG_POWERPC_GPR6, + PERF_REG_POWERPC_GPR7, + PERF_REG_POWERPC_GPR8, + PERF_REG_POWERPC_GPR9, + PERF_REG_POWERPC_GPR10, + PERF_REG_POWERPC_GPR11, + PERF_REG_POWERPC_GPR12, + PERF_REG_POWERPC_GPR13, + PERF_REG_POWERPC_GPR14, + PERF_REG_POWERPC_GPR15, + PERF_REG_POWERPC_GPR16, + PERF_REG_POWERPC_GPR17, + PERF_REG_POWERPC_GPR18, + PERF_REG_POWERPC_GPR19, + PERF_REG_POWERPC_GPR20, + PERF_REG_POWERPC_GPR21, + PERF_REG_POWERPC_GPR22, + PERF_REG_POWERPC_GPR23, + PERF_REG_POWERPC_GPR24, + PERF_REG_POWERPC_GPR25, + PERF_REG_POWERPC_GPR26, + PERF_REG_POWERPC_GPR27, + PERF_REG_POWERPC_GPR28, + PERF_REG_POWERPC_GPR29, + PERF_REG_POWERPC_GPR30, + PERF_REG_POWERPC_GPR31, + PERF_REG_POWERPC_NIP, + PERF_REG_POWERPC_MSR, + PERF_REG_POWERPC_ORIG_R3, + PERF_REG_POWERPC_CTR, + PERF_REG_POWERPC_LNK, + PERF_REG_POWERPC_XER, + PERF_REG_POWERPC_CCR, +#ifdef __powerpc64__ + PERF_REG_POWERPC_SOFTE, +#else + PERF_REG_POWERPC_MQ, +#endif + PERF_REG_POWERPC_TRAP, + PERF_REG_POWERPC_DAR, + PERF_REG_POWERPC_DSISR, + PERF_REG_POWERPC_RESULT, + PERF_REG_POWERPC_MAX, +}; +#endif /* _ASM_POWERPC_PERF_REGS_H */ -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH V2 0/3] perf/powerpc:Add ability to sample intr machine state in powerpc
On 10/26/15, Anju Twrote: > This short patch series add the ability to sample the interrupted > machine state for each hardware sample Hi, how can we check your patch series without testing details? > > Anju (3): > perf/powerpc:add ability to sample intr machine state in power > tools/perf:Map the ID values with register names > perf/powerpc:add support for sampling intr machine state > > > arch/powerpc/Kconfig| 1 + > arch/powerpc/include/uapi/asm/perf_regs.h | 55 + > arch/powerpc/perf/Makefile | 1 + > arch/powerpc/perf/perf_regs.c | 87 > tools/perf/arch/powerpc/include/perf_regs.h | 118 > > tools/perf/config/Makefile | 5 ++ > 6 files changed, 267 insertions(+) > create mode 100644 arch/powerpc/include/uapi/asm/perf_regs.h > create mode 100644 arch/powerpc/perf/perf_regs.c > create mode 100644 tools/perf/arch/powerpc/include/perf_regs.h > > -- > 2.1.0 > > ___ > Linuxppc-dev mailing list > Linuxppc-dev@lists.ozlabs.org > https://lists.ozlabs.org/listinfo/linuxppc-dev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 1/8] ppc64le FTRACE_WITH_REGS implementation
Implement FTRACE_WITH_REGS for powerpc64, on ELF ABI v2. Initial work started by Vojtech Pavlik, used with permission. * arch/powerpc/kernel/entry_64.S: - enhance _mcount with a stub to support call sites generated by -mprofile-kernel. This is backward-compatible. - Implement an effective ftrace_caller that works from within the kernel binary as well as from modules. * arch/powerpc/kernel/ftrace.c: - be prepared to deal with ppc64 ELF ABI v2, especially calls to _mcount that result from gcc -mprofile-kernel - a little more error verbosity * arch/powerpc/kernel/module_64.c: - do not save the TOC pointer on the trampoline when the destination is ftrace_caller. This trampoline jump happens from a function prologue before a new stack frame is set up, so bad things may happen otherwise... - relax is_module_trampoline() to recognise the modified trampoline. Signed-off-by: Torsten Duwe--- arch/powerpc/include/asm/ftrace.h | 5 ++ arch/powerpc/kernel/entry_64.S| 113 +- arch/powerpc/kernel/ftrace.c | 72 +--- arch/powerpc/kernel/module_64.c | 39 - 4 files changed, 217 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index ef89b14..6eb9fbc 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -46,6 +46,8 @@ extern void _mcount(void); #ifdef CONFIG_DYNAMIC_FTRACE +# define FTRACE_ADDR ((unsigned long)ftrace_caller) +# define FTRACE_REGS_ADDR FTRACE_ADDR static inline unsigned long ftrace_call_adjust(unsigned long addr) { /* reloction of mcount call site is the same as the address */ @@ -58,6 +60,9 @@ struct dyn_arch_ftrace { #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* __ASSEMBLY__ */ +#ifdef CONFIG_DYNAMIC_FTRACE +#define ARCH_SUPPORTS_FTRACE_OPS 1 +#endif #endif #if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) && !defined(__ASSEMBLY__) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index a94f155..b0dfbfe 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -1206,8 +1206,13 @@ _GLOBAL(enter_prom) #ifdef CONFIG_DYNAMIC_FTRACE _GLOBAL(mcount) _GLOBAL(_mcount) - blr + mflrr0 + mtctr r0 + ld r0,LRSAVE(r1) + mtlrr0 + bctr +#ifndef CC_USING_MPROFILE_KERNEL _GLOBAL_TOC(ftrace_caller) /* Taken from output of objdump from lib64/glibc */ mflrr3 @@ -1229,6 +1234,81 @@ _GLOBAL(ftrace_graph_stub) ld r0, 128(r1) mtlrr0 addir1, r1, 112 +#else +_GLOBAL(ftrace_caller) +#if defined(_CALL_ELF) && _CALL_ELF == 2 + mflrr0 + bl 2f +2: mflrr12 + mtlrr0 + mr r0,r2 // save callee's TOC + addis r2,r12,(.TOC.-ftrace_caller-8)@ha + addir2,r2,(.TOC.-ftrace_caller-8)@l +#else + mr r0,r2 +#endif + ld r12,LRSAVE(r1) // get caller's address + + stdur1,-SWITCH_FRAME_SIZE(r1) + + std r12, _LINK(r1) + SAVE_8GPRS(0,r1) + std r0, 24(r1) // save TOC + SAVE_8GPRS(8,r1) + SAVE_8GPRS(16,r1) + SAVE_8GPRS(24,r1) + + LOAD_REG_IMMEDIATE(r3,function_trace_op) + ld r5,0(r3) + + mflrr3 + std r3, _NIP(r1) + std r3, 16(r1) + subir3, r3, MCOUNT_INSN_SIZE + mfmsr r4 + std r4, _MSR(r1) + mfctr r4 + std r4, _CTR(r1) + mfxer r4 + std r4, _XER(r1) + mr r4, r12 + addir6, r1 ,STACK_FRAME_OVERHEAD + +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop + + ld r3, _NIP(r1) + mtlrr3 + + REST_8GPRS(0,r1) + REST_8GPRS(8,r1) + REST_8GPRS(16,r1) + REST_8GPRS(24,r1) + + addi r1, r1, SWITCH_FRAME_SIZE + + ld r12, LRSAVE(r1) // get caller's address + mtlrr12 + mr r2,r0 // restore callee's TOC + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + stdur1, -112(r1) +.globl ftrace_graph_call +ftrace_graph_call: + b ftrace_graph_stub +_GLOBAL(ftrace_graph_stub) + addir1, r1, 112 +#endif + + mflrr0 // move this LR to CTR + mtctr r0 + + ld r0,LRSAVE(r1) // restore callee's lr at _mcount site + mtlrr0 + bctr// jump after _mcount site +#endif /* CC_USING_MPROFILE_KERNEL */ _GLOBAL(ftrace_stub) blr #else @@ -1262,6 +1342,19 @@ _GLOBAL(ftrace_stub) #ifdef CONFIG_FUNCTION_GRAPH_TRACER _GLOBAL(ftrace_graph_caller) +#ifdef CC_USING_MPROFILE_KERNEL + // with -mprofile-kernel, parameter regs are still alive at _mcount + std r10, 104(r1) + std r9, 96(r1) + std r8, 88(r1) + std
[PATCH v3 2/8] ppc use ftrace_modify_all_code default
Convert ppc's arch_ftrace_update_code from its own function copy to use the generic default functionality (without stop_machine -- our instructions are properly aligned and the replacements atomic ;) With this we gain error checking and the much-needed function_trace_op handling. Signed-off-by: Torsten Duwe--- arch/powerpc/kernel/ftrace.c | 16 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 310137f..e419c7b 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -511,20 +511,12 @@ void ftrace_replace_code(int enable) } } +/* Use the default ftrace_modify_all_code, but without + * stop_machine(). + */ void arch_ftrace_update_code(int command) { - if (command & FTRACE_UPDATE_CALLS) - ftrace_replace_code(1); - else if (command & FTRACE_DISABLE_CALLS) - ftrace_replace_code(0); - - if (command & FTRACE_UPDATE_TRACE_FUNC) - ftrace_update_ftrace_func(ftrace_trace_function); - - if (command & FTRACE_START_FUNC_RET) - ftrace_enable_ftrace_graph_caller(); - else if (command & FTRACE_STOP_FUNC_RET) - ftrace_disable_ftrace_graph_caller(); + ftrace_modify_all_code(command); } int __init ftrace_dyn_arch_init(void) -- 1.8.5.6 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v3 0/8] ftrace with regs + live patching for ppc64 LE (ABI v2)
* Makefile: - globally use -mprofile-kernel in case it's configured. * arch/powerpc/Kconfig / kernel/trace/Kconfig: - declare that ppc64 HAVE_MPROFILE_KERNEL and HAVE_DYNAMIC_FTRACE_WITH_REGS, and use it. Signed-off-by: Torsten Duwe--- arch/powerpc/Kconfig | 2 ++ arch/powerpc/Makefile | 7 +++ kernel/trace/Kconfig | 5 + 3 files changed, 14 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 9a7057e..0e6011c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -97,8 +97,10 @@ config PPC select OF_RESERVED_MEM select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_MPROFILE_KERNEL select SYSCTL_EXCEPTION_TRACE select ARCH_WANT_OPTIONAL_GPIOLIB select VIRT_TO_BUS if !PPC64 diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index b9b4af2..25d0034 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -133,6 +133,13 @@ else CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64 endif +ifeq ($(CONFIG_PPC64),y) +ifdef CONFIG_HAVE_MPROFILE_KERNEL +CC_FLAGS_FTRACE:= -pg $(call cc-option,-mprofile-kernel) +KBUILD_CPPFLAGS+= -DCC_USING_MPROFILE_KERNEL +endif +endif + CFLAGS-$(CONFIG_CELL_CPU) += $(call cc-option,-mcpu=cell) CFLAGS-$(CONFIG_POWER4_CPU) += $(call cc-option,-mcpu=power4) CFLAGS-$(CONFIG_POWER5_CPU) += $(call cc-option,-mcpu=power5) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 1153c43..dbcb635 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -52,6 +52,11 @@ config HAVE_FENTRY help Arch supports the gcc options -pg with -mfentry +config HAVE_MPROFILE_KERNEL + bool + help + Arch supports the gcc options -pg with -mprofile-kernel + config HAVE_C_RECORDMCOUNT bool help -- 1.8.5.6 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 5/8] ppc64 ftrace: disable profiling for some functions
At least POWER7/8 have MMUs that don't completely autoload; a normal, recoverable memory fault might pass through these functions. If a dynamic tracer function causes such a fault, any of these functions being traced with -mprofile-kernel may cause an endless recursion. Signed-off-by: Torsten Duwe--- arch/powerpc/kernel/process.c| 2 +- arch/powerpc/mm/fault.c | 2 +- arch/powerpc/mm/hash_utils_64.c | 18 +- arch/powerpc/mm/hugetlbpage-hash64.c | 2 +- arch/powerpc/mm/hugetlbpage.c| 4 ++-- arch/powerpc/mm/mem.c| 2 +- arch/powerpc/mm/pgtable_64.c | 2 +- arch/powerpc/mm/slb.c| 6 +++--- arch/powerpc/mm/slice.c | 8 9 files changed, 23 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 75b6676..c2900b9 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -715,7 +715,7 @@ static inline void __switch_to_tm(struct task_struct *prev) * don't know which of the checkpointed state and the transactional * state to use. */ -void restore_tm_state(struct pt_regs *regs) +notrace void restore_tm_state(struct pt_regs *regs) { unsigned long msr_diff; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index a67c6d7..125be37 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -205,7 +205,7 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) * The return value is 0 if the fault was handled, or the signal * number if this is a kernel fault that can't be handled here. */ -int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, +notrace int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code) { enum ctx_state prev_state = exception_enter(); diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index aee7017..90e89e7 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -849,7 +849,7 @@ void early_init_mmu_secondary(void) /* * Called by asm hashtable.S for doing lazy icache flush */ -unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) +notrace unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) { struct page *page; @@ -870,7 +870,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) } #ifdef CONFIG_PPC_MM_SLICES -static unsigned int get_paca_psize(unsigned long addr) +static notrace unsigned int get_paca_psize(unsigned long addr) { u64 lpsizes; unsigned char *hpsizes; @@ -899,7 +899,7 @@ unsigned int get_paca_psize(unsigned long addr) * For now this makes the whole process use 4k pages. */ #ifdef CONFIG_PPC_64K_PAGES -void demote_segment_4k(struct mm_struct *mm, unsigned long addr) +notrace void demote_segment_4k(struct mm_struct *mm, unsigned long addr) { if (get_slice_psize(mm, addr) == MMU_PAGE_4K) return; @@ -920,7 +920,7 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) * Result is 0: full permissions, _PAGE_RW: read-only, * _PAGE_USER or _PAGE_USER|_PAGE_RW: no access. */ -static int subpage_protection(struct mm_struct *mm, unsigned long ea) +static notrace int subpage_protection(struct mm_struct *mm, unsigned long ea) { struct subpage_prot_table *spt = >context.spt; u32 spp = 0; @@ -968,7 +968,7 @@ void hash_failure_debug(unsigned long ea, unsigned long access, trap, vsid, ssize, psize, lpsize, pte); } -static void check_paca_psize(unsigned long ea, struct mm_struct *mm, +static notrace void check_paca_psize(unsigned long ea, struct mm_struct *mm, int psize, bool user_region) { if (user_region) { @@ -990,7 +990,7 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, * -1 - critical hash insertion error * -2 - access not permitted by subpage protection mechanism */ -int hash_page_mm(struct mm_struct *mm, unsigned long ea, +notrace int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap, unsigned long flags) { @@ -1186,7 +1186,7 @@ bail: } EXPORT_SYMBOL_GPL(hash_page_mm); -int hash_page(unsigned long ea, unsigned long access, unsigned long trap, +notrace int hash_page(unsigned long ea, unsigned long access, unsigned long trap, unsigned long dsisr) { unsigned long flags = 0; @@ -1288,7 +1288,7 @@ out_exit: /* WARNING: This is called from hash_low_64.S, if you change this prototype, * do not forget to update the assembly call site ! */ -void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, +notrace void flush_hash_page(unsigned long vpn, real_pte_t pte, int
[PATCH v3 0/8] ftrace with regs + live patching for ppc64 LE (ABI v2)
Hi all, here is the current status of ftrace with regs, trace ops and live patching for ppc64le. It seems I broke the ftrace graph caller and I spent most of last week trying to fix it; Steven, maybe you could have a look? I startet out with -mprofile-kernel and now found that the ordinary -pg is very different. -mprofile-kernel only does the very bare minimal prologue (set TOC, save LR) and then calls _mcount, which poses some problems. I managed to get them resolved up to the point of the graph return ... I tested intensively with the ftrace self tests, and, without the graph caller, this set passes all of them on ppc64le. I tried not to break BE, but may have missed an ifdef or two. patch 2 (ftrace_modify_all_code default) is an independent prerequisite, I would even call it a fix -- please consider applying it even if you don't like the rest. patch 5 has proven to be very useful during development; as mentioned earlier, many of these functions may get called during a recoverable fault. The whole recursion will probably terminate if all goes well, but I'd rather be defensive here. Torsten Duwe (8): ppc64le FTRACE_WITH_REGS implementation ppc use ftrace_modify_all_code default ppc64 ftrace_with_regs configuration variables ppc64 ftrace_with_regs: spare early boot and low level ppc64 ftrace: disable profiling for some functions ppc64 ftrace: disable profiling for some files Implement kernel live patching for ppc64le (ABIv2) Enable LIVEPATCH to be configured on ppc64le and add livepatch.o if it is selected. arch/powerpc/Kconfig | 7 ++ arch/powerpc/Makefile| 7 ++ arch/powerpc/include/asm/ftrace.h| 5 ++ arch/powerpc/include/asm/livepatch.h | 27 +++ arch/powerpc/kernel/Makefile | 13 +-- arch/powerpc/kernel/entry_64.S | 153 ++- arch/powerpc/kernel/ftrace.c | 88 +++- arch/powerpc/kernel/livepatch.c | 20 + arch/powerpc/kernel/module_64.c | 39 - arch/powerpc/kernel/process.c| 2 +- arch/powerpc/lib/Makefile| 4 +- arch/powerpc/mm/fault.c | 2 +- arch/powerpc/mm/hash_utils_64.c | 18 ++--- arch/powerpc/mm/hugetlbpage-hash64.c | 2 +- arch/powerpc/mm/hugetlbpage.c| 4 +- arch/powerpc/mm/mem.c| 2 +- arch/powerpc/mm/pgtable_64.c | 2 +- arch/powerpc/mm/slb.c| 6 +- arch/powerpc/mm/slice.c | 8 +- kernel/trace/Kconfig | 5 ++ 20 files changed, 359 insertions(+), 55 deletions(-) create mode 100644 arch/powerpc/include/asm/livepatch.h create mode 100644 arch/powerpc/kernel/livepatch.c -- 1.8.5.6 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 4/8] ppc64 ftrace_with_regs: spare early boot and low level
Using -mprofile-kernel on early boot code not only confuses the checker but is also useless, as the infrastructure is not yet in place. Proceed like with -pg (remove it from CFLAGS), equally with time.o and ftrace itself. * arch/powerpc/kernel/Makefile: - remove -mprofile-kernel from low level and boot code objects' CFLAGS for FUNCTION_TRACER configurations. Signed-off-by: Torsten Duwe--- arch/powerpc/kernel/Makefile | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index ba33693..0f417d5 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -16,14 +16,14 @@ endif ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code -CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog -CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog -CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog -CFLAGS_REMOVE_prom.o = -pg -mno-sched-epilog +CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog -mprofile-kernel +CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog -mprofile-kernel +CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog -mprofile-kernel +CFLAGS_REMOVE_prom.o = -pg -mno-sched-epilog -mprofile-kernel # do not trace tracer code -CFLAGS_REMOVE_ftrace.o = -pg -mno-sched-epilog +CFLAGS_REMOVE_ftrace.o = -pg -mno-sched-epilog -mprofile-kernel # timers used by tracing -CFLAGS_REMOVE_time.o = -pg -mno-sched-epilog +CFLAGS_REMOVE_time.o = -pg -mno-sched-epilog -mprofile-kernel endif obj-y := cputable.o ptrace.o syscalls.o \ -- 1.8.5.6 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 6/8] ppc64 ftrace: disable profiling for some files
This adds -mprofile-kernel to the cc flags to be stripped from the command line for code-patching.o and feature-fixups.o, in addition to "-pg" Signed-off-by: Torsten Duwe--- arch/powerpc/lib/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index a47e142..98e22b2 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -6,8 +6,8 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-$(CONFIG_PPC64):= $(NO_MINIMAL_TOC) -CFLAGS_REMOVE_code-patching.o = -pg -CFLAGS_REMOVE_feature-fixups.o = -pg +CFLAGS_REMOVE_code-patching.o = -pg -mprofile-kernel +CFLAGS_REMOVE_feature-fixups.o = -pg -mprofile-kernel obj-y += string.o alloc.o crtsavres.o ppc_ksyms.o code-patching.o \ feature-fixups.o -- 1.8.5.6 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 7/8] Implement kernel live patching for ppc64le (ABIv2)
* create the appropriate files+functions arch/powerpc/include/asm/livepatch.h klp_check_compiler_support, klp_arch_set_pc arch/powerpc/kernel/livepatch.c with a stub for klp_write_module_reloc This is architecture-independent work in progress. * introduce a fixup in arch/powerpc/kernel/entry_64.S for local calls that are becoming global due to live patching. And of course do the main KLP thing: return to a maybe different address, possibly altered by the live patching ftrace op. Signed-off-by: Torsten Duwe--- arch/powerpc/include/asm/livepatch.h | 27 arch/powerpc/kernel/entry_64.S | 48 +--- arch/powerpc/kernel/livepatch.c | 20 +++ 3 files changed, 91 insertions(+), 4 deletions(-) create mode 100644 arch/powerpc/include/asm/livepatch.h create mode 100644 arch/powerpc/kernel/livepatch.c diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h new file mode 100644 index 000..334eb55 --- /dev/null +++ b/arch/powerpc/include/asm/livepatch.h @@ -0,0 +1,27 @@ +#ifndef _ASM_POWERPC64_LIVEPATCH_H +#define _ASM_POWERPC64_LIVEPATCH_H + +#include +#include + +#ifdef CONFIG_LIVEPATCH +static inline int klp_check_compiler_support(void) +{ +#if !defined(_CALL_ELF) || _CALL_ELF != 2 + return 1; +#endif + return 0; +} + +extern int klp_write_module_reloc(struct module *mod, unsigned long type, + unsigned long loc, unsigned long value); + +static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) +{ + regs->nip = ip; +} +#else +#error Live patching support is disabled; check CONFIG_LIVEPATCH +#endif + +#endif /* _ASM_POWERPC64_LIVEPATCH_H */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index b0dfbfe..2681601 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -1264,6 +1264,9 @@ _GLOBAL(ftrace_caller) mflrr3 std r3, _NIP(r1) std r3, 16(r1) +#ifdef CONFIG_LIVEPATCH + mr r14,r3 // remember "old" NIP +#endif subir3, r3, MCOUNT_INSN_SIZE mfmsr r4 std r4, _MSR(r1) @@ -1280,7 +1283,10 @@ ftrace_call: nop ld r3, _NIP(r1) - mtlrr3 + mtctr r3 // prepare to jump there +#ifdef CONFIG_LIVEPATCH + cmpdr14,r3 // has NIP been altered? +#endif REST_8GPRS(0,r1) REST_8GPRS(8,r1) @@ -1293,6 +1299,24 @@ ftrace_call: mtlrr12 mr r2,r0 // restore callee's TOC +#ifdef CONFIG_LIVEPATCH + beq+4f // likely(old_NIP == new_NIP) + + // For a local call, restore this TOC after calling the patch function. + // For a global call, it does not matter what we restore here, + // since the global caller does its own restore right afterwards, + // anyway. + // Just insert a KLP_return_helper frame in any case, + // so a patch function can always count on the changed stack offsets. + stdur1,-32(r1) // open new mini stack frame + std r0,24(r1) // save TOC now, unconditionally. + LOAD_REG_IMMEDIATE(r12,KLP_return_helper) + std r12,LRSAVE(r1) + mtlrr12 + bctr +4: +#endif + #ifdef CONFIG_FUNCTION_GRAPH_TRACER stdur1, -112(r1) .globl ftrace_graph_call @@ -1302,15 +1326,31 @@ _GLOBAL(ftrace_graph_stub) addir1, r1, 112 #endif - mflrr0 // move this LR to CTR - mtctr r0 - ld r0,LRSAVE(r1) // restore callee's lr at _mcount site mtlrr0 bctr// jump after _mcount site #endif /* CC_USING_MPROFILE_KERNEL */ _GLOBAL(ftrace_stub) blr + +#ifdef CONFIG_LIVEPATCH +/* Helper function for local calls that are becoming global + due to live patching. + We can't simply patch the NOP after the original call, + because, depending on the consistency model, some kernel + threads may still have called the original, local function + *without* saving their TOC in the respective stack frame slot, + so the decision is made per-thread during function return by + maybe inserting a KLP_return_helper frame or not. +*/ +KLP_return_helper: + ld r2,24(r1) // restore TOC (saved by ftrace_caller) + addi r1, r1, 32 // destroy mini stack frame + ld r0,LRSAVE(r1) // get the real return address + mtlrr0 + blr +#endif + #else _GLOBAL_TOC(_mcount) /* Taken from output of objdump from lib64/glibc */ diff --git a/arch/powerpc/kernel/livepatch.c b/arch/powerpc/kernel/livepatch.c new file mode 100644 index 000..9dace38 --- /dev/null +++ b/arch/powerpc/kernel/livepatch.c @@ -0,0 +1,20 @@ +#include +#include + +/** + * klp_write_module_reloc() - write a
[PATCH v3 3/8] ppc64 ftrace_with_regs configuration variables
* Makefile: - globally use -mprofile-kernel in case it's configured. * arch/powerpc/Kconfig / kernel/trace/Kconfig: - declare that ppc64 HAVE_MPROFILE_KERNEL and HAVE_DYNAMIC_FTRACE_WITH_REGS, and use it. Signed-off-by: Torsten Duwe--- arch/powerpc/Kconfig | 2 ++ arch/powerpc/Makefile | 7 +++ kernel/trace/Kconfig | 5 + 3 files changed, 14 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 9a7057e..0e6011c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -97,8 +97,10 @@ config PPC select OF_RESERVED_MEM select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_MPROFILE_KERNEL select SYSCTL_EXCEPTION_TRACE select ARCH_WANT_OPTIONAL_GPIOLIB select VIRT_TO_BUS if !PPC64 diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index b9b4af2..25d0034 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -133,6 +133,13 @@ else CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64 endif +ifeq ($(CONFIG_PPC64),y) +ifdef CONFIG_HAVE_MPROFILE_KERNEL +CC_FLAGS_FTRACE:= -pg $(call cc-option,-mprofile-kernel) +KBUILD_CPPFLAGS+= -DCC_USING_MPROFILE_KERNEL +endif +endif + CFLAGS-$(CONFIG_CELL_CPU) += $(call cc-option,-mcpu=cell) CFLAGS-$(CONFIG_POWER4_CPU) += $(call cc-option,-mcpu=power4) CFLAGS-$(CONFIG_POWER5_CPU) += $(call cc-option,-mcpu=power5) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 1153c43..dbcb635 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -52,6 +52,11 @@ config HAVE_FENTRY help Arch supports the gcc options -pg with -mfentry +config HAVE_MPROFILE_KERNEL + bool + help + Arch supports the gcc options -pg with -mprofile-kernel + config HAVE_C_RECORDMCOUNT bool help -- 1.8.5.6 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 8/8] Enable LIVEPATCH to be configured on ppc64le and add livepatch.o if it is selected.
Signed-off-by: Torsten Duwe--- arch/powerpc/Kconfig | 5 + arch/powerpc/kernel/Makefile | 1 + 2 files changed, 6 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 0e6011c..341ebe9 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -163,6 +163,9 @@ config PPC select ARCH_HAS_DMA_SET_COHERENT_MASK select HAVE_ARCH_SECCOMP_FILTER +config HAVE_LIVEPATCH + def_bool PPC64 && CPU_LITTLE_ENDIAN + config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN @@ -1095,3 +1098,5 @@ config PPC_LIB_RHEAP bool source "arch/powerpc/kvm/Kconfig" + +source "kernel/livepatch/Kconfig" diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 0f417d5..f9a2925 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -119,6 +119,7 @@ obj-$(CONFIG_DYNAMIC_FTRACE)+= ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER)+= ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_TRACING) += trace_clock.o +obj-$(CONFIG_LIVEPATCH)+= livepatch.o ifneq ($(CONFIG_PPC_INDIRECT_PIO),y) obj-y += iomap.o -- 1.8.5.6 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 1/8] ppc64le FTRACE_WITH_REGS implementation
Implement FTRACE_WITH_REGS for powerpc64, on ELF ABI v2. Initial work started by Vojtech Pavlik, used with permission. * arch/powerpc/kernel/entry_64.S: - enhance _mcount with a stub to support call sites generated by -mprofile-kernel. This is backward-compatible. - Implement an effective ftrace_caller that works from within the kernel binary as well as from modules. * arch/powerpc/kernel/ftrace.c: - be prepared to deal with ppc64 ELF ABI v2, especially calls to _mcount that result from gcc -mprofile-kernel - a little more error verbosity * arch/powerpc/kernel/module_64.c: - do not save the TOC pointer on the trampoline when the destination is ftrace_caller. This trampoline jump happens from a function prologue before a new stack frame is set up, so bad things may happen otherwise... - relax is_module_trampoline() to recognise the modified trampoline. Signed-off-by: Torsten Duwe--- arch/powerpc/include/asm/ftrace.h | 5 ++ arch/powerpc/kernel/entry_64.S| 113 +- arch/powerpc/kernel/ftrace.c | 72 +--- arch/powerpc/kernel/module_64.c | 39 - 4 files changed, 217 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index ef89b14..6eb9fbc 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -46,6 +46,8 @@ extern void _mcount(void); #ifdef CONFIG_DYNAMIC_FTRACE +# define FTRACE_ADDR ((unsigned long)ftrace_caller) +# define FTRACE_REGS_ADDR FTRACE_ADDR static inline unsigned long ftrace_call_adjust(unsigned long addr) { /* reloction of mcount call site is the same as the address */ @@ -58,6 +60,9 @@ struct dyn_arch_ftrace { #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* __ASSEMBLY__ */ +#ifdef CONFIG_DYNAMIC_FTRACE +#define ARCH_SUPPORTS_FTRACE_OPS 1 +#endif #endif #if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) && !defined(__ASSEMBLY__) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index a94f155..b0dfbfe 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -1206,8 +1206,13 @@ _GLOBAL(enter_prom) #ifdef CONFIG_DYNAMIC_FTRACE _GLOBAL(mcount) _GLOBAL(_mcount) - blr + mflrr0 + mtctr r0 + ld r0,LRSAVE(r1) + mtlrr0 + bctr +#ifndef CC_USING_MPROFILE_KERNEL _GLOBAL_TOC(ftrace_caller) /* Taken from output of objdump from lib64/glibc */ mflrr3 @@ -1229,6 +1234,81 @@ _GLOBAL(ftrace_graph_stub) ld r0, 128(r1) mtlrr0 addir1, r1, 112 +#else +_GLOBAL(ftrace_caller) +#if defined(_CALL_ELF) && _CALL_ELF == 2 + mflrr0 + bl 2f +2: mflrr12 + mtlrr0 + mr r0,r2 // save callee's TOC + addis r2,r12,(.TOC.-ftrace_caller-8)@ha + addir2,r2,(.TOC.-ftrace_caller-8)@l +#else + mr r0,r2 +#endif + ld r12,LRSAVE(r1) // get caller's address + + stdur1,-SWITCH_FRAME_SIZE(r1) + + std r12, _LINK(r1) + SAVE_8GPRS(0,r1) + std r0, 24(r1) // save TOC + SAVE_8GPRS(8,r1) + SAVE_8GPRS(16,r1) + SAVE_8GPRS(24,r1) + + LOAD_REG_IMMEDIATE(r3,function_trace_op) + ld r5,0(r3) + + mflrr3 + std r3, _NIP(r1) + std r3, 16(r1) + subir3, r3, MCOUNT_INSN_SIZE + mfmsr r4 + std r4, _MSR(r1) + mfctr r4 + std r4, _CTR(r1) + mfxer r4 + std r4, _XER(r1) + mr r4, r12 + addir6, r1 ,STACK_FRAME_OVERHEAD + +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop + + ld r3, _NIP(r1) + mtlrr3 + + REST_8GPRS(0,r1) + REST_8GPRS(8,r1) + REST_8GPRS(16,r1) + REST_8GPRS(24,r1) + + addi r1, r1, SWITCH_FRAME_SIZE + + ld r12, LRSAVE(r1) // get caller's address + mtlrr12 + mr r2,r0 // restore callee's TOC + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + stdur1, -112(r1) +.globl ftrace_graph_call +ftrace_graph_call: + b ftrace_graph_stub +_GLOBAL(ftrace_graph_stub) + addir1, r1, 112 +#endif + + mflrr0 // move this LR to CTR + mtctr r0 + + ld r0,LRSAVE(r1) // restore callee's lr at _mcount site + mtlrr0 + bctr// jump after _mcount site +#endif /* CC_USING_MPROFILE_KERNEL */ _GLOBAL(ftrace_stub) blr #else @@ -1262,6 +1342,19 @@ _GLOBAL(ftrace_stub) #ifdef CONFIG_FUNCTION_GRAPH_TRACER _GLOBAL(ftrace_graph_caller) +#ifdef CC_USING_MPROFILE_KERNEL + // with -mprofile-kernel, parameter regs are still alive at _mcount + std r10, 104(r1) + std r9, 96(r1) + std r8, 88(r1) + std