[PATCH 0/5] powerpc: use jump label for cpu/mmu_has_feature

2013-08-25 Thread Kevin Hao
Inspired by Benjamin Herrenschmidt, this patch series try to reduce the
cpu/mmu feature checking overhead by using jump label. The following is
the difference of the run path of cpu_has_feature between before and after
applying these patches:

   before after
  addis   r10,r2,1   b xxx
  addir9,r10,-2280   b xxx (This will also be omitted if the
  ld  r9,0(r9)  feature is not set)
  ld  r9,16(r9)
  rldicl. r8,r9,55,63
  beq c0037c94

This patch series passed the build test for almost all the defconfig of ppc.
There does have some broken for some configs. But they are not related to this
change. This also passed allyesconfig for x86. Boot test on p2020rdb and
p5020ds boards.

Kevin Hao (5):
  jump_label: factor out the base part of jump_label.h to a separate
file
  jump_label: also include linux/atomic.h when jump label is enabled
  powerpc: move the cpu_has_feature to a separate file
  powerpc: use the jump label for cpu_has_feature
  powerpc: use jump label for mmu_has_feature

 arch/powerpc/include/asm/cacheflush.h   |   1 +
 arch/powerpc/include/asm/cpufeatures.h  |  42 ++
 arch/powerpc/include/asm/cputable.h |   8 --
 arch/powerpc/include/asm/cputime.h  |   1 +
 arch/powerpc/include/asm/dbell.h|   1 +
 arch/powerpc/include/asm/dcr-native.h   |   1 +
 arch/powerpc/include/asm/mman.h |   1 +
 arch/powerpc/include/asm/mmu.h  |  19 +
 arch/powerpc/include/asm/time.h |   1 +
 arch/powerpc/kernel/align.c |   1 +
 arch/powerpc/kernel/cputable.c  |  43 ++
 arch/powerpc/kernel/irq.c   |   1 +
 arch/powerpc/kernel/process.c   |   1 +
 arch/powerpc/kernel/setup-common.c  |   1 +
 arch/powerpc/kernel/setup_32.c  |   1 +
 arch/powerpc/kernel/smp.c   |   1 +
 arch/powerpc/oprofile/op_model_rs64.c   |   1 +
 arch/powerpc/platforms/cell/pervasive.c |   1 +
 arch/powerpc/xmon/ppc-dis.c |   1 +
 include/linux/jump_label.h  | 132 +
 include/linux/jump_label_base.h | 142 
 21 files changed, 263 insertions(+), 138 deletions(-)
 create mode 100644 arch/powerpc/include/asm/cpufeatures.h
 create mode 100644 include/linux/jump_label_base.h

-- 
1.8.3.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 3/5] powerpc: move the cpu_has_feature to a separate file

2013-08-25 Thread Kevin Hao
We plan to use jump label for cpu_has_feature. In order to implement
this we need to include the linux/jump_label_base.h in asm/cputable.h.
But it seems that asm/cputable.h is so basic header file for ppc that
it is almost included by all the other header files. The including of
the linux/jump_label_base.h will introduces various recursive inclusion.
And it is very hard to fix that. So we choose to move the function
cpu_has_feature to a separate header file before using the jump label
for it. No functional change.

Signed-off-by: Kevin Hao haoke...@gmail.com
---
 arch/powerpc/include/asm/cacheflush.h   |  1 +
 arch/powerpc/include/asm/cpufeatures.h  | 14 ++
 arch/powerpc/include/asm/cputable.h |  8 
 arch/powerpc/include/asm/cputime.h  |  1 +
 arch/powerpc/include/asm/dbell.h|  1 +
 arch/powerpc/include/asm/dcr-native.h   |  1 +
 arch/powerpc/include/asm/mman.h |  1 +
 arch/powerpc/include/asm/time.h |  1 +
 arch/powerpc/kernel/align.c |  1 +
 arch/powerpc/kernel/irq.c   |  1 +
 arch/powerpc/kernel/process.c   |  1 +
 arch/powerpc/kernel/setup-common.c  |  1 +
 arch/powerpc/kernel/setup_32.c  |  1 +
 arch/powerpc/kernel/smp.c   |  1 +
 arch/powerpc/oprofile/op_model_rs64.c   |  1 +
 arch/powerpc/platforms/cell/pervasive.c |  1 +
 arch/powerpc/xmon/ppc-dis.c |  1 +
 17 files changed, 29 insertions(+), 8 deletions(-)
 create mode 100644 arch/powerpc/include/asm/cpufeatures.h

diff --git a/arch/powerpc/include/asm/cacheflush.h 
b/arch/powerpc/include/asm/cacheflush.h
index b843e35..540b32e 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -11,6 +11,7 @@
 
 #include linux/mm.h
 #include asm/cputable.h
+#include asm/cpufeatures.h
 
 /*
  * No cache flushing is required when address mappings are changed,
diff --git a/arch/powerpc/include/asm/cpufeatures.h 
b/arch/powerpc/include/asm/cpufeatures.h
new file mode 100644
index 000..37650db
--- /dev/null
+++ b/arch/powerpc/include/asm/cpufeatures.h
@@ -0,0 +1,14 @@
+#ifndef __ASM_POWERPC_CPUFEATURES_H
+#define __ASM_POWERPC_CPUFEATURES_H
+
+#include asm/cputable.h
+
+static inline int cpu_has_feature(unsigned long feature)
+{
+   return (CPU_FTRS_ALWAYS  feature) ||
+  (CPU_FTRS_POSSIBLE
+cur_cpu_spec-cpu_features
+feature);
+}
+
+#endif /* __ASM_POWERPC_CPUFEATURE_H */
diff --git a/arch/powerpc/include/asm/cputable.h 
b/arch/powerpc/include/asm/cputable.h
index 6f3887d..ab0813d 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -541,14 +541,6 @@ enum {
 };
 #endif /* __powerpc64__ */
 
-static inline int cpu_has_feature(unsigned long feature)
-{
-   return (CPU_FTRS_ALWAYS  feature) ||
-  (CPU_FTRS_POSSIBLE
-cur_cpu_spec-cpu_features
-feature);
-}
-
 #define HBP_NUM 1
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/cputime.h 
b/arch/powerpc/include/asm/cputime.h
index 607559a..15481e2 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -28,6 +28,7 @@ static inline void setup_cputime_one_jiffy(void) { }
 #include asm/div64.h
 #include asm/time.h
 #include asm/param.h
+#include asm/cpufeatures.h
 
 typedef u64 __nocast cputime_t;
 typedef u64 __nocast cputime64_t;
diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
index 5fa6b20..2d9eae3 100644
--- a/arch/powerpc/include/asm/dbell.h
+++ b/arch/powerpc/include/asm/dbell.h
@@ -16,6 +16,7 @@
 #include linux/threads.h
 
 #include asm/ppc-opcode.h
+#include asm/cpufeatures.h
 
 #define PPC_DBELL_MSG_BRDCAST  (0x0400)
 #define PPC_DBELL_TYPE(x)  (((x)  0xf)  (63-36))
diff --git a/arch/powerpc/include/asm/dcr-native.h 
b/arch/powerpc/include/asm/dcr-native.h
index 7d2e623..3372650 100644
--- a/arch/powerpc/include/asm/dcr-native.h
+++ b/arch/powerpc/include/asm/dcr-native.h
@@ -24,6 +24,7 @@
 
 #include linux/spinlock.h
 #include asm/cputable.h
+#include asm/cpufeatures.h
 
 typedef struct {
unsigned int base;
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index 8565c25..74922ad 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -13,6 +13,7 @@
 
 #include asm/cputable.h
 #include linux/mm.h
+#include asm/cpufeatures.h
 
 /*
  * This file is included by linux/mman.h, so we can't use cacl_vm_prot_bits()
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index c1f2676..20e6ee9 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -18,6 +18,7 @@
 #include linux/percpu.h
 
 #include asm/processor.h
+#include asm/cpufeatures.h
 
 /* time.c */
 extern unsigned long tb_ticks_per_jiffy;
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index ee5b690..ca4169a 100644
--- 

[PATCH 2/5] jump_label: also include linux/atomic.h when jump label is enabled

2013-08-25 Thread Kevin Hao
The struct static_key will have a atomic_t type member no matter
whether jump label is enabled or not. We would include linux/atomic.h
when jump label is not enabled. But it also does make sense to include
this header file when jump label is enabled.

Signed-off-by: Kevin Hao haoke...@gmail.com
---
 include/linux/jump_label_base.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/jump_label_base.h b/include/linux/jump_label_base.h
index 20df08f..d5c8f4b 100644
--- a/include/linux/jump_label_base.h
+++ b/include/linux/jump_label_base.h
@@ -5,6 +5,8 @@
 #include linux/types.h
 #include linux/compiler.h
 
+#include linux/atomic.h
+
 #if defined(CC_HAVE_ASM_GOTO)  defined(CONFIG_JUMP_LABEL)
 
 struct static_key {
@@ -77,8 +79,6 @@ extern void jump_label_apply_nops(struct module *mod);
 
 #else  /* !HAVE_JUMP_LABEL */
 
-#include linux/atomic.h
-
 struct static_key {
atomic_t enabled;
 };
-- 
1.8.3.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 5/5] powerpc: use jump label for mmu_has_feature

2013-08-25 Thread Kevin Hao
The mmu features are fixed once the probe of mmu features are done.
And the function mmu_has_feature() does be used in some hot path.
The checking of the mmu features for each time of invoking of
mmu_has_feature() seems suboptimal. This tries to reduce this
overhead of this check by using jump label. But we can only use
the jump label for this check only after the execution of
jump_label_init(), so we introduce another jump label to
still do the feature check by default before all the mmu
feature jump labels are initialized.

Signed-off-by: Kevin Hao haoke...@gmail.com
---
 arch/powerpc/include/asm/mmu.h | 19 +++
 arch/powerpc/kernel/cputable.c | 20 
 2 files changed, 39 insertions(+)

diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 691fd8a..163e9b1 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -121,10 +121,29 @@
 DECLARE_PER_CPU(int, next_tlbcam_idx);
 #endif
 
+#ifdef CONFIG_JUMP_LABEL
+#include linux/jump_label_base.h
+
+#define MAX_MMU_FEATURES   32
+
+extern struct static_key mmu_feat_keys[MAX_MMU_FEATURES];
+extern struct static_key mmu_feat_keys_enabled;
+
+static inline int mmu_has_feature(unsigned long feature)
+{
+   if (static_key_false(mmu_feat_keys_enabled)) {
+   int i = __builtin_ctzl(feature);
+
+   return static_key_false(mmu_feat_keys[i]);
+   } else
+   return !!(cur_cpu_spec-mmu_features  feature);
+}
+#else
 static inline int mmu_has_feature(unsigned long feature)
 {
return (cur_cpu_spec-mmu_features  feature);
 }
+#endif
 
 static inline void mmu_clear_feature(unsigned long feature)
 {
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 2014ab7..f25eb1d 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -2280,4 +2280,24 @@ static __init int cpu_feat_keys_init(void)
return 0;
 }
 early_initcall(cpu_feat_keys_init);
+
+struct static_key mmu_feat_keys[MAX_MMU_FEATURES];
+struct static_key mmu_feat_keys_enabled;
+
+static __init int mmu_feat_keys_init(void)
+{
+   int i;
+
+   for (i = 0; i  MAX_MMU_FEATURES; i++) {
+   unsigned long f = 1  i;
+
+   if (cur_cpu_spec-mmu_features  f)
+   static_key_slow_inc(mmu_feat_keys[i]);
+   }
+
+   static_key_slow_inc(mmu_feat_keys_enabled);
+
+   return 0;
+}
+early_initcall(mmu_feat_keys_init);
 #endif
-- 
1.8.3.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 4/5] powerpc: use the jump label for cpu_has_feature

2013-08-25 Thread Kevin Hao
The cpu features are fixed once the probe of cpu features are done.
And the function cpu_has_feature() does be used in some hot path.
The checking of the cpu features for each time of invoking of
cpu_has_feature() seems suboptimal. This tries to reduce this
overhead of this check by using jump label. But we can only use
the jump label for this check only after the execution of
jump_label_init(), so we introduce another jump label to
still do the feature check by default before all the cpu
feature jump labels are initialized.

Signed-off-by: Kevin Hao haoke...@gmail.com
---
 arch/powerpc/include/asm/cpufeatures.h | 28 
 arch/powerpc/kernel/cputable.c | 23 +++
 2 files changed, 51 insertions(+)

diff --git a/arch/powerpc/include/asm/cpufeatures.h 
b/arch/powerpc/include/asm/cpufeatures.h
index 37650db..598ac91 100644
--- a/arch/powerpc/include/asm/cpufeatures.h
+++ b/arch/powerpc/include/asm/cpufeatures.h
@@ -2,7 +2,34 @@
 #define __ASM_POWERPC_CPUFEATURES_H
 
 #include asm/cputable.h
+#ifdef CONFIG_JUMP_LABEL
+#include linux/atomic.h
+#include linux/jump_label_base.h
 
+#ifdef __powerpc64__
+#define MAX_CPU_FEATURES   64
+#else
+#define MAX_CPU_FEATURES   32
+#endif
+extern struct static_key cpu_feat_keys[MAX_CPU_FEATURES];
+extern struct static_key cpu_feat_keys_enabled;
+
+static inline int cpu_has_feature(unsigned long feature)
+{
+   if (CPU_FTRS_ALWAYS  feature)
+   return 1;
+
+   if (!(CPU_FTRS_POSSIBLE | feature))
+   return 0;
+
+   if (static_key_false(cpu_feat_keys_enabled)) {
+   int i = __builtin_ctzl(feature);
+
+   return static_key_false(cpu_feat_keys[i]);
+   } else
+   return !!(cur_cpu_spec-cpu_features  feature);
+}
+#else
 static inline int cpu_has_feature(unsigned long feature)
 {
return (CPU_FTRS_ALWAYS  feature) ||
@@ -10,5 +37,6 @@ static inline int cpu_has_feature(unsigned long feature)
 cur_cpu_spec-cpu_features
 feature);
 }
+#endif
 
 #endif /* __ASM_POWERPC_CPUFEATURE_H */
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 22973a7..2014ab7 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -21,6 +21,7 @@
 #include asm/prom.h  /* for PTRRELOC on ARCH=ppc */
 #include asm/mmu.h
 #include asm/setup.h
+#include asm/cpufeatures.h
 
 struct cpu_spec* cur_cpu_spec = NULL;
 EXPORT_SYMBOL(cur_cpu_spec);
@@ -2258,3 +2259,25 @@ struct cpu_spec * __init identify_cpu(unsigned long 
offset, unsigned int pvr)
 
return NULL;
 }
+
+#ifdef CONFIG_JUMP_LABEL
+struct static_key cpu_feat_keys[MAX_CPU_FEATURES];
+struct static_key cpu_feat_keys_enabled;
+
+static __init int cpu_feat_keys_init(void)
+{
+   int i;
+
+   for (i = 0; i  MAX_CPU_FEATURES; i++) {
+   unsigned long f = 1  i;
+
+   if (cur_cpu_spec-cpu_features  f)
+   static_key_slow_inc(cpu_feat_keys[i]);
+   }
+
+   static_key_slow_inc(cpu_feat_keys_enabled);
+
+   return 0;
+}
+early_initcall(cpu_feat_keys_init);
+#endif
-- 
1.8.3.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 1/5] jump_label: factor out the base part of jump_label.h to a separate file

2013-08-25 Thread Kevin Hao
We plan to use the jump label in the cpu/mmu feature check on ppc.
This will need to include the jump_label.h in several very basic header
files of ppc which seems to be included by most of the other head
files implicitly or explicitly. But in the current jump_label.h,
it also include the linux/workqueue.h and this will cause recursive
inclusion. In order to fix this, we choose to factor out the base
part of jump_label.h to a separate header file and we can include
that file instead of jump_label.h to avoid the recursive inclusion.
No functional change.

Signed-off-by: Kevin Hao haoke...@gmail.com
---
 include/linux/jump_label.h  | 132 +
 include/linux/jump_label_base.h | 142 
 2 files changed, 144 insertions(+), 130 deletions(-)
 create mode 100644 include/linux/jump_label_base.h

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 0976fc4..14bae65 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -46,20 +46,11 @@
  *
 */
 
-#include linux/types.h
-#include linux/compiler.h
 #include linux/workqueue.h
+#include linux/jump_label_base.h
 
-#if defined(CC_HAVE_ASM_GOTO)  defined(CONFIG_JUMP_LABEL)
 
-struct static_key {
-   atomic_t enabled;
-/* Set lsb bit to 1 if branch is default true, 0 ot */
-   struct jump_entry *entries;
-#ifdef CONFIG_MODULES
-   struct static_key_mod *next;
-#endif
-};
+#ifdef HAVE_JUMP_LABEL
 
 struct static_key_deferred {
struct static_key key;
@@ -67,145 +58,26 @@ struct static_key_deferred {
struct delayed_work work;
 };
 
-# include asm/jump_label.h
-# define HAVE_JUMP_LABEL
-#endif /* CC_HAVE_ASM_GOTO  CONFIG_JUMP_LABEL */
-
-enum jump_label_type {
-   JUMP_LABEL_DISABLE = 0,
-   JUMP_LABEL_ENABLE,
-};
-
-struct module;
-
-#ifdef HAVE_JUMP_LABEL
-
-#define JUMP_LABEL_TRUE_BRANCH 1UL
-
-static
-inline struct jump_entry *jump_label_get_entries(struct static_key *key)
-{
-   return (struct jump_entry *)((unsigned long)key-entries
-~JUMP_LABEL_TRUE_BRANCH);
-}
-
-static inline bool jump_label_get_branch_default(struct static_key *key)
-{
-   if ((unsigned long)key-entries  JUMP_LABEL_TRUE_BRANCH)
-   return true;
-   return false;
-}
-
-static __always_inline bool static_key_false(struct static_key *key)
-{
-   return arch_static_branch(key);
-}
-
-static __always_inline bool static_key_true(struct static_key *key)
-{
-   return !static_key_false(key);
-}
-
-extern struct jump_entry __start___jump_table[];
-extern struct jump_entry __stop___jump_table[];
-
-extern void jump_label_init(void);
-extern void jump_label_lock(void);
-extern void jump_label_unlock(void);
-extern void arch_jump_label_transform(struct jump_entry *entry,
- enum jump_label_type type);
-extern void arch_jump_label_transform_static(struct jump_entry *entry,
-enum jump_label_type type);
-extern int jump_label_text_reserved(void *start, void *end);
-extern void static_key_slow_inc(struct static_key *key);
-extern void static_key_slow_dec(struct static_key *key);
 extern void static_key_slow_dec_deferred(struct static_key_deferred *key);
-extern void jump_label_apply_nops(struct module *mod);
 extern void
 jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl);
 
-#define STATIC_KEY_INIT_TRUE ((struct static_key) \
-   { .enabled = ATOMIC_INIT(1), .entries = (void *)1 })
-#define STATIC_KEY_INIT_FALSE ((struct static_key) \
-   { .enabled = ATOMIC_INIT(0), .entries = (void *)0 })
-
 #else  /* !HAVE_JUMP_LABEL */
 
-#include linux/atomic.h
-
-struct static_key {
-   atomic_t enabled;
-};
-
-static __always_inline void jump_label_init(void)
-{
-}
-
 struct static_key_deferred {
struct static_key  key;
 };
 
-static __always_inline bool static_key_false(struct static_key *key)
-{
-   if (unlikely(atomic_read(key-enabled))  0)
-   return true;
-   return false;
-}
-
-static __always_inline bool static_key_true(struct static_key *key)
-{
-   if (likely(atomic_read(key-enabled))  0)
-   return true;
-   return false;
-}
-
-static inline void static_key_slow_inc(struct static_key *key)
-{
-   atomic_inc(key-enabled);
-}
-
-static inline void static_key_slow_dec(struct static_key *key)
-{
-   atomic_dec(key-enabled);
-}
-
 static inline void static_key_slow_dec_deferred(struct static_key_deferred 
*key)
 {
static_key_slow_dec(key-key);
 }
 
-static inline int jump_label_text_reserved(void *start, void *end)
-{
-   return 0;
-}
-
-static inline void jump_label_lock(void) {}
-static inline void jump_label_unlock(void) {}
-
-static inline int jump_label_apply_nops(struct module *mod)
-{
-   return 0;
-}
-
 static inline void
 jump_label_rate_limit(struct static_key_deferred *key,
unsigned 

Re: [PATCH 2/2] ppc: kvm: use anon_inode_getfd() with O_CLOEXEC flag

2013-08-25 Thread Alexander Graf

On 24.08.2013, at 21:14, Yann Droneaud wrote:

 KVM uses anon_inode_get() to allocate file descriptors as part
 of some of its ioctls. But those ioctls are lacking a flag argument
 allowing userspace to choose options for the newly opened file descriptor.
 
 In such case it's advised to use O_CLOEXEC by default so that
 userspace is allowed to choose, without race, if the file descriptor
 is going to be inherited across exec().
 
 This patch set O_CLOEXEC flag on all file descriptors created
 with anon_inode_getfd() to not leak file descriptors across exec().
 
 Signed-off-by: Yann Droneaud ydrone...@opteya.com
 Link: http://lkml.kernel.org/r/cover.1377372576.git.ydrone...@opteya.com

Reviewed-by: Alexander Graf ag...@suse.de

Would it make sense to simply inherit the O_CLOEXEC flag from the parent kvm fd 
instead? That would give user space the power to keep fds across exec() if it 
wants to.


Alex

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc/kvm: Handle the boundary condition correctly

2013-08-25 Thread Alexander Graf

On 23.08.2013, at 05:28, Benjamin Herrenschmidt wrote:

 On Fri, 2013-08-23 at 09:01 +0530, Aneesh Kumar K.V wrote:
 Alexander Graf ag...@suse.de writes:
 
 On 22.08.2013, at 12:37, Aneesh Kumar K.V wrote:
 
 From: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
 
 Isn't this you?
 
 Yes. The patches are generated using git format-patch and sent by
 git send-email. That's how it always created patches for me. I am not sure if
 there is a config I can change to avoid having From:
 
 Don't bother, that's perfectly fine, and git am will do the right thing.

It will, but it's an indicator that something in his git config is 
misconfigured. Usually when git sees Author == Sender it will omit the From: 
line.


Alex

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc/kvm: Handle the boundary condition correctly

2013-08-25 Thread Alexander Graf

On 23.08.2013, at 04:31, Aneesh Kumar K.V wrote:

 Alexander Graf ag...@suse.de writes:
 
 On 22.08.2013, at 12:37, Aneesh Kumar K.V wrote:
 
 From: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
 
 Isn't this you?
 
 Yes. The patches are generated using git format-patch and sent by
 git send-email. That's how it always created patches for me. I am not sure if
 there is a config I can change to avoid having From:
 
 
 
 We should be able to copy upto count bytes
 
 Why?
 
 
 Without this we end up doing
 
 +struct kvm_get_htab_buf {
 +struct kvm_get_htab_header header;
 +/*
 + * Older kernel required one extra byte.
 + */
 +unsigned long hpte[3];
 +} hpte_buf;
 
 
 even though we are only looking for one hpte entry.

Ok, please give me an example with real numbers and why it breaks.


Alex

 
 http://mid.gmane.org/1376995766-16526-4-git-send-email-aneesh.ku...@linux.vnet.ibm.com
 
 
 Alex
 
 
 Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
 ---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
 
 diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
 b/arch/powerpc/kvm/book3s_64_mmu_hv.c
 index 710d313..0ae6bb6 100644
 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
 +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
 @@ -1362,7 +1362,7 @@ static ssize_t kvm_htab_read(struct file *file, char 
 __user *buf,
 lbuf = (unsigned long __user *)buf;
 
 nb = 0;
 -   while (nb + sizeof(hdr) + HPTE_SIZE  count) {
 +   while (nb + sizeof(hdr) + HPTE_SIZE = count) {
 /* Initialize header */
 hptr = (struct kvm_get_htab_header __user *)buf;
 hdr.n_valid = 0;
 @@ -1385,7 +1385,7 @@ static ssize_t kvm_htab_read(struct file *file, char 
 __user *buf,
 /* Grab a series of valid entries */
 while (i  kvm-arch.hpt_npte 
hdr.n_valid  0x 
 -  nb + HPTE_SIZE  count 
 +  nb + HPTE_SIZE = count 
record_hpte(flags, hptp, hpte, revp, 1, first_pass)) {
 /* valid entry, write it out */
 ++hdr.n_valid;
 -- 
 1.8.1.2
 
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm-ppc in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: Ethernet over PCIe driver for Inter-Processor Communication

2013-08-25 Thread David Hawkins

Hi S.Saravanan,


Root complex's would normally interrupt a device via a PCIe write
to a register in a BAR on the end-point (or in extended configuration
space registers depending on the hardware implementation).


MPC8640 End point implements only the Type 0 header (Page 1116) . The
header implements five BARs (Page 1165).


One of those BARs typically provides access to the PowerPC memory
mapped registers (or at least a 1MB window onto those registers).
This is how your root complex can write to some form of messaging
register.


PCIe drivers need some way to interrupt the processor, so there must
be an option somewhere ... for example, what are the message register
interrupts intended for? See p479

http://cache.freescale.com/files/32bit/doc/ref_manual/MPC8641DRM.pdf

(Ira and myself have not used the MPC8640 so are not familiar with
its user manual).


Message registers are for interrupting the processor. A write to
them sends an interrupt to the processor.  Actually message registers
are used by the RC to enable interrupts to the processor when an EP
sends an MSI transaction to RC. In RC driver i register separately for
the msi interrupts from all three EPs.


This is pretty much what you are looking for then right?

The end-points interrrupt the root-complex using PCIe MSI interrupts,
whereas the root-complex interrupts an end-point by writing directly
to its MSI interrupt.


To access them in the EP from the RC  i will have to set an inbound
window mapping the PIC register space in the EP to the PCI mem space
assigned to it . An inbound window maps a PCI address on the bus
received by the PCIe controller to a platform address. I will try that
and let u know .


Right, as I comment above, one of the BARs typically exposes the PowerPC
internal registers.


Feel free to discuss your ideas for your PCIe driver (eg., why start
with rionet rather than Ira's driver), either on-list, or email Ira
and myself directly


To be frank with you there was no particular reason in starting with
rionet. Maybe because our board also had SRIO interface and we are using
rionet driver successfully. I had looked at Ira's driver later. I will
study that also and try   to come back with a skeleton for my driver.


Its always a good idea to discuss different options, and to stub out
drivers or create minimal (but functional) drivers. That way you'll
be able to see how similar your new driver is to other drivers, and
you'll quickly discover if there is a hardware feature in the
existing driver that you cannot emulate (eg., some SRIO feature
used by the rionet driver).


One further note. You might want to look at rproc/rpmsg and their virtio
driver support. That seems to be where the Linux world is moving for
inter-processor communications. See for example the ARM CPUs interfacing
with DSPs.


I will study that as i am not familiar with virtio .


Follow Ira's advice. Talk to the guys working on virtio, tell them what
you are trying to do. They'll likely have good advice for you.

Good luck!

Cheers,
Dave


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: Detecting LD/ST instruction

2013-08-25 Thread Michael Neuling
Sukadev Bhattiprolu suka...@linux.vnet.ibm.com wrote:

 Michael Neuling [mi...@neuling.org] wrote:
 |  I am working on implementing the 'perf mem' command for Power
 |  systems. This would for instance, let us know where in the memory
 |  hierarchy (L1, L2, Local RAM etc) the data for a load/store
 |  instruction was found (hit).
 |  
 |  On Power7, if the mcmcra[DCACHE_MISS] is clear _and_ the
 |  instruction is a load/store, then it implies a L1-hit.
 |  
 |  Unlike on Power8, the Power7 event vector has no indication
 |  if the instruction was load/store.
 |  
 |  In the context of a PMU interrupt, is there any way to determine
 |  if an instruction is a load/store ?
 | 
 | You could read the instruction from memory and work it out.  
 | 
 | We do something similar to this in power_pmu_bhrb_to() where we read the
 | instruction and work out where the branch is going to.
 | 
 | If you do this, please use and/or extend the functions in
 | arch/powerpc/lib/code-patching.c
 
 Here is a draft of what I could come up with.  With this patch, 
 the number of L1 hits on Power7 matches that on Power8 for one
 application.

Nice, the approach is along the lines of what I was thinking.

 But, wondering if there is a more efficient way to do this - there
 are over 50 flavors of load and store!

I dunno, there might be.  If you look at all the opcodes in binary,
there's often a nice little pattern you can use. 

Did you catch all the VSX and VMX loads/stores?

snip
 + if (op == 31) {
 + n = sizeof(x_form_load_store) / sizeof(int);
 +
 + for (i = 0; i  n; i++) {

Yeah, this might be a bit slow... Are there any instructions with op ==
31 that aren't a load/store?

 
 + if (x_form_load_store[i] == load_store_xval(*instr))
 + return 1;
 + }
 + }
 +
 + return 0;
 +}
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [PATCH 1/2] powerpc/85xx: add hardware automatically enter altivec idle state

2013-08-25 Thread Wang Dongsheng-B40534


 -Original Message-
 From: Wood Scott-B07421
 Sent: Friday, August 23, 2013 11:31 PM
 To: Wang Dongsheng-B40534
 Cc: Wood Scott-B07421; Kumar Gala; Zhao Chenhui-B35336; linuxppc-
 d...@lists.ozlabs.org
 Subject: Re: [PATCH 1/2] powerpc/85xx: add hardware automatically enter
 altivec idle state
 
 On Thu, 2013-08-22 at 21:52 -0500, Wang Dongsheng-B40534 wrote:
 
   -Original Message-
   From: Wood Scott-B07421
   Sent: Thursday, August 22, 2013 11:19 PM
   To: Wang Dongsheng-B40534
   Cc: Wood Scott-B07421; Kumar Gala; Zhao Chenhui-B35336; linuxppc-
   d...@lists.ozlabs.org
   Subject: Re: [PATCH 1/2] powerpc/85xx: add hardware automatically
 enter
   altivec idle state
  
   On Wed, 2013-08-21 at 22:13 -0500, Wang Dongsheng-B40534 wrote:
   
 -Original Message-
 From: Wood Scott-B07421
 Sent: Tuesday, August 20, 2013 8:39 AM
 To: Wang Dongsheng-B40534
 Cc: Wood Scott-B07421; Kumar Gala; linuxppc-dev@lists.ozlabs.org
 Subject: Re: [PATCH 1/2] powerpc/85xx: add hardware automatically
 enter altivec idle state

 It just seems wrong to have an ad-hoc mechanism for running
 core-specific code when we have cputable...  If we really need
 this,
 maybe we should add a cpu_setup_late function pointer.

 With your patch, when does the power management register get set
 when hot plugging a cpu?

Um.. I don't deal with this situation. I will fix it.
__setup/restore_cpu_e6500 looks good. But only bootcpu call
   __setup_cpu_e6500, not on each cpu.
I think this is a bug.
  
   Other CPUs call __restore_cpu_e6500.
  
  No, there is bootcore of secondary thread, and other cores of first
 thread call __restore_cpu_e6500.
 
 This is the upstream list -- there is no e6500 thread support yet. :-)
 
 But in the SDK I do see generic_secondary_common_init being called from
 generic_secondary_thread_init, which means __restore_cpu_e6500 will be
 called.

Thanks.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [v3] powerpc/mpc85xx: Update the clock device tree nodes

2013-08-25 Thread Tang Yuantian-B29983
 
  clockgen: global-utilities@e1000 {
  -   compatible = fsl,b4420-clockgen, fsl,qoriq-clockgen-2.0;
  +   compatible = fsl,b4420-clockgen, fsl,qoriq-clockgen-2.0,
  +  fixed-clock;
  +   clock-output-names = sysclk;
  +   #clock-cells = 0;
 
 Does U-Boot fill in clock-frequency here?
 
Yes, clock-frequency will be filled by uboot.
You suggested we'd better not add it here.

  +   #address-cells = 1;
  +   #size-cells = 0;
  +   pll0: pll0@800 {
  +   #clock-cells = 1;
  +   reg = 0x800;
  +   compatible = fsl,core-pll-clock;
  +   clocks = clockgen;
  +   clock-output-names = pll0, pll0-div2, pll0-div4;
  +   };
  +   pll1: pll1@820 {
  +   #clock-cells = 1;
  +   reg = 0x820;
  +   compatible = fsl,core-pll-clock;
  +   clocks = clockgen;
  +   clock-output-names = pll1, pll1-div2, pll1-div4;
  +   };
 
 Please leave a blank line between properties and nodes, and between nodes.
 
OK, will add.

 What does reg represent?  Where is the binding for this?
 
 The compatible is too vague.
Reg is register offset. I should have had a binding document.
About the compatible, you should pointed it out earlier in SDK review.
It is too later to change since the clock driver is merged for months although 
I sent this patch first.
Besides, it is not too bad because other arch use the similar name.

Regards,
Yuantian

 
  +   mux0: mux0@0 {
  +   #clock-cells = 0;
  +   reg = 0x0;
  +   compatible = fsl,core-mux-clock;
  +   clocks = pll0 0, pll0 1, pll0 2,
  +pll1 0, pll1 1, pll1 2;
  +   clock-names = pll0_0, pll0_1, pll0_2,
  +   pll1_0, pll1_1, pll1_2;
  +   clock-output-names = cmux0;
  +   };
 
 What does reg represent?  Where is the binding for this?
 
 The compatible is too vague.
 
 -Scott

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc/kvm: Handle the boundary condition correctly

2013-08-25 Thread Aneesh Kumar K.V
Alexander Graf ag...@suse.de writes:

 On 23.08.2013, at 04:31, Aneesh Kumar K.V wrote:

 Alexander Graf ag...@suse.de writes:
 
 On 22.08.2013, at 12:37, Aneesh Kumar K.V wrote:
 
 From: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
 
 Isn't this you?
 
 Yes. The patches are generated using git format-patch and sent by
 git send-email. That's how it always created patches for me. I am not sure if
 there is a config I can change to avoid having From:
 
 
 
 We should be able to copy upto count bytes
 
 Why?
 
 
 Without this we end up doing
 
 +struct kvm_get_htab_buf {
 +struct kvm_get_htab_header header;
 +/*
 + * Older kernel required one extra byte.
 + */
 +unsigned long hpte[3];
 +} hpte_buf;
 
 
 even though we are only looking for one hpte entry.

 Ok, please give me an example with real numbers and why it breaks.

 
 http://mid.gmane.org/1376995766-16526-4-git-send-email-aneesh.ku...@linux.vnet.ibm.com
 

Didn't quiet get what you are looking for. As explained before, we now
need to pass an array with array size 3 even though we know we need to
read only 2 entries because kernel doesn't loop correctly.

-aneesh

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] powerpc: DSCR FSCR cleanup

2013-08-25 Thread Michael Neuling
As suggested by paulus we can simplify the Data Stream Control Register
(DSCR) Facility Status and Control Register (FSCR) handling.

Firstly, we simplify the asm by using a rldimi.

Secondly, we now use the FSCR only to control the DSCR facility, rather
than both the FSCR and HFSCR.  Users will see no functional change from
this but will get a minor speedup as they will trap into the kernel only
once (rather than twice) when they first touch the DSCR.  Also, this
changes removes a bunch of ugly FTR_SECTION code.

Signed-off-by: Michael Neuling mi...@neuling.org
--
 arch/powerpc/kernel/entry_64.S | 31 ++-
 arch/powerpc/kernel/traps.c|  7 ++-
 2 files changed, 8 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2bd0b88..513dc4d 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -575,34 +575,15 @@ BEGIN_FTR_SECTION
ld  r7,DSCR_DEFAULT@toc(2)
ld  r0,THREAD_DSCR(r4)
cmpwi   r6,0
-   li  r8, FSCR_DSCR
bne 1f
ld  r0,0(r7)
-   b   3f
 1:
-  BEGIN_FTR_SECTION_NESTED(70)
-   mfspr   r6, SPRN_FSCR
-   or  r6, r6, r8
-   mtspr   SPRN_FSCR, r6
-BEGIN_FTR_SECTION_NESTED(69)
-   mfspr   r6, SPRN_HFSCR
-   or  r6, r6, r8
-   mtspr   SPRN_HFSCR, r6
-END_FTR_SECTION_NESTED(CPU_FTR_HVMODE, CPU_FTR_HVMODE, 69)
-   b   4f
-  END_FTR_SECTION_NESTED(CPU_FTR_ARCH_207S, CPU_FTR_ARCH_207S, 70)
-3:
-  BEGIN_FTR_SECTION_NESTED(70)
-   mfspr   r6, SPRN_FSCR
-   andcr6, r6, r8
-   mtspr   SPRN_FSCR, r6
-BEGIN_FTR_SECTION_NESTED(69)
-   mfspr   r6, SPRN_HFSCR
-   andcr6, r6, r8
-   mtspr   SPRN_HFSCR, r6
-END_FTR_SECTION_NESTED(CPU_FTR_HVMODE, CPU_FTR_HVMODE, 69)
-  END_FTR_SECTION_NESTED(CPU_FTR_ARCH_207S, CPU_FTR_ARCH_207S, 70)
-4: cmpdr0,r25
+BEGIN_FTR_SECTION_NESTED(70)
+   mfspr   r8, SPRN_FSCR
+   rldimi  r8, r6, FSCR_DSCR_LG, (63 - FSCR_DSCR_LG)
+   mtspr   SPRN_FSCR, r8
+END_FTR_SECTION_NESTED(CPU_FTR_ARCH_207S, CPU_FTR_ARCH_207S, 70)
+   cmpdr0,r25
beq 2f
mtspr   SPRN_DSCR,r0
 2:
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index e435bc0..0ba68a2 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1322,13 +1322,10 @@ void facility_unavailable_exception(struct pt_regs 
*regs)
if (status == FSCR_DSCR_LG) {
/* User is acessing the DSCR.  Set the inherit bit and allow
 * the user to set it directly in future by setting via the
-* H/FSCR DSCR bit.
+* FSCR DSCR bit.  We always leave HFSCR DSCR set.
 */
current-thread.dscr_inherit = 1;
-   if (hv)
-   mtspr(SPRN_HFSCR, value | HFSCR_DSCR);
-   else
-   mtspr(SPRN_FSCR,  value | FSCR_DSCR);
+   mtspr(SPRN_FSCR, value | FSCR_DSCR);
return;
}
 
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev