[PATCH v2] include: mman: Use bool instead of int for the return value of arch_validate_prot

2016-07-23 Thread chengang
From: Chen Gang 

For pure bool function's return value, bool is a little better more or
less than int.

Signed-off-by: Chen Gang 
---
 arch/powerpc/include/asm/mman.h | 8 
 include/linux/mman.h| 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index 2563c43..fc420ce 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -31,13 +31,13 @@ static inline pgprot_t arch_vm_get_page_prot(unsigned long 
vm_flags)
 }
 #define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags)
 
-static inline int arch_validate_prot(unsigned long prot)
+static inline bool arch_validate_prot(unsigned long prot)
 {
if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | PROT_SAO))
-   return 0;
+   return false;
if ((prot & PROT_SAO) && !cpu_has_feature(CPU_FTR_SAO))
-   return 0;
-   return 1;
+   return false;
+   return true;
 }
 #define arch_validate_prot(prot) arch_validate_prot(prot)
 
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 33e17f6..634c4c5 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -49,7 +49,7 @@ static inline void vm_unacct_memory(long pages)
  *
  * Returns true if the prot flags are valid
  */
-static inline int arch_validate_prot(unsigned long prot)
+static inline bool arch_validate_prot(unsigned long prot)
 {
return (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM)) == 0;
 }
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[v3] UCC_GETH/UCC_FAST: Use IS_ERR_VALUE_U32 API to avoid IS_ERR_VALUE abuses.

2016-07-23 Thread Arvind Yadav
IS_ERR_VALUE() assumes that its parameter is an unsigned long.
It can not be used to check if an 'unsigned int' reflects an error.
As they pass an 'unsigned int' into a function that takes an
'unsigned long' argument. This happens to work because the type
is sign-extended on 64-bit architectures before it gets converted
into an unsigned type.

However, anything that passes an 'unsigned short' or 'unsigned int'
argument into IS_ERR_VALUE() is guaranteed to be broken, as are
8-bit integers and types that are wider than 'unsigned long'.

It would be nice to any users that are not passing 'unsigned int'
arguments.

Passing value in IS_ERR_VALUE() is wrong, as they pass an
'unsigned int' into a function that takes an 'unsigned long'
argument.This happens to work because the type is sign-extended
on 64-bit architectures before it gets converted into an
unsigned type.

Passing an 'unsigned short' or 'unsigned int'argument into
IS_ERR_VALUE() is guaranteed to be broken, as are 8-bit integers
and types that are wider than 'unsigned long'.

Any user will get compilation warning for that do not pass an
unsigned long' argument.

Signed-off-by: Arvind Yadav 
---
 drivers/bcma/scan.c   |  2 --
 drivers/net/ethernet/freescale/ucc_geth.c | 30 +++---
 drivers/soc/fsl/qe/ucc_fast.c |  4 ++--
 include/linux/err.h   |  1 +
 4 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/drivers/bcma/scan.c b/drivers/bcma/scan.c
index 4a2d1b2..319d78e 100644
--- a/drivers/bcma/scan.c
+++ b/drivers/bcma/scan.c
@@ -272,8 +272,6 @@ static struct bcma_device *bcma_find_core_reverse(struct 
bcma_bus *bus, u16 core
return NULL;
 }
 
-#define IS_ERR_VALUE_U32(x) ((x) >= (u32)-MAX_ERRNO)
-
 static int bcma_get_next_core(struct bcma_bus *bus, u32 __iomem **eromptr,
  struct bcma_device_id *match, int core_num,
  struct bcma_device *core)
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c 
b/drivers/net/ethernet/freescale/ucc_geth.c
index 5bf1ade..d290dea 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -289,7 +289,7 @@ static int fill_init_enet_entries(struct ucc_geth_private 
*ugeth,
else {
init_enet_offset =
qe_muram_alloc(thread_size, thread_alignment);
-   if (IS_ERR_VALUE(init_enet_offset)) {
+   if (IS_ERR_VALUE_U32(init_enet_offset)) {
if (netif_msg_ifup(ugeth))
pr_err("Can not allocate DPRAM 
memory\n");
qe_put_snum((u8) snum);
@@ -2234,7 +2234,7 @@ static int ucc_geth_alloc_tx(struct ucc_geth_private 
*ugeth)
ugeth->tx_bd_ring_offset[j] =
qe_muram_alloc(length,
   UCC_GETH_TX_BD_RING_ALIGNMENT);
-   if (!IS_ERR_VALUE(ugeth->tx_bd_ring_offset[j]))
+   if (!IS_ERR_VALUE_U32(ugeth->tx_bd_ring_offset[j]))
ugeth->p_tx_bd_ring[j] =
(u8 __iomem *) qe_muram_addr(ugeth->
 tx_bd_ring_offset[j]);
@@ -2311,7 +2311,7 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private 
*ugeth)
ugeth->rx_bd_ring_offset[j] =
qe_muram_alloc(length,
   UCC_GETH_RX_BD_RING_ALIGNMENT);
-   if (!IS_ERR_VALUE(ugeth->rx_bd_ring_offset[j]))
+   if (!IS_ERR_VALUE_U32(ugeth->rx_bd_ring_offset[j]))
ugeth->p_rx_bd_ring[j] =
(u8 __iomem *) qe_muram_addr(ugeth->
 rx_bd_ring_offset[j]);
@@ -2521,7 +2521,7 @@ static int ucc_geth_startup(struct ucc_geth_private 
*ugeth)
ugeth->tx_glbl_pram_offset =
qe_muram_alloc(sizeof(struct ucc_geth_tx_global_pram),
   UCC_GETH_TX_GLOBAL_PRAM_ALIGNMENT);
-   if (IS_ERR_VALUE(ugeth->tx_glbl_pram_offset)) {
+   if (IS_ERR_VALUE_U32(ugeth->tx_glbl_pram_offset)) {
if (netif_msg_ifup(ugeth))
pr_err("Can not allocate DPRAM memory for 
p_tx_glbl_pram\n");
return -ENOMEM;
@@ -2541,7 +2541,7 @@ static int ucc_geth_startup(struct ucc_geth_private 
*ugeth)
   sizeof(struct ucc_geth_thread_data_tx) +
   32 * (numThreadsTxNumerical == 1),
   UCC_GETH_THREAD_DATA_ALIGNMENT);
-   if (IS_ERR_VALUE(ugeth->thread_dat_tx_offset)) {
+   if (IS_ERR_VALUE_U32(ugeth->thread_dat_tx_offset)) {
if (netif_msg_ifup(ugeth))

Re: [PATCH] powerpc/64: implement a slice mask cache

2016-07-23 Thread Balbir Singh
On Sat, Jul 23, 2016 at 05:10:36PM +1000, Nicholas Piggin wrote:
> On Sat, 23 Jul 2016 12:19:37 +1000
> Balbir Singh  wrote:
> 
> > On Fri, Jul 22, 2016 at 10:57:28PM +1000, Nicholas Piggin wrote:
> > > Calculating the slice mask can become a signifcant overhead for
> > > get_unmapped_area. The mask is relatively small and does not change
> > > frequently, so we can cache it in the mm context.
> > > 
> > > This saves about 30% kernel time on a 4K user address allocation
> > > in a microbenchmark.
> > > 
> > > Comments on the approach taken? I think there is the option for
> > > fixed allocations to avoid some of the slice calculation entirely,
> > > but first I think it will be good to have a general speedup that
> > > covers all mmaps.
> > > 
> > > Cc: Benjamin Herrenschmidt 
> > > Cc: Anton Blanchard 
> > > ---
> > >  arch/powerpc/include/asm/book3s/64/mmu.h |  8 +++
> > >  arch/powerpc/mm/slice.c  | 39
> > > ++-- 2 files changed, 45 insertions(+),
> > > 2 deletions(-)
> > > 
> > > diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h
> > > b/arch/powerpc/include/asm/book3s/64/mmu.h index 5854263..0d15af4
> > > 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h
> > > +++ b/arch/powerpc/include/asm/book3s/64/mmu.h
> > > @@ -71,6 +71,14 @@ typedef struct {
> > >  #ifdef CONFIG_PPC_MM_SLICES
> > >   u64 low_slices_psize;   /* SLB page size encodings */
> > >   unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
> > > + struct slice_mask mask_4k;
> > > +# ifdef CONFIG_PPC_64K_PAGES
> > > + struct slice_mask mask_64k;
> > > +# endif
> > > +# ifdef CONFIG_HUGETLB_PAGE
> > > + struct slice_mask mask_16m;
> > > + struct slice_mask mask_16g;
> > > +# endif  
> > 
> > Should we cache these in mmu_psize_defs? I am not 100% sure
> > if want to overload that structure, but it provides a convient
> > way of saying mmu_psize_defs[psize].mask instead of all
> > the if checks
> 
> I'm not sure if we can, can we? mmu_psize_defs is global
> whereas we need per-process structure.
>

Oh! sorry, I meant a structure like mmu_psize_defs.
 
> The branches are a bit annoying, but we can't directly use an array
> because it's too big. But see the comment at MMU_PAGE_* defines.
> Perhaps we could change this structure to be sized at compile time to
> only include possible page sizes, and would enable building a
> structure like the above with simply
> 
> struct type blah[MMU_POSSIBLE_PAGE_COUNT];
> 
> Perhaps we can consider that as a follow on patch? It's probably a bit
> more work to implement.
> 


Yeah.. good idea
MMU_PAGE_COUNT is 15, the size is going to be 15*8 bytes?


> 
> > >  #else
> > >   u16 sllp;   /* SLB page size encoding */
> > >  #endif
> > > diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
> > > index 2b27458..559ea5f 100644
> > > --- a/arch/powerpc/mm/slice.c
> > > +++ b/arch/powerpc/mm/slice.c
> > > @@ -147,7 +147,7 @@ static struct slice_mask
> > > slice_mask_for_free(struct mm_struct *mm) return ret;
> > >  }
> > >  
> > > -static struct slice_mask slice_mask_for_size(struct mm_struct *mm,
> > > int psize) +static struct slice_mask
> > > calc_slice_mask_for_size(struct mm_struct *mm, int psize) {
> > >   unsigned char *hpsizes;
> > >   int index, mask_index;
> > > @@ -171,6 +171,36 @@ static struct slice_mask
> > > slice_mask_for_size(struct mm_struct *mm, int psize) return ret;
> > >  }
> > >  
> > > +static void recalc_slice_mask_cache(struct mm_struct *mm)
> > > +{
> > > + mm->context.mask_4k = calc_slice_mask_for_size(mm,
> > > MMU_PAGE_4K); +#ifdef CONFIG_PPC_64K_PAGES
> > > + mm->context.mask_64k = calc_slice_mask_for_size(mm,
> > > MMU_PAGE_64K); +#endif
> > > +# ifdef CONFIG_HUGETLB_PAGE
> > > + /* Radix does not come here */
> > > + mm->context.mask_16m = calc_slice_mask_for_size(mm,
> > > MMU_PAGE_16M);
> > > + mm->context.mask_16g = calc_slice_mask_for_size(mm,
> > > MMU_PAGE_16G); +# endif
> > > +}  
> > 
> > Should the function above be called under slice_convert_lock?
> 
> Good question. The slice_convert_lock is... interesting. It only
> protects the update-side of the slice page size arrays. I thought
> this was okay last time I looked, but now you make me think again
> maybe it is not. I need to check again what's providing exclusion
> on the read side too.
> 
> I wanted to avoid doing more work under slice_convert_lock, but
> we should just make that a per-mm lock anyway shouldn't we?
>

Yeah and Ben's comment in the reply suggest we already hold a
per mm lock on the read side.

Balbir Singh

 
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH for-4.8 V2 10/10] powerpc/mm: Catch the usage of cpu/mmu_has_feature before jump label init

2016-07-23 Thread Aneesh Kumar K.V
This enable us to catch the wrong usage of cpu_has_feature and
mmu_has_feature in the code. We need to use the feature bit based
check in show_regs because that is used in the reporting code.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/Kconfig.debug | 11 +++
 arch/powerpc/include/asm/cpufeatures.h |  6 ++
 arch/powerpc/include/asm/mmu.h | 13 +
 arch/powerpc/kernel/process.c  |  2 +-
 4 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 8243ada23237..e4da4e4985fe 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -60,6 +60,17 @@ config CODE_PATCHING_SELFTEST
depends on DEBUG_KERNEL
default n
 
+config FEATURE_FIXUP_DEBUG
+   bool "Do extra check on feature fixup calls"
+   depends on DEBUG_KERNEL
+   default n
+   help
+ This catch the wrong usage of cpu_has_feature and mmu_has_feature
+ in the code.
+
+ If you don't know what this means, say N
+
+
 config FTR_FIXUP_SELFTEST
bool "Run self-tests of the feature-fixup code"
depends on DEBUG_KERNEL
diff --git a/arch/powerpc/include/asm/cpufeatures.h 
b/arch/powerpc/include/asm/cpufeatures.h
index 4a4a0b898463..93e7e3e87af4 100644
--- a/arch/powerpc/include/asm/cpufeatures.h
+++ b/arch/powerpc/include/asm/cpufeatures.h
@@ -22,6 +22,12 @@ static __always_inline bool cpu_has_feature(unsigned long 
feature)
 {
int i;
 
+#ifdef CONFIG_FEATURE_FIXUP_DEBUG
+   if (!static_key_initialized) {
+   WARN_ON(1);
+   return __cpu_has_feature(feature);
+   }
+#endif
if (CPU_FTRS_ALWAYS & feature)
return true;
 
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 3726161f6a8d..5c1f3a4cb99f 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -152,6 +152,12 @@ static __always_inline bool mmu_has_feature(unsigned long 
feature)
 {
int i;
 
+#ifdef CONFIG_FEATURE_FIXUP_DEBUG
+   if (!static_key_initialized) {
+   WARN_ON(1);
+   return __mmu_has_feature(feature);
+   }
+#endif
if (!(MMU_FTRS_POSSIBLE & feature))
return false;
 
@@ -163,6 +169,13 @@ static inline void mmu_clear_feature(unsigned long feature)
 {
int i;
 
+#ifdef CONFIG_FEATURE_FIXUP_DEBUG
+   if (!static_key_initialized) {
+   WARN_ON(1);
+   cur_cpu_spec->mmu_features &= ~feature;
+   return;
+   }
+#endif
i = __builtin_ctzl(feature);
cur_cpu_spec->mmu_features &= ~feature;
static_branch_disable(_feat_keys[i]);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 75611b984faa..5a08cff6621c 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1315,7 +1315,7 @@ void show_regs(struct pt_regs * regs)
print_msr_bits(regs->msr);
printk("  CR: %08lx  XER: %08lx\n", regs->ccr, regs->xer);
trap = TRAP(regs);
-   if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
+   if ((regs->trap != 0xc00) && __cpu_has_feature(CPU_FTR_CFAR))
printk("CFAR: "REG" ", regs->orig_gpr3);
if (trap == 0x200 || trap == 0x300 || trap == 0x600)
 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
-- 
2.7.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH for-4.8 V2 09/10] powerpc: use jump label for mmu_has_feature

2016-07-23 Thread Aneesh Kumar K.V
From: Kevin Hao 

The mmu features are fixed once the probe of mmu features are done.
And the function mmu_has_feature() does be used in some hot path.
The checking of the mmu features for each time of invoking of
mmu_has_feature() seems suboptimal. This tries to reduce this
overhead of this check by using jump label.

The generated assemble code of the following c program:
if (mmu_has_feature(MMU_FTR_XXX))
xxx()
Before:
lis r9,-16230
lwz r9,12324(r9)
lwz r9,24(r9)
andi.   r10,r9,16
beqlr+

After:
nop if MMU_FTR_XXX is enabled
b xxx   if MMU_FTR_XXX is not enabled

Signed-off-by: Kevin Hao 
Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/mmu.h| 36 
 arch/powerpc/kernel/cputable.c| 17 +
 arch/powerpc/lib/feature-fixups.c |  1 +
 3 files changed, 54 insertions(+)

diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 828b92faec91..3726161f6a8d 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -139,6 +139,41 @@ static inline bool __mmu_has_feature(unsigned long feature)
return !!(MMU_FTRS_POSSIBLE & cur_cpu_spec->mmu_features & feature);
 }
 
+#ifdef CONFIG_JUMP_LABEL
+#include 
+
+#define MAX_MMU_FEATURES   (8 * sizeof(((struct cpu_spec 
*)0)->mmu_features))
+
+extern struct static_key_true mmu_feat_keys[MAX_MMU_FEATURES];
+
+extern void mmu_feat_keys_init(void);
+
+static __always_inline bool mmu_has_feature(unsigned long feature)
+{
+   int i;
+
+   if (!(MMU_FTRS_POSSIBLE & feature))
+   return false;
+
+   i = __builtin_ctzl(feature);
+   return static_branch_likely(_feat_keys[i]);
+}
+
+static inline void mmu_clear_feature(unsigned long feature)
+{
+   int i;
+
+   i = __builtin_ctzl(feature);
+   cur_cpu_spec->mmu_features &= ~feature;
+   static_branch_disable(_feat_keys[i]);
+}
+#else
+
+static inline void mmu_feat_keys_init(void)
+{
+
+}
+
 static inline bool mmu_has_feature(unsigned long feature)
 {
return __mmu_has_feature(feature);
@@ -148,6 +183,7 @@ static inline void mmu_clear_feature(unsigned long feature)
 {
cur_cpu_spec->mmu_features &= ~feature;
 }
+#endif /* CONFIG_JUMP_LABEL */
 
 extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup;
 
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 67ce4816998e..fa1580788eda 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -2243,4 +2243,21 @@ void __init cpu_feat_keys_init(void)
static_branch_disable(_feat_keys[i]);
}
 }
+
+struct static_key_true mmu_feat_keys[MAX_MMU_FEATURES] = {
+   [0 ... MAX_MMU_FEATURES - 1] = STATIC_KEY_TRUE_INIT
+};
+EXPORT_SYMBOL_GPL(mmu_feat_keys);
+
+void __init mmu_feat_keys_init(void)
+{
+   int i;
+
+   for (i = 0; i < MAX_MMU_FEATURES; i++) {
+   unsigned long f = 1ul << i;
+
+   if (!(cur_cpu_spec->mmu_features & f))
+   static_branch_disable(_feat_keys[i]);
+   }
+}
 #endif
diff --git a/arch/powerpc/lib/feature-fixups.c 
b/arch/powerpc/lib/feature-fixups.c
index ec698b9e6238..7c29906cf8e9 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -184,6 +184,7 @@ void apply_feature_fixups(void)
 */
jump_label_init();
cpu_feat_keys_init();
+   mmu_feat_keys_init();
 }
 
 #ifdef CONFIG_FTR_FIXUP_SELFTEST
-- 
2.7.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH for-4.8 V2 08/10] powerpc: use the jump label for cpu_has_feature

2016-07-23 Thread Aneesh Kumar K.V
From: Kevin Hao 

The cpu features are fixed once the probe of cpu features are done.
And the function cpu_has_feature() does be used in some hot path.
The checking of the cpu features for each time of invoking of
cpu_has_feature() seems suboptimal. This tries to reduce this
overhead of this check by using jump label.

The generated assemble code of the following c program:
if (cpu_has_feature(CPU_FTR_XXX))
xxx()

Before:
lis r9,-16230
lwz r9,12324(r9)
lwz r9,12(r9)
andi.   r10,r9,512
beqlr-

After:
nop if CPU_FTR_XXX is enabled
b xxx   if CPU_FTR_XXX is not enabled

Signed-off-by: Kevin Hao 
Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/cpufeatures.h | 21 +
 arch/powerpc/include/asm/cputable.h|  8 
 arch/powerpc/kernel/cputable.c | 20 
 arch/powerpc/lib/feature-fixups.c  |  1 +
 4 files changed, 50 insertions(+)

diff --git a/arch/powerpc/include/asm/cpufeatures.h 
b/arch/powerpc/include/asm/cpufeatures.h
index bfa6cb8f5629..4a4a0b898463 100644
--- a/arch/powerpc/include/asm/cpufeatures.h
+++ b/arch/powerpc/include/asm/cpufeatures.h
@@ -13,10 +13,31 @@ static inline bool __cpu_has_feature(unsigned long feature)
return !!(CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature);
 }
 
+#ifdef CONFIG_JUMP_LABEL
+#include 
+
+extern struct static_key_true cpu_feat_keys[MAX_CPU_FEATURES];
+
+static __always_inline bool cpu_has_feature(unsigned long feature)
+{
+   int i;
+
+   if (CPU_FTRS_ALWAYS & feature)
+   return true;
+
+   if (!(CPU_FTRS_POSSIBLE & feature))
+   return false;
+
+   i = __builtin_ctzl(feature);
+   return static_branch_likely(_feat_keys[i]);
+}
+#else
 static inline bool cpu_has_feature(unsigned long feature)
 {
 
return __cpu_has_feature(feature);
 }
+#endif
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ASM_POWERPC_CPUFEATURE_H */
diff --git a/arch/powerpc/include/asm/cputable.h 
b/arch/powerpc/include/asm/cputable.h
index a49ea95849f8..6c161e456759 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -122,6 +122,12 @@ extern void do_feature_fixups(unsigned long value, void 
*fixup_start,
 
 extern const char *powerpc_base_platform;
 
+#ifdef CONFIG_JUMP_LABEL
+extern void cpu_feat_keys_init(void);
+#else
+static inline void cpu_feat_keys_init(void) { }
+#endif
+
 /* TLB flush actions. Used as argument to cpu_spec.flush_tlb() hook */
 enum {
TLB_INVAL_SCOPE_GLOBAL = 0, /* invalidate all TLBs */
@@ -132,6 +138,8 @@ enum {
 
 /* CPU kernel features */
 
+#define MAX_CPU_FEATURES   (8 * sizeof(((struct cpu_spec 
*)0)->cpu_features))
+
 /* Retain the 32b definitions all use bottom half of word */
 #define CPU_FTR_COHERENT_ICACHEASM_CONST(0x0001)
 #define CPU_FTR_L2CR   ASM_CONST(0x0002)
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index d81f826d1029..67ce4816998e 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -2224,3 +2225,22 @@ struct cpu_spec * __init identify_cpu(unsigned long 
offset, unsigned int pvr)
 
return NULL;
 }
+
+#ifdef CONFIG_JUMP_LABEL
+struct static_key_true cpu_feat_keys[MAX_CPU_FEATURES] = {
+   [0 ... MAX_CPU_FEATURES - 1] = STATIC_KEY_TRUE_INIT
+};
+EXPORT_SYMBOL_GPL(cpu_feat_keys);
+
+void __init cpu_feat_keys_init(void)
+{
+   int i;
+
+   for (i = 0; i < MAX_CPU_FEATURES; i++) {
+   unsigned long f = 1ul << i;
+
+   if (!(cur_cpu_spec->cpu_features & f))
+   static_branch_disable(_feat_keys[i]);
+   }
+}
+#endif
diff --git a/arch/powerpc/lib/feature-fixups.c 
b/arch/powerpc/lib/feature-fixups.c
index 8b0b0b51e8aa..ec698b9e6238 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -183,6 +183,7 @@ void apply_feature_fixups(void)
 * by now.
 */
jump_label_init();
+   cpu_feat_keys_init();
 }
 
 #ifdef CONFIG_FTR_FIXUP_SELFTEST
-- 
2.7.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH for-4.8 V2 07/10] powerpc: move the cpu_has_feature to a separate file

2016-07-23 Thread Aneesh Kumar K.V
From: Kevin Hao 

We plan to use jump label for cpu_has_feature. In order to implement
this we need to include the linux/jump_label.h in asm/cputable.h.
But it seems that asm/cputable.h is so basic header file for ppc that
it is almost included by all the other header files. The including of
the linux/jump_label.h will introduces various recursive inclusion.
And it is very hard to fix that. So we choose to move the function
cpu_has_feature to a separate header file before using the jump label
for it. No functional change.

Signed-off-by: Kevin Hao 
Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |  1 +
 arch/powerpc/include/asm/cacheflush.h |  1 +
 arch/powerpc/include/asm/cpufeatures.h| 22 ++
 arch/powerpc/include/asm/cputable.h   | 13 -
 arch/powerpc/include/asm/cputime.h|  1 +
 arch/powerpc/include/asm/dbell.h  |  1 +
 arch/powerpc/include/asm/dcr-native.h |  1 +
 arch/powerpc/include/asm/mman.h   |  1 +
 arch/powerpc/include/asm/time.h   |  1 +
 arch/powerpc/include/asm/xor.h|  1 +
 arch/powerpc/kernel/align.c   |  1 +
 arch/powerpc/kernel/irq.c |  1 +
 arch/powerpc/kernel/process.c |  1 +
 arch/powerpc/kernel/setup-common.c|  1 +
 arch/powerpc/kernel/setup_32.c|  1 +
 arch/powerpc/kernel/smp.c |  1 +
 arch/powerpc/platforms/cell/pervasive.c   |  1 +
 arch/powerpc/xmon/ppc-dis.c   |  1 +
 18 files changed, 38 insertions(+), 13 deletions(-)
 create mode 100644 arch/powerpc/include/asm/cpufeatures.h

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h 
b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index ceba5472fe58..b396c6a8b3de 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * SLB
diff --git a/arch/powerpc/include/asm/cacheflush.h 
b/arch/powerpc/include/asm/cacheflush.h
index 69fb16d7a811..e650819acc95 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -11,6 +11,7 @@
 
 #include 
 #include 
+#include 
 
 /*
  * No cache flushing is required when address mappings are changed,
diff --git a/arch/powerpc/include/asm/cpufeatures.h 
b/arch/powerpc/include/asm/cpufeatures.h
new file mode 100644
index ..bfa6cb8f5629
--- /dev/null
+++ b/arch/powerpc/include/asm/cpufeatures.h
@@ -0,0 +1,22 @@
+#ifndef __ASM_POWERPC_CPUFEATURES_H
+#define __ASM_POWERPC_CPUFEATURES_H
+
+#ifndef __ASSEMBLY__
+
+#include 
+
+static inline bool __cpu_has_feature(unsigned long feature)
+{
+   if (CPU_FTRS_ALWAYS & feature)
+   return true;
+
+   return !!(CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature);
+}
+
+static inline bool cpu_has_feature(unsigned long feature)
+{
+
+   return __cpu_has_feature(feature);
+}
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_POWERPC_CPUFEATURE_H */
diff --git a/arch/powerpc/include/asm/cputable.h 
b/arch/powerpc/include/asm/cputable.h
index dfdf36bc2664..a49ea95849f8 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -576,19 +576,6 @@ enum {
 };
 #endif /* __powerpc64__ */
 
-static inline bool __cpu_has_feature(unsigned long feature)
-{
-   if (CPU_FTRS_ALWAYS & feature)
-   return true;
-
-   return !!(CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature);
-}
-
-static inline bool cpu_has_feature(unsigned long feature)
-{
-   return __cpu_has_feature(feature);
-}
-
 #define HBP_NUM 1
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/cputime.h 
b/arch/powerpc/include/asm/cputime.h
index e2452550bcb1..b91837865c0e 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -28,6 +28,7 @@ static inline void setup_cputime_one_jiffy(void) { }
 #include 
 #include 
 #include 
+#include 
 
 typedef u64 __nocast cputime_t;
 typedef u64 __nocast cputime64_t;
diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
index 5fa6b20eba10..2d9eae338f70 100644
--- a/arch/powerpc/include/asm/dbell.h
+++ b/arch/powerpc/include/asm/dbell.h
@@ -16,6 +16,7 @@
 #include 
 
 #include 
+#include 
 
 #define PPC_DBELL_MSG_BRDCAST  (0x0400)
 #define PPC_DBELL_TYPE(x)  (((x) & 0xf) << (63-36))
diff --git a/arch/powerpc/include/asm/dcr-native.h 
b/arch/powerpc/include/asm/dcr-native.h
index 4efc11dacb98..0186ba05bfe1 100644
--- a/arch/powerpc/include/asm/dcr-native.h
+++ b/arch/powerpc/include/asm/dcr-native.h
@@ -24,6 +24,7 @@
 
 #include 
 #include 
+#include 
 
 typedef struct {
unsigned int base;
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index 

[PATCH for-4.8 V2 06/10] powerpc: kill mfvtb()

2016-07-23 Thread Aneesh Kumar K.V
From: Kevin Hao 

This function is only used by get_vtb(). They are almost the same
except the reading from the real register. Move the mfspr() to
get_vtb() and kill the function mfvtb(). With this, we can eliminate
the use of cpu_has_feature() in very core header file like reg.h.
This is a preparation for the use of jump label for cpu_has_feature().

Signed-off-by: Kevin Hao 
Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/reg.h  | 9 -
 arch/powerpc/include/asm/time.h | 2 +-
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index d7e9ab5e4709..817c005205f0 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1256,15 +1256,6 @@ static inline void msr_check_and_clear(unsigned long 
bits)
__msr_check_and_clear(bits);
 }
 
-static inline unsigned long mfvtb (void)
-{
-#ifdef CONFIG_PPC_BOOK3S_64
-   if (cpu_has_feature(CPU_FTR_ARCH_207S))
-   return mfspr(SPRN_VTB);
-#endif
-   return 0;
-}
-
 #ifdef __powerpc64__
 #if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E)
 #define mftb() ({unsigned long rval;   \
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 09211640a0e0..cbbeaf0a6597 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -103,7 +103,7 @@ static inline u64 get_vtb(void)
 {
 #ifdef CONFIG_PPC_BOOK3S_64
if (cpu_has_feature(CPU_FTR_ARCH_207S))
-   return mfvtb();
+   return mfspr(SPRN_VTB);
 #endif
return 0;
 }
-- 
2.7.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH for-4.8 V2 05/10] powerpc: Call jump_label_init early

2016-07-23 Thread Aneesh Kumar K.V
Call jump_label_init early so that can use static keys for cpu and
mmu feature check. We should have finalzed all the cpu/mmu features when
we call setup_system and we also did feature fixup for ASM based code.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/lib/feature-fixups.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/lib/feature-fixups.c 
b/arch/powerpc/lib/feature-fixups.c
index defb2998b818..8b0b0b51e8aa 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -177,6 +177,12 @@ void apply_feature_fixups(void)
  &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
 #endif
do_final_fixups();
+   /*
+* init jump label so that cpu and mmu feature check can be optimized
+* using jump label. We should have all the cpu/mmu features finalized
+* by now.
+*/
+   jump_label_init();
 }
 
 #ifdef CONFIG_FTR_FIXUP_SELFTEST
-- 
2.7.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH for-4.8 V2 04/10] jump_label: make it possible for the archs to invoke jump_label_init() much earlier

2016-07-23 Thread Aneesh Kumar K.V
From: Kevin Hao 

For some archs (such as powerpc) would want to invoke jump_label_init()
in a much earlier stage. So check static_key_initialized in order to
make sure this function run only once.

Signed-off-by: Kevin Hao 
Signed-off-by: Aneesh Kumar K.V 
---
 kernel/jump_label.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 05254eeb4b4e..14d81315fd7e 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -205,6 +205,9 @@ void __init jump_label_init(void)
struct static_key *key = NULL;
struct jump_entry *iter;
 
+   if (static_key_initialized)
+   return;
+
jump_label_lock();
jump_label_sort_entries(iter_start, iter_stop);
 
-- 
2.7.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH for-4.8 V2 03/10] powerpc/mm/radix: Add radix_set_pte to use in early init

2016-07-23 Thread Aneesh Kumar K.V
We want to use the static key based feature check in set_pte_at. Since
we call radix__map_kernel_page early in boot before jump label is
initialized we can't call set_pte_at there. Add radix__set_pte for the
same.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/mm/pgtable-radix.c | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 003ff48a11b6..6d2eb76b508e 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -39,6 +39,27 @@ static __ref void *early_alloc_pgtable(unsigned long size)
 
return pt;
 }
+/*
+ * set_pte stores a linux PTE into the linux page table.
+ */
+static void radix__set_pte(struct mm_struct *mm, unsigned long addr, pte_t 
*ptep,
+  pte_t pte)
+{
+   /*
+* When handling numa faults, we already have the pte marked
+* _PAGE_PRESENT, but we can be sure that it is not in hpte.
+* Hence we can use set_pte_at for them.
+*/
+   VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep));
+
+   /*
+* Add the pte bit when tryint set a pte
+*/
+   pte = __pte(pte_val(pte) | _PAGE_PTE);
+
+   /* Perform the setting of the PTE */
+   radix__set_pte_at(mm, addr, ptep, pte, 0);
+}
 
 int radix__map_kernel_page(unsigned long ea, unsigned long pa,
  pgprot_t flags,
@@ -102,7 +123,7 @@ int radix__map_kernel_page(unsigned long ea, unsigned long 
pa,
}
 
 set_the_pte:
-   set_pte_at(_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags));
+   radix__set_pte(_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags));
smp_wmb();
return 0;
 }
-- 
2.7.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH for-4.8 V2 02/10] powerpc/mm: Convert early cpu/mmu feature check to use the new helpers

2016-07-23 Thread Aneesh Kumar K.V
This switch the early feature check to use the non static key
variant of the function. In later patches we will be switching
cpu_has_feature and mmu_has_feature to use static keys and we can use
them only after static key/jump label is initialized. Any check for
feature before jump label init should be done using this new helper.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |  4 ++--
 arch/powerpc/kernel/paca.c|  2 +-
 arch/powerpc/kernel/setup_64.c|  4 ++--
 arch/powerpc/mm/hash_native_64.c  |  2 +-
 arch/powerpc/mm/hash_utils_64.c   | 10 +-
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h 
b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index e4e1e64e2c8d..ceba5472fe58 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -278,7 +278,7 @@ static inline unsigned long hpte_encode_avpn(unsigned long 
vpn, int psize,
 */
v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm);
v <<= HPTE_V_AVPN_SHIFT;
-   if (!cpu_has_feature(CPU_FTR_ARCH_300))
+   if (!__cpu_has_feature(CPU_FTR_ARCH_300))
v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
return v;
 }
@@ -306,7 +306,7 @@ static inline unsigned long hpte_encode_r(unsigned long pa, 
int base_psize,
  int actual_psize, int ssize)
 {
 
-   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   if (__cpu_has_feature(CPU_FTR_ARCH_300))
pa |= ((unsigned long) ssize) << HPTE_R_3_0_SSIZE_SHIFT;
 
/* A 4K page needs no special encoding */
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 93dae296b6be..1b0b89e80824 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -184,7 +184,7 @@ void setup_paca(struct paca_struct *new_paca)
 * if we do a GET_PACA() before the feature fixups have been
 * applied
 */
-   if (cpu_has_feature(CPU_FTR_HVMODE))
+   if (__cpu_has_feature(CPU_FTR_HVMODE))
mtspr(SPRN_SPRG_HPACA, local_paca);
 #endif
mtspr(SPRN_SPRG_PACA, local_paca);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index d8216aed22b7..042d20a740ab 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -227,8 +227,8 @@ static void __init configure_exceptions(void)
opal_configure_cores();
 
/* Enable AIL if supported, and we are in hypervisor mode */
-   if (cpu_has_feature(CPU_FTR_HVMODE) &&
-   cpu_has_feature(CPU_FTR_ARCH_207S)) {
+   if (__cpu_has_feature(CPU_FTR_HVMODE) &&
+   __cpu_has_feature(CPU_FTR_ARCH_207S)) {
unsigned long lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
}
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index d2d8efd79cbf..b6565c50cabf 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -746,6 +746,6 @@ void __init hpte_init_native(void)
mmu_hash_ops.flush_hash_range = native_flush_hash_range;
mmu_hash_ops.hugepage_invalidate   = native_hugepage_invalidate;
 
-   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   if (__cpu_has_feature(CPU_FTR_ARCH_300))
ppc_md.register_process_table = native_register_proc_table;
 }
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 341632471b9d..a688f6c2b403 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -530,7 +530,7 @@ static bool might_have_hea(void)
 * we will never see an HEA ethernet device.
 */
 #ifdef CONFIG_IBMEBUS
-   return !cpu_has_feature(CPU_FTR_ARCH_207S) &&
+   return !__cpu_has_feature(CPU_FTR_ARCH_207S) &&
!firmware_has_feature(FW_FEATURE_SPLPAR);
 #else
return false;
@@ -561,7 +561,7 @@ static void __init htab_init_page_sizes(void)
 * Not in the device-tree, let's fallback on known size
 * list for 16M capable GP & GR
 */
-   if (mmu_has_feature(MMU_FTR_16M_PAGE))
+   if (__mmu_has_feature(MMU_FTR_16M_PAGE))
memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
   sizeof(mmu_psize_defaults_gp));
 found:
@@ -591,7 +591,7 @@ found:
mmu_vmalloc_psize = MMU_PAGE_64K;
if (mmu_linear_psize == MMU_PAGE_4K)
mmu_linear_psize = MMU_PAGE_64K;
-   if (mmu_has_feature(MMU_FTR_CI_LARGE_PAGE)) {
+   if (__mmu_has_feature(MMU_FTR_CI_LARGE_PAGE)) {
/*
 * When running on pSeries using 64k pages for 

[PATCH for-4.8 V2 01/10] powerpc/mm: Add __cpu/__mmu_has_feature

2016-07-23 Thread Aneesh Kumar K.V
In later patches, we will be switching cpu and mmu feature check to
use static keys. This would require us to have a variant of feature
check that can be used in early boot before jump label is initialized.
This patch adds the same. We also add a variant for radix_enabled()
check

We also update the return type to bool.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/book3s/64/mmu.h | 19 +++
 arch/powerpc/include/asm/cputable.h  | 15 ++-
 arch/powerpc/include/asm/mmu.h   | 13 +++--
 arch/powerpc/xmon/ppc-dis.c  |  1 +
 4 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h 
b/arch/powerpc/include/asm/book3s/64/mmu.h
index 6d8306d9aa7a..1bb0e536c76b 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -24,9 +24,20 @@ struct mmu_psize_def {
 extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
 
 #ifdef CONFIG_PPC_RADIX_MMU
-#define radix_enabled() mmu_has_feature(MMU_FTR_TYPE_RADIX)
+static inline bool radix_enabled(void)
+{
+   return mmu_has_feature(MMU_FTR_TYPE_RADIX);
+}
+#define radix_enabled radix_enabled
+
+static inline bool __radix_enabled(void)
+{
+   return __mmu_has_feature(MMU_FTR_TYPE_RADIX);
+}
+#define __radix_enabled __radix_enabled
 #else
 #define radix_enabled() (0)
+#define __radix_enabled() (0)
 #endif
 
 #endif /* __ASSEMBLY__ */
@@ -111,7 +122,7 @@ extern void hash__early_init_mmu(void);
 extern void radix__early_init_mmu(void);
 static inline void early_init_mmu(void)
 {
-   if (radix_enabled())
+   if (__radix_enabled())
return radix__early_init_mmu();
return hash__early_init_mmu();
 }
@@ -119,7 +130,7 @@ extern void hash__early_init_mmu_secondary(void);
 extern void radix__early_init_mmu_secondary(void);
 static inline void early_init_mmu_secondary(void)
 {
-   if (radix_enabled())
+   if (__radix_enabled())
return radix__early_init_mmu_secondary();
return hash__early_init_mmu_secondary();
 }
@@ -131,7 +142,7 @@ extern void radix__setup_initial_memory_limit(phys_addr_t 
first_memblock_base,
 static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base,
  phys_addr_t first_memblock_size)
 {
-   if (radix_enabled())
+   if (__radix_enabled())
return radix__setup_initial_memory_limit(first_memblock_base,
   first_memblock_size);
return hash__setup_initial_memory_limit(first_memblock_base,
diff --git a/arch/powerpc/include/asm/cputable.h 
b/arch/powerpc/include/asm/cputable.h
index df4fb5faba43..dfdf36bc2664 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -576,12 +576,17 @@ enum {
 };
 #endif /* __powerpc64__ */
 
-static inline int cpu_has_feature(unsigned long feature)
+static inline bool __cpu_has_feature(unsigned long feature)
 {
-   return (CPU_FTRS_ALWAYS & feature) ||
-  (CPU_FTRS_POSSIBLE
-   & cur_cpu_spec->cpu_features
-   & feature);
+   if (CPU_FTRS_ALWAYS & feature)
+   return true;
+
+   return !!(CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature);
+}
+
+static inline bool cpu_has_feature(unsigned long feature)
+{
+   return __cpu_has_feature(feature);
 }
 
 #define HBP_NUM 1
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 0e7c1a262075..828b92faec91 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -134,9 +134,14 @@ enum {
0,
 };
 
-static inline int mmu_has_feature(unsigned long feature)
+static inline bool __mmu_has_feature(unsigned long feature)
 {
-   return (MMU_FTRS_POSSIBLE & cur_cpu_spec->mmu_features & feature);
+   return !!(MMU_FTRS_POSSIBLE & cur_cpu_spec->mmu_features & feature);
+}
+
+static inline bool mmu_has_feature(unsigned long feature)
+{
+   return __mmu_has_feature(feature);
 }
 
 static inline void mmu_clear_feature(unsigned long feature)
@@ -232,5 +237,9 @@ extern void setup_initial_memory_limit(phys_addr_t 
first_memblock_base,
 #define radix_enabled() (0)
 #endif
 
+#ifndef __radix_enabled
+#define __radix_enabled() (0)
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_MMU_H_ */
diff --git a/arch/powerpc/xmon/ppc-dis.c b/arch/powerpc/xmon/ppc-dis.c
index 89098f320ad5..acad77b4f7b6 100644
--- a/arch/powerpc/xmon/ppc-dis.c
+++ b/arch/powerpc/xmon/ppc-dis.c
@@ -19,6 +19,7 @@ You should have received a copy of the GNU General Public 
License
 along with this file; see the file COPYING.  If not, write to the Free
 Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, 
USA.  */
 
+#include 
 #include 
 #include "nonstdio.h"
 #include "ansidecl.h"
-- 
2.7.4


[PATCH for-4.8 V2 00/10] Use jump label for cpu/mmu_has_feature

2016-07-23 Thread Aneesh Kumar K.V
Changes from V1:
* Update "powerpc/mm: Convert early cpu/mmu feature check to use the new 
helpers"
  based on resend code changes in this area.

We now do feature fixup early and hence we can reduce the usage of
 __cpu/__mmu_has_feature.

Aneesh Kumar K.V (5):
  powerpc/mm: Add __cpu/__mmu_has_feature
  powerpc/mm: Convert early cpu/mmu feature check to use the new helpers
  powerpc/mm/radix: Add radix_set_pte to use in early init
  powerpc: Call jump_label_init early
  powerpc/mm: Catch the usage of cpu/mmu_has_feature before jump label
init

Kevin Hao (5):
  jump_label: make it possible for the archs to invoke jump_label_init()
much earlier
  powerpc: kill mfvtb()
  powerpc: move the cpu_has_feature to a separate file
  powerpc: use the jump label for cpu_has_feature
  powerpc: use jump label for mmu_has_feature

 arch/powerpc/Kconfig.debug| 11 +
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |  5 ++-
 arch/powerpc/include/asm/book3s/64/mmu.h  | 19 ++--
 arch/powerpc/include/asm/cacheflush.h |  1 +
 arch/powerpc/include/asm/cpufeatures.h| 49 +
 arch/powerpc/include/asm/cputable.h   | 16 +++
 arch/powerpc/include/asm/cputime.h|  1 +
 arch/powerpc/include/asm/dbell.h  |  1 +
 arch/powerpc/include/asm/dcr-native.h |  1 +
 arch/powerpc/include/asm/mman.h   |  1 +
 arch/powerpc/include/asm/mmu.h| 62 ++-
 arch/powerpc/include/asm/reg.h|  9 
 arch/powerpc/include/asm/time.h   |  3 +-
 arch/powerpc/include/asm/xor.h|  1 +
 arch/powerpc/kernel/align.c   |  1 +
 arch/powerpc/kernel/cputable.c| 37 
 arch/powerpc/kernel/irq.c |  1 +
 arch/powerpc/kernel/paca.c|  2 +-
 arch/powerpc/kernel/process.c |  3 +-
 arch/powerpc/kernel/setup-common.c|  1 +
 arch/powerpc/kernel/setup_32.c|  1 +
 arch/powerpc/kernel/setup_64.c|  4 +-
 arch/powerpc/kernel/smp.c |  1 +
 arch/powerpc/lib/feature-fixups.c |  8 
 arch/powerpc/mm/hash_native_64.c  |  2 +-
 arch/powerpc/mm/hash_utils_64.c   | 10 ++---
 arch/powerpc/mm/pgtable-radix.c   | 23 +-
 arch/powerpc/platforms/cell/pervasive.c   |  1 +
 arch/powerpc/xmon/ppc-dis.c   |  2 +
 kernel/jump_label.c   |  3 ++
 30 files changed, 243 insertions(+), 37 deletions(-)
 create mode 100644 arch/powerpc/include/asm/cpufeatures.h

-- 
2.7.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc/64: implement a slice mask cache

2016-07-23 Thread Benjamin Herrenschmidt
On Sat, 2016-07-23 at 17:10 +1000, Nicholas Piggin wrote:
> I wanted to avoid doing more work under slice_convert_lock, but
> we should just make that a per-mm lock anyway shouldn't we?

Aren't the readers under the mm sem taken for writing or has this
changed ?

Cheers,
Ben.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] Optimise syscall entry for virtual, relocatable case

2016-07-23 Thread Nicholas Piggin
The mflr r10 instruction was left over saving of lr when the code
used lr to branch to system_call_entry from the exception handler.
That was changed by 6a404806d to use the count register.

The value is never used now, so mflr can be removed, and r10 can be
used for storage rather than spilling to scratch register.

This brings getppid syscall cost from 406 to 376 cycles on a POWER8.
Non-relocatable case is 371 cycles.

Cc: Michael Neuling 
Cc: Benjamin Herrenschmidt 
Signed-off-by: Nick Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 4c94406..eb0cac2 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -63,15 +63,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)  
\
 * is volatile across system calls.
 */
 #define SYSCALL_PSERIES_2_DIRECT   \
-   mflrr10 ;   \
ld  r12,PACAKBASE(r13) ;\
LOAD_HANDLER(r12, system_call_entry) ;  \
mtctr   r12 ;   \
mfspr   r12,SPRN_SRR1 ; \
-   /* Re-use of r13... No spare regs to do this */ \
-   li  r13,MSR_RI ;\
-   mtmsrd  r13,1 ; \
-   GET_PACA(r13) ; /* get r13 back */  \
+   li  r10,MSR_RI ;\
+   mtmsrd  r10,1 ; \
bctr ;
 #else
/* We can branch directly */
-- 
2.8.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc/64: implement a slice mask cache

2016-07-23 Thread Nicholas Piggin
On Sat, 23 Jul 2016 12:19:37 +1000
Balbir Singh  wrote:

> On Fri, Jul 22, 2016 at 10:57:28PM +1000, Nicholas Piggin wrote:
> > Calculating the slice mask can become a signifcant overhead for
> > get_unmapped_area. The mask is relatively small and does not change
> > frequently, so we can cache it in the mm context.
> > 
> > This saves about 30% kernel time on a 4K user address allocation
> > in a microbenchmark.
> > 
> > Comments on the approach taken? I think there is the option for
> > fixed allocations to avoid some of the slice calculation entirely,
> > but first I think it will be good to have a general speedup that
> > covers all mmaps.
> > 
> > Cc: Benjamin Herrenschmidt 
> > Cc: Anton Blanchard 
> > ---
> >  arch/powerpc/include/asm/book3s/64/mmu.h |  8 +++
> >  arch/powerpc/mm/slice.c  | 39
> > ++-- 2 files changed, 45 insertions(+),
> > 2 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h
> > b/arch/powerpc/include/asm/book3s/64/mmu.h index 5854263..0d15af4
> > 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h
> > +++ b/arch/powerpc/include/asm/book3s/64/mmu.h
> > @@ -71,6 +71,14 @@ typedef struct {
> >  #ifdef CONFIG_PPC_MM_SLICES
> > u64 low_slices_psize;   /* SLB page size encodings */
> > unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
> > +   struct slice_mask mask_4k;
> > +# ifdef CONFIG_PPC_64K_PAGES
> > +   struct slice_mask mask_64k;
> > +# endif
> > +# ifdef CONFIG_HUGETLB_PAGE
> > +   struct slice_mask mask_16m;
> > +   struct slice_mask mask_16g;
> > +# endif  
> 
> Should we cache these in mmu_psize_defs? I am not 100% sure
> if want to overload that structure, but it provides a convient
> way of saying mmu_psize_defs[psize].mask instead of all
> the if checks

I'm not sure if we can, can we? mmu_psize_defs is global
whereas we need per-process structure.

The branches are a bit annoying, but we can't directly use an array
because it's too big. But see the comment at MMU_PAGE_* defines.
Perhaps we could change this structure to be sized at compile time to
only include possible page sizes, and would enable building a
structure like the above with simply

struct type blah[MMU_POSSIBLE_PAGE_COUNT];

Perhaps we can consider that as a follow on patch? It's probably a bit
more work to implement.


> >  #else
> > u16 sllp;   /* SLB page size encoding */
> >  #endif
> > diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
> > index 2b27458..559ea5f 100644
> > --- a/arch/powerpc/mm/slice.c
> > +++ b/arch/powerpc/mm/slice.c
> > @@ -147,7 +147,7 @@ static struct slice_mask
> > slice_mask_for_free(struct mm_struct *mm) return ret;
> >  }
> >  
> > -static struct slice_mask slice_mask_for_size(struct mm_struct *mm,
> > int psize) +static struct slice_mask
> > calc_slice_mask_for_size(struct mm_struct *mm, int psize) {
> > unsigned char *hpsizes;
> > int index, mask_index;
> > @@ -171,6 +171,36 @@ static struct slice_mask
> > slice_mask_for_size(struct mm_struct *mm, int psize) return ret;
> >  }
> >  
> > +static void recalc_slice_mask_cache(struct mm_struct *mm)
> > +{
> > +   mm->context.mask_4k = calc_slice_mask_for_size(mm,
> > MMU_PAGE_4K); +#ifdef CONFIG_PPC_64K_PAGES
> > +   mm->context.mask_64k = calc_slice_mask_for_size(mm,
> > MMU_PAGE_64K); +#endif
> > +# ifdef CONFIG_HUGETLB_PAGE
> > +   /* Radix does not come here */
> > +   mm->context.mask_16m = calc_slice_mask_for_size(mm,
> > MMU_PAGE_16M);
> > +   mm->context.mask_16g = calc_slice_mask_for_size(mm,
> > MMU_PAGE_16G); +# endif
> > +}  
> 
> Should the function above be called under slice_convert_lock?

Good question. The slice_convert_lock is... interesting. It only
protects the update-side of the slice page size arrays. I thought
this was okay last time I looked, but now you make me think again
maybe it is not. I need to check again what's providing exclusion
on the read side too.

I wanted to avoid doing more work under slice_convert_lock, but
we should just make that a per-mm lock anyway shouldn't we?

Thanks,
Nick
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [kernel,v2,1/2] powerpc/iommu: Stop using @current in mm_iommu_xxx

2016-07-23 Thread Nicholas Piggin
On Wed, 20 Jul 2016 14:34:30 +1000
Alexey Kardashevskiy  wrote:



>  static long tce_iommu_register_pages(struct tce_container *container,
> @@ -128,10 +129,17 @@ static long tce_iommu_register_pages(struct
> tce_container *container, ((vaddr + size) < vaddr))
>   return -EINVAL;
>  
> - ret = mm_iommu_get(vaddr, entries, );
> + if (!container->mm) {
> + if (!current->mm)
> + return -ESRCH; /* process exited */

This shouldn't happen if we're a userspace process.

> +
> + atomic_inc(>mm->mm_count);
> + container->mm = current->mm;
> + }
> +
> + ret = mm_iommu_get(container->mm, vaddr, entries, );

Is it possible for processes (different mm) to be using the same
container? 


> @@ -354,6 +362,8 @@ static void tce_iommu_release(void *iommu_data)
>   tce_iommu_free_table(tbl);
>   }
>  
> + if (container->mm)
> + mmdrop(container->mm);
>   tce_iommu_disable(container);
>   mutex_destroy(>lock);

I'm wondering why keep the mm around at all. There is a bit of
locked_vm accounting there (which maybe doesn't exactly do the
right thing if we're talking about current task's rlimit if the
mm does not belong to current anyway).

The interesting cases are only the ones where a thread does
something with container->mm when current->mm != container->mm
(either a different process or a kernel thread). In what
situations does that happen?

Thanks,
Nick
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev