Re: [PATCH 10/13] powerpc: Add "bitmask" paramater to MASKABLE_* macros

2016-09-18 Thread Madhavan Srinivasan



On Friday 16 September 2016 04:33 PM, Nicholas Piggin wrote:

On Thu, 15 Sep 2016 18:32:00 +0530
Madhavan Srinivasan  wrote:


Make it explicit the interrupt masking supported
by a gievn interrupt handler. Patch correspondingly
extends the MASKABLE_* macros with an addition's parameter.
"bitmask" parameter is passed to SOFTEN_TEST macro to decide
on masking the interrupt.

Signed-off-by: Madhavan Srinivasan 
---
  arch/powerpc/include/asm/exception-64s.h | 62 
  arch/powerpc/kernel/exceptions-64s.S | 36 ---
  2 files changed, 54 insertions(+), 44 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 1eea4ab75607..41be0c2d7658 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -179,9 +179,9 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
   * checking of the interrupt maskable level in the SOFTEN_TEST.
   * Intended to be used in MASKABLE_EXCPETION_* macros.
   */
-#define __EXCEPTION_PROLOG_1(area, extra, vec) \
+#define __EXCEPTION_PROLOG_1(area, extra, vec, bitmask)
\
__EXCEPTION_PROLOG_1_PRE(area); \
-   extra(vec); \
+   extra(vec, bitmask);\
__EXCEPTION_PROLOG_1_POST(area);
  
  /*

Is __EXCEPTION_PROLOG_1 now for maskable exceptions, and EXCEPTION_PROLOG_1
for unmaskable? Does it make sense to rename __EXCEPTION_PROLOG_1 to
MASKABLE_EXCEPTION_PROLOG_1? Reducing the mystery underscores in this file would
be nice!


Yes. That is true. Will make the changes.

Maddy



This worked out nicely with mask bit being passed in by the exception handlers.
Very neat.


Thanks.


Reviewed-by: Nicholas Piggin 





Re: [PATCH 09/13] powerpc: Introduce new mask bit for soft_enabled

2016-09-18 Thread Madhavan Srinivasan



On Friday 16 September 2016 03:46 PM, Nicholas Piggin wrote:

On Thu, 15 Sep 2016 18:31:59 +0530
Madhavan Srinivasan  wrote:


Currently soft_enabled is used as the flag to determine
the interrupt state. Patch extends the soft_enabled
to be used as a mask instead of a flag.

This should be the title of the patch, IMO. Introducing the new
mask bit is incidental (and could be moved to patch 11). The key
here of course is switching the tests from a flag to a mask.

Very cool that you got this all worked out without adding any
new instructions.


Will make the changes accordingly. Thanks



Reviewed-by: Nicholas Piggin 


Signed-off-by: Madhavan Srinivasan 
---
  arch/powerpc/include/asm/exception-64s.h | 4 ++--
  arch/powerpc/include/asm/hw_irq.h| 1 +
  arch/powerpc/include/asm/irqflags.h  | 4 ++--
  arch/powerpc/kernel/entry_64.S   | 4 ++--
  arch/powerpc/kernel/exceptions-64e.S | 6 +++---
  5 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index dd3253bd0d8e..1eea4ab75607 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -430,9 +430,9 @@ label##_relon_hv:   
\
  
  #define __SOFTEN_TEST(h, vec)		\

lbz r10,PACASOFTIRQEN(r13); \
-   cmpwi   r10,IRQ_DISABLE_MASK_LINUX; 
\
+   andi.   r10,r10,IRQ_DISABLE_MASK_LINUX; \
li  r10,SOFTEN_VALUE_##vec; \
-   beq masked_##h##interrupt
+   bne masked_##h##interrupt
  #define _SOFTEN_TEST(h, vec)  __SOFTEN_TEST(h, vec)
  
  #define SOFTEN_TEST_PR(vec)		\

diff --git a/arch/powerpc/include/asm/hw_irq.h 
b/arch/powerpc/include/asm/hw_irq.h
index fd9b421f9020..245262c02bab 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -32,6 +32,7 @@
   */
  #define IRQ_DISABLE_MASK_NONE 0
  #define IRQ_DISABLE_MASK_LINUX1
+#define IRQ_DISABLE_MASK_PMU   2
  
  #endif /* CONFIG_PPC64 */
  
diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h

index d0ed2a7d7d10..9ff09747a226 100644
--- a/arch/powerpc/include/asm/irqflags.h
+++ b/arch/powerpc/include/asm/irqflags.h
@@ -48,11 +48,11 @@
  #define RECONCILE_IRQ_STATE(__rA, __rB)   \
lbz __rA,PACASOFTIRQEN(r13);\
lbz __rB,PACAIRQHAPPENED(r13);  \
-   cmpwi   cr0,__rA,IRQ_DISABLE_MASK_LINUX;\
+   andi.   __rA,__rA,IRQ_DISABLE_MASK_LINUX;\
li  __rA,IRQ_DISABLE_MASK_LINUX;\
ori __rB,__rB,PACA_IRQ_HARD_DIS;\
stb __rB,PACAIRQHAPPENED(r13);  \
-   beq 44f;\
+   bne 44f;\
stb __rA,PACASOFTIRQEN(r13);\
TRACE_DISABLE_INTS; \
  44:
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 879aeb11ad29..533e363914a9 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -764,8 +764,8 @@ restore:
 */
ld  r5,SOFTE(r1)
lbz r6,PACASOFTIRQEN(r13)
-   cmpwi   cr0,r5,IRQ_DISABLE_MASK_LINUX
-   beq restore_irq_off
+   andi.   r5,r5,IRQ_DISABLE_MASK_LINUX
+   bne restore_irq_off
  
  	/* We are enabling, were we already enabled ? Yes, just return */

cmpwi   cr0,r6,IRQ_DISABLE_MASK_NONE
diff --git a/arch/powerpc/kernel/exceptions-64e.S 
b/arch/powerpc/kernel/exceptions-64e.S
index 5c628b5696f6..8e40df2c2f30 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -212,8 +212,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
/* Interrupts had better not already be enabled... */
twnei   r6,IRQ_DISABLE_MASK_LINUX
  
-	cmpwi	cr0,r5,IRQ_DISABLE_MASK_LINUX

-   beq 1f
+   andi.   r5,r5,IRQ_DISABLE_MASK_LINUX
+   bne 1f
  
  	TRACE_ENABLE_INTS

stb r5,PACASOFTIRQEN(r13)
@@ -352,7 +352,7 @@ ret_from_mc_except:
  
  #define PROLOG_ADDITION_MASKABLE_GEN(n)	\

lbz r10,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */  \
-   cmpwi   cr0,r10,IRQ_DISABLE_MASK_LINUX;/* yes -> go out of line */ \
+   andi.   r10,r10,IRQ_DISABLE_MASK_LINUX;/* yes -> go out of line */ \
beq masked_interrupt_book3e_##n
  
  #define PROLOG_ADDITION_2REGS_GEN(n)	\




Re: [PATCH 08/13] powerpc: Add new _EXCEPTION_PROLOG_1 macro

2016-09-18 Thread Madhavan Srinivasan



On Friday 16 September 2016 03:42 PM, Nicholas Piggin wrote:

On Thu, 15 Sep 2016 18:31:58 +0530
Madhavan Srinivasan  wrote:


To support addition of "bitmask" to MASKABLE_* macros,
factor out the EXCPETION_PROLOG_1 macro.

Signed-off-by: Madhavan Srinivasan 

Really minor nit, but as a matter of readability of the series,
would you consider moving this next to patch 10 where it's used,
if you submit again?

Yes sure. Make sense. Will do it.

Maddy



Reviewed-by: Nicholas Piggin 





Re: [PATCH v2 2/3] powerpc: get hugetlbpage handling more generic

2016-09-18 Thread Aneesh Kumar K.V

Christophe Leroy  writes:
> +#else
> +static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
> +{
> + BUG();
> +}
> +
>  #endif


I was expecting that BUG will get removed in the next patch. But I don't
see it in the next patch. Considering

@@ -475,11 +453,10 @@ static void free_hugepd_range(struct mmu_gather *tlb, 
hugepd_t *hpdp, int pdshif
for (i = 0; i < num_hugepd; i++, hpdp++)
hpdp->pd = 0;

-#ifdef CONFIG_PPC_FSL_BOOK3E
-   hugepd_free(tlb, hugepte);
-#else
-   pgtable_free_tlb(tlb, hugepte, pdshift - shift);
-#endif
+   if (shift >= pdshift)
+   hugepd_free(tlb, hugepte);
+   else
+   pgtable_free_tlb(tlb, hugepte, pdshift - shift);
 }

What is that I am missing ?

-aneesh



Re: [PATCH v2 2/3] powerpc: get hugetlbpage handling more generic

2016-09-18 Thread Aneesh Kumar K.V
Christophe Leroy  writes:

> Today there are two implementations of hugetlbpages which are managed
> by exclusive #ifdefs:
> * FSL_BOOKE: several directory entries points to the same single hugepage
> * BOOK3S: one upper level directory entry points to a table of hugepages
>
> In preparation of implementation of hugepage support on the 8xx, we
> need a mix of the two above solutions, because the 8xx needs both cases
> depending on the size of pages:
> * In 4k page size mode, each PGD entry covers a 4M bytes area. It means
> that 2 PGD entries will be necessary to cover an 8M hugepage while a
> single PGD entry will cover 8x 512k hugepages.
> * In 16 page size mode, each PGD entry covers a 64M bytes area. It means
> that 8x 8M hugepages will be covered by one PGD entry and 64x 512k
> hugepages will be covers by one PGD entry.
>
> This patch:
> * removes #ifdefs in favor of if/else based on the range sizes
> * merges the two huge_pte_alloc() functions as they are pretty similar
> * merges the two hugetlbpage_init() functions as they are pretty similar
>
> Signed-off-by: Christophe Leroy 
> ---
> v2: This part is new and results from a split of last patch of v1 serie in
> two parts
>
>  arch/powerpc/mm/hugetlbpage.c | 189 
> +-
>  1 file changed, 77 insertions(+), 112 deletions(-)
>
> diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
> index 8a512b1..2119f00 100644
> --- a/arch/powerpc/mm/hugetlbpage.c
> +++ b/arch/powerpc/mm/hugetlbpage.c
> @@ -64,14 +64,16 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t 
> *hpdp,
>  {
>   struct kmem_cache *cachep;
>   pte_t *new;
> -
> -#ifdef CONFIG_PPC_FSL_BOOK3E
>   int i;
> - int num_hugepd = 1 << (pshift - pdshift);
> - cachep = hugepte_cache;
> -#else
> - cachep = PGT_CACHE(pdshift - pshift);
> -#endif
> + int num_hugepd;
> +
> + if (pshift >= pdshift) {
> + cachep = hugepte_cache;
> + num_hugepd = 1 << (pshift - pdshift);
> + } else {
> + cachep = PGT_CACHE(pdshift - pshift);
> + num_hugepd = 1;
> + }

Is there a way to hint likely/unlikely branch based on the page size
selected at build time ?



>  
>   new = kmem_cache_zalloc(cachep, GFP_KERNEL);
>  
> @@ -89,7 +91,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t 
> *hpdp,
>   smp_wmb();
>  
>   spin_lock(>page_table_lock);
> -#ifdef CONFIG_PPC_FSL_BOOK3E
> +
>   /*
>* We have multiple higher-level entries that point to the same
>* actual pte location.  Fill in each as we go and backtrack on error.
> @@ -100,8 +102,13 @@ static int __hugepte_alloc(struct mm_struct *mm, 
> hugepd_t *hpdp,
>   if (unlikely(!hugepd_none(*hpdp)))
>   break;
>   else



> -#ifdef CONFIG_PPC_FSL_BOOK3E
>  struct kmem_cache *hugepte_cache;
>  static int __init hugetlbpage_init(void)
>  {
>   int psize;
>  
> - for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
> - unsigned shift;
> -
> - if (!mmu_psize_defs[psize].shift)
> - continue;
> -
> - shift = mmu_psize_to_shift(psize);
> -
> - /* Don't treat normal page sizes as huge... */
> - if (shift != PAGE_SHIFT)
> - if (add_huge_page_size(1ULL << shift) < 0)
> - continue;
> - }
> -
> - /*
> -  * Create a kmem cache for hugeptes.  The bottom bits in the pte have
> -  * size information encoded in them, so align them to allow this
> -  */
> - hugepte_cache =  kmem_cache_create("hugepte-cache", sizeof(pte_t),
> -HUGEPD_SHIFT_MASK + 1, 0, NULL);
> - if (hugepte_cache == NULL)
> - panic("%s: Unable to create kmem cache for hugeptes\n",
> -   __func__);
> -
> - /* Default hpage size = 4M */
> - if (mmu_psize_defs[MMU_PAGE_4M].shift)
> - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift;
> - else
> - panic("%s: Unable to set default huge page size\n", __func__);
> -
> -
> - return 0;
> -}
> -#else
> -static int __init hugetlbpage_init(void)
> -{
> - int psize;
> -
> +#if !defined(CONFIG_PPC_FSL_BOOK3E)
>   if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE))
>   return -ENODEV;
> -
> +#endif

Do we need that #if ? radix_enabled() should become 0 and that if
condition should be removed at compile time isn't it ? or are you
finding errors with that ?


>   for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
>   unsigned shift;
>   unsigned pdshift;
> @@ -860,16 +807,31 @@ static int __init hugetlbpage_init(void)
>* if we have pdshift and shift value same, we don't
>* use pgt cache for hugepd.
>*/
> - if (pdshift != 

Re: [PATCH 06/13] powerpc: reverse the soft_enable logic

2016-09-18 Thread Madhavan Srinivasan



On Friday 16 September 2016 03:35 PM, Nicholas Piggin wrote:

On Thu, 15 Sep 2016 18:31:56 +0530
Madhavan Srinivasan  wrote:


"paca->soft_enabled" is used as a flag to mask some of interrupts.
Currently supported flags values and their details:

soft_enabledMSR[EE]

0   0   Disabled (PMI and HMI not masked)
1   1   Enabled

"paca->soft_enabled" is initialized to 1 to make the interripts as
enabled. arch_local_irq_disable() will toggle the value when interrupts
needs to disbled. At this point, the interrupts are not actually disabled,
instead, interrupt vector has code to check for the flag and mask it when it 
occurs.
By "mask it", it update interrupt paca->irq_happened and return.
arch_local_irq_restore() is called to re-enable interrupts, which checks and
replays interrupts if any occured.

Now, as mentioned, current logic doesnot mask "performance monitoring 
interrupts"
and PMIs are implemented as NMI. But this patchset depends on local_irq_*
for a successful local_* update. Meaning, mask all possible interrupts during
local_* update and replay them after the update.

So the idea here is to reserve the "paca->soft_enabled" logic. New values and
details:

soft_enabledMSR[EE]

1   0   Disabled  (PMI and HMI not masked)
0   1   Enabled

Reason for the this change is to create foundation for a third flag value "2"
for "soft_enabled" to add support to mask PMIs. When ->soft_enabled is
set to a value "2", PMI interrupts are mask and when set to a value
of "1", PMI are not mask.

Signed-off-by: Madhavan Srinivasan 
---
  arch/powerpc/include/asm/hw_irq.h | 4 ++--
  arch/powerpc/kernel/entry_64.S| 5 ++---
  2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_irq.h 
b/arch/powerpc/include/asm/hw_irq.h
index dc3c248f9244..fd9b421f9020 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -30,8 +30,8 @@
  /*
   * flags for paca->soft_enabled
   */
-#define IRQ_DISABLE_MASK_NONE  1
-#define IRQ_DISABLE_MASK_LINUX 0
+#define IRQ_DISABLE_MASK_NONE  0
+#define IRQ_DISABLE_MASK_LINUX 1
  
  #endif /* CONFIG_PPC64 */
  
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S

index aef7b64cbbeb..879aeb11ad29 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -131,8 +131,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 */
  #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG)
lbz r10,PACASOFTIRQEN(r13)
-   xorir10,r10,IRQ_DISABLE_MASK_NONE
-1: tdnei   r10,0
+1: tdnei   r10,IRQ_DISABLE_MASK_NONE
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
  #endif
  
@@ -1012,7 +1011,7 @@ _GLOBAL(enter_rtas)

 * check it with the asm equivalent of WARN_ON
 */
lbz r0,PACASOFTIRQEN(r13)
-1: tdnei   r0,IRQ_DISABLE_MASK_LINUX
+1: tdeqi   r0,IRQ_DISABLE_MASK_NONE
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
  #endif


We specifically want to ensure that _LINUX interrupts are disabled
here. Not that we allow masking of others without _LINUX now, but
current behavior is checking that LINUX ones are masked.

Otherwise it seems okay.

It might be nice after this series to do a pass and rename
soft_enabled to soft_masked.


Yes definitely, I do have it in my todo.

Maddy



Reviewed-by: Nicholas Piggin 





Re: [PATCH 05/13] powerpc: Add soft_enabled manipulation functions

2016-09-18 Thread Madhavan Srinivasan



On Friday 16 September 2016 03:27 PM, Nicholas Piggin wrote:

On Thu, 15 Sep 2016 18:31:55 +0530
Madhavan Srinivasan  wrote:


Add new soft_enabled_* manipulation function and implement
arch_local_* using the soft_enabled_* wrappers.

Signed-off-by: Madhavan Srinivasan 
---
  arch/powerpc/include/asm/hw_irq.h | 32 ++--
  1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_irq.h 
b/arch/powerpc/include/asm/hw_irq.h
index f828b8f8df02..dc3c248f9244 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -53,21 +53,7 @@ static inline notrace void soft_enabled_set(unsigned long 
enable)
: : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
  }
  
-static inline notrace unsigned long soft_enabled_set_return(unsigned long enable)

-{
-   unsigned long flags;
-
-   asm volatile(
-   "lbz %0,%1(13); stb %2,%1(13)"
-   : "=r" (flags)
-   : "i" (offsetof(struct paca_struct, soft_enabled)),\
- "r" (enable)
-   : "memory");
-
-   return flags;
-}
-
-static inline unsigned long arch_local_save_flags(void)
+static inline notrace unsigned long soft_enabled_return(void)
  {
unsigned long flags;
  
@@ -79,20 +65,30 @@ static inline unsigned long arch_local_save_flags(void)

return flags;
  }
  
-static inline unsigned long arch_local_irq_disable(void)

+static inline notrace unsigned long soft_enabled_set_return(unsigned long 
enable)
  {
unsigned long flags, zero;
  
  	asm volatile(

-   "li %1,%3; lbz %0,%2(13); stb %1,%2(13)"
+   "mr %1,%3; lbz %0,%2(13); stb %1,%2(13)"
: "=r" (flags), "=" (zero)
: "i" (offsetof(struct paca_struct, soft_enabled)),\
- "i" (IRQ_DISABLE_MASK_LINUX)
+ "r" (enable)
: "memory");
  
  	return flags;

  }

As we talked about earlier, it would be nice to add builtin_constant
variants to avoid the extra instruction. If you prefer to do that
after this series, that's fine.


True. my bad. Will look into this.

Maddy



Reviewed-by: Nicholas Piggin 





Re: [PATCH 04/13] powerpc: Use soft_enabled_set api to update paca->soft_enabled

2016-09-18 Thread Madhavan Srinivasan



On Monday 19 September 2016 08:22 AM, Nicholas Piggin wrote:

On Fri, 16 Sep 2016 13:22:24 +
David Laight  wrote:


From: Nicholas Piggin

Sent: 16 September 2016 12:59
On Fri, 16 Sep 2016 11:43:13 +
David Laight  wrote:
   

From: Nicholas Piggin

Sent: 16 September 2016 10:53
On Thu, 15 Sep 2016 18:31:54 +0530
Madhavan Srinivasan  wrote:
  

Force use of soft_enabled_set() wrapper to update paca-soft_enabled
wherever possisble. Also add a new wrapper function, soft_enabled_set_return(),
added to force the paca->soft_enabled updates.

...

diff --git a/arch/powerpc/include/asm/hw_irq.h 
b/arch/powerpc/include/asm/hw_irq.h
index 8fad8c24760b..f828b8f8df02 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -53,6 +53,20 @@ static inline notrace void soft_enabled_set(unsigned long 
enable)
: : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
  }

+static inline notrace unsigned long soft_enabled_set_return(unsigned long 
enable)
+{
+   unsigned long flags;
+
+   asm volatile(
+   "lbz %0,%1(13); stb %2,%1(13)"
+   : "=r" (flags)
+   : "i" (offsetof(struct paca_struct, soft_enabled)),\
+ "r" (enable)
+   : "memory");
+
+   return flags;
+}

Why do you have the "memory" clobber here while soft_enabled_set() does not?

I wondered about the missing memory clobber earlier.

Any 'clobber' ought to be restricted to the referenced memory area.
If the structure is only referenced by r13 through 'asm volatile' it isn't 
needed.

Well a clobber (compiler barrier) at some point is needed in irq_disable and
irq_enable paths, so we correctly open and close the critical section vs 
interrupts.
I just wonder about these helpers. It might be better to take the clobbers out 
of
there and add barrier(); in callers, which would make it more obvious.

If the memory clobber is needed to synchronise with the rest of the code
rather than just ensuring the compiler doesn't reorder accesses via r13
then I'd add an explicit barrier() somewhere - even if in these helpers.

Potentially the helper wants a memory clobber for the (r13) area
and a separate barrier() to ensure the interrupts are masked for the
right code.
Even if both are together in the same helper.

Good point. Some of the existing modification helpers don't seem to have
clobbers for modifying the r13->soft_enabled memory itself, but they do
have the memory clobber where a critical section barrier is required.

The former may not be a problem if the helpers are used very carefully,
but probably should be commented at best, if not fixed.


Yes. Agreed. Will add comments


  So after Madhi's
patches, we should make all accesses go via the helper functions, so a
clobber for the soft_enabled modification may not be required (this should
be commented). I think it may be cleaner to specify the location in the
constraints, but maybe that doesn't generate the best code -- something to
investigate.

Then, I'd like to see barrier()s for interrupt critical sections placed in
the callers of these helpers, which will make the code more obvious.


Ok will look into this.



Thanks,
Nick





Re: [PATCH 2/2] powerpc: tm: Enable transactional memory (TM) lazily for userspace

2016-09-18 Thread Cyril Bur
On Mon, 2016-09-19 at 12:47 +0800, Simon Guo wrote:
> On Wed, Sep 14, 2016 at 06:02:16PM +1000, Cyril Bur wrote:
> > 
> > @@ -954,8 +963,16 @@ static inline void __switch_to_tm(struct
> > task_struct *prev,
> >     struct task_struct *new)
> >  {
> >     if (cpu_has_feature(CPU_FTR_TM)) {
> > -   tm_enable();
> > -   tm_reclaim_task(prev);
> > +   if (tm_enabled(prev) || tm_enabled(new))
> > +   tm_enable();
> > +
> > +   if (tm_enabled(prev)) {
> > +   prev->thread.load_tm++;
> > +   tm_reclaim_task(prev);
> > +   if (!MSR_TM_ACTIVE(prev->thread.regs->msr) 
> > && prev->thread.load_tm == 0)
> > +   prev->thread.regs->msr &= ~MSR_TM;
> > +   }
> Hi Cyril,
> 
> If MSR_TM_ACTIVE(), is it better to reset load_tm to 0?
> Other looks good to me.
> 

Doing so would extend the window that we keep TM enabled for when we
might not need to. It is possible that we could assume that if
MSR_TM_ACTIVE() then they're in codepathes that will reuse TM again
soon so load_tm = 0 could be a good idea but there's really no way to
know. Food for thought I guess...

Maybe?

Good thought,
Cyril

> Thanks,
> - Simon


[PATCH v7 6/6] powerpc: pSeries: Add pv-qspinlock build config/make

2016-09-18 Thread Pan Xinhui
pSeries run as a guest and might need pv-qspinlock.

Signed-off-by: Pan Xinhui 
---
 arch/powerpc/kernel/Makefile   | 1 +
 arch/powerpc/platforms/pseries/Kconfig | 8 
 2 files changed, 9 insertions(+)

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index fe4c075..efd2f3d 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -49,6 +49,7 @@ obj-$(CONFIG_PPC_970_NAP) += idle_power4.o
 obj-$(CONFIG_PPC_P7_NAP)   += idle_book3s.o
 procfs-y   := proc_powerpc.o
 obj-$(CONFIG_PROC_FS)  += $(procfs-y)
+obj-$(CONFIG_PARAVIRT_SPINLOCKS)   += paravirt.o
 rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI)  := rtas_pci.o
 obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y-y)
 obj-$(CONFIG_PPC_RTAS_DAEMON)  += rtasd.o
diff --git a/arch/powerpc/platforms/pseries/Kconfig 
b/arch/powerpc/platforms/pseries/Kconfig
index f669323..46632e4 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -128,3 +128,11 @@ config HV_PERF_CTRS
  systems. 24x7 is available on Power 8 systems.
 
   If unsure, select Y.
+
+config PARAVIRT_SPINLOCKS
+   bool "Paravirtialization support for qspinlock"
+   depends on PPC_SPLPAR && QUEUED_SPINLOCKS
+   default y
+   help
+ If platform supports virtualization, for example PowerVM, this option
+ can let guest have a better performace.
-- 
2.4.11



[PATCH v7 5/6] powerpc/pv-qspinlock: powerpc support pv-qspinlock

2016-09-18 Thread Pan Xinhui
The default pv-qspinlock uses qspinlock(native version of pv-qspinlock).
pv_lock initialization should be done in bootstage with irq disabled.
And if we run as a guest with powerKVM/pHyp shared_processor mode,
restore pv_lock_ops callbacks to pv-qspinlock(pv version) which makes
full use of virtualization.

There is a hash table, we store cpu number into it and the key is lock.
So everytime pv_wait can know who is the lock holder by searching the
lock. Also store the lock in a per_cpu struct, and remove it when we own
the lock. Then pv_wait can know which lock we are spinning on. But the
cpu in the hash table might not be the correct lock holder, as for
performace issue, we does not take care of hash conflict.

Also introduce spin_lock_holder, which tells who owns the lock now.
currently the only user is spin_unlock_wait.

Signed-off-by: Pan Xinhui 
---
 arch/powerpc/include/asm/qspinlock.h   |  29 +++-
 arch/powerpc/include/asm/qspinlock_paravirt.h  |  36 +
 .../powerpc/include/asm/qspinlock_paravirt_types.h |  13 ++
 arch/powerpc/kernel/paravirt.c | 153 +
 arch/powerpc/lib/locks.c   |   8 +-
 arch/powerpc/platforms/pseries/setup.c |   5 +
 6 files changed, 241 insertions(+), 3 deletions(-)
 create mode 100644 arch/powerpc/include/asm/qspinlock_paravirt.h
 create mode 100644 arch/powerpc/include/asm/qspinlock_paravirt_types.h
 create mode 100644 arch/powerpc/kernel/paravirt.c

diff --git a/arch/powerpc/include/asm/qspinlock.h 
b/arch/powerpc/include/asm/qspinlock.h
index 881a186..23459fb 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -15,7 +15,7 @@ static inline u8 * __qspinlock_lock_byte(struct qspinlock 
*lock)
return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN);
 }
 
-static inline void queued_spin_unlock(struct qspinlock *lock)
+static inline void native_queued_spin_unlock(struct qspinlock *lock)
 {
/* release semantics is required */
smp_store_release(__qspinlock_lock_byte(lock), 0);
@@ -27,6 +27,33 @@ static inline int queued_spin_is_locked(struct qspinlock 
*lock)
return atomic_read(>val);
 }
 
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+#include 
+/*
+ * try to know who is the lock holder, however it is not always true
+ * Return:
+ * -1, we did not know the lock holder.
+ * other value, likely is the lock holder.
+ */
+extern int spin_lock_holder(void *lock);
+
+static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+{
+   pv_queued_spin_lock(lock, val);
+}
+
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+   pv_queued_spin_unlock(lock);
+}
+#else
+#define spin_lock_holder(l) (-1)
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+   native_queued_spin_unlock(lock);
+}
+#endif
+
 #include 
 
 /* we need override it as ppc has io_sync stuff */
diff --git a/arch/powerpc/include/asm/qspinlock_paravirt.h 
b/arch/powerpc/include/asm/qspinlock_paravirt.h
new file mode 100644
index 000..d87cda0
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock_paravirt.h
@@ -0,0 +1,36 @@
+#ifndef CONFIG_PARAVIRT_SPINLOCKS
+#error "do not include this file"
+#endif
+
+#ifndef _ASM_QSPINLOCK_PARAVIRT_H
+#define _ASM_QSPINLOCK_PARAVIRT_H
+
+#include  
+
+extern void pv_lock_init(void);
+extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __pv_init_lock_hash(void);
+extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __pv_queued_spin_unlock(struct qspinlock *lock);
+
+static inline void pv_queued_spin_lock(struct qspinlock *lock, u32 val)
+{
+   pv_lock_op.lock(lock, val);
+}
+
+static inline void pv_queued_spin_unlock(struct qspinlock *lock)
+{
+   pv_lock_op.unlock(lock);
+}
+
+static inline void pv_wait(u8 *ptr, u8 val)
+{
+   pv_lock_op.wait(ptr, val);
+}
+
+static inline void pv_kick(int cpu)
+{
+   pv_lock_op.kick(cpu);
+}
+
+#endif
diff --git a/arch/powerpc/include/asm/qspinlock_paravirt_types.h 
b/arch/powerpc/include/asm/qspinlock_paravirt_types.h
new file mode 100644
index 000..83611ed
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock_paravirt_types.h
@@ -0,0 +1,13 @@
+#ifndef _ASM_QSPINLOCK_PARAVIRT_TYPES_H
+#define _ASM_QSPINLOCK_PARAVIRT_TYPES_H
+
+struct pv_lock_ops {
+   void (*lock)(struct qspinlock *lock, u32 val);
+   void (*unlock)(struct qspinlock *lock);
+   void (*wait)(u8 *ptr, u8 val);
+   void (*kick)(int cpu);
+};
+
+extern struct pv_lock_ops pv_lock_op;
+
+#endif
diff --git a/arch/powerpc/kernel/paravirt.c b/arch/powerpc/kernel/paravirt.c
new file mode 100644
index 000..e697b17
--- /dev/null
+++ b/arch/powerpc/kernel/paravirt.c
@@ -0,0 +1,153 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * 

[PATCH v7 4/6] powerpc: lib/locks.c: Add cpu yield/wake helper function

2016-09-18 Thread Pan Xinhui
Add two corresponding helper functions to support pv-qspinlock.

For normal use, __spin_yield_cpu will confer current vcpu slices to the
target vcpu(say, a lock holder). If target vcpu is not specified or it
is in running state, such conferging to lpar happens or not depends.

Because hcall itself will introduce latency and a little overhead. And
we do NOT want to suffer any latency on some cases, e.g. in interrupt handler.
The second parameter *confer* can indicate such case.

__spin_wake_cpu is simpiler, it will wake up one vcpu regardless of its
current vcpu state.

Signed-off-by: Pan Xinhui 
---
 arch/powerpc/include/asm/spinlock.h |  4 +++
 arch/powerpc/lib/locks.c| 59 +
 2 files changed, 63 insertions(+)

diff --git a/arch/powerpc/include/asm/spinlock.h 
b/arch/powerpc/include/asm/spinlock.h
index 6aef8dd..abb6b0f 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -56,9 +56,13 @@
 /* We only yield to the hypervisor if we are in shared processor mode */
 #define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
 extern void __spin_yield(arch_spinlock_t *lock);
+extern void __spin_yield_cpu(int cpu, int confer);
+extern void __spin_wake_cpu(int cpu);
 extern void __rw_yield(arch_rwlock_t *lock);
 #else /* SPLPAR */
 #define __spin_yield(x)barrier()
+#define __spin_yield_cpu(x,y) barrier()
+#define __spin_wake_cpu(x) barrier()
 #define __rw_yield(x)  barrier()
 #define SHARED_PROCESSOR   0
 #endif
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 6574626..892df7d 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -23,6 +23,65 @@
 #include 
 #include 
 
+/*
+ * confer our slices to a specified cpu and return. If it is already running or
+ * cpu is -1, then we will check confer. If confer is NULL, we will return
+ * otherwise we confer our slices to lpar.
+ */
+void __spin_yield_cpu(int cpu, int confer)
+{
+   unsigned int holder_cpu = cpu, yield_count;
+
+   if (cpu == -1)
+   goto yield_to_lpar;
+
+   BUG_ON(holder_cpu >= nr_cpu_ids);
+   yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
+
+   /* if cpu is running, confer slices to lpar conditionally*/
+   if ((yield_count & 1) == 0)
+   goto yield_to_lpar;
+
+   plpar_hcall_norets(H_CONFER,
+   get_hard_smp_processor_id(holder_cpu), yield_count);
+   return;
+
+yield_to_lpar:
+   if (confer)
+   plpar_hcall_norets(H_CONFER, -1, 0);
+}
+EXPORT_SYMBOL_GPL(__spin_yield_cpu);
+
+void __spin_wake_cpu(int cpu)
+{
+   unsigned int holder_cpu = cpu;
+
+   BUG_ON(holder_cpu >= nr_cpu_ids);
+   /*
+* NOTE: we should always do this hcall regardless of
+* the yield_count of the holder_cpu.
+* as thers might be a case like below;
+* CPU  1   2
+*  yielded = true
+*  if (yielded)
+*  __spin_wake_cpu()
+*  __spin_yield_cpu()
+*
+* So we might lose a wake if we check the yield_count and
+* return directly if the holder_cpu is running.
+* IOW. do NOT code like below.
+*  yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
+*  if ((yield_count & 1) == 0)
+*  return;
+*
+* a PROD hcall marks the target_cpu proded, which cause the next cede 
or confer
+* called on the target_cpu invalid.
+*/
+   plpar_hcall_norets(H_PROD,
+   get_hard_smp_processor_id(holder_cpu));
+}
+EXPORT_SYMBOL_GPL(__spin_wake_cpu);
+
 #ifndef CONFIG_QUEUED_SPINLOCKS
 void __spin_yield(arch_spinlock_t *lock)
 {
-- 
2.4.11



[PATCH v7 3/6] powerpc: pseries/Kconfig: Add qspinlock build config

2016-09-18 Thread Pan Xinhui
pseries will use qspinlock by default.

Signed-off-by: Pan Xinhui 
---
 arch/powerpc/platforms/pseries/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/platforms/pseries/Kconfig 
b/arch/powerpc/platforms/pseries/Kconfig
index bec90fb..f669323 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -21,6 +21,7 @@ config PPC_PSERIES
select HOTPLUG_CPU if SMP
select ARCH_RANDOM
select PPC_DOORBELL
+   select ARCH_USE_QUEUED_SPINLOCKS
default y
 
 config PPC_SPLPAR
-- 
2.4.11



[PATCH v7 2/6] powerpc/qspinlock: powerpc support qspinlock

2016-09-18 Thread Pan Xinhui
This patch add basic code to enable qspinlock on powerpc. qspinlock is
one kind of fairlock implemention. And seen some performance improvement
under some scenarios.

queued_spin_unlock() release the lock by just one write of NULL to the
->locked field which sits at different places in the two endianness
system.

We override some arch_spin_xxx as powerpc has io_sync stuff which makes
sure the io operations are protected by the lock correctly.

There is another special case, see commit
2c610022711 ("locking/qspinlock: Fix spin_unlock_wait() some more")

Signed-off-by: Pan Xinhui 
---
 arch/powerpc/include/asm/qspinlock.h  | 66 +++
 arch/powerpc/include/asm/spinlock.h   | 31 +--
 arch/powerpc/include/asm/spinlock_types.h |  4 ++
 arch/powerpc/lib/locks.c  | 59 +++
 4 files changed, 147 insertions(+), 13 deletions(-)
 create mode 100644 arch/powerpc/include/asm/qspinlock.h

diff --git a/arch/powerpc/include/asm/qspinlock.h 
b/arch/powerpc/include/asm/qspinlock.h
new file mode 100644
index 000..881a186
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -0,0 +1,66 @@
+#ifndef _ASM_POWERPC_QSPINLOCK_H
+#define _ASM_POWERPC_QSPINLOCK_H
+
+#include 
+
+#define SPIN_THRESHOLD (1 << 15)
+#define queued_spin_unlock queued_spin_unlock
+#define queued_spin_is_locked queued_spin_is_locked
+#define queued_spin_unlock_wait queued_spin_unlock_wait
+
+extern void queued_spin_unlock_wait(struct qspinlock *lock);
+
+static inline u8 * __qspinlock_lock_byte(struct qspinlock *lock)
+{
+   return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN);
+}
+
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+   /* release semantics is required */
+   smp_store_release(__qspinlock_lock_byte(lock), 0);
+}
+
+static inline int queued_spin_is_locked(struct qspinlock *lock)
+{
+   smp_mb();
+   return atomic_read(>val);
+}
+
+#include 
+
+/* we need override it as ppc has io_sync stuff */
+#undef arch_spin_trylock
+#undef arch_spin_lock
+#undef arch_spin_lock_flags
+#undef arch_spin_unlock
+#define arch_spin_trylock arch_spin_trylock
+#define arch_spin_lock arch_spin_lock
+#define arch_spin_lock_flags arch_spin_lock_flags
+#define arch_spin_unlock arch_spin_unlock
+
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+   CLEAR_IO_SYNC;
+   return queued_spin_trylock(lock);
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+   CLEAR_IO_SYNC;
+   queued_spin_lock(lock);
+}
+
+static inline
+void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
+{
+   CLEAR_IO_SYNC;
+   queued_spin_lock(lock);
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+   SYNC_IO;
+   queued_spin_unlock(lock);
+}
+#endif /* _ASM_POWERPC_QSPINLOCK_H */
diff --git a/arch/powerpc/include/asm/spinlock.h 
b/arch/powerpc/include/asm/spinlock.h
index fa37fe9..6aef8dd 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -52,6 +52,23 @@
 #define SYNC_IO
 #endif
 
+#if defined(CONFIG_PPC_SPLPAR)
+/* We only yield to the hypervisor if we are in shared processor mode */
+#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
+extern void __spin_yield(arch_spinlock_t *lock);
+extern void __rw_yield(arch_rwlock_t *lock);
+#else /* SPLPAR */
+#define __spin_yield(x)barrier()
+#define __rw_yield(x)  barrier()
+#define SHARED_PROCESSOR   0
+#endif
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include 
+#else
+
+#define arch_spin_relax(lock)  __spin_yield(lock)
+
 static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 {
return lock.slock == 0;
@@ -106,18 +123,6 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
  * held.  Conveniently, we have a word in the paca that holds this
  * value.
  */
-
-#if defined(CONFIG_PPC_SPLPAR)
-/* We only yield to the hypervisor if we are in shared processor mode */
-#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
-extern void __spin_yield(arch_spinlock_t *lock);
-extern void __rw_yield(arch_rwlock_t *lock);
-#else /* SPLPAR */
-#define __spin_yield(x)barrier()
-#define __rw_yield(x)  barrier()
-#define SHARED_PROCESSOR   0
-#endif
-
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
CLEAR_IO_SYNC;
@@ -195,6 +200,7 @@ out:
smp_mb();
 }
 
+#endif /* !CONFIG_QUEUED_SPINLOCKS */
 /*
  * Read-write spinlocks, allowing multiple readers
  * but only one writer.
@@ -330,7 +336,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
 
-#define arch_spin_relax(lock)  __spin_yield(lock)
 #define arch_read_relax(lock)  __rw_yield(lock)
 #define arch_write_relax(lock) __rw_yield(lock)
 
diff --git 

[PATCH v7 1/6] pv-qspinlock: use cmpxchg_release in __pv_queued_spin_unlock

2016-09-18 Thread Pan Xinhui
cmpxchg_release() is more lighweight than cmpxchg() on some archs(e.g.
PPC), moreover, in __pv_queued_spin_unlock() we only needs a RELEASE in
the fast path(pairing with *_try_lock() or *_lock()). And the slow path
has smp_store_release too. So it's safe to use cmpxchg_release here.

Suggested-by:  Boqun Feng 
Signed-off-by: Pan Xinhui 
---
 kernel/locking/qspinlock_paravirt.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/locking/qspinlock_paravirt.h 
b/kernel/locking/qspinlock_paravirt.h
index 8a99abf..ce655aa 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -544,7 +544,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock 
*lock)
 * unhash. Otherwise it would be possible to have multiple @lock
 * entries, which would be BAD.
 */
-   locked = cmpxchg(>locked, _Q_LOCKED_VAL, 0);
+   locked = cmpxchg_release(>locked, _Q_LOCKED_VAL, 0);
if (likely(locked == _Q_LOCKED_VAL))
return;
 
-- 
2.4.11



[PATCH v7 0/6] Implement qspinlock/pv-qspinlock on ppc

2016-09-18 Thread Pan Xinhui
Hi All,
  this is the fairlock patchset. You can apply them and build successfully.
patches are based on 4.8-rc4.
  qspinlock can avoid waiter starved issue. It has about the same speed in
single-thread and it can be much faster in high contention situations
especially when the spinlock is embedded within the data structure to be
protected.

v6 -> v7:
rebase onto 4.8-rc4
no changelog anymore, sorry for that. I hope there is a very careful review.

Todo:
we can save one function call overhead. As we can use feature-fixup to 
patch
the binary code. Currently there is pv_lock_ops->lock(lock) and ->unlock(lock) 
to acquire/release the lock.

some benchmark result below

perf bench
these numbers are ops per sec, So the higher the better.
***
on pSeries with 32 vcpus, 32Gb memory, pHyp.

test case   | pv-qspinlock  |  qspinlock| 
current-spinlock

futex hash  | 618572| 552332| 553788
futex lock-pi   | 364   | 364   | 364
sched pipe  | 78984 | 76060 | 81454


unix bench:
these numbers are scores, So the higher the better.

on PowerNV with 16 cores(cpus) (smt off), 32Gb memory:
-
pv-qspinlock and qspinlock have very similar results because pv-qspinlock use 
native version
which is only having one callback overhead

test case   | pv-qspinlock and qspinlock | current-spinlock

Execl Throughput   761.1 761.4
File Copy 1024 bufsize 2000 maxblocks 1259.81286.6
File Copy 256 bufsize 500 maxblocks782.2 790.3
File Copy 4096 bufsize 8000 maxblocks 2741.52817.4
Pipe Throughput   1063.21036.7
Pipe-based Context Switching   284.7 281.1
Process Creation   679.6 649.1
Shell Scripts (1 concurrent)  1933.21922.9
Shell Scripts (8 concurrent)  5003.34899.8
System Call Overhead   900.6 896.8
 ==
System Benchmarks Index Score 1139.3 1133.0
--- 
-

***
on pSeries with 32 vcpus, 32Gb memory, pHyp.

test case   |   pv-qspinlock |  qspinlock | 
current-spinlock

Execl Throughput 877.1 891.2 872.8
File Copy 1024 bufsize 2000 maxblocks   1390.41399.21395.0
File Copy 256 bufsize 500 maxblocks  882.4 889.5 881.8
File Copy 4096 bufsize 8000 maxblocks   3112.33113.43121.7
Pipe Throughput 1095.81162.61158.5
Pipe-based Context Switching 194.9 192.7 200.7
Process Creation 518.4 526.4 509.1
Shell Scripts (1 concurrent)1401.91413.91402.2
Shell Scripts (8 concurrent)3215.63246.63229.1
System Call Overhead 833.2 892.4 888.1
  
System Benchmarks Index Score   1033.71052.51047.8


**
on pSeries with 32 vcpus, 16Gb memory, KVM.

test case   |   pv-qspinlock |  qspinlock | 
current-spinlock

Execl Throughput 497.4518.7 497.8
File Copy 1024 bufsize 2000 maxblocks   1368.8   1390.11343.3
File Copy 256 bufsize 500 maxblocks  857.7859.8 831.4
File Copy 4096 bufsize 8000 maxblocks   2851.7   2838.12785.5
Pipe Throughput 1221.9 

Re: [PATCH v2 1/3] powerpc: port 64 bits pgtable_cache to 32 bits

2016-09-18 Thread Aneesh Kumar K.V
Christophe Leroy  writes:

> Today powerpc64 uses a set of pgtable_caches while powerpc32 uses
> standard pages when using 4k pages and a single pgtable_cache
> if using other size pages.
>
> In preparation of implementing huge pages on the 8xx, this patch
> replaces the specific powerpc32 handling by the 64 bits approach.
>
> This is done by:
> * moving 64 bits pgtable_cache_add() and pgtable_cache_init()
> in a new file called init-common.c
> * modifying pgtable_cache_init() to also handle the case
> without PMD
> * removing the 32 bits version of pgtable_cache_add() and
> pgtable_cache_init()
> * copying related header contents from 64 bits into both the
> book3s/32 and nohash/32 header files
>
> On the 8xx, the following cache sizes will be used:
> * 4k pages mode:
> - PGT_CACHE(10) for PGD
> - PGT_CACHE(3) for 512k hugepage tables
> * 16k pages mode:
> - PGT_CACHE(6) for PGD
> - PGT_CACHE(7) for 512k hugepage tables
> - PGT_CACHE(3) for 8M hugepage tables
>
> Signed-off-by: Christophe Leroy 
> ---
> v2: in v1, hugepte_cache was wrongly replaced by PGT_CACHE(1).
> This modification has been removed from v2.
>
>  arch/powerpc/include/asm/book3s/32/pgalloc.h |  44 ++--
>  arch/powerpc/include/asm/book3s/32/pgtable.h |  43 
>  arch/powerpc/include/asm/book3s/64/pgtable.h |   3 -
>  arch/powerpc/include/asm/nohash/32/pgalloc.h |  44 ++--
>  arch/powerpc/include/asm/nohash/32/pgtable.h |  45 
>  arch/powerpc/include/asm/nohash/64/pgtable.h |   2 -
>  arch/powerpc/include/asm/pgtable.h   |   2 +
>  arch/powerpc/mm/Makefile |   3 +-
>  arch/powerpc/mm/init-common.c| 147 
> +++
>  arch/powerpc/mm/init_64.c|  77 --
>  arch/powerpc/mm/pgtable_32.c |  37 ---
>  11 files changed, 273 insertions(+), 174 deletions(-)
>  create mode 100644 arch/powerpc/mm/init-common.c
>
> diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h 
> b/arch/powerpc/include/asm/book3s/32/pgalloc.h
> index 8e21bb4..d310546 100644
> --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
> +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
> @@ -2,14 +2,42 @@
>  #define _ASM_POWERPC_BOOK3S_32_PGALLOC_H
>  
>  #include 
> +#include 
>  
> -/* For 32-bit, all levels of page tables are just drawn from get_free_page() 
> */
> -#define MAX_PGTABLE_INDEX_SIZE   0
> +/*
> + * Functions that deal with pagetables that could be at any level of
> + * the table need to be passed an "index_size" so they know how to
> + * handle allocation.  For PTE pages (which are linked to a struct
> + * page for now, and drawn from the main get_free_pages() pool), the
> + * allocation size will be (2^index_size * sizeof(pointer)) and
> + * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
> + *
> + * The maximum index size needs to be big enough to allow any
> + * pagetable sizes we need, but small enough to fit in the low bits of
> + * any page table pointer.  In other words all pagetables, even tiny
> + * ones, must be aligned to allow at least enough low 0 bits to
> + * contain this value.  This value is also used as a mask, so it must
> + * be one less than a power of two.
> + */
> +#define MAX_PGTABLE_INDEX_SIZE   0xf
>  
>  extern void __bad_pte(pmd_t *pmd);
>  
> -extern pgd_t *pgd_alloc(struct mm_struct *mm);
> -extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
> +extern struct kmem_cache *pgtable_cache[];
> +#define PGT_CACHE(shift) ({  \
> + BUG_ON(!(shift));   \
> + pgtable_cache[(shift) - 1]; \
> + })
> +
> +static inline pgd_t *pgd_alloc(struct mm_struct *mm)
> +{
> + return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
> +}
> +
> +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
> +{
> + kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
> +}
>  
>  /*
>   * We don't have any real pmd's, and this code never triggers because
> @@ -68,8 +96,12 @@ static inline void pte_free(struct mm_struct *mm, 
> pgtable_t ptepage)
>  
>  static inline void pgtable_free(void *table, unsigned index_size)
>  {
> - BUG_ON(index_size); /* 32-bit doesn't use this */
> - free_page((unsigned long)table);
> + if (!index_size) {
> + free_page((unsigned long)table);
> + } else {
> + BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
> + kmem_cache_free(PGT_CACHE(index_size), table);
> + }
>  }
>  
>  #define check_pgt_cache()do { } while (0)
> diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
> b/arch/powerpc/include/asm/book3s/32/pgtable.h
> index 6b8b2d5..f887499 100644
> --- a/arch/powerpc/include/asm/book3s/32/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
> @@ -8,6 +8,26 @@
>  /* And here we include common definitions */
>  #include 
>  
> +#define 

Re: [PATCH][RFC] Implement arch primitives for busywait loops

2016-09-18 Thread Aneesh Kumar K.V

> diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h
> index 3b5bafc..695743c 100644
> --- a/include/linux/bit_spinlock.h
> +++ b/include/linux/bit_spinlock.h
> @@ -25,9 +25,8 @@ static inline void bit_spin_lock(int bitnum, unsigned long 
> *addr)
>  #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
>   while (unlikely(test_and_set_bit_lock(bitnum, addr))) {
>   preempt_enable();
> - do {
> - cpu_relax();
> - } while (test_bit(bitnum, addr));
> + spin_do {
> + } spin_while (test_bit(bitnum, addr));
>   preempt_disable();
>   }
>  #endif

That usage is strange, with nothing in the do{ }while loop. May be add a
macro for usages like this ?

-aneesh



Re: [PATCH 04/13] powerpc: Use soft_enabled_set api to update paca->soft_enabled

2016-09-18 Thread Madhavan Srinivasan



On Friday 16 September 2016 05:13 PM, David Laight wrote:

From: Nicholas Piggin

Sent: 16 September 2016 10:53
On Thu, 15 Sep 2016 18:31:54 +0530
Madhavan Srinivasan  wrote:


Force use of soft_enabled_set() wrapper to update paca-soft_enabled
wherever possisble. Also add a new wrapper function, soft_enabled_set_return(),
added to force the paca->soft_enabled updates.

...

diff --git a/arch/powerpc/include/asm/hw_irq.h 
b/arch/powerpc/include/asm/hw_irq.h
index 8fad8c24760b..f828b8f8df02 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -53,6 +53,20 @@ static inline notrace void soft_enabled_set(unsigned long 
enable)
: : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
  }

+static inline notrace unsigned long soft_enabled_set_return(unsigned long 
enable)
+{
+   unsigned long flags;
+
+   asm volatile(
+   "lbz %0,%1(13); stb %2,%1(13)"
+   : "=r" (flags)
+   : "i" (offsetof(struct paca_struct, soft_enabled)),\
+ "r" (enable)
+   : "memory");
+
+   return flags;
+}

Why do you have the "memory" clobber here while soft_enabled_set() does not?

I wondered about the missing memory clobber earlier.

Any 'clobber' ought to be restricted to the referenced memory area.
If the structure is only referenced by r13 through 'asm volatile' it isn't 
needed.
OTOH why not allocate a global register variable to r13 and access through that?


I do see this in asm/paca.h "register struct paca_struct *local_paca 
asm("r13"); "

and __check_irq_replay() in kernel/irq.c do updates the "irq_happened" as
mentioned. But existing helpers in hw_irq update the soft_enabled via
asm volatile and i did the same.

Maddy


David





Re: [PATCH 2/2] powerpc: tm: Enable transactional memory (TM) lazily for userspace

2016-09-18 Thread Simon Guo
On Wed, Sep 14, 2016 at 06:02:16PM +1000, Cyril Bur wrote:
> @@ -954,8 +963,16 @@ static inline void __switch_to_tm(struct task_struct 
> *prev,
>   struct task_struct *new)
>  {
>   if (cpu_has_feature(CPU_FTR_TM)) {
> - tm_enable();
> - tm_reclaim_task(prev);
> + if (tm_enabled(prev) || tm_enabled(new))
> + tm_enable();
> +
> + if (tm_enabled(prev)) {
> + prev->thread.load_tm++;
> + tm_reclaim_task(prev);
> + if (!MSR_TM_ACTIVE(prev->thread.regs->msr) && 
> prev->thread.load_tm == 0)
> + prev->thread.regs->msr &= ~MSR_TM;
> + }
Hi Cyril,

If MSR_TM_ACTIVE(), is it better to reset load_tm to 0?
Other looks good to me.

Thanks,
- Simon


Re: [PATCH 04/13] powerpc: Use soft_enabled_set api to update paca->soft_enabled

2016-09-18 Thread Madhavan Srinivasan



On Friday 16 September 2016 03:23 PM, Nicholas Piggin wrote:

On Thu, 15 Sep 2016 18:31:54 +0530
Madhavan Srinivasan  wrote:


Force use of soft_enabled_set() wrapper to update paca-soft_enabled
wherever possisble. Also add a new wrapper function, soft_enabled_set_return(),
added to force the paca->soft_enabled updates.

Signed-off-by: Madhavan Srinivasan 
---
  arch/powerpc/include/asm/hw_irq.h  | 14 ++
  arch/powerpc/include/asm/kvm_ppc.h |  2 +-
  arch/powerpc/kernel/irq.c  |  2 +-
  arch/powerpc/kernel/setup_64.c |  4 ++--
  arch/powerpc/kernel/time.c |  6 +++---
  5 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_irq.h 
b/arch/powerpc/include/asm/hw_irq.h
index 8fad8c24760b..f828b8f8df02 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -53,6 +53,20 @@ static inline notrace void soft_enabled_set(unsigned long 
enable)
: : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
  }
  
+static inline notrace unsigned long soft_enabled_set_return(unsigned long enable)

+{
+   unsigned long flags;
+
+   asm volatile(
+   "lbz %0,%1(13); stb %2,%1(13)"
+   : "=r" (flags)
+   : "i" (offsetof(struct paca_struct, soft_enabled)),\
+ "r" (enable)
+   : "memory");
+
+   return flags;
+}

Why do you have the "memory" clobber here while soft_enabled_set() does not?


I did change to function to include a local variable
and update the soft_enabled. But, my bad. It was in the next patch.
I should make the change here. Yes. we dont a "memory" clobber here
which is right. But, this change is not complete and I will correct it.

Maddy



Thanks,
Nick





Re: [PATCH 03/13] powerpc: move set_soft_enabled() and rename

2016-09-18 Thread Madhavan Srinivasan



On Friday 16 September 2016 03:20 PM, Nicholas Piggin wrote:

On Thu, 15 Sep 2016 18:31:53 +0530
Madhavan Srinivasan  wrote:


Move set_soft_enabled() from powerpc/kernel/irq.c to
asm/hw_irq.c. and rename it soft_enabled_set().
THis way paca->soft_enabled updates can be forced.

Could you just tidy up the changelog a little?

You are renaming it I assume because you are going to introduce more
soft_enabled_x() functions, and that the namespace works better as a
prefix than a postfix.

You are moving it so all paca->soft_enabled updates can be done via
these access functions rather than open coded.

Did I get that right?


Yes. That is right. My bad. Will fix the commit message and changelog.



Reviewed-by: Nicholas Piggin 





Re: [PATCH 02/13] powerpc: Cleanup to use IRQ_DISABLE_MASK_* macros for paca->soft_enabled update

2016-09-18 Thread Madhavan Srinivasan



On Friday 16 September 2016 03:17 PM, Nicholas Piggin wrote:

On Thu, 15 Sep 2016 18:31:52 +0530
Madhavan Srinivasan  wrote:


Replace the hardcoded values used when updating
paca->soft_enabled with IRQ_DISABLE_MASK_* #def.
No logic change.

This could be folded with patch 1.


Sure. Will do it.


Reviewed-by: Nicholas Piggin 





Re: [PATCH 04/13] powerpc: Use soft_enabled_set api to update paca->soft_enabled

2016-09-18 Thread Nicholas Piggin
On Fri, 16 Sep 2016 13:22:24 +
David Laight  wrote:

> From: Nicholas Piggin
> > Sent: 16 September 2016 12:59
> > On Fri, 16 Sep 2016 11:43:13 +
> > David Laight  wrote:
> >   
> > > From: Nicholas Piggin  
> > > > Sent: 16 September 2016 10:53
> > > > On Thu, 15 Sep 2016 18:31:54 +0530
> > > > Madhavan Srinivasan  wrote:
> > > >  
> > > > > Force use of soft_enabled_set() wrapper to update paca-soft_enabled
> > > > > wherever possisble. Also add a new wrapper function, 
> > > > > soft_enabled_set_return(),
> > > > > added to force the paca->soft_enabled updates.  
> > > ...  
> > > > > diff --git a/arch/powerpc/include/asm/hw_irq.h 
> > > > > b/arch/powerpc/include/asm/hw_irq.h
> > > > > index 8fad8c24760b..f828b8f8df02 100644
> > > > > --- a/arch/powerpc/include/asm/hw_irq.h
> > > > > +++ b/arch/powerpc/include/asm/hw_irq.h
> > > > > @@ -53,6 +53,20 @@ static inline notrace void 
> > > > > soft_enabled_set(unsigned long enable)
> > > > >   : : "r" (enable), "i" (offsetof(struct paca_struct, 
> > > > > soft_enabled)));
> > > > >  }
> > > > >
> > > > > +static inline notrace unsigned long soft_enabled_set_return(unsigned 
> > > > > long enable)
> > > > > +{
> > > > > + unsigned long flags;
> > > > > +
> > > > > + asm volatile(
> > > > > + "lbz %0,%1(13); stb %2,%1(13)"
> > > > > + : "=r" (flags)
> > > > > + : "i" (offsetof(struct paca_struct, soft_enabled)),\
> > > > > +   "r" (enable)
> > > > > + : "memory");
> > > > > +
> > > > > + return flags;
> > > > > +}  
> > > >
> > > > Why do you have the "memory" clobber here while soft_enabled_set() does 
> > > > not?  
> > >
> > > I wondered about the missing memory clobber earlier.
> > >
> > > Any 'clobber' ought to be restricted to the referenced memory area.
> > > If the structure is only referenced by r13 through 'asm volatile' it 
> > > isn't needed.  
> > 
> > Well a clobber (compiler barrier) at some point is needed in irq_disable and
> > irq_enable paths, so we correctly open and close the critical section vs 
> > interrupts.
> > I just wonder about these helpers. It might be better to take the clobbers 
> > out of
> > there and add barrier(); in callers, which would make it more obvious.  
> 
> If the memory clobber is needed to synchronise with the rest of the code
> rather than just ensuring the compiler doesn't reorder accesses via r13
> then I'd add an explicit barrier() somewhere - even if in these helpers.
> 
> Potentially the helper wants a memory clobber for the (r13) area
> and a separate barrier() to ensure the interrupts are masked for the
> right code.
> Even if both are together in the same helper.

Good point. Some of the existing modification helpers don't seem to have
clobbers for modifying the r13->soft_enabled memory itself, but they do
have the memory clobber where a critical section barrier is required.

The former may not be a problem if the helpers are used very carefully,
but probably should be commented at best, if not fixed. So after Madhi's
patches, we should make all accesses go via the helper functions, so a
clobber for the soft_enabled modification may not be required (this should
be commented). I think it may be cleaner to specify the location in the
constraints, but maybe that doesn't generate the best code -- something to
investigate.

Then, I'd like to see barrier()s for interrupt critical sections placed in
the callers of these helpers, which will make the code more obvious.

Thanks,
Nick


Re: Linux 4.8: Reported regressions as of Sunday, 2016-09-18

2016-09-18 Thread Dave Chinner
On Sun, Sep 18, 2016 at 03:20:53PM +0200, Thorsten Leemhuis wrote:
> Hi! Here is my fourth regression report for Linux 4.8. It lists 14
> regressions I'm aware of. 5 of them are new; 1 mentioned in last 
> weeks report got fixed.
> 
> As always: Are you aware of any other regressions? Then please let me
> know (simply CC regressi...@leemhuis.info). And pls tell me if there
> is anything in the report that shouldn't be there.
> 
> Ciao, Thorsten
> 
> == Current regressions ==
> 
> Desc: genirq: Flags mismatch irq 8, 0088 (mmc0) vs. 0080 (rtc0). 
> mmc0: Failed to request irq 8: -16
> Repo: 2016-08-01 https://bugzilla.kernel.org/show_bug.cgi?id=150881
> Stat: 2016-09-09 https://bugzilla.kernel.org/show_bug.cgi?id=150881#c34
> Note: stalled; root cause somewhere in the main gpio merge for 4.8, but 
> problematic commit still unknown
> 
> Desc: [lkp] [xfs] 68a9f5e700: aim7.jobs-per-min -13.6% regression
> Repo: 2016-08-09 http://www.spinics.net/lists/kernel/msg2317052.html
> Stat: 2016-09-09 https://marc.info/?t=14734151953=1=2
> Note: looks like post-4.8 material at this point: Mel working on it in his 
> spare time, but "The progression of this series has been unsatisfactory."

Actually, what Mel was working on (mapping lock contention) was not
related to the reported XFS regression. The regression was an XFS
sub-page write issue introduced by the new iomap infrastructure,
and nobody has been able to reproduce it exactly
outside of the reaim benchmark. We've reproduced other, similar
issues, and the fixes for those are queued for the 4.9 window.

Cheers,

Dave.
-- 
Dave Chinner
da...@fromorbit.com


Re: [PATCH V2 1/7] dt-bindings: Update QorIQ TMU thermal bindings

2016-09-18 Thread Zhang Rui
On 三, 2016-09-14 at 11:40 -0500, Leo Li wrote:
> On Wed, Jun 8, 2016 at 2:52 PM, Rob Herring  wrote:
> > 
> > On Tue, Jun 07, 2016 at 11:27:34AM +0800, Jia Hongtao wrote:
> > > 
> > > For different types of SoC the sensor id and endianness may vary.
> > > "#thermal-sensor-cells" is used to provide sensor id information.
> > > "little-endian" property is to tell the endianness of TMU.
> > > 
> > > Signed-off-by: Jia Hongtao 
> > > ---
> > > Changes for V2:
> > > * Remove formatting chnages.
> > > 
> > >  Documentation/devicetree/bindings/thermal/qoriq-thermal.txt | 7
> > > +++
> > >  1 file changed, 7 insertions(+)
> > Acked-by: Rob Herring 
> Hi Zhang Rui,
> 
> Since you have applied the driver patch, can you also apply the
> binding patch?  The binding is supposed to go with the driver.
> 

Do you mean I should take both patch 1/7 and 7/7? I can not see the
other patches in this patch set.

thanks,
rui



Re: [PATCH v14 13/15] selftests/powerpc: Add ptrace tests for TM SPR registers

2016-09-18 Thread Simon Guo
On Wed, Sep 14, 2016 at 03:04:12PM +1000, Cyril Bur wrote:
> On Mon, 2016-09-12 at 15:33 +0800, wei.guo.si...@gmail.com wrote:
> > From: Anshuman Khandual 
> > 
> > This patch adds ptrace interface test for TM SPR registers. This
> > also adds ptrace interface based helper functions related to TM
> > SPR registers access.
> > 
> 
> I'm seeing this one fail a lot, it does occasionally succeed but fails
> a lot on my test setup.
> 
> I use qemu on a power8 for most of my testing:
> qemu-system-ppc64 --enable-kvm -machine pseries,accel=kvm,usb=off -m
> 4096 -realtime mlock=off -smp 4,sockets=1,cores=2,threads=2 -nographic
> -vga none
Hi Cyril,

Sorry for the late response. I am just back from a vacation.
Strangely the case always succeed in my machine. I will try to 
reproduce it with your configuration.

> > +   if ((regs->tm_texasr != TM_SCHED) && (regs->tm_texasr !=
> > TM_KVM_SCHED))
> > +   return TEST_FAIL;
> 
> The above condition fails, should this test try again if this condition
> is true, rather than fail?
If you have the value of "regs->tm_texasr" in hand, please let
me know.

Thanks for the elaborated comment in other mails and I will 
work on that.

BR,
- Simon


[PATCH] PCI: Add parameter @mmio_force_on to pci_update_resource()

2016-09-18 Thread Gavin Shan
In pci_update_resource(), the PCI device's memory decoding (0x2 in
PCI_COMMAND) is disabled when 64-bits memory BAR is updated if the
PCI device's memory space wasn't asked to be always on by @pdev->
mmio_always_on. The PF's memory decoding might be disabled when
updating its IOV BARs in the following path. Actually, the PF's
memory decoding shouldn't be disabled in this scenario as the PF
has been started to provide services:

   sriov_numvfs_store
   pdev->driver->sriov_configure
   mlx5_core_sriov_configure
   pci_enable_sriov
   sriov_enable
   pcibios_sriov_enable
   pnv_pci_sriov_enable
   pnv_pci_vf_resource_shift
   pci_update_resource

This doesn't change the PF's memory decoding in the path by introducing
additional parameter (@mmio_force_on) to pci_update_resource().

Reported-by: Carol Soto 
Signed-off-by: Gavin Shan 
Tested-by: Carol Soto 
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 2 +-
 drivers/pci/iov.c | 2 +-
 drivers/pci/pci.c | 2 +-
 drivers/pci/setup-res.c   | 9 +
 include/linux/pci.h   | 2 +-
 5 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index bc0c91e..2d6a2b7 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -999,7 +999,7 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, 
int offset)
dev_info(>dev, "VF BAR%d: %pR shifted to %pR (%sabling %d 
VFs shifted by %d)\n",
 i, , res, (offset > 0) ? "En" : "Dis",
 num_vfs, offset);
-   pci_update_resource(dev, i + PCI_IOV_RESOURCES);
+   pci_update_resource(dev, i + PCI_IOV_RESOURCES, true);
}
return 0;
 }
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 2194b44..117aae6 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -511,7 +511,7 @@ static void sriov_restore_state(struct pci_dev *dev)
return;
 
for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++)
-   pci_update_resource(dev, i);
+   pci_update_resource(dev, i, false);
 
pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
pci_iov_set_numvfs(dev, iov->num_VFs);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index aab9d51..87a33c0 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -545,7 +545,7 @@ static void pci_restore_bars(struct pci_dev *dev)
return;
 
for (i = 0; i < PCI_BRIDGE_RESOURCES; i++)
-   pci_update_resource(dev, i);
+   pci_update_resource(dev, i, false);
 }
 
 static const struct pci_platform_pm_ops *pci_platform_pm;
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 66c4d8f..e8a50ff 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -26,7 +26,7 @@
 #include "pci.h"
 
 
-void pci_update_resource(struct pci_dev *dev, int resno)
+void pci_update_resource(struct pci_dev *dev, int resno, bool mmio_force_on)
 {
struct pci_bus_region region;
bool disable;
@@ -81,7 +81,8 @@ void pci_update_resource(struct pci_dev *dev, int resno)
 * disable decoding so that a half-updated BAR won't conflict
 * with another device.
 */
-   disable = (res->flags & IORESOURCE_MEM_64) && !dev->mmio_always_on;
+   disable = (res->flags & IORESOURCE_MEM_64) &&
+ !mmio_force_on && !dev->mmio_always_on;
if (disable) {
pci_read_config_word(dev, PCI_COMMAND, );
pci_write_config_word(dev, PCI_COMMAND,
@@ -310,7 +311,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
res->flags &= ~IORESOURCE_STARTALIGN;
dev_info(>dev, "BAR %d: assigned %pR\n", resno, res);
if (resno < PCI_BRIDGE_RESOURCES)
-   pci_update_resource(dev, resno);
+   pci_update_resource(dev, resno, false);
 
return 0;
 }
@@ -350,7 +351,7 @@ int pci_reassign_resource(struct pci_dev *dev, int resno, 
resource_size_t addsiz
dev_info(>dev, "BAR %d: reassigned %pR (expanded by %#llx)\n",
 resno, res, (unsigned long long) addsize);
if (resno < PCI_BRIDGE_RESOURCES)
-   pci_update_resource(dev, resno);
+   pci_update_resource(dev, resno, false);
 
return 0;
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0ab8359..99231d1 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1039,7 +1039,7 @@ int pci_try_reset_bus(struct pci_bus *bus);
 void pci_reset_secondary_bus(struct pci_dev *dev);
 void pcibios_reset_secondary_bus(struct pci_dev *dev);
 void pci_reset_bridge_secondary_bus(struct pci_dev *dev);
-void pci_update_resource(struct pci_dev *dev, int resno);
+void 

Re: [PATHC v2 0/9] ima: carry the measurement list across kexec

2016-09-18 Thread Thiago Jung Bauermann
Am Samstag, 17 September 2016, 00:17:37 schrieb Eric W. Biederman:
> Thiago Jung Bauermann  writes:
> > Hello Eric,
> > 
> > Am Freitag, 16 September 2016, 14:47:13 schrieb Eric W. Biederman:
> >> I can see tracking to see if the list has changed at some
> >> point and causing a reboot(LINUX_REBOOT_CMD_KEXEC) to fail.
> > 
> > Yes, that is an interesting feature that I can add using the checksum-
> > verifying part of my code. I can submit a patch for that if there's
> > interest, adding a reboot notifier that verifies the checksum and causes
> > a regular reboot instead of a kexec reboot if the checksum fails.
> 
> I was thinking an early failure instead of getting all of the way down
> into a kernel an discovering the tpm/ima subsystem would not
> initialized.  But where that falls in the reboot pathway I don't expect
> there is much value in it.

I'm not sure I understand. What I described doesn't involve the tpm or ima. 
I'm suggesting that if I take the parts of patch 4/5 in the kexec hand-over 
buffer series that verify the image checksum, I can submit a separate patch 
that checks the integrity of the kexec image early in kernel_kexec() and 
reverts to a regular reboot if the check fails. This would be orthogonal to 
ima carrying its measurement list across kexec.

I think there is value in that, because if the kexec image is corrupted the 
machine will just get stuck in the purgatory and (unless it's a platform 
where the purgatory can print to the console) without even an error message 
explaining what is going on. Whereas if we notice the corruption before 
jumping into the purgatory we can switch to a regular reboot and the machine 
will boot successfully.

To have an early failure, when would the checksum verification be done?
What I can think of is to have kexec_file_load accept a new flag 
KEXEC_FILE_VERIFY_IMAGE, which userspace could use to request an integrity 
check when it's about to start the reboot procedure. Then it can decide to 
either reload the kernel or use a regular reboot if the image is corrupted.

Is this what you had in mind?

> >> At least the common bootloader cases that I know of using kexec are
> >> very
> >> minimal distributions that live in a ramdisk and as such it should be
> >> very straight forward to measure what is needed at or before
> >> sys_kexec_load.  But that was completely dismissed as unrealistic so I
> >> don't have a clue what actual problem you are trying to solve.
> > 
> > We are interested in solving the problem in a general way because it
> > will be useful to us in the future for the case of an arbitrary number
> > of kexecs (and thus not only a bootloader but also multiple full-blown
> > distros may be involved in the chain).
> > 
> > But you are right that for the use case for which we currently need this
> > feature it's feasible to measure everything upfront. We can cross the
> > other bridge when we get there.
> 
> Then let's start there.  Passing the measurment list is something that
> should not be controversial.

Great!

> >> If there is anyway we can start small and not with this big scary
> >> infrastructure change I would very much prefer it.
> > 
> > Sounds good. If we pre-measure everything then the following patches
> > from my buffer hand-over series are enough:
> > 
> > [PATCH v5 2/5] kexec_file: Add buffer hand-over support for the next
> > kernel [PATCH v5 3/5] powerpc: kexec_file: Add buffer hand-over support
> > for the next kernel
> > 
> > Would you consider including those two?
> > 
> > And like I mentioned in the cover letter, patch 1/5 is an interesting
> > improvement that is worth considering.
> 
> So from 10,000 feet I think that is correct.
> 
> I am not quite certain why a new mechanism is being invented.  We have
> other information that is already passed (much of it architecture
> specific) like the flattened device tree.  If you remove the need to
> update the information can you just append this information to the
> flattened device tree without a new special mechanism to pass the data?
> 
> I am just reluctant to invent a new mechanism when there is an existing
> mechanism that looks like it should work without problems.

Michael Ellerman suggested putting the buffer contents inside the device 
tree itself, but the s390 people are also planning to implement this 
feature. That architecture doesn't use device trees, so a solution that 
depends on DTs won't help them.

With this mechanism each architecture will still need its own way of 
communicating to the next kernel where the buffer is, but I think it's 
easier to pass a base address and length than to pass a whole buffer.

I suppose we could piggyback the ima measurements buffer at the end of one 
of the other segments such as the kernel or, in the case of powerpc, the dtb 
but it looks hackish to me. I think it's cleaner to put it in its own 
segment.

-- 
[]'s
Thiago Jung Bauermann
IBM Linux Technology Center



Linux 4.8: Reported regressions as of Sunday, 2016-09-18

2016-09-18 Thread Thorsten Leemhuis
Hi! Here is my fourth regression report for Linux 4.8. It lists 14
regressions I'm aware of. 5 of them are new; 1 mentioned in last 
weeks report got fixed.

As always: Are you aware of any other regressions? Then please let me
know (simply CC regressi...@leemhuis.info). And pls tell me if there
is anything in the report that shouldn't be there.

Ciao, Thorsten

== Current regressions ==

Desc: genirq: Flags mismatch irq 8, 0088 (mmc0) vs. 0080 (rtc0). mmc0: 
Failed to request irq 8: -16
Repo: 2016-08-01 https://bugzilla.kernel.org/show_bug.cgi?id=150881
Stat: 2016-09-09 https://bugzilla.kernel.org/show_bug.cgi?id=150881#c34
Note: stalled; root cause somewhere in the main gpio merge for 4.8, but 
problematic commit still unknown

Desc: [lkp] [xfs] 68a9f5e700: aim7.jobs-per-min -13.6% regression
Repo: 2016-08-09 http://www.spinics.net/lists/kernel/msg2317052.html
Stat: 2016-09-09 https://marc.info/?t=14734151953=1=2
Note: looks like post-4.8 material at this point: Mel working on it in his 
spare time, but "The progression of this series has been unsatisfactory."

Desc: scsi host6: runtime PM trying to activate child device host6 but parent 
(2-2:1.0) is not active
Repo: 2016-08-15 https://bugzilla.kernel.org/show_bug.cgi?id=153171
Stat: 2016-09-14 https://bugzilla.kernel.org/show_bug.cgi?id=153171#c5
Note: patch available; mkp "I would like the change to get a bit of soak time 
before we backport it."

Desc: DT/OCTEON driver probing broken
Repo: 2016-08-16 http://www.spinics.net/lists/devicetree/msg138990.html
Stat: 2016-08-30 http://www.spinics.net/lists/devicetree/msg140682.html
Note: stalled, poked Rob a week ago, got no reply

Desc: gpio-leds broken on OCTEON
Repo: 2016-08-23 http://www.spinics.net/lists/devicetree/msg139863.html
Stat: 2016-09-12 http://www.spinics.net/lists/devicetree/msg140179.html
Note: patch in Linux MIPS patchwork 
https://patchwork.linux-mips.org/patch/14091/

Desc: Skylake graphics regression: projector failure with 4.8-rc3
Repo: 2016-08-26 http://www.spinics.net/lists/intel-gfx/msg105478.html
Stat: 2016-09-01 https://lkml.org/lkml/2016/8/31/946
Note: stalled, poked lists

Desc: lk 4.8 + !CONFIG_SHMEM + shmat() = oops
Repo: 2016-08-30 http://www.spinics.net/lists/linux-mm/msg112920.html
Stat: 2016-09-07 http://www.spinics.net/lists/linux-mm/msg113177.html
Note: just like last week: patch "ipc/shm: fix crash if CONFIG_SHMEM is not 
set" is going to fix this, but has not hit mainline yet

Desc: regression in re-read operation by iozone ~10%
Repo: 2016-09-02 https://bugzilla.kernel.org/show_bug.cgi?id=155821
Stat: n/a 
Note: stalled; told reporter he might be better of posting about the issue to 
some mailing list

Desc: brcmfmac is preventing suspend (Dell XPS 13 9350 / Skylake)/
Repo: 2016-09-13 https://bugzilla.kernel.org/show_bug.cgi?id=156631
Stat: n/a 
Note: used to be https://bugzilla.kernel.org/show_bug.cgi?id=156361 in last 
weeks report

Desc: CPU speed set very low
Repo: 2016-09-09 https://lkml.org/lkml/2016/9/9/608
Stat: 2016-09-14 https://lkml.org/lkml/2016/9/14/588
Note: Larry: "Testing continues."

Desc: pinctrl: qcom: Clear all function selection bits introduced a regression 
by not properly masking the calculated value.
Repo: 2016-09-12 
http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1229055.html
Stat: n/a 
Note: Patch available in initial report

Desc: WARN in vb2_dma_sg_alloc when makes dvbstream fail with with AverMedia 
Hybrid+FM
Repo: 2016-09-13 https://bugzilla.kernel.org/show_bug.cgi?id=156751
Stat: n/a 
Note: told reporter he might be better of posting the issue to linux-media

Desc: it is now common that machine needs re-run of xrandr after resume
Repo: 2016-09-13 
http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1230782.html
Stat: 2016-09-15 
http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1232834.html 
http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1231104.html
Note: Seems Martin isn't seeing the problem anymore; Pavel's issue might be a 
different problem anyway; and he now has a userland-problem that complicates 
testing

Desc: usb: gadget: udc: atmel: fix endpoint name
Repo: 2016-09-15 
http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1232309.html 
http://www.mail-archive.com/linux-usb@vger.kernel.org/msg80645.html 
Stat: n/a 
Note: Patch available: "Felipe, Greg,It is clearly a regression and material 
for 4.8-fixes."

== Fixed since last report ==

Desc: ath9k: bring back direction setting in ath9k_{start_stop}
Repo: 2016-09-01 https://marc.info/?l=linux-wireless=147292415030585=2
Fix:  https://git.kernel.org/torvalds/c/e34f2ff40e0339f6a379e1ecf49e8f2759056453


[PATCH] Removed dependency on IDE_GD_ATA if ADB_PMU_LED_DISK is selected.

2016-09-18 Thread Elimar Riesebieter
We can use the front led of powerbooks/ibooks as a disk activitiy imagination
without the deprecated IDE_GD_ATA.

Signed-off-by: Elimar Riesebieter 
---
 drivers/macintosh/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig
index d28690f..5d80810 100644
--- a/drivers/macintosh/Kconfig
+++ b/drivers/macintosh/Kconfig
@@ -102,7 +102,6 @@ config ADB_PMU_LED_DISK
bool "Use front LED as DISK LED by default"
depends on ADB_PMU_LED
depends on LEDS_CLASS
-   depends on IDE_GD_ATA
select LEDS_TRIGGERS
select LEDS_TRIGGER_DISK
help
-- 
2.9.3


-- 
  "Talking much about oneself can also
   be a means to conceal oneself."
 -Friedrich Nietzsche