[RFC PATCH] powerpc/mm: Use big endian page table for book3s 64

2016-02-25 Thread Aneesh Kumar K.V
This enables us to share the same page table code for
both radix and hash. Radix use a hardware defined big endian
page table

Asm -> C conversion makes it simpler to build code for both little
and big endian page table.

Signed-off-by: Aneesh Kumar K.V 
---
Note:
Any suggestion on how we can do that pte update better so that we can build
a LE and BE page table kernel will be helpful.

 arch/powerpc/include/asm/book3s/64/hash.h   |  75 
 arch/powerpc/include/asm/kvm_book3s_64.h|  12 ++--
 arch/powerpc/include/asm/page.h |   4 ++
 arch/powerpc/include/asm/pgtable-be-types.h | 104 
 arch/powerpc/mm/hash64_4k.c |   6 +-
 arch/powerpc/mm/hash64_64k.c|  11 +--
 arch/powerpc/mm/hugepage-hash64.c   |   5 +-
 arch/powerpc/mm/hugetlbpage-hash64.c|   5 +-
 arch/powerpc/mm/pgtable-hash64.c|  42 +--
 9 files changed, 197 insertions(+), 67 deletions(-)
 create mode 100644 arch/powerpc/include/asm/pgtable-be-types.h

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
b/arch/powerpc/include/asm/book3s/64/hash.h
index 9b451cb8294a..9153bda5f395 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -1,6 +1,9 @@
 #ifndef _ASM_POWERPC_BOOK3S_64_HASH_H
 #define _ASM_POWERPC_BOOK3S_64_HASH_H
 #ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+#include 
+#endif
 
 /*
  * Common bits between 4K and 64K pages in a linux-style PTE.
@@ -249,27 +252,35 @@ static inline unsigned long pte_update(struct mm_struct 
*mm,
   unsigned long set,
   int huge)
 {
-   unsigned long old, tmp;
-
-   __asm__ __volatile__(
-   "1: ldarx   %0,0,%3 # pte_update\n\
-   andi.   %1,%0,%6\n\
-   bne-1b \n\
-   andc%1,%0,%4 \n\
-   or  %1,%1,%7\n\
-   stdcx.  %1,0,%3 \n\
-   bne-1b"
-   : "=" (old), "=" (tmp), "=m" (*ptep)
-   : "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY), "r" (set)
-   : "cc" );
+   pte_t pte;
+   unsigned long old_pte, new_pte;
+
+   do {
+reload:
+   pte = READ_ONCE(*ptep);
+   old_pte = pte_val(pte);
+
+   /* If PTE busy, retry */
+   if (unlikely(old_pte & _PAGE_BUSY))
+   goto reload;
+   /*
+* Try to lock the PTE, add ACCESSED and DIRTY if it was
+* a write access. Since this is 4K insert of 64K page size
+* also add _PAGE_COMBO
+*/
+   new_pte = (old_pte | set) & ~clr;
+
+   } while (cpu_to_be64(old_pte) != __cmpxchg_u64((unsigned long *)ptep,
+  cpu_to_be64(old_pte),
+  cpu_to_be64(new_pte)));
/* huge pages use the old page table lock */
if (!huge)
assert_pte_locked(mm, addr);
 
-   if (old & _PAGE_HASHPTE)
-   hpte_need_flush(mm, addr, ptep, old, huge);
+   if (old_pte & _PAGE_HASHPTE)
+   hpte_need_flush(mm, addr, ptep, old_pte, huge);
 
-   return old;
+   return old_pte;
 }
 
 /*
@@ -317,22 +328,30 @@ static inline void huge_ptep_set_wrprotect(struct 
mm_struct *mm,
  */
 static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
 {
+   pte_t pte;
+   unsigned long old_pte, new_pte;
unsigned long bits = pte_val(entry) &
(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC |
 _PAGE_SOFT_DIRTY);
 
-   unsigned long old, tmp;
-
-   __asm__ __volatile__(
-   "1: ldarx   %0,0,%4\n\
-   andi.   %1,%0,%6\n\
-   bne-1b \n\
-   or  %0,%3,%0\n\
-   stdcx.  %0,0,%4\n\
-   bne-1b"
-   :"=" (old), "=" (tmp), "=m" (*ptep)
-   :"r" (bits), "r" (ptep), "m" (*ptep), "i" (_PAGE_BUSY)
-   :"cc");
+   do {
+reload:
+   pte = READ_ONCE(*ptep);
+   old_pte = pte_val(pte);
+
+   /* If PTE busy, retry */
+   if (unlikely(old_pte & _PAGE_BUSY))
+   goto reload;
+   /*
+* Try to lock the PTE, add ACCESSED and DIRTY if it was
+* a write access. Since this is 4K insert of 64K page size
+* also add _PAGE_COMBO
+*/
+   new_pte = old_pte | bits;
+
+   } while (cpu_to_be64(old_pte) != __cmpxchg_u64((unsigned long *)ptep,
+  cpu_to_be64(old_pte),
+  cpu_to_be64(new_pte)));
 }
 
 static inline int pgd_bad(pgd_t pgd)
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index 

[RFC PATCH 2/2] powerpc/mm: Replace _PAGE_USER with _PAGE_PRIV

2016-02-25 Thread Aneesh Kumar K.V
_PAGE_PRIV means the page can be accessed only by kernel. This is done
to keep pte bits similar to PowerISA 3.0 radix PTE format. User
pages are now makred by clearing _PAGE_PRIV bit.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/book3s/64/hash.h | 23 ++-
 arch/powerpc/mm/hash64_4k.c   |  5 +
 arch/powerpc/mm/hash64_64k.c  | 10 ++
 arch/powerpc/mm/hash_utils_64.c   | 15 +++
 arch/powerpc/mm/hugepage-hash64.c |  5 +
 arch/powerpc/mm/hugetlbpage-hash64.c  |  5 +
 arch/powerpc/mm/hugetlbpage.c |  2 +-
 arch/powerpc/mm/pgtable-hash64.c  | 12 
 arch/powerpc/mm/pgtable_32.c  |  2 +-
 arch/powerpc/mm/pgtable_64.c  |  2 +-
 arch/powerpc/perf/callchain.c |  2 +-
 11 files changed, 58 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
b/arch/powerpc/include/asm/book3s/64/hash.h
index 244f2c322c43..2a38883c9187 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -22,7 +22,7 @@
 #define _PAGE_RW   0x2 /* read & write access allowed */
 #define _PAGE_READ 0x4 /* read access allowed */
 #define _PAGE_RWX  (_PAGE_READ | _PAGE_RW | _PAGE_EXEC)
-#define _PAGE_USER 0x8 /* page may be accessed by userspace */
+#define _PAGE_PRIV 0x8 /* page can only be accessed by kernel*/
 #define _PAGE_GUARDED  0x00010 /* G: guarded (side-effect) page */
 /* M (memory coherence) is always set in the HPTE, so we don't need it here */
 #define _PAGE_COHERENT 0x0
@@ -117,9 +117,9 @@
 #endif /* CONFIG_PPC_MM_SLICES */
 
 /* No separate kernel read-only */
-#define _PAGE_KERNEL_RW(_PAGE_RW | _PAGE_DIRTY) /* user access 
blocked by key */
+#define _PAGE_KERNEL_RW(_PAGE_PRIV | _PAGE_RW | _PAGE_DIRTY) 
/* user access blocked by key */
 #define _PAGE_KERNEL_RO _PAGE_KERNEL_RW
-#define _PAGE_KERNEL_RWX   (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
+#define _PAGE_KERNEL_RWX   (_PAGE_PRIV | _PAGE_DIRTY | _PAGE_RW | 
_PAGE_EXEC)
 
 /* Strong Access Ordering */
 #define _PAGE_SAO  (_PAGE_WRITETHRU | _PAGE_NO_CACHE | 
_PAGE_COHERENT)
@@ -151,7 +151,7 @@
  */
 #define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
 _PAGE_WRITETHRU | _PAGE_4K_PFN | \
-_PAGE_USER | _PAGE_ACCESSED |  \
+_PAGE_PRIV| _PAGE_ACCESSED |  \
 _PAGE_RW |  _PAGE_DIRTY | _PAGE_EXEC | \
 _PAGE_SOFT_DIRTY)
 /*
@@ -174,15 +174,12 @@
  * Note due to the way vm flags are laid out, the bits are XWR
  */
 #define PAGE_NONE  __pgprot(_PAGE_BASE)
-#define PAGE_SHARED__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
-#define PAGE_SHARED_X  __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \
-_PAGE_EXEC)
-#define PAGE_COPY  __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ )
-#define PAGE_COPY_X__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \
-_PAGE_EXEC)
-#define PAGE_READONLY  __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ)
-#define PAGE_READONLY_X__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \
-_PAGE_EXEC)
+#define PAGE_SHARED__pgprot(_PAGE_BASE | _PAGE_RW)
+#define PAGE_SHARED_X  __pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_EXEC)
+#define PAGE_COPY  __pgprot(_PAGE_BASE | _PAGE_READ )
+#define PAGE_COPY_X__pgprot(_PAGE_BASE | _PAGE_READ| _PAGE_EXEC)
+#define PAGE_READONLY  __pgprot(_PAGE_BASE | _PAGE_READ)
+#define PAGE_READONLY_X__pgprot(_PAGE_BASE | _PAGE_READ| _PAGE_EXEC)
 
 #define __P000 PAGE_NONE
 #define __P001 PAGE_READONLY
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index 1a862eb6fef1..8c83be3f67ef 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -40,6 +40,11 @@ int __hash_page_4K(unsigned long ea, unsigned long access, 
unsigned long vsid,
if (unlikely(access & ~old_pte))
return 1;
/*
+* access from user, but pte in _PAGE_PRIV
+*/
+   if (unlikely((access & _PAGE_PRIV) != (old_pte & _PAGE_PRIV)))
+   return 1;
+   /*
 * Try to lock the PTE, add ACCESSED and DIRTY if it was
 * a write access. Since this is 4K insert of 64K page size
 * also add _PAGE_COMBO
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 976eb5f6e492..3465b5d44223 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -72,6 +72,11 @@ int __hash_page_4K(unsigned long ea, unsigned long access, 
unsigned long vsid,
   

[RFC PATCH 1/2] powerpc/mm: Update prot_none implementation using _PAGE_READ

2016-02-25 Thread Aneesh Kumar K.V
Now that we have _PAGE_READ use that to implement prot none. With this
prot_none is _PAGE_PRESENT with none of the access bits set. While
hashing we map that to PP bit 00.

With this implementation, we will now take a prot fault for prot none
ptes, whereas before, we never inserted such a pte to hash. Hence we
always got nohpte fault before.

This is in preparation to remove _PAGE_USER from book3s 64

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/book3s/64/hash.h | 18 --
 arch/powerpc/mm/hash_utils_64.c   | 15 +++
 arch/powerpc/mm/hugetlbpage.c |  2 +-
 arch/powerpc/mm/pgtable-hash64.c  |  6 ++
 4 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
b/arch/powerpc/include/asm/book3s/64/hash.h
index 9153bda5f395..244f2c322c43 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -21,6 +21,7 @@
 #define _PAGE_EXEC 0x1 /* execute permission */
 #define _PAGE_RW   0x2 /* read & write access allowed */
 #define _PAGE_READ 0x4 /* read access allowed */
+#define _PAGE_RWX  (_PAGE_READ | _PAGE_RW | _PAGE_EXEC)
 #define _PAGE_USER 0x8 /* page may be accessed by userspace */
 #define _PAGE_GUARDED  0x00010 /* G: guarded (side-effect) page */
 /* M (memory coherence) is always set in the HPTE, so we don't need it here */
@@ -176,10 +177,12 @@
 #define PAGE_SHARED__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
 #define PAGE_SHARED_X  __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \
 _PAGE_EXEC)
-#define PAGE_COPY  __pgprot(_PAGE_BASE | _PAGE_USER )
-#define PAGE_COPY_X__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
-#define PAGE_READONLY  __pgprot(_PAGE_BASE | _PAGE_USER )
-#define PAGE_READONLY_X__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_COPY  __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ )
+#define PAGE_COPY_X__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \
+_PAGE_EXEC)
+#define PAGE_READONLY  __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ)
+#define PAGE_READONLY_X__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \
+_PAGE_EXEC)
 
 #define __P000 PAGE_NONE
 #define __P001 PAGE_READONLY
@@ -392,15 +395,10 @@ static inline pte_t pte_clear_soft_dirty(pte_t pte)
 #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
 
 #ifdef CONFIG_NUMA_BALANCING
-/*
- * These work without NUMA balancing but the kernel does not care. See the
- * comment in include/asm-generic/pgtable.h . On powerpc, this will only
- * work for user pages and always return true for kernel pages.
- */
 static inline int pte_protnone(pte_t pte)
 {
return (pte_val(pte) &
-   (_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT;
+   (_PAGE_PRESENT | _PAGE_RWX)) == _PAGE_PRESENT;
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 96b52bd3da8f..79b81cd0d254 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -173,9 +173,11 @@ unsigned long htab_convert_pte_flags(unsigned long 
pteflags)
 * and there is no kernel RO (_PAGE_KERNEL_RO).
 * User area is mapped with PP=0x2 for read/write
 * or PP=0x3 for read-only (including writeable but clean pages).
+* We also map user prot none as with PP=00.
 */
if (pteflags & _PAGE_USER) {
-   rflags |= 0x2;
+   if ((pteflags & _PAGE_READ) || (pteflags & _PAGE_RW))
+   rflags |= 0x2;
if (!((pteflags & _PAGE_RW) && (pteflags & _PAGE_DIRTY)))
rflags |= 0x1;
}
@@ -933,7 +935,7 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long 
addr)
  * Userspace sets the subpage permissions using the subpage_prot system call.
  *
  * Result is 0: full permissions, _PAGE_RW: read-only,
- * _PAGE_USER or _PAGE_USER|_PAGE_RW: no access.
+ * _PAGE_RWX: no access.
  */
 static int subpage_protection(struct mm_struct *mm, unsigned long ea)
 {
@@ -959,8 +961,13 @@ static int subpage_protection(struct mm_struct *mm, 
unsigned long ea)
/* extract 2-bit bitfield for this 4k subpage */
spp >>= 30 - 2 * ((ea >> 12) & 0xf);
 
-   /* turn 0,1,2,3 into combination of _PAGE_USER and _PAGE_RW */
-   spp = ((spp & 2) ? _PAGE_USER : 0) | ((spp & 1) ? _PAGE_RW : 0);
+   /*
+* 0 -> full premission
+* 1 -> Read only
+* 2 -> no access.
+* We return the flag that need to be cleared.
+*/
+   spp = ((spp & 2) ? _PAGE_RWX : 0) | ((spp & 1) ? _PAGE_RW : 0);
return spp;
 }
 
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 196e69a3c472..17ca4827dd87 100644
--- 

Re: [PATCH V4 08/18] powerpc/mm: Copy pgalloc (part 1)

2016-02-25 Thread Aneesh Kumar K.V
Paul Mackerras  writes:

> On Tue, Feb 23, 2016 at 10:18:10AM +0530, Aneesh Kumar K.V wrote:
>> This patch make a copy of pgalloc routines for book3s. The idea is to
>> enable a hash64 copy of these pgalloc routines which can be later
>> updated to have a radix conditional. Radix introduce a new page table
>> format with different page table size.
>> 
>> This mostly does:
>> 
>> cp pgalloc-32.h book3s/32/pgalloc.h
>> cp pgalloc-64.h book3s/64/pgalloc.h
>
> What is the motivation for copying over the 32-bit header?

That was moved, because I was looking to consolidate all book3s headers
under book3s/ and wanted to move pgalloc-64.h to book3s.

>
> Regarding the 64-bit header, I don't see anything in it that will need
> to be changed for radix other than making P{G,U,M}D_INDEX_SIZE be
> variables rather than constants (and possibly have a lowercase name
> instead of uppercase).  What other changes are you expecting to make?
>

I was taking the approach of conditional call rather than variables. Hence
moved them to 64/pgalloc.h so that we can add a static inline for those.

-aneesh

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V4 07/18] powerpc/mm: Update masked bits for linux page table

2016-02-25 Thread Aneesh Kumar K.V
Paul Mackerras  writes:

> On Tue, Feb 23, 2016 at 10:18:09AM +0530, Aneesh Kumar K.V wrote:
>> We now use physical address in upper page table tree levels. Even though
>> they are aligned to their size, for the masked bits we use the
>> overloaded bit positions as per PowerISA 3.0. We keep the bad bits check
>> as it is, and will use conditional there when adding radix. Bad bits
>> check also check for reserved bits and we oveload some of the reserved
>> fields of radix in hash config.
>> 
>> Signed-off-by: Aneesh Kumar K.V 
>> ---
>>  arch/powerpc/include/asm/book3s/64/hash-64k.h | 15 ++-
>>  1 file changed, 6 insertions(+), 9 deletions(-)
>> 
>> diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h 
>> b/arch/powerpc/include/asm/book3s/64/hash-64k.h
>> index f0f5f91d7909..60c2c912c3a7 100644
>> --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
>> +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
>> @@ -60,15 +60,12 @@
>>  #define PTE_FRAG_SIZE_SHIFT  12
>>  #define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
>>  
>> -/*
>> - * Bits to mask out from a PMD to get to the PTE page
>> - * PMDs point to PTE table fragments which are PTE_FRAG_SIZE aligned.
>> - */
>> -#define PMD_MASKED_BITS (PTE_FRAG_SIZE - 1)
>> -/* Bits to mask out from a PGD/PUD to get to the PMD page */
>> -#define PUD_MASKED_BITS 0x1ff
>> -/* FIXME!! Will be fixed in next patch */
>> -#define PGD_MASKED_BITS 0
>> +/* Bits to mask out from a PMD to get to the PTE page */
>> +#define PMD_MASKED_BITS 0xc0ffUL
>> +/* Bits to mask out from a PUD to get to the PMD page */
>> +#define PUD_MASKED_BITS 0xc0ffUL
>> +/* Bits to mask out from a PGD to get to the PUD page */
>> +#define PGD_MASKED_BITS 0xc0ffUL
>
> Why not fold this into the previous patch?  (and include this patch's
> commentary in the previous patch's commentary, of course)
>

Ok will do that. I was trying to make sure the change is called out
separately.

-aneesh

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V4 06/18] powerpc/mm: Switch book3s 64 with 64K page size to 4 level page table

2016-02-25 Thread Aneesh Kumar K.V
Paul Mackerras  writes:

> On Tue, Feb 23, 2016 at 10:18:08AM +0530, Aneesh Kumar K.V wrote:
>> This is needed so that we can support both hash and radix page table
>> using single kernel. Radix kernel uses a 4 level table.
>> 
>> Signed-off-by: Aneesh Kumar K.V 
>> ---
>>  arch/powerpc/Kconfig  |  1 +
>>  arch/powerpc/include/asm/book3s/64/hash-4k.h  | 33 
>> +--
>>  arch/powerpc/include/asm/book3s/64/hash-64k.h | 20 +---
>>  arch/powerpc/include/asm/book3s/64/hash.h | 11 +
>>  arch/powerpc/include/asm/book3s/64/pgtable.h  | 25 +++-
>>  arch/powerpc/include/asm/pgalloc-64.h | 28 ---
>>  arch/powerpc/include/asm/pgtable-types.h  | 13 +++
>>  arch/powerpc/mm/init_64.c | 21 -
>>  8 files changed, 97 insertions(+), 55 deletions(-)
>> 
>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>> index 9faa18c4f3f7..599329332613 100644
>> --- a/arch/powerpc/Kconfig
>> +++ b/arch/powerpc/Kconfig
>> @@ -303,6 +303,7 @@ config ZONE_DMA32
>>  config PGTABLE_LEVELS
>>  int
>>  default 2 if !PPC64
>> +default 4 if PPC_BOOK3S_64
>>  default 3 if PPC_64K_PAGES
>>  default 4
>
> Why not just "default 4"?  Why do we still need the if PPC_BOOK3S_64
> line at all?

You are suggesting remove that PPC_64K_PAGES line right ? I was not sure
about other platforms that use 64K pages.


>
> [...]
>
>> diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
>> b/arch/powerpc/include/asm/book3s/64/hash.h
>> index ef9bd68f7e6d..d0ee6fcef823 100644
>> --- a/arch/powerpc/include/asm/book3s/64/hash.h
>> +++ b/arch/powerpc/include/asm/book3s/64/hash.h
>> @@ -235,6 +235,7 @@
>>  #define __pgtable_ptr_val(ptr)  __pa(ptr)
>>  
>>  #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 
>> 1))
>> +#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1))
>>  #define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1))
>>  #define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 
>> 1))
>>  
>> @@ -363,8 +364,18 @@ static inline void __ptep_set_access_flags(pte_t *ptep, 
>> pte_t entry)
>>  :"cc");
>>  }
>>  
>> +static inline int pgd_bad(pgd_t pgd)
>> +{
>> +return (pgd_val(pgd) == 0);
>> +}
>> +
>>  #define __HAVE_ARCH_PTE_SAME
>>  #define pte_same(A,B)   (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) 
>> == 0)
>> +static inline unsigned long pgd_page_vaddr(pgd_t pgd)
>> +{
>> +return (unsigned long)__va(pgd_val(pgd) & ~PGD_MASKED_BITS);
>> +}
>> +
>>  
>>  /* Generic accessors to PTE bits */
>>  static inline int pte_write(pte_t pte)  { return 
>> !!(pte_val(pte) & _PAGE_RW);}
>> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
>> b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> index 7482f69117b6..77d3ce05798e 100644
>> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
>> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> @@ -106,6 +106,26 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long 
>> val)
>>  *pgdp = __pgd(val);
>>  }
>>  
>> +static inline void pgd_clear(pgd_t *pgdp)
>> +{
>> +*pgdp = __pgd(0);
>> +}
>> +
>> +#define pgd_none(pgd)   (!pgd_val(pgd))
>> +#define pgd_present(pgd)(!pgd_none(pgd))
>> +
>> +static inline pte_t pgd_pte(pgd_t pgd)
>> +{
>> +return __pte(pgd_val(pgd));
>> +}
>> +
>> +static inline pgd_t pte_pgd(pte_t pte)
>> +{
>> +return __pgd(pte_val(pte));
>> +}
>> +
>> +extern struct page *pgd_page(pgd_t pgd);
>
> Why did you put pgd_bad() and pgd_page_vaddr() in hash.h, but
> pgd_clear(), pgd_none, pgd_present etc. in pgtable.h?  Why split them
> between two headers rather than putting them all in the same header?
>

Any page table operation that involved PTE bit position I am moving to
hash.h with the expectation that we will have to put a conditional call
in there. Functions like pgd_none and pgd_clear don't use bit
positions.

-aneesh

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/2] powerpc/mm: Remove duplicated check in do_page_fault()

2016-02-25 Thread Gavin Shan
When the page fault happened in user space, we need check it's
caused by stack frame pointer update instruction and update
local variable @flag with FAULT_FLAG_USER. Currently, the code
has two separate check for the same condition. That's unnecessary.

This removes one of the duplicated check. No functinal changes
introduced.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/mm/fault.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index a67c6d7..935f386 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -294,11 +294,10 @@ int __kprobes do_page_fault(struct pt_regs *regs, 
unsigned long address,
 * can result in fault, which will cause a deadlock when called with
 * mmap_sem held
 */
-   if (user_mode(regs))
-   store_update_sp = store_updates_sp(regs);
-
-   if (user_mode(regs))
+   if (user_mode(regs)) {
flags |= FAULT_FLAG_USER;
+   store_update_sp = store_updates_sp(regs);
+   }
 
/* When running in the kernel we expect faults to occur only to
 * addresses in user space.  All other faults represent errors in the
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/2] powerpc/mm: Improve readability of update_mmu_cache()

2016-02-25 Thread Gavin Shan
The function is used to update the MMU with software PTE. It can
be called by data access exception handler (0x300) or instruction
access exception handler (0x400). If the function is called by
0x400 handler , the local variable @access is set to _PAGE_EXEC
to indicate the software PTE should have that flag set. When the
function is called by 0x300 handler, @access is set to zero.

This improves the readability of the function by replacing if
statements with switch. No logical changes introduced.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/mm/mem.c | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index d0f0a51..58b9b31 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -492,7 +492,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned 
long address,
 * We don't need to worry about _PAGE_PRESENT here because we are
 * called with either mm->page_table_lock held or ptl lock held
 */
-   unsigned long access = 0, trap;
+   unsigned long access, trap;
 
/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
if (!pte_young(*ptep) || address >= TASK_SIZE)
@@ -505,13 +505,18 @@ void update_mmu_cache(struct vm_area_struct *vma, 
unsigned long address,
 *
 * We also avoid filling the hash if not coming from a fault
 */
-   if (current->thread.regs == NULL)
-   return;
-   trap = TRAP(current->thread.regs);
-   if (trap == 0x400)
-   access |= _PAGE_EXEC;
-   else if (trap != 0x300)
+   trap = current->thread.regs ? TRAP(current->thread.regs) : 0UL;
+   switch (trap) {
+   case 0x300:
+   access = 0UL;
+   break;
+   case 0x400:
+   access = _PAGE_EXEC;
+   break;
+   default:
return;
+   }
+
hash_preload(vma->vm_mm, address, access, trap);
 #endif /* CONFIG_PPC_STD_MMU */
 #if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc/eeh: Remove duplicated check in eeh_dump_pe_log()

2016-02-25 Thread Gavin Shan
On Tue, Feb 16, 2016 at 10:58:20AM +1100, Andrew Donnellan wrote:
>On 16/02/16 10:30, Gavin Shan wrote:
>>Thanks for review. Do you want to see revised patch to include your
>>comments?
>
>Not particularly - the comments were just detailing what I went through as I
>reviewed it. Feel free to include it if you feel it makes the description
>clearer, but I don't really care.
>

I think it's always worthy to have better commit log, v2 will be sent shortly
to have improved commit log, thanks for review.

Thanks,
Gavin

>-- 
>Andrew Donnellan  Software Engineer, OzLabs
>andrew.donnel...@au1.ibm.com  Australia Development Lab, Canberra
>+61 2 6201 8874 (work)IBM Australia Limited

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2] powerpc/eeh: Remove duplicated check in eeh_dump_pe_log()

2016-02-25 Thread Gavin Shan
When eeh_dump_pe_log() is only called by eeh_slot_error_detail(),
we already have the check that the PE isn't in PCI config blocked
state in eeh_slot_error_detail(). So we needn't the duplicated
check in eeh_dump_pe_log().

This removes the duplicated check in eeh_dump_pe_log(). No logical
changes introduced.

Signed-off-by: Gavin Shan 
Reviewed-by: Andrew Donnellan 
---
v2: Improved commit log
---
 arch/powerpc/kernel/eeh.c | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 8c6005c..46b41be 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -268,13 +268,6 @@ static void *eeh_dump_pe_log(void *data, void *flag)
struct eeh_dev *edev, *tmp;
size_t *plen = flag;
 
-   /* If the PE's config space is blocked, 0xFF's will be
-* returned. It's pointless to collect the log in this
-* case.
-*/
-   if (pe->state & EEH_PE_CFG_BLOCKED)
-   return NULL;
-
eeh_pe_for_each_dev(pe, edev, tmp)
*plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen,
  EEH_PCI_REGS_LOG_LEN - *plen);
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 3/3] powerpc/eeh: Synchronize recovery in host/guest

2016-02-25 Thread Gavin Shan
When passing through SRIOV VFs to guest, we possibly encounter EEH
error on PF. In this case, the VF PEs are put into frozen state.
The error could be reported to guest before it's captured by the
host. That means the guest could attempt to recover errors on VFs
before host gets chance to recover errors on PFs. The VFs won't be
recovered successfully.

This enforces the recovery order for above case: the recovery on
child PE in guest is hold until the recovery on parent PE in host
is completed.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/kernel/eeh.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index fd9c782..42bd546 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1541,6 +1541,17 @@ int eeh_pe_get_state(struct eeh_pe *pe)
if (!eeh_ops || !eeh_ops->get_state)
return -ENOENT;
 
+   /*
+* If the parent PE, which is owned by host kernel, is experiencing
+* error recovery. We should return temporarily unavailable PE state
+* so that the recovery on guest side is suspended until the error
+* recovery is completed on host side.
+*/
+   if (pe->parent &&
+   !(pe->state & EEH_PE_REMOVED) &&
+   (pe->parent->state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING)))
+   return EEH_PE_STATE_UNAVAIL;
+
result = eeh_ops->get_state(pe, NULL);
rst_active = !!(result & EEH_STATE_RESET_ACTIVE);
dma_en = !!(result & EEH_STATE_DMA_ENABLED);
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/3] powerpc/eeh: Don't propagate error to guest

2016-02-25 Thread Gavin Shan
When EEH error happened to the parent PE of those PEs that have
been passed through to guest, the error is propagated to guest
domain and the VFIO driver's error handlers are called. It's not
correct as the error in the host domain shouldn't be propagated
to guests and affect them.

This adds one more limitation when calling EEH error handlers.
If the PE has been passed through to guest, the error handlers
won't be called.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/kernel/eeh_driver.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index c0fe7a6..6c59de8 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -195,7 +195,7 @@ static void *eeh_report_error(void *data, void *userdata)
enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver;
 
-   if (!dev || eeh_dev_removed(edev))
+   if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
dev->error_state = pci_channel_io_frozen;
 
@@ -237,7 +237,7 @@ static void *eeh_report_mmio_enabled(void *data, void 
*userdata)
enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver;
 
-   if (!dev || eeh_dev_removed(edev))
+   if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
 
driver = eeh_pcid_get(dev);
@@ -277,7 +277,7 @@ static void *eeh_report_reset(void *data, void *userdata)
enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver;
 
-   if (!dev || eeh_dev_removed(edev))
+   if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
dev->error_state = pci_channel_io_normal;
 
@@ -336,7 +336,7 @@ static void *eeh_report_resume(void *data, void *userdata)
bool was_in_error;
struct pci_driver *driver;
 
-   if (!dev || eeh_dev_removed(edev))
+   if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
dev->error_state = pci_channel_io_normal;
 
@@ -375,7 +375,7 @@ static void *eeh_report_failure(void *data, void *userdata)
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
struct pci_driver *driver;
 
-   if (!dev || eeh_dev_removed(edev))
+   if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
dev->error_state = pci_channel_io_perm_failure;
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 0/3] powerpc/eeh: Enhancement to EEH for VF

2016-02-25 Thread Gavin Shan
Those patches are based on the series of patches supporting EEH for VF,
which is pending for merging: https://patchwork.ozlabs.org/patch/581315/

This series of patches fixes couple of issue that resides in previous
patchset:

   * The error handlers provided by vfio-pci driver shouldn't be called.
 Otherwise, the guest is simply killed.
   * When we have partially hoplug in error recovery, we shouldn't remove
 those passed-through devices. Otherwise, the guest will be brought
 to undefined situation.
   * When we have errors detected on PF PE, hold VF PE that has been passed
 through to guest until the recovery on PF PE is done

Gavin Shan (3):
  powerpc/eeh: Don't propagate error to guest
  powerpc/eeh: Don't remove passed VFs
  powerpc/eeh: Synchronize recovery in host/guest

 arch/powerpc/kernel/eeh.c| 11 +++
 arch/powerpc/kernel/eeh_driver.c | 13 -
 2 files changed, 19 insertions(+), 5 deletions(-)

-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/3] powerpc/eeh: Don't remove passed VFs

2016-02-25 Thread Gavin Shan
When we have partial hotplug as part of the error recovery on PF,
the VFs that are bound with vfio-pci driver will experience hotplug.
That's not allowed.

This checks if the VF PE is passed or not. If it does, we leave
the VF without removing it.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/kernel/eeh_driver.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 6c59de8..fb6207d 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -455,6 +455,9 @@ static void *eeh_rmv_device(void *data, void *userdata)
if (driver) {
eeh_pcid_put(dev);
if (removed &&
+   eeh_pe_passed(edev->pe))
+   return NULL;
+   if (removed &&
driver->err_handler &&
driver->err_handler->error_detected &&
driver->err_handler->slot_reset)
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 0/2] clk: imx6: add kpp clock for i.MX6UL

2016-02-25 Thread Stephen Boyd
On 01/12, Lothar Waßmann wrote:
> This patchset adds the clock which is necessary to operate the KPP
> unit on i.MX6UL.
> The first patch removes bogus whitespace before TABs in indentation.
> The second patch adds the clock definition.
> 

Both look fine. Shawn?

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC][PATCH] Enable livepatching for powerpc

2016-02-25 Thread Kamalesh Babulal
* Balbir Singh  [2016-02-25 23:11:45]:

> This applies on top of the patches posted by Michael today
> Enable livepatching. This takes patch 6/8 and 7/8 of v8 as the base.
> Removes the extra strict check in gcc-profile-kernel-notrace.sh
> and adds logic for checking offsets in livepatch. The patch
> for HAVE_C_RECORDMCOUNT is not required and not used here.
> 
> Depending on whether or not a TOC is generated, the offset
> for _mcount can be +16 or +8. The changes are such that the
> offset checks are specific to powerpc.
> 
> Comments? Testing? I tested the sample in the livepatch
> directory
> 
> References
> 
> 1. https://patchwork.ozlabs.org/patch/581521/
> 2. https://patchwork.ozlabs.org/patch/587464/
> 
> Signed-off-by: Torsten Duwe 
> Signed-off-by: Balbir Singh 

I was able to test livepatch-sample module with this patch + Michael patch set.

Tested-by: Kamalesh Babulal 


Regards,
Kamalesh.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC][PATCH] Enable livepatching for powerpc

2016-02-25 Thread Balbir Singh
This applies on top of the patches posted by Michael today
Enable livepatching. This takes patch 6/8 and 7/8 of v8 as the base.
Removes the extra strict check in gcc-profile-kernel-notrace.sh
and adds logic for checking offsets in livepatch. The patch
for HAVE_C_RECORDMCOUNT is not required and not used here.

Depending on whether or not a TOC is generated, the offset
for _mcount can be +16 or +8. The changes are such that the
offset checks are specific to powerpc.

Comments? Testing? I tested the sample in the livepatch
directory

References

1. https://patchwork.ozlabs.org/patch/581521/
2. https://patchwork.ozlabs.org/patch/587464/

Signed-off-by: Torsten Duwe 
Signed-off-by: Balbir Singh 
---
 arch/powerpc/Kconfig|  3 ++
 arch/powerpc/gcc-mprofile-kernel-notrace.sh |  7 
 arch/powerpc/include/asm/livepatch.h| 61 +
 arch/powerpc/kernel/Makefile|  1 +
 arch/powerpc/kernel/entry_64.S  | 46 ++
 arch/powerpc/kernel/livepatch.c | 38 ++
 include/linux/livepatch.h   | 13 ++
 kernel/livepatch/core.c |  4 +-
 8 files changed, 164 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9f72565..72e46b0 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -160,6 +160,7 @@ config PPC
select ARCH_HAS_DEVMEM_IS_ALLOWED
select HAVE_ARCH_SECCOMP_FILTER
select ARCH_HAS_UBSAN_SANITIZE_ALL
+   select HAVE_LIVEPATCH if PPC64 && CPU_LITTLE_ENDIAN
 
 config GENERIC_CSUM
def_bool CPU_LITTLE_ENDIAN
@@ -1093,3 +1094,5 @@ config PPC_LIB_RHEAP
bool
 
 source "arch/powerpc/kvm/Kconfig"
+
+source "kernel/livepatch/Kconfig"
diff --git a/arch/powerpc/gcc-mprofile-kernel-notrace.sh 
b/arch/powerpc/gcc-mprofile-kernel-notrace.sh
index 68d6482..6dafff6 100755
--- a/arch/powerpc/gcc-mprofile-kernel-notrace.sh
+++ b/arch/powerpc/gcc-mprofile-kernel-notrace.sh
@@ -12,12 +12,6 @@ echo "int func() { return 0; }" | \
 
 trace_result=$?
 
-echo "int func() { return 0; }" | \
-$* -S -x c -O2 -p -mprofile-kernel - -o - 2> /dev/null | \
-sed -n -e '/func:/,/bl _mcount/p' | grep -q TOC
-
-leaf_toc_result=$?
-
 /bin/echo -e "#include \nnotrace int func() { return 0; }" | 
\
 $* -S -x c -O2 -p -mprofile-kernel - -o - 2> /dev/null | \
 grep -q "mcount"
@@ -25,7 +19,6 @@ leaf_toc_result=$?
 notrace_result=$?
 
 if [ "$trace_result" -eq "0" -a \
-   "$leaf_toc_result" -eq "0" -a \
"$notrace_result" -eq "1" ]; then
echo y
 else
diff --git a/arch/powerpc/include/asm/livepatch.h 
b/arch/powerpc/include/asm/livepatch.h
new file mode 100644
index 000..6abb69c
--- /dev/null
+++ b/arch/powerpc/include/asm/livepatch.h
@@ -0,0 +1,61 @@
+/*
+ * livepatch.h - powerpc-specific Kernel Live Patching Core
+ *
+ * Copyright (C) 2015 SUSE
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see .
+ */
+#ifndef _ASM_POWERPC64_LIVEPATCH_H
+#define _ASM_POWERPC64_LIVEPATCH_H
+
+#include 
+#include 
+
+#ifdef CONFIG_LIVEPATCH
+static inline int klp_check_compiler_support(void)
+{
+#if !defined(_CALL_ELF) || _CALL_ELF != 2 || !defined(CC_USING_MPROFILE_KERNEL)
+   return 1;
+#endif
+   return 0;
+}
+
+#define ARCH_HAVE_KLP_MATCHADDR
+static inline int klp_matchaddr(struct ftrace_ops *ops, unsigned long ip,
+   int remove, int reset)
+{
+   int offsets[] = {8, 16};
+   int i;
+   int ret = 1;
+
+   for (i = 0; i < ARRAY_SIZE(offsets); i++) {
+   ret = ftrace_set_filter_ip(ops, ip+offsets[i], remove, reset);
+   if (!ret)
+   break;
+   }
+   return ret;
+}
+
+extern int klp_write_module_reloc(struct module *mod, unsigned long type,
+  unsigned long loc, unsigned long value);
+
+static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
+{
+   regs->nip = ip;
+}
+#else
+#error Live patching support is disabled; check CONFIG_LIVEPATCH
+#endif
+
+#endif /* _ASM_POWERPC64_LIVEPATCH_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 44667fd..405efce 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -119,6 +119,7 @@ 

Re: [PATCH v5 1/9] selftests/powerpc: Test the preservation of FPU and VMX regs across syscall

2016-02-25 Thread Cyril Bur
On Thu, 25 Feb 2016 11:52:05 +0530
"Naveen N. Rao"  wrote:

> On 2016/02/25 10:44AM, Cyril Bur wrote:
> > On Wed, 24 Feb 2016 19:57:38 +0530
> > "Naveen N. Rao"  wrote:
> >   
> > > On 2016/02/23 02:38PM, Cyril Bur wrote:  
> > > > Test that the non volatile floating point and Altivec registers get
> > > > correctly preserved across the fork() syscall.
> > > > 
> > > > fork() works nicely for this purpose, the registers should be the same 
> > > > for
> > > > both parent and child
> > > > 
> > > > Signed-off-by: Cyril Bur 
> > > > ---  
> 
> 
> 
> > > > diff --git a/tools/testing/selftests/powerpc/basic_asm.h 
> > > > b/tools/testing/selftests/powerpc/basic_asm.h
> > > > new file mode 100644
> > > > index 000..f56482f
> > > > --- /dev/null
> > > > +++ b/tools/testing/selftests/powerpc/basic_asm.h
> > > > @@ -0,0 +1,62 @@
> > > > +#include 
> > > > +#include 
> > > > +
> > > > +#if defined(_CALL_ELF) && _CALL_ELF == 2
> > > > +#define STACK_FRAME_MIN_SIZE 32
> > > > +#define STACK_FRAME_TOC_POS  24
> > > > +#define __STACK_FRAME_PARAM(_param)  (32 + ((_param)*8))
> > > > +#define __STACK_FRAME_LOCAL(_num_params,_var_num)  
> > > > ((STACK_FRAME_PARAM(_num_params)) + ((_var_num)*8))
> > > > +#else
> > > > +#define STACK_FRAME_MIN_SIZE 112
> > > > +#define STACK_FRAME_TOC_POS  40
> > > > +#define __STACK_FRAME_PARAM(i)  (48 + ((i)*8))
> > > > +/*
> > > > + * Caveat: if a function passed more than 8 params, the caller will 
> > > > have
> > > > + * made more space... this should be reflected by this C code.
> > > > + * if (_num_params > 8)
> > > > + * total = 112 + ((_num_params - 8) * 8)
> > > > + *
> > > > + * And substitute the '112' for 'total' in the macro. Doable in 
> > > > preprocessor for ASM?
> > > > + */
> > > 
> > > Per my understanding, the parameter save area is only for parameters to 
> > > functions that *we* call. And since we control that, I don't think we 
> > > need to worry about having more than 8 parameters.
> > >   
> > 
> > Yes, I just thought I'd put that there to prevent anyone blindly reusing 
> > this
> > macro somewhere and getting stung. But you're correct, we don't need to 
> > worry
> > about this caveat for this code.  
> 
> Agreed, so probably a simpler warning will suffice. Also, it is worth 
> noting that this is not necessarily 8 parameters, but 8 doublewords - we 
> may have less number of parameters, but may still need more than 8 
> doublewords.
> 

True, happy to send s/params/doublewords/ if it's that important.

> >   
> > > > +#define __STACK_FRAME_LOCAL(_num_params,_var_num)  (112 + 
> > > > ((_var_num)*8))
> > > > +#endif
> > > > +/* Parameter x saved to the stack */
> > > > +#define STACK_FRAME_PARAM(var)__STACK_FRAME_PARAM(var)
> > > > +/* Local variable x saved to the stack after x parameters */
> > > > +#define STACK_FRAME_LOCAL(num_params,var)
> > > > __STACK_FRAME_LOCAL(num_params,var)
> > > 
> > > So this works, but I'm wondering if this is really worth the code 
> > > complexity - every use needs to determine appropriate extra stack space, 
> > > the number of parameters to save and so on.  This is after all for 
> > > selftests and so, we probably don't need to be precise in stack space 
> > > usage. We can get away using a larger fixed size stack.  That will 
> > > simplify a lot of the code further on and future tests won't need to 
> > > bother with all the details.
> > >   
> > 
> > So I agree that we don't need to be precise about stack space at all (I'm 
> > sure
> > you noticed all my stack pushes and pops are very overestimated in size).
> > 
> > I should probably go back to basics and explain how these macros started. In
> > writing all this I got fed up of typing the PUSH_BASIC_STACK and
> > POP_BASIC_STACK macro out at the start of each function. I wasn't trying to
> > macro out the entire calling convention and honestly I don't think it is a 
> > good
> > idea. The more easy to use/abstracted the macros get the less flexible they 
> >  
> 
> The simplification here is largely in terms of the stack size and the 
> fpu/gpr/vmx save areas. I don't think most tests would need more control 
> there. For the few tests that may need it, they can always hand code or 
> introduce different macros.
> 

It seems like your goal here is to make it so easy to write asm with these
macros that the programmer shouldn't need to open the abi because it's all done
for them. Lets just stick to the avoiding pointless repetition/avoiding silly
mistakes feature of macros.

If these macros all of a sudden take off and every selftest programmer
complains they aren't simple enough, perhaps we can revisit.

> > become. These being selftests, I expect that people might want to do
> > funky/questionable/hacky things, flexibility in the macros might help with 
> > that.  
> 
> Sure, but having every test deal with the intricacies of the stack is 
> not good. It's 

Re: Problems with swapping in v4.5-rc on POWER

2016-02-25 Thread Hugh Dickins via Linuxppc-dev
On Wed, 24 Feb 2016, Hugh Dickins wrote:
> On Thu, 25 Feb 2016, Aneesh Kumar K.V wrote:
> > 
> > Can you test the impact of the merge listed below ?(ie, revert the merge 
> > and see if
> > we can reproduce and also verify with merge applied). This will give us a
> > set of commits to look closer. We had quiet a lot of page table
> > related changes going in this merge window. 
> > 
> > f689b742f217b2ffe7 ("Pull powerpc updates from Michael Ellerman:")
> > 
> > That is the merge commit that added _PAGE_PTE. 
> 
> Another experiment running on it at the moment, I'd like to give that
> a few more hours, and then will try the revert you suggest.  But does
> that merge revert cleanly, did you try?  I'm afraid of interactions,
> whether obvious or subtle, with the THP refcounting rework.  Oh, since
> I don't have THP configured on, maybe I can ignore any issues from that.

That revert worked painlessly, only a very few and simple conflicts,
I ran that under load for 12 hours, no problem seen.

I've now checked out an f689b742 tree and started on that, just to
confirm that it fails fairly quickly I hope; and will then proceed
to git bisect, giving that as bad and 37cea93b as good.

Given the uncertainty of whether 12 hours is really long enough to be
sure, and perhaps difficulties along the way, I don't rate my chances
of a reliable bisection higher than 60%, but we'll see.

Hugh
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v5 14/18] cxl: Support to flash a new image on the adapter from a guest

2016-02-25 Thread Manoj Kumar

Fred: Comments below.

On 2/25/2016 7:11 AM, Frederic Barrat wrote:



Le 24/02/2016 21:03, Manoj Kumar a écrit :


From: Christophe Lombard 

+#define CXL_DEV_MINORS 13   /* 1 control + 4 AFUs * 3
(dedicated/master/shared) */


Where does this limit of 4 AFUs come from?
Is this related to CXL_MAX_SLICES?
Should this be a computed value, in case the number of AFUs/slices
is increased at a future date?


The architecture document (CAIA) limits the number of AFUs to 4, though
I don't think anybody as tried with more than 1 so far.
So yes, we could have reused CXL_MAX_SLICES. Since we were just moving
the definition from another file and this is not likely to vary until a
major revision of the architecture, I don't intend to address it in this
patchset, but I've added it to my list of ideas for future cleanup (you
had already mentioned something about hard-coded constants in the
previous series).


Since it was a carryover from existing code, it is fine to address
later.


memset(0) after kzalloc() is redundant.


yep! Will fix.


If this is resolved in v6, you may add

Reviewed-by: Manoj Kumar 

---
Manoj Kumar

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v5 15/18] cxl: Parse device tree and create cxl device(s) at boot

2016-02-25 Thread Manoj Kumar

Fred: Thanks for the clarification.

Reviewed-by: Manoj Kumar 

---
Manoj Kumar


On 2/25/2016 7:19 AM, Frederic Barrat wrote:



Le 24/02/2016 21:15, Manoj Kumar a écrit :

On 2/23/2016 10:21 AM, Frederic Barrat wrote:

+module_init(cxl_base_init);


Is this a remnant from when there were two modules?
Do you really need two module_init() calls (can't one be called from the
other)?
What is the tear-down portion of this (module_exit)?


No, this is not a left-over from the previous 2-module implementation of
the cxl driver.
The file base.c is not part of the "normal" cxl driver. It is either
part of the kernel if the cxl driver is a module or configured in the
kernel. Or it is discarded if cxl is not even a module. So code in that
file is either in the kernel or it's not even compiled. That was already
the case on bare-metal.
Code in module_init() is executed when the kernel boots and it is not
going away.

   Fred


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [BUG] random kernel crashes after THP rework on s390 (maybe also on PowerPC and ARM)

2016-02-25 Thread Steve Capper
On 25 February 2016 at 16:01, Kirill A. Shutemov  wrote:
> On Thu, Feb 25, 2016 at 03:49:33PM +, Steve Capper wrote:
>> On 23 February 2016 at 18:47, Will Deacon  wrote:
>> > [adding Steve, since he worked on THP for 32-bit ARM]
>>
>> Apologies for my late reply...
>>
>> >
>> > On Tue, Feb 23, 2016 at 07:19:07PM +0100, Gerald Schaefer wrote:
>> >> On Tue, 23 Feb 2016 13:32:21 +0300
>> >> "Kirill A. Shutemov"  wrote:
>> >> > The theory is that the splitting bit effetely masked bogus 
>> >> > pmd_present():
>> >> > we had pmd_trans_splitting() in all code path and that prevented mm from
>> >> > touching the pmd. Once pmd_trans_splitting() has gone, mm proceed with 
>> >> > the
>> >> > pmd where it shouldn't and here's a boom.
>> >>
>> >> Well, I don't think pmd_present() == true is bogus for a trans_huge pmd 
>> >> under
>> >> splitting, after all there is a page behind the the pmd. Also, if it was
>> >> bogus, and it would need to be false, why should it be marked 
>> >> !pmd_present()
>> >> only at the pmdp_invalidate() step before the pmd_populate()? It clearly
>> >> is pmd_present() before that, on all architectures, and if there was any
>> >> problem/race with that, setting it to !pmd_present() at this stage would
>> >> only (marginally) reduce the race window.
>> >>
>> >> BTW, PowerPC and Sparc seem to do the same thing in pmdp_invalidate(),
>> >> i.e. they do not set pmd_present() == false, only mark it so that it would
>> >> not generate a new TLB entry, just like on s390. After all, the function
>> >> is called pmdp_invalidate(), and I think the comment in mm/huge_memory.c
>> >> before that call is just a little ambiguous in its wording. When it says
>> >> "mark the pmd notpresent" it probably means "mark it so that it will not
>> >> generate a new TLB entry", which is also what the comment is really about:
>> >> prevent huge and small entries in the TLB for the same page at the same
>> >> time.
>> >>
>> >> FWIW, and since the ARM arch-list is already on cc, I think there is
>> >> an issue with pmdp_invalidate() on ARM, since it also seems to clear
>> >> the trans_huge (and formerly trans_splitting) bit, which actually makes
>> >> the pmd !pmd_present(), but it violates the other requirement from the
>> >> comment:
>> >> "the pmd_trans_huge and pmd_trans_splitting must remain set at all times
>> >> on the pmd until the split is complete for this pmd"
>> >
>> > I've only been testing this for arm64 (where I'm yet to see a problem),
>> > but we use the generic pmdp_invalidate implementation from
>> > mm/pgtable-generic.c there. On arm64, pmd_trans_huge will return true
>> > after pmd_mknotpresent. On arm, it does look to be buggy, since it nukes
>> > the entire entry... Steve?
>>
>> pmd_mknotpresent on arm looks inconsistent with the other
>> architectures and can be changed.
>>
>> Having had a look at the usage, I can't see it causing an immediate
>> problem (that needs to be addressed by an emergency patch).
>> We don't have a notion of splitting pmds (so there is no splitting
>> information to lose), and the only usage I could see of
>> pmd_mknotpresent was:
>>
>> pmdp_invalidate(vma, haddr, pmd);
>> pmd_populate(mm, pmd, pgtable);
>>
>> In mm/huge_memory.c, around line 3588.
>>
>> So we invalidate the entry (which puts down a faulting entry from
>> pmd_mknotpresent and invalidates tlb), then immediately put down a
>> table entry with pmd_populate.
>>
>> I have run a 32-bit ARM test kernel and exacerbated THP splits (that's
>> what took me time), and I didn't notice any problems with 4.5-rc5.
>
> If I read code correctly, your pmd_mknotpresent() makes the pmd
> pmd_none(), right? If yes, it's a problem.
>
> It introduces race I've described here:
>
> https://marc.info/?l=linux-mm=144723658100512=4
>
> Basically, if zap_pmd_range() would see pmd_none() between
> pmdp_mknotpresent() and pmd_populate(), we're screwed.
>
> The race window is small, but it's there.

A, okay, thank you Kirill.
I agree, I'll get a patch out.

Cheers,
--
Steve
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [BUG] random kernel crashes after THP rework on s390 (maybe also on PowerPC and ARM)

2016-02-25 Thread Kirill A. Shutemov
On Thu, Feb 25, 2016 at 03:49:33PM +, Steve Capper wrote:
> On 23 February 2016 at 18:47, Will Deacon  wrote:
> > [adding Steve, since he worked on THP for 32-bit ARM]
> 
> Apologies for my late reply...
> 
> >
> > On Tue, Feb 23, 2016 at 07:19:07PM +0100, Gerald Schaefer wrote:
> >> On Tue, 23 Feb 2016 13:32:21 +0300
> >> "Kirill A. Shutemov"  wrote:
> >> > The theory is that the splitting bit effetely masked bogus pmd_present():
> >> > we had pmd_trans_splitting() in all code path and that prevented mm from
> >> > touching the pmd. Once pmd_trans_splitting() has gone, mm proceed with 
> >> > the
> >> > pmd where it shouldn't and here's a boom.
> >>
> >> Well, I don't think pmd_present() == true is bogus for a trans_huge pmd 
> >> under
> >> splitting, after all there is a page behind the the pmd. Also, if it was
> >> bogus, and it would need to be false, why should it be marked 
> >> !pmd_present()
> >> only at the pmdp_invalidate() step before the pmd_populate()? It clearly
> >> is pmd_present() before that, on all architectures, and if there was any
> >> problem/race with that, setting it to !pmd_present() at this stage would
> >> only (marginally) reduce the race window.
> >>
> >> BTW, PowerPC and Sparc seem to do the same thing in pmdp_invalidate(),
> >> i.e. they do not set pmd_present() == false, only mark it so that it would
> >> not generate a new TLB entry, just like on s390. After all, the function
> >> is called pmdp_invalidate(), and I think the comment in mm/huge_memory.c
> >> before that call is just a little ambiguous in its wording. When it says
> >> "mark the pmd notpresent" it probably means "mark it so that it will not
> >> generate a new TLB entry", which is also what the comment is really about:
> >> prevent huge and small entries in the TLB for the same page at the same
> >> time.
> >>
> >> FWIW, and since the ARM arch-list is already on cc, I think there is
> >> an issue with pmdp_invalidate() on ARM, since it also seems to clear
> >> the trans_huge (and formerly trans_splitting) bit, which actually makes
> >> the pmd !pmd_present(), but it violates the other requirement from the
> >> comment:
> >> "the pmd_trans_huge and pmd_trans_splitting must remain set at all times
> >> on the pmd until the split is complete for this pmd"
> >
> > I've only been testing this for arm64 (where I'm yet to see a problem),
> > but we use the generic pmdp_invalidate implementation from
> > mm/pgtable-generic.c there. On arm64, pmd_trans_huge will return true
> > after pmd_mknotpresent. On arm, it does look to be buggy, since it nukes
> > the entire entry... Steve?
> 
> pmd_mknotpresent on arm looks inconsistent with the other
> architectures and can be changed.
> 
> Having had a look at the usage, I can't see it causing an immediate
> problem (that needs to be addressed by an emergency patch).
> We don't have a notion of splitting pmds (so there is no splitting
> information to lose), and the only usage I could see of
> pmd_mknotpresent was:
> 
> pmdp_invalidate(vma, haddr, pmd);
> pmd_populate(mm, pmd, pgtable);
> 
> In mm/huge_memory.c, around line 3588.
> 
> So we invalidate the entry (which puts down a faulting entry from
> pmd_mknotpresent and invalidates tlb), then immediately put down a
> table entry with pmd_populate.
> 
> I have run a 32-bit ARM test kernel and exacerbated THP splits (that's
> what took me time), and I didn't notice any problems with 4.5-rc5.

If I read code correctly, your pmd_mknotpresent() makes the pmd
pmd_none(), right? If yes, it's a problem.

It introduces race I've described here:

https://marc.info/?l=linux-mm=144723658100512=4

Basically, if zap_pmd_range() would see pmd_none() between
pmdp_mknotpresent() and pmd_populate(), we're screwed.

The race window is small, but it's there.

-- 
 Kirill A. Shutemov
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [BUG] random kernel crashes after THP rework on s390 (maybe also on PowerPC and ARM)

2016-02-25 Thread Steve Capper
On 23 February 2016 at 18:47, Will Deacon  wrote:
> [adding Steve, since he worked on THP for 32-bit ARM]

Apologies for my late reply...

>
> On Tue, Feb 23, 2016 at 07:19:07PM +0100, Gerald Schaefer wrote:
>> On Tue, 23 Feb 2016 13:32:21 +0300
>> "Kirill A. Shutemov"  wrote:
>> > The theory is that the splitting bit effetely masked bogus pmd_present():
>> > we had pmd_trans_splitting() in all code path and that prevented mm from
>> > touching the pmd. Once pmd_trans_splitting() has gone, mm proceed with the
>> > pmd where it shouldn't and here's a boom.
>>
>> Well, I don't think pmd_present() == true is bogus for a trans_huge pmd under
>> splitting, after all there is a page behind the the pmd. Also, if it was
>> bogus, and it would need to be false, why should it be marked !pmd_present()
>> only at the pmdp_invalidate() step before the pmd_populate()? It clearly
>> is pmd_present() before that, on all architectures, and if there was any
>> problem/race with that, setting it to !pmd_present() at this stage would
>> only (marginally) reduce the race window.
>>
>> BTW, PowerPC and Sparc seem to do the same thing in pmdp_invalidate(),
>> i.e. they do not set pmd_present() == false, only mark it so that it would
>> not generate a new TLB entry, just like on s390. After all, the function
>> is called pmdp_invalidate(), and I think the comment in mm/huge_memory.c
>> before that call is just a little ambiguous in its wording. When it says
>> "mark the pmd notpresent" it probably means "mark it so that it will not
>> generate a new TLB entry", which is also what the comment is really about:
>> prevent huge and small entries in the TLB for the same page at the same
>> time.
>>
>> FWIW, and since the ARM arch-list is already on cc, I think there is
>> an issue with pmdp_invalidate() on ARM, since it also seems to clear
>> the trans_huge (and formerly trans_splitting) bit, which actually makes
>> the pmd !pmd_present(), but it violates the other requirement from the
>> comment:
>> "the pmd_trans_huge and pmd_trans_splitting must remain set at all times
>> on the pmd until the split is complete for this pmd"
>
> I've only been testing this for arm64 (where I'm yet to see a problem),
> but we use the generic pmdp_invalidate implementation from
> mm/pgtable-generic.c there. On arm64, pmd_trans_huge will return true
> after pmd_mknotpresent. On arm, it does look to be buggy, since it nukes
> the entire entry... Steve?

pmd_mknotpresent on arm looks inconsistent with the other
architectures and can be changed.

Having had a look at the usage, I can't see it causing an immediate
problem (that needs to be addressed by an emergency patch).
We don't have a notion of splitting pmds (so there is no splitting
information to lose), and the only usage I could see of
pmd_mknotpresent was:

pmdp_invalidate(vma, haddr, pmd);
pmd_populate(mm, pmd, pgtable);

In mm/huge_memory.c, around line 3588.

So we invalidate the entry (which puts down a faulting entry from
pmd_mknotpresent and invalidates tlb), then immediately put down a
table entry with pmd_populate.

I have run a 32-bit ARM test kernel and exacerbated THP splits (that's
what took me time), and I didn't notice any problems with 4.5-rc5.

Cheers,
-- 
Steve

>
> Will
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majord...@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: mailto:"d...@kvack.org;> em...@kvack.org 
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 07/12] powerpc/ftrace: FTRACE_WITH_REGS implementation for ppc64le

2016-02-25 Thread Torsten Duwe
On Thu, Feb 25, 2016 at 11:48:59AM +1100, Balbir Singh wrote:
> > @@ -608,6 +621,9 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
> > return -ENOENT;
> > if (!restore_r2((u32 *)location + 1, me))
> > return -ENOEXEC;
> > +   /* Squash the TOC saver for profiler calls */
> > +   if (!strcmp("_mcount", strtab+sym->st_name))
> > +   SQUASH_TOC_SAVE_INSN(value);
> I don't think we need this anymore, do we?

I'm not sure. Once a module is loaded, are all the "bl _mcount"s NOPed out
before any of its functions are run? If not, the _mcount trampoline will
be used, and it must not save R2!

Torsten

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 01/12] powerpc/module: Only try to generate the ftrace_caller() stub once

2016-02-25 Thread Kamalesh Babulal
* Michael Ellerman  [2016-02-25 01:28:24]:

> Currently we generate the module stub for ftrace_caller() at the bottom
> of apply_relocate_add(). However apply_relocate_add() is potentially
> called more than once per module, which means we will try to generate
> the ftrace_caller() stub multiple times.
> 
> Although the current code deals with that correctly, ie. it only
> generates a stub the first time, it would be clearer to only try to
> generate the stub once.
> 
> Note also on first reading it may appear that we generate a different
> stub for each section that requires relocation, but that is not the
> case. The code in stub_for_addr() that searches for an existing stub
> uses sechdrs[me->arch.stubs_section], ie. the single stub section for
> this module.
> 
> A cleaner approach is to only generate the ftrace_caller() stub once,
> from module_finalize(). An additional benefit is we can clean the ifdefs
> up a little.
> 
> Finally we must propagate the const'ness of some of the pointers passed
> to module_finalize(), but that is also an improvement.
> 
> Signed-off-by: Michael Ellerman 

For all of the patches in the series.

Tested-by: Kamalesh Babulal 


Regards,
Kamalesh.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 10/12] powerpc/ftrace: FTRACE_WITH_REGS configuration variables

2016-02-25 Thread Torsten Duwe
On Thu, Feb 25, 2016 at 12:11:33PM +1100, Balbir Singh wrote:
> On 25/02/16 01:28, Michael Ellerman wrote:
> >
> > diff --git a/arch/powerpc/gcc-mprofile-kernel-notrace.sh 
> > b/arch/powerpc/gcc-mprofile-kernel-notrace.sh
> > new file mode 100755
> > index ..68d6482d56ab
> > --- /dev/null
> > +++ b/arch/powerpc/gcc-mprofile-kernel-notrace.sh
> > @@ -0,0 +1,33 @@
> > +#!/bin/sh
> > +# Test whether the compile option -mprofile-kernel
> > +# generates profiling code ( = a call to mcount), and
> > +# whether a function without any global references sets
> > +# the TOC pointer properly at the beginning, and

Remove the above two lines, for completeness,

> > +# whether the "notrace" function attribute successfully
> > +# suppresses the _mcount call.
> > +
> > +echo "int func() { return 0; }" | \
> > +$* -S -x c -O2 -p -mprofile-kernel - -o - 2> /dev/null | \
> > +grep -q "mcount"
> > +
> > +trace_result=$?
> > +
> > +echo "int func() { return 0; }" | \
> > +$* -S -x c -O2 -p -mprofile-kernel - -o - 2> /dev/null | \
> > +sed -n -e '/func:/,/bl _mcount/p' | grep -q TOC
> > +
> > +leaf_toc_result=$?
> > +
> We should remove this bit, we don't need a TOC for leaf procedures anymore

Exactly. I thought it was a bug when I wrote this test, Michael insisted
it was a feature :-)

> > +/bin/echo -e "#include \nnotrace int func() { return 0; 
> > }" | \
> > +$* -S -x c -O2 -p -mprofile-kernel - -o - 2> /dev/null | \
> > +grep -q "mcount"
> > +
> > +notrace_result=$?
> > +
> > +if [ "$trace_result" -eq "0" -a \
> > +   "$leaf_toc_result" -eq "0" -a \

In particular, remove this ^ line.

> > +   "$notrace_result" -eq "1" ]; then
> > +   echo y
> > +else
> > +   echo n
> > +fi

That version would have made it into my v9.

Signed-off-by: Torsten Duwe 

Torsten

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 08/12] powerpc/ftrace: Rework ftrace_caller()

2016-02-25 Thread Torsten Duwe
On Thu, Feb 25, 2016 at 01:28:31AM +1100, Michael Ellerman wrote:
> The main change is to just use paca->kernel_toc, rather than a branch to
> +4 and mflr etc. That makes the code simpler and should also perform
> better.

Indeed.

> There was also a sequence after ftrace_call() where we load from
> pt_regs->nip, move to LR, then a few instructions later load from LRSAVE
> and move to LR. Instead I think we want to put pt_regs->nip into CTR and
> branch to it later.

Yes, I did some of this cleanup in the livepatch implementation.

> We also rework some of the SPR loads to hopefully speed them up a bit.
> Also comment the asm much more, to hopefully make it clearer.
> 
> Signed-off-by: Michael Ellerman 

Reviewed-by: Torsten Duwe 

Torsten

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 06/12] powerpc/module: Rework is_early_mcount_callsite()

2016-02-25 Thread Torsten Duwe
On Thu, Feb 25, 2016 at 09:28:32PM +1100, Michael Ellerman wrote:
> On Thu, 2016-02-25 at 10:39 +1100, Balbir Singh wrote:
> > On 25/02/16 01:28, Michael Ellerman wrote:
> > > is_early_mcount_callsite() needs to detect either the two instruction or
> > > the three instruction versions of the _mcount() sequence.
> > > 
> > > But if we're running a kernel with the two instruction sequence, we need
> > > to be careful not to read instruction - 2, otherwise we might fall off
> > > the front of a page and cause an oops.
> > > 
> > > While we're here convert to bool to make the return semantics clear.
> > > 
> > > Signed-off-by: Michael Ellerman 

I wouldn't mind if you had folded this into the previous patch, see comments 
there.

> > > 
> > Do we even need to do this anymore?
> 
> Yes. Otherwise the code in apply_relocate_add() will see a far call with no 
> nop
> slot after it to do the toc restore, and it considers that a bug (which it
> usually is, except mcount is special).
> 
> As we discussed today I'm hoping we can clean this code up a bit more in the
> medium term, but this works for now.

Agreed.
Reviewed-by: Torsten Duwe 

Torsten

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 05/12] powerpc/ftrace: ftrace_graph_caller() needs to save/restore toc

2016-02-25 Thread Torsten Duwe
On Thu, Feb 25, 2016 at 11:30:38AM +1100, Balbir Singh wrote:
> On 25/02/16 01:28, Michael Ellerman wrote:
> > Signed-off-by: Michael Ellerman 
> > ---
> >  arch/powerpc/kernel/entry_64.S | 8 
> >  1 file changed, 8 insertions(+)
> >

Ah, -mprofile-kernel, DYNAMIC_FTRACE but without REGS.
Hadn't considered that, thanks!

> >
> > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> > index 2a7313cfbc7d..9e77a2c8f218 100644
> > --- a/arch/powerpc/kernel/entry_64.S
> > +++ b/arch/powerpc/kernel/entry_64.S
> > @@ -1237,6 +1237,11 @@ _GLOBAL(ftrace_graph_caller)
> > std r5, 64(r1)
> > std r4, 56(r1)
> > std r3, 48(r1)
> > +
> > +   /* Save callee's TOC in the ABI compliant location */
> > +   std r2, 24(r1)
> R2_STACK_OFFSET for readability?

I have encountered LRSAVE vs. PPC_LR_STKOFF and
STK_GOT vs. R2_STACK_OFFSET, some usable in assembler source, some in C.

> > +   ld  r2, PACATOC(r13)/* get kernel TOC in r2 */
> > +
> > mfctr   r4  /* ftrace_caller has moved local addr here */
> > std r4, 40(r1)
> > mflrr3  /* ftrace_caller has restored LR from stack */
> > @@ -1262,6 +1267,9 @@ _GLOBAL(ftrace_graph_caller)
> > ld  r4, 56(r1)
> > ld  r3, 48(r1)
> >  
> > +   /* Restore callee's TOC */
> > +   ld  r2, 24(r1)
> > +
> > addir1, r1, 112
> > mflrr0
> > std r0, LRSAVE(r1)
> 
> Reviewed-by: Balbir Singh 

Reviewed-by: Torsten Duwe 

Torsten


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 04/12] powerpc/ftrace: Prepare for -mprofile-kernel

2016-02-25 Thread Torsten Duwe
On Thu, Feb 25, 2016 at 01:28:27AM +1100, Michael Ellerman wrote:
> @@ -450,17 +448,44 @@ static unsigned long stub_for_addr(const Elf64_Shdr 
> *sechdrs,
>   return (unsigned long)[i];
>  }
>  
> +#ifdef CC_USING_MPROFILE_KERNEL
> +static int is_early_mcount_callsite(u32 *instruction)
> +{
> + /* -mprofile-kernel sequence starting with
> +  * mflr r0 and maybe std r0, LRSAVE(r1).
> +  */
> + if ((instruction[-3] == PPC_INST_MFLR &&
> +  instruction[-2] == PPC_INST_STD_LR) ||
> + instruction[-2] == PPC_INST_MFLR) {
> + /* Nothing to be done here, it's an _mcount
> +  * call location and r2 will have to be
> +  * restored in the _mcount function.
> +  */
> + return 1;
> + }
> + return 0;
> +}
> +#else

*You* said this might page fault :)

Did we agree yet whether we insist on a streamlined compiler?
(GCC commit e95d0248dace required)?

If not:
if (instruction[-2] == PPC_INST_STD_LR)
  {
if (instruction[-3] == PPC_INST_MFLR)
  return 1;
  }
else if (instruction[-2] == PPC_INST_MFLR)
return 1;
return 0;

leaves less freedom for the compiler to "optimise".

Signed-off-by: Torsten Duwe 

Torsten

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 03/12] powerpc/module: Create a special stub for ftrace_caller()

2016-02-25 Thread Torsten Duwe
On Thu, Feb 25, 2016 at 11:08:54AM +1100, Balbir Singh wrote:
> How about some comments on r2
> r2 is still pointing to the module's toc, will be saved by ftrace_caller and 
> restored by the instruction following bl ftrace_caller (after patching 
> _mcount/nop)

To be precise: ftrace_caller needs to save _and_ restore r2 in case of 
-mprofile-kernel.

> > +   /* Stub uses address relative to kernel_toc */
> > +   reladdr = (unsigned long)ftrace_caller - get_paca()->kernel_toc;

kernel_toc is a compile time constant; do you really want to look it up in
memory at runtime each time? It's a bit tricky to get the +- 0x8000 right
OTOH...

I wrote:
extern unsigned long __toc_start;
reladdr = addr - ((unsigned long)(&__toc_start) + 0x8000UL);

looks a bit odd, but evaluates to a constant for ftrace_caller.

Either way is fine with me:

Signed-off-by: Torsten Duwe 
Reviewed-by: Torsten Duwe 

> Reviewed-by: Balbir Singh 

Torsten

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v5 15/18] cxl: Parse device tree and create cxl device(s) at boot

2016-02-25 Thread Frederic Barrat



Le 24/02/2016 21:15, Manoj Kumar a écrit :

On 2/23/2016 10:21 AM, Frederic Barrat wrote:

+module_init(cxl_base_init);


Is this a remnant from when there were two modules?
Do you really need two module_init() calls (can't one be called from the
other)?
What is the tear-down portion of this (module_exit)?


No, this is not a left-over from the previous 2-module implementation of 
the cxl driver.
The file base.c is not part of the "normal" cxl driver. It is either 
part of the kernel if the cxl driver is a module or configured in the 
kernel. Or it is discarded if cxl is not even a module. So code in that 
file is either in the kernel or it's not even compiled. That was already 
the case on bare-metal.
Code in module_init() is executed when the kernel boots and it is not 
going away.


  Fred

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 02/12] powerpc/module: Mark module stubs with a magic value

2016-02-25 Thread Torsten Duwe
On Thu, Feb 25, 2016 at 01:28:25AM +1100, Michael Ellerman wrote:
> 
> We can make that process easier by marking the generated stubs with a
> magic value, and then looking for that magic value. Altough this is not
> as rigorous as the current method, I believe it is sufficient in
> practice.

The actual magic value is sort of debatable; it should be "improbable"
enough. But this can be changed easily, for each kernel compile, even.

> Signed-off-by: Michael Ellerman 
Reviewed-by: Torsten Duwe 

[for reference:]
>  
> +#define STUB_MAGIC 0x73747562 /* stub */
> +

Torsten

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v5 14/18] cxl: Support to flash a new image on the adapter from a guest

2016-02-25 Thread Frederic Barrat



Le 24/02/2016 21:03, Manoj Kumar a écrit :


From: Christophe Lombard 

+#define CXL_DEV_MINORS 13   /* 1 control + 4 AFUs * 3
(dedicated/master/shared) */


Where does this limit of 4 AFUs come from?
Is this related to CXL_MAX_SLICES?
Should this be a computed value, in case the number of AFUs/slices
is increased at a future date?


The architecture document (CAIA) limits the number of AFUs to 4, though 
I don't think anybody as tried with more than 1 so far.
So yes, we could have reused CXL_MAX_SLICES. Since we were just moving 
the definition from another file and this is not likely to vary until a 
major revision of the architecture, I don't intend to address it in this 
patchset, but I've added it to my list of ideas for future cleanup (you 
had already mentioned something about hard-coded constants in the 
previous series).




memset(0) after kzalloc() is redundant.


yep! Will fix.

  Fred

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 01/12] powerpc/module: Only try to generate the ftrace_caller() stub once

2016-02-25 Thread Torsten Duwe
On Thu, Feb 25, 2016 at 01:28:24AM +1100, Michael Ellerman wrote:
> Currently we generate the module stub for ftrace_caller() at the bottom
> of apply_relocate_add(). However apply_relocate_add() is potentially
> called more than once per module, which means we will try to generate
> the ftrace_caller() stub multiple times.
> 
> Although the current code deals with that correctly, ie. it only
> generates a stub the first time, it would be clearer to only try to
> generate the stub once.
> 
> Note also on first reading it may appear that we generate a different
> stub for each section that requires relocation, but that is not the
> case. The code in stub_for_addr() that searches for an existing stub
> uses sechdrs[me->arch.stubs_section], ie. the single stub section for
> this module.
> 
> A cleaner approach is to only generate the ftrace_caller() stub once,
> from module_finalize(). An additional benefit is we can clean the ifdefs
> up a little.
> 
> Finally we must propagate the const'ness of some of the pointers passed
> to module_finalize(), but that is also an improvement.
> 
> Signed-off-by: Michael Ellerman 

Reviewed-by: Torsten Duwe 

Torsten

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 3/3] powernv: Fix MCE handler to avoid trashing CR0/CR1 registers.

2016-02-25 Thread Mahesh Jagannath Salgaonkar
On 02/25/2016 12:35 PM, Shreyas B Prabhu wrote:
> 
> 
> On 02/25/2016 10:25 AM, Mahesh J Salgaonkar wrote:
>> From: Mahesh Salgaonkar 
>>
>> The current implementation of MCE early handling modifies CR0/1 registers
>> without saving its old values. Fix this by moving early check for
>> powersaving mode to machine_check_handle_early().
>>
>> The power architecture 2.06 or later allows the possibility of getting
>> machine check while in nap/sleep/winkle. The last bit of HSPRG0 is set
>> to 1, if thread is woken up from winkle. Hence, clear the last bit of
>> HSPRG0 (r13) before MCE handler starts using it as paca pointer.
>>
>> Also, the current code always puts the thread into nap state irrespective
>> of whatever idle state it woke up from. Fix that by looking at
>> paca->thread_idle_state and put the thread back into same state where it
>> came from.
>>
>> Reported-by: Paul Mackerras 
>> Signed-off-by: Mahesh Salgaonkar 
>> ---
>>  arch/powerpc/kernel/exceptions-64s.S |   66 
>> --
>>  1 file changed, 39 insertions(+), 27 deletions(-)
>>
>> diff --git a/arch/powerpc/kernel/exceptions-64s.S 
>> b/arch/powerpc/kernel/exceptions-64s.S
>> index d4c99f0..7fa71e7 100644
>> --- a/arch/powerpc/kernel/exceptions-64s.S
>> +++ b/arch/powerpc/kernel/exceptions-64s.S
>> @@ -164,29 +164,14 @@ machine_check_pSeries_1:
>>   * vector
>>   */
>>  SET_SCRATCH0(r13)   /* save r13 */
>> -#ifdef CONFIG_PPC_P7_NAP
>> -BEGIN_FTR_SECTION
>> -/* Running native on arch 2.06 or later, check if we are
>> - * waking up from nap. We only handle no state loss and
>> - * supervisor state loss. We do -not- handle hypervisor
>> - * state loss at this time.
>> +/*
>> + * Running native on arch 2.06 or later, we may wakeup from winkle
>> + * inside machine check. If yes, then last bit of HSPGR0 would be set
>> + * to 1. Hence clear it unconditionally.
>>   */
>> -mfspr   r13,SPRN_SRR1
>> -rlwinm. r13,r13,47-31,30,31
>> -OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
>> -beq 9f
>> -
>> -mfspr   r13,SPRN_SRR1
>> -rlwinm. r13,r13,47-31,30,31
>> -/* waking up from powersave (nap) state */
>> -cmpwi   cr1,r13,2
>> -/* Total loss of HV state is fatal. let's just stay stuck here */
>> -OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
>> -bgt cr1,.
>> -9:
>> -OPT_SET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
>> -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
>> -#endif /* CONFIG_PPC_P7_NAP */
>> +GET_PACA(r13)
>> +clrrdi  r13,r13,1
>> +SET_PACA(r13)
>>  EXCEPTION_PROLOG_0(PACA_EXMC)
>>  BEGIN_FTR_SECTION
>>  b   machine_check_powernv_early
>> @@ -1362,25 +1347,52 @@ machine_check_handle_early:
>>   * Check if thread was in power saving mode. We come here when any
>>   * of the following is true:
>>   * a. thread wasn't in power saving mode
>> - * b. thread was in power saving mode with no state loss or
>> - *supervisor state loss
>> + * b. thread was in power saving mode with no state loss,
>> + *supervisor state loss or hypervisor state loss.
>>   *
>> - * Go back to nap again if (b) is true.
>> + * Go back to nap/sleep/winkle mode again if (b) is true.
>>   */
>>  rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */
>>  beq 4f  /* No, it wasn;t */
>>  /* Thread was in power saving mode. Go back to nap again. */
>>  cmpwi   r11,2
>> -bne 3f
>> -/* Supervisor state loss */
>> +blt 3f
>> +/* Supervisor/Hypervisor state loss loss */
> 
> loss repeated twice in comment
>>  li  r0,1
>>  stb r0,PACA_NAPSTATELOST(r13)
>>  3:  bl  machine_check_queue_event
>>  MACHINE_CHECK_HANDLER_WINDUP
>>  GET_PACA(r13)
>> +/*
>> + * Check what idle state this CPU was in and go back to same mode
>> + * again.
>> + */
>> +lbz r3,PACA_THREAD_IDLE_STATE(r13)
>> +cmpwi   r3,PNV_THREAD_NAP
>> +bgt 1f
>>  ld  r1,PACAR1(r13)
>>  li  r3,PNV_THREAD_NAP
>>  b   power7_enter_nap_mode
> 
> You could call IDLE_STATE_ENTER_SEQ(PPC_NAP) here to keep it consistent
> with what you do for sleep and winkle below. power7_enter_nap_mode is
> only setting couple of PACA flags which are anyway set in your case.

Yup I could do that. Will fix it in v2.

> 
> Also what is the MSR at this point? I don't foresee any issue as long as
> we are in real mode. That said, ISA says SF, HV and ME bits should be 1,
> RI can be 0,1 and rest have to be 0 before entering low power mode.

Machine check interrupts are taken in real mode and by the time we come
here ME bit is already set to 1. So we are good.

>> +/* No return */
>> +1:
>> +cmpwi   r3,PNV_THREAD_SLEEP
>> +bgt 2f
>> +IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
>> +/* No return */
>> +
>> 

[GIT PULL] Please pull powerpc/linux.git powerpc-4.5-4 tag

2016-02-25 Thread Michael Ellerman
Hi Linus,

Please pull a couple more powerpc fixes for 4.5:

The following changes since commit 6ecad912a0073c768db1491c27ca55ad2d0ee68f:

  powerpc/ioda: Set "read" permission when "write" is set (2016-02-17 23:52:17 
+1100)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
tags/powerpc-4.5-4

for you to fetch changes up to 9ab3ac233a8b4ffcc27c8475b83dee49fc46bc76:

  powerpc/mm/hash: Clear the invalid slot information correctly (2016-02-22 
19:27:39 +1100)


powerpc fixes for 4.5 #3

 - eeh: Fix partial hotplug criterion from Gavin Shan
 - mm: Clear the invalid slot information correctly from Aneesh Kumar K.V


Aneesh Kumar K.V (1):
  powerpc/mm/hash: Clear the invalid slot information correctly

Gavin Shan (1):
  powerpc/eeh: Fix partial hotplug criterion

 arch/powerpc/kernel/eeh_driver.c  |  3 +--
 arch/powerpc/mm/hash64_64k.c  |  8 +++-
 arch/powerpc/mm/hugepage-hash64.c | 12 +++-
 3 files changed, 19 insertions(+), 4 deletions(-)


signature.asc
Description: This is a digitally signed message part
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC PATCH v1 2/2] powerpc/86xx: Introduce and use common dtsi

2016-02-25 Thread Alessio Igor Bogani
Signed-off-by: Alessio Igor Bogani 
---
 arch/powerpc/boot/dts/fsl/gef_ppc9a.dts| 284 +++---
 arch/powerpc/boot/dts/fsl/gef_sbc310.dts   | 276 +++--
 arch/powerpc/boot/dts/fsl/gef_sbc610.dts   | 284 +++---
 arch/powerpc/boot/dts/fsl/mpc8641_hpcn.dts | 318 -
 arch/powerpc/boot/dts/fsl/mpc8641_hpcn_36b.dts | 317 
 arch/powerpc/boot/dts/fsl/mpc8641si-post.dtsi  | 124 ++
 arch/powerpc/boot/dts/fsl/mpc8641si-pre.dtsi   |  59 +
 arch/powerpc/boot/dts/fsl/sbc8641d.dts | 314 +++-
 8 files changed, 400 insertions(+), 1576 deletions(-)
 create mode 100644 arch/powerpc/boot/dts/fsl/mpc8641si-post.dtsi
 create mode 100644 arch/powerpc/boot/dts/fsl/mpc8641si-pre.dtsi

diff --git a/arch/powerpc/boot/dts/fsl/gef_ppc9a.dts 
b/arch/powerpc/boot/dts/fsl/gef_ppc9a.dts
index 83eb0fd..0c11623 100644
--- a/arch/powerpc/boot/dts/fsl/gef_ppc9a.dts
+++ b/arch/powerpc/boot/dts/fsl/gef_ppc9a.dts
@@ -18,62 +18,19 @@
  * Compiled with dtc -I dts -O dtb -o gef_ppc9a.dtb gef_ppc9a.dts
  */
 
-/dts-v1/;
+/include/ "mpc8641si-pre.dtsi"
 
 / {
model = "GEF_PPC9A";
compatible = "gef,ppc9a";
-   #address-cells = <1>;
-   #size-cells = <1>;
-
-   aliases {
-   ethernet0 = 
-   ethernet1 = 
-   serial0 = 
-   serial1 = 
-   pci0 = 
-   };
-
-   cpus {
-   #address-cells = <1>;
-   #size-cells = <0>;
-
-   PowerPC,8641@0 {
-   device_type = "cpu";
-   reg = <0>;
-   d-cache-line-size = <32>;   // 32 bytes
-   i-cache-line-size = <32>;   // 32 bytes
-   d-cache-size = <32768>; // L1, 32K
-   i-cache-size = <32768>; // L1, 32K
-   timebase-frequency = <0>;   // From uboot
-   bus-frequency = <0>;// From uboot
-   clock-frequency = <0>;  // From uboot
-   };
-   PowerPC,8641@1 {
-   device_type = "cpu";
-   reg = <1>;
-   d-cache-line-size = <32>;   // 32 bytes
-   i-cache-line-size = <32>;   // 32 bytes
-   d-cache-size = <32768>; // L1, 32K
-   i-cache-size = <32768>; // L1, 32K
-   timebase-frequency = <0>;   // From uboot
-   bus-frequency = <0>;// From uboot
-   clock-frequency = <0>;  // From uboot
-   };
-   };
 
memory {
device_type = "memory";
reg = <0x0 0x4000>; // set by uboot
};
 
-   localbus@fef05000 {
-   #address-cells = <2>;
-   #size-cells = <1>;
-   compatible = "fsl,mpc8641-localbus", "simple-bus";
+   lbc: localbus@fef05000 {
reg = <0xfef05000 0x1000>;
-   interrupts = <19 2>;
-   interrupt-parent = <>;
 
ranges = <0 0 0xff00 0x0100 // 16MB Boot flash
  1 0 0xe800 0x0800 // Paged Flash 0
@@ -133,7 +90,7 @@
compatible = "gef,ppc9a-fpga-wdt", "gef,fpga-wdt-1.00",
"gef,fpga-wdt";
reg = <0x4 0x2000 0x8>;
-   interrupts = <0x1a 0x4>;
+   interrupts = <0x1a 0x4 0 0>;
interrupt-parent = <_pic>;
};
/* Second watchdog available, driver currently supports one.
@@ -141,7 +98,7 @@
compatible = "gef,ppc9a-fpga-wdt", "gef,fpga-wdt-1.00",
"gef,fpga-wdt";
reg = <0x4 0x2010 0x8>;
-   interrupts = <0x1b 0x4>;
+   interrupts = <0x1b 0x4 0 0>;
interrupt-parent = <_pic>;
};
*/
@@ -150,10 +107,7 @@
interrupt-controller;
compatible = "gef,ppc9a-fpga-pic", "gef,fpga-pic-1.00";
reg = <0x4 0x4000 0x20>;
-   interrupts = <0x8
- 0x9>;
-   interrupt-parent = <>;
-
+   interrupts = <0x8 0x9 0 0>;
};
gef_gpio: gpio@7,14000 {
#gpio-cells = <2>;
@@ -163,37 +117,10 @@
};
};
 
-   soc@fef0 {
-   #address-cells = <1>;
-   #size-cells = <1>;
-   #interrupt-cells = <2>;
-   device_type = "soc";
-   compatible = 

[RFC PATCH v1 1/2] powerpc/86xx: Move dts files to fsl directory

2016-02-25 Thread Alessio Igor Bogani
Signed-off-by: Alessio Igor Bogani 
---
 arch/powerpc/boot/dts/{ => fsl}/gef_ppc9a.dts| 0
 arch/powerpc/boot/dts/{ => fsl}/gef_sbc310.dts   | 0
 arch/powerpc/boot/dts/{ => fsl}/gef_sbc610.dts   | 0
 arch/powerpc/boot/dts/{ => fsl}/mpc8641_hpcn.dts | 0
 arch/powerpc/boot/dts/{ => fsl}/mpc8641_hpcn_36b.dts | 0
 arch/powerpc/boot/dts/{ => fsl}/sbc8641d.dts | 0
 6 files changed, 0 insertions(+), 0 deletions(-)
 rename arch/powerpc/boot/dts/{ => fsl}/gef_ppc9a.dts (100%)
 rename arch/powerpc/boot/dts/{ => fsl}/gef_sbc310.dts (100%)
 rename arch/powerpc/boot/dts/{ => fsl}/gef_sbc610.dts (100%)
 rename arch/powerpc/boot/dts/{ => fsl}/mpc8641_hpcn.dts (100%)
 rename arch/powerpc/boot/dts/{ => fsl}/mpc8641_hpcn_36b.dts (100%)
 rename arch/powerpc/boot/dts/{ => fsl}/sbc8641d.dts (100%)

diff --git a/arch/powerpc/boot/dts/gef_ppc9a.dts 
b/arch/powerpc/boot/dts/fsl/gef_ppc9a.dts
similarity index 100%
rename from arch/powerpc/boot/dts/gef_ppc9a.dts
rename to arch/powerpc/boot/dts/fsl/gef_ppc9a.dts
diff --git a/arch/powerpc/boot/dts/gef_sbc310.dts 
b/arch/powerpc/boot/dts/fsl/gef_sbc310.dts
similarity index 100%
rename from arch/powerpc/boot/dts/gef_sbc310.dts
rename to arch/powerpc/boot/dts/fsl/gef_sbc310.dts
diff --git a/arch/powerpc/boot/dts/gef_sbc610.dts 
b/arch/powerpc/boot/dts/fsl/gef_sbc610.dts
similarity index 100%
rename from arch/powerpc/boot/dts/gef_sbc610.dts
rename to arch/powerpc/boot/dts/fsl/gef_sbc610.dts
diff --git a/arch/powerpc/boot/dts/mpc8641_hpcn.dts 
b/arch/powerpc/boot/dts/fsl/mpc8641_hpcn.dts
similarity index 100%
rename from arch/powerpc/boot/dts/mpc8641_hpcn.dts
rename to arch/powerpc/boot/dts/fsl/mpc8641_hpcn.dts
diff --git a/arch/powerpc/boot/dts/mpc8641_hpcn_36b.dts 
b/arch/powerpc/boot/dts/fsl/mpc8641_hpcn_36b.dts
similarity index 100%
rename from arch/powerpc/boot/dts/mpc8641_hpcn_36b.dts
rename to arch/powerpc/boot/dts/fsl/mpc8641_hpcn_36b.dts
diff --git a/arch/powerpc/boot/dts/sbc8641d.dts 
b/arch/powerpc/boot/dts/fsl/sbc8641d.dts
similarity index 100%
rename from arch/powerpc/boot/dts/sbc8641d.dts
rename to arch/powerpc/boot/dts/fsl/sbc8641d.dts
-- 
2.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 03/12] powerpc/module: Create a special stub for ftrace_caller()

2016-02-25 Thread Michael Ellerman
On Thu, 2016-02-25 at 11:08 +1100, Balbir Singh wrote:
> 
> On 25/02/16 01:28, Michael Ellerman wrote:
> > In order to support the new -mprofile-kernel ABI, we need to be able to
> > call from the module back to ftrace_caller() (in the kernel) without
> > using the module's r2. That is because the function in this module which
> > is calling ftrace_caller() may not have setup r2, if it doesn't
> > otherwise need it (ie. it accesses no globals).
> > 
> > To make that work we add a new stub which is used for calling
> > ftrace_caller(), which uses the kernel toc instead of the module toc.
> > 
> > diff --git a/arch/powerpc/kernel/module_64.c 
> > b/arch/powerpc/kernel/module_64.c
> > index 9629966e614b..e711d40a3b8f 100644
> > --- a/arch/powerpc/kernel/module_64.c
> > +++ b/arch/powerpc/kernel/module_64.c
> > @@ -671,10 +671,56 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
> >  }
> >  
> >  #ifdef CONFIG_DYNAMIC_FTRACE
> > +
> > +#define PACATOC offsetof(struct paca_struct, kernel_toc)
> > +
> > +static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct 
> > module *me)
> > +{
> > +   struct ppc64_stub_entry *entry;
> > +   unsigned int i, num_stubs;

> How about some comments on r2
> r2 is still pointing to the module's toc, will be saved by ftrace_caller and
> restored by the instruction following bl ftrace_caller (after patching
> _mcount/nop)

Yeah I'll add some commentary.

I think the change log describes it fairly well but a comment is also good.

cheers

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 05/12] powerpc/ftrace: ftrace_graph_caller() needs to save/restore toc

2016-02-25 Thread Michael Ellerman
On Thu, 2016-02-25 at 11:30 +1100, Balbir Singh wrote:
> 
> On 25/02/16 01:28, Michael Ellerman wrote:
> > Signed-off-by: Michael Ellerman 
> > 
> > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> > index 2a7313cfbc7d..9e77a2c8f218 100644
> > --- a/arch/powerpc/kernel/entry_64.S
> > +++ b/arch/powerpc/kernel/entry_64.S
> > @@ -1237,6 +1237,11 @@ _GLOBAL(ftrace_graph_caller)
> > std r5, 64(r1)
> > std r4, 56(r1)
> > std r3, 48(r1)
> > +
> > +   /* Save callee's TOC in the ABI compliant location */
> > +   std r2, 24(r1)

> R2_STACK_OFFSET for readability?

Hmm, maybe. Personally when I see "24(r1)" what my brain reads is "stack TOC
save slot", but maybe I've been spending too much time with powerpc assembly.

R2_STACK_OFFSET is actually new, pulled out from the module code by Torsten.
Other code uses STK_GOT to mean the same thing. I don't really like either
name, so I'll probably leave do a clean up once this is in.

cheers

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 04/12] powerpc/ftrace: Prepare for -mprofile-kernel

2016-02-25 Thread Michael Ellerman
On Thu, 2016-02-25 at 11:28 +1100, Balbir Singh wrote:
> On 25/02/16 01:28, Michael Ellerman wrote:
> > @@ -300,8 +298,34 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned 
> > long addr)
> >  * The load offset is different depending on the ABI. For simplicity
> >  * just mask it out when doing the compare.
> >  */
> > -   if ((op[0] != 0x4808) || ((op[1] & 0x) != 0xe841)) {
> > -   pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]);
> > +   if ((op0 != 0x4808) || ((op1 & 0x) != 0xe841))
> > +   return 0;
> > +   return 1;
> > +}
> > +#else
> > +static int
> > +expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1)
> > +{
> > +   /* look for patched "NOP" on ppc64 with -mprofile-kernel */
> > +   if (op0 != PPC_INST_NOP)
> > +   return 0;
> > +   return 1;

> With the magic changes, do we care for this? I think it's a bit of an overkill

I don't particularly like it either. However this code doesn't actually use the
magic, it's the reverse case of turning a nop into a call to the stub. So the
magic in the stub doesn't actually make that any safer.

I think we do at least want to check there's a nop there. But without
mprofile-kernel it's not a nop, so we need some check and it does need to be
different between the profiling ABIs. So I think for now this is the
conservative approach.

cheers

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 06/12] powerpc/module: Rework is_early_mcount_callsite()

2016-02-25 Thread Michael Ellerman
On Thu, 2016-02-25 at 10:39 +1100, Balbir Singh wrote:
> On 25/02/16 01:28, Michael Ellerman wrote:
> > is_early_mcount_callsite() needs to detect either the two instruction or
> > the three instruction versions of the _mcount() sequence.
> > 
> > But if we're running a kernel with the two instruction sequence, we need
> > to be careful not to read instruction - 2, otherwise we might fall off
> > the front of a page and cause an oops.
> > 
> > While we're here convert to bool to make the return semantics clear.
> > 
> > Signed-off-by: Michael Ellerman 
> > 
> Do we even need to do this anymore?

Yes. Otherwise the code in apply_relocate_add() will see a far call with no nop
slot after it to do the toc restore, and it considers that a bug (which it
usually is, except mcount is special).

As we discussed today I'm hoping we can clean this code up a bit more in the
medium term, but this works for now.

cheers

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev