Re: [PATCH 3/9] powerpc/mm/book3s-64: Use physical addresses in upper page table tree levels

2016-02-21 Thread Aneesh Kumar K.V
Paul Mackerras  writes:

> From: Paul Mackerras 
>
> This changes the Linux page tables to store physical addresses
> rather than kernel virtual addresses in the upper levels of the
> tree (pgd, pud and pmd) for 64-bit Book 3S machines.
>
> This frees up some high order bits, and will be needed with
> PowerISA v3.0 machines which read the page table tree in hardware
> in radix mode.
>

Radix mark the top two bits at upper level page table tree.

ie,


static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
{
pud_set(pud, __pgtable_ptr_val(pmd));
}

static inline void rpud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
{
*pud = __pud(__pa(pmd) | RPUD_VAL_BITS);
}


I guess we will do the same with hash to keep them same ?

-aneesh

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc/pagetable: Add option to dump kernel pagetable

2016-02-21 Thread Rashmica Gupta
Useful to be able to dump the kernel page tables to check permissions and
memory types - derived from arm64's implementation.

Add a debugfs file to check the page tables. To use this the PPC_PTDUMP
config option must be selected.

Tested on 64BE and 64LE with both 4K and 64K page sizes.
---
 arch/powerpc/Kconfig.debug |  12 ++
 arch/powerpc/mm/Makefile   |   1 +
 arch/powerpc/mm/dump.c | 364 +
 3 files changed, 377 insertions(+)
 create mode 100644 arch/powerpc/mm/dump.c

diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 638f9ce740f5..e4883880abe3 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -344,4 +344,16 @@ config FAIL_IOMMU
 
  If you are unsure, say N.
 
+config PPC_PTDUMP
+bool "Export kernel pagetable layout to userspace via debugfs"
+depends on DEBUG_KERNEL
+select DEBUG_FS
+help
+  This options dumps the state of the kernel pagetables in a debugfs
+  file. This is only useful for kernel developers who are working in
+  architecture specific areas of the kernel - probably not a good idea 
to
+  enable this feature in a production kernel.
+
+  If you are unsure, say N.
+
 endmenu
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 1ffeda85c086..16f84bdd7597 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -40,3 +40,4 @@ obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
 obj-$(CONFIG_HIGHMEM)  += highmem.o
 obj-$(CONFIG_PPC_COPRO_BASE)   += copro_fault.o
 obj-$(CONFIG_SPAPR_TCE_IOMMU)  += mmu_context_iommu.o
+obj-$(CONFIG_PPC_PTDUMP)   += dump.o
diff --git a/arch/powerpc/mm/dump.c b/arch/powerpc/mm/dump.c
new file mode 100644
index ..937b10fc40cc
--- /dev/null
+++ b/arch/powerpc/mm/dump.c
@@ -0,0 +1,364 @@
+/*
+ * Copyright 2016, Rashmica Gupta, IBM Corp.
+ * 
+ * Debug helper to dump the current kernel pagetables of the system
+ * so that we can see what the various memory ranges are set to.
+ * 
+ * Derived from the arm64 implementation:
+ * Copyright (c) 2014, The Linux Foundation, Laura Abbott.
+ * (C) Copyright 2008 Intel Corporation, Arjan van de Ven.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define PUD_TYPE_MASK   (_AT(u64, 3) << 0)
+#define PUD_TYPE_SECT   (_AT(u64, 1) << 0)
+#define PMD_TYPE_MASK   (_AT(u64, 3) << 0)
+#define PMD_TYPE_SECT   (_AT(u64, 1) << 0)
+
+ 
+#if CONFIG_PGTABLE_LEVELS == 2
+#include 
+#elif CONFIG_PGTABLE_LEVELS == 3
+#include 
+#endif
+ 
+#define pmd_sect(pmd)  ((pmd_val(pmd) & PMD_TYPE_MASK) == PMD_TYPE_SECT)
+#ifdef CONFIG_PPC_64K_PAGES
+#define pud_sect(pud)   (0)
+#else
+#define pud_sect(pud)   ((pud_val(pud) & PUD_TYPE_MASK) == \
+   PUD_TYPE_SECT)
+#endif
+
+
+struct addr_marker {
+   unsigned long start_address;
+   const char *name;
+};
+
+enum address_markers_idx {
+   VMALLOC_START_NR = 0,
+   VMALLOC_END_NR,
+   ISA_IO_START_NR,
+   ISA_IO_END_NR,
+   PHB_IO_START_NR,
+   PHB_IO_END_NR,
+   IOREMAP_START_NR,
+   IOREMP_END_NR,
+};
+
+static struct addr_marker address_markers[] = {
+   { VMALLOC_START,"vmalloc() Area" },
+   { VMALLOC_END,  "vmalloc() End" },
+   { ISA_IO_BASE,  "isa I/O start" },
+   { ISA_IO_END,   "isa I/O end" },
+   { PHB_IO_BASE,  "phb I/O start" },
+   { PHB_IO_END,   "phb I/O end" },
+   { IOREMAP_BASE, "I/O remap start" },
+   { IOREMAP_END,  "I/O remap end" },
+   { -1,   NULL },
+};
+
+/*
+ * The page dumper groups page table entries of the same type into a single
+ * description. It uses pg_state to track the range information while
+ * iterating over the pte entries. When the continuity is broken it then
+ * dumps out a description of the range.
+ */
+struct pg_state {
+   struct seq_file *seq;
+   const struct addr_marker *marker;
+   unsigned long start_address;
+   unsigned level;
+   u64 current_prot;
+};
+
+struct prot_bits {
+   u64 mask;
+   u64 val;
+   const char  *set;
+   const char  *clear;
+};
+
+static const struct prot_bits pte_bits[] = {
+   {
+   .mask   = _PAGE_USER,
+   .val= _PAGE_USER,
+   .set= "user",
+   .clear  = "",
+   }, {
+   .mask   = _PAGE_RW,
+   .val= _PAGE_RW,
+   .set= "rw",
+   .clear  = "ro",
+   }, {
+   

[PATCH 1/1] powerpc: Detect broken or mismatched toolchains

2016-02-21 Thread Sam Bobroff
It can currently be difficult to diagnose a build that fails due to
the compiler, linker or other parts of the toolchain being unable to
build binaries of the type required by the kernel config. For example
using a little endian toolchain to build a big endian kernel may
produce:

as: unrecognized option '-maltivec'

This patch adds a basic compile test and error message to
arch/powerpc/Makefile so that the above error becomes:

*** Sorry, your toolchain seems to be broken or incorrect. ***
Make sure it supports your kernel configuration (ppc64).

Signed-off-by: Sam Bobroff 
---

 arch/powerpc/Makefile | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 96efd82..0041cd2 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -355,6 +355,13 @@ TOUT   := .tmp_gas_check
 # - Require gcc 4.0 or above on 64-bit
 # - gcc-4.2.0 has issues compiling modules on 64-bit
 checkbin:
+   @if test "$(call try-run,echo 'int _start(void) { return 0; }' > 
\"$$TMP\"; \
+   $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -x c -nostdlib \"$$TMP\" \
+   -o /dev/null,ok,broken)" = "broken" ; then \
+   echo "*** Sorry, your toolchain seems to be broken or 
incorrect. ***" ; \
+   echo "Make sure it supports your kernel configuration 
($(UTS_MACHINE))." ; \
+   false; \
+   fi
@if test "$(cc-name)" != "clang" \
&& test "$(cc-version)" = "0304" ; then \
if ! /bin/echo mftb 5 | $(AS) -v -mppc -many -o $(TOUT) 
>/dev/null 2>&1 ; then \
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 3/9] powerpc/mm/book3s-64: Use physical addresses in upper page table tree levels

2016-02-21 Thread Aneesh Kumar K.V
Paul Mackerras  writes:

> From: Paul Mackerras 
>
> This changes the Linux page tables to store physical addresses
> rather than kernel virtual addresses in the upper levels of the
> tree (pgd, pud and pmd) for 64-bit Book 3S machines.
>
> This frees up some high order bits, and will be needed with
> PowerISA v3.0 machines which read the page table tree in hardware
> in radix mode.

How about hugepd pointer with 4k linux page size ?

-aneesh

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 9/9] powerpc/mm/book3s-64: Expand the real page number field of the Linux PTE

2016-02-21 Thread Paul Mackerras
From: Paul Mackerras 

Now that other PTE fields have been moved out of the way, we can
expand the RPN field of the PTE on 64-bit Book 3S systems and align
it with the RPN field in the radix PTE format used by PowerISA v3.0
CPUs in radix mode.  For 64k page size, this means we need to move
the _PAGE_COMBO and _PAGE_4K_PFN bits.

Signed-off-by: Paul Mackerras 
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h  |  4 ++--
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 12 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h 
b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 0425d3e..7f60f7e 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -52,8 +52,8 @@
 _PAGE_F_SECOND | _PAGE_F_GIX)
 
 /* shift to put page number into pte */
-#define PTE_RPN_SHIFT  (18)
-#define PTE_RPN_SIZE   (39)/* gives 51-bit real addresses */
+#define PTE_RPN_SHIFT  (12)
+#define PTE_RPN_SIZE   (45)/* gives 57-bit real addresses */
 
 #define _PAGE_4K_PFN   0
 #ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h 
b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index ed390e1..8bb0325 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -25,8 +25,8 @@
 #define PGDIR_SIZE (1UL << PGDIR_SHIFT)
 #define PGDIR_MASK (~(PGDIR_SIZE-1))
 
-#define _PAGE_COMBO0x0004 /* this is a combo 4k page */
-#define _PAGE_4K_PFN   0x0008 /* PFN is for a single 4k page */
+#define _PAGE_COMBO0x1000 /* this is a combo 4k page */
+#define _PAGE_4K_PFN   0x2000 /* PFN is for a single 4k page */
 /*
  * Used to track subpage group valid if _PAGE_COMBO is set
  * This overloads _PAGE_F_GIX and _PAGE_F_SECOND
@@ -39,11 +39,11 @@
 
 /* Shift to put page number into pte.
  *
- * That gives us a max RPN of 37 bits, which means a max of 53 bits
- * of addressable physical space, or 49 bits for the special 4k PFNs.
+ * That gives us a max RPN of 41 bits, which means a max of 57 bits
+ * of addressable physical space, or 53 bits for the special 4k PFNs.
  */
-#define PTE_RPN_SHIFT  (20)
-#define PTE_RPN_SIZE   (37)
+#define PTE_RPN_SHIFT  (16)
+#define PTE_RPN_SIZE   (41)
 
 /*
  * we support 16 fragments per PTE page of 64K size.
-- 
2.6.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 8/9] powerpc/mm/book3s-64: Move software-used bits in PTE

2016-02-21 Thread Paul Mackerras
From: Paul Mackerras 

This moves the _PAGE_SPECIAL and _PAGE_SOFT_DIRTY bits in the Linux
PTE on 64-bit Book 3S systems to bit positions which are designated
for software use in the radix PTE format used by PowerISA v3.0 CPUs
in radix mode.

Signed-off-by: Paul Mackerras 
---
 arch/powerpc/include/asm/book3s/64/hash.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
b/arch/powerpc/include/asm/book3s/64/hash.h
index cd4bf95..ef9bd68 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -26,13 +26,13 @@
 #define _PAGE_WRITETHRU0x00040 /* W: cache write-through */
 #define _PAGE_DIRTY0x00080 /* C: page changed */
 #define _PAGE_ACCESSED 0x00100 /* R: page referenced */
+#define _PAGE_SPECIAL  0x00400 /* software: special page */
 #define _PAGE_BUSY 0x00800 /* software: PTE & hash are busy */
-#define _PAGE_SPECIAL  0x1 /* software: special page */
 
 #ifdef CONFIG_MEM_SOFT_DIRTY
-#define _PAGE_SOFT_DIRTY   0x2 /* software: software dirty tracking */
+#define _PAGE_SOFT_DIRTY   0x200 /* software: software dirty tracking */
 #else
-#define _PAGE_SOFT_DIRTY   0x0
+#define _PAGE_SOFT_DIRTY   0x000
 #endif
 
 #define _PAGE_F_GIX_SHIFT  57
-- 
2.6.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 7/9] powerpc/mm/book3s-64: Shuffle read, write, execute and user bits in PTE

2016-02-21 Thread Paul Mackerras
From: Paul Mackerras 

This moves the _PAGE_EXEC, _PAGE_RW and _PAGE_USER bits around in
the Linux PTE on 64-bit Book 3S systems to correspond with the bit
positions used in radix mode by PowerISA v3.0 CPUs.  This also adds
a _PAGE_READ bit corresponding to the read permission bit in the
radix PTE.  _PAGE_READ is currently unused but could possibly be used
in future to improve pte_protnone().

Signed-off-by: Paul Mackerras 
---
 arch/powerpc/include/asm/book3s/64/hash.h | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
b/arch/powerpc/include/asm/book3s/64/hash.h
index a59cfae..cd4bf95 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -13,9 +13,12 @@
  * We could create separate kernel read-only if we used the 3 PP bits
  * combinations that newer processors provide but we currently don't.
  */
-#define _PAGE_BIT_SWAP_TYPE2
-#define _PAGE_USER 0x4 /* page may be accessed by userspace */
-#define _PAGE_EXEC 0x8 /* execute permission */
+#define _PAGE_BIT_SWAP_TYPE0
+
+#define _PAGE_EXEC 0x1 /* execute permission */
+#define _PAGE_RW   0x2 /* read & write access allowed */
+#define _PAGE_READ 0x4 /* read access allowed */
+#define _PAGE_USER 0x8 /* page may be accessed by userspace */
 #define _PAGE_GUARDED  0x00010 /* G: guarded (side-effect) page */
 /* M (memory coherence) is always set in the HPTE, so we don't need it here */
 #define _PAGE_COHERENT 0x0
@@ -23,7 +26,6 @@
 #define _PAGE_WRITETHRU0x00040 /* W: cache write-through */
 #define _PAGE_DIRTY0x00080 /* C: page changed */
 #define _PAGE_ACCESSED 0x00100 /* R: page referenced */
-#define _PAGE_RW   0x00200 /* software: user write access allowed 
*/
 #define _PAGE_BUSY 0x00800 /* software: PTE & hash are busy */
 #define _PAGE_SPECIAL  0x1 /* software: special page */
 
-- 
2.6.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 6/9] powerpc/mm/book3s-64: Move HPTE-related bits in PTE to upper end

2016-02-21 Thread Paul Mackerras
From: Paul Mackerras 

This moves the _PAGE_HASHPTE, _PAGE_F_GIX and _PAGE_F_SECOND fields in
the Linux PTE on 64-bit Book 3S systems to the most significant byte.
Of the 5 bits, one is a software-use bit and the other four are
reserved bit positions in the PowerISA v3.0 radix PTE format.
Using these bits is OK because these bits are all to do with tracking
the HPTE(s) associated with the Linux PTE, and therefore won't be
needed in radix mode.  This frees up bit positions in the lower two
bytes.

Signed-off-by: Paul Mackerras 
---
 arch/powerpc/include/asm/book3s/64/hash.h | 8 
 arch/powerpc/mm/hugetlbpage-hash64.c  | 5 +++--
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
b/arch/powerpc/include/asm/book3s/64/hash.h
index 14cfd49..a59cfae 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -24,11 +24,7 @@
 #define _PAGE_DIRTY0x00080 /* C: page changed */
 #define _PAGE_ACCESSED 0x00100 /* R: page referenced */
 #define _PAGE_RW   0x00200 /* software: user write access allowed 
*/
-#define _PAGE_HASHPTE  0x00400 /* software: pte has an associated HPTE 
*/
 #define _PAGE_BUSY 0x00800 /* software: PTE & hash are busy */
-#define _PAGE_F_GIX0x07000 /* full page: hidx bits */
-#define _PAGE_F_GIX_SHIFT  12
-#define _PAGE_F_SECOND 0x08000 /* Whether to use secondary hash or not 
*/
 #define _PAGE_SPECIAL  0x1 /* software: special page */
 
 #ifdef CONFIG_MEM_SOFT_DIRTY
@@ -37,6 +33,10 @@
 #define _PAGE_SOFT_DIRTY   0x0
 #endif
 
+#define _PAGE_F_GIX_SHIFT  57
+#define _PAGE_F_GIX(7ul << 57) /* HPTE index within HPTEG */
+#define _PAGE_F_SECOND (1ul << 60) /* HPTE is in 2ndary HPTEG */
+#define _PAGE_HASHPTE  (1ul << 61) /* PTE has associated HPTE */
 #define _PAGE_PTE  (1ul << 62) /* distinguishes PTEs from 
pointers */
 #define _PAGE_PRESENT  (1ul << 63) /* pte contains a translation */
 
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c 
b/arch/powerpc/mm/hugetlbpage-hash64.c
index e2138c7..8555fce 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -76,7 +76,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, 
unsigned long vsid,
if (old_pte & _PAGE_F_SECOND)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
-   slot += (old_pte & _PAGE_F_GIX) >> 12;
+   slot += (old_pte & _PAGE_F_GIX) >> _PAGE_F_GIX_SHIFT;
 
if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize,
 mmu_psize, ssize, flags) == -1)
@@ -105,7 +105,8 @@ int __hash_page_huge(unsigned long ea, unsigned long 
access, unsigned long vsid,
return -1;
}
 
-   new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX);
+   new_pte |= (slot << _PAGE_F_GIX_SHIFT) &
+   (_PAGE_F_SECOND | _PAGE_F_GIX);
}
 
/*
-- 
2.6.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 5/9] powerpc/mm/book3s-64: Move _PAGE_PTE to 2nd most significant bit

2016-02-21 Thread Paul Mackerras
From: Paul Mackerras 

This changes _PAGE_PTE for 64-bit Book 3S processors from 0x1 to
0x4000___, because that bit is used as the L (leaf)
bit by PowerISA v3.0 CPUs in radix mode.  The "leaf" bit indicates
that the PTE points to a page directly rather than another radix
level, which is what the _PAGE_PTE bit means.

Signed-off-by: Paul Mackerras 
---
 arch/powerpc/include/asm/book3s/64/hash.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
b/arch/powerpc/include/asm/book3s/64/hash.h
index 36ff107..14cfd49 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -13,7 +13,6 @@
  * We could create separate kernel read-only if we used the 3 PP bits
  * combinations that newer processors provide but we currently don't.
  */
-#define _PAGE_PTE  0x1 /* distinguishes PTEs from pointers */
 #define _PAGE_BIT_SWAP_TYPE2
 #define _PAGE_USER 0x4 /* page may be accessed by userspace */
 #define _PAGE_EXEC 0x8 /* execute permission */
@@ -38,6 +37,7 @@
 #define _PAGE_SOFT_DIRTY   0x0
 #endif
 
+#define _PAGE_PTE  (1ul << 62) /* distinguishes PTEs from 
pointers */
 #define _PAGE_PRESENT  (1ul << 63) /* pte contains a translation */
 
 /*
-- 
2.6.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 4/9] powerpc/mm/book3s-64: Move _PAGE_PRESENT to the most significant bit

2016-02-21 Thread Paul Mackerras
From: Paul Mackerras 

This changes _PAGE_PRESENT for 64-bit Book 3S processors from 0x2 to
0x8000___, because that is where PowerISA v3.0 CPUs in
radix mode will expect to find it.

Signed-off-by: Paul Mackerras 
---
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 10 +-
 arch/powerpc/include/asm/book3s/64/hash.h |  5 +++--
 arch/powerpc/mm/mmu_decl.h|  3 ++-
 arch/powerpc/mm/pgtable_64.c  |  2 +-
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h 
b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index a8c4c2a..ed390e1 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -210,30 +210,30 @@ static inline char *get_hpte_slot_array(pmd_t *pmdp)
 /*
  * The linux hugepage PMD now include the pmd entries followed by the address
  * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
- * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per
+ * [ 000 | 1 bit secondary | 3 bit hidx | 1 bit valid]. We use one byte per
  * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and
  * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
  *
- * The last three bits are intentionally left to zero. This memory location
+ * The top three bits are intentionally left as zero. This memory location
  * are also used as normal page PTE pointers. So if we have any pointers
  * left around while we collapse a hugepage, we need to make sure
  * _PAGE_PRESENT bit of that is zero when we look at them
  */
 static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int 
index)
 {
-   return (hpte_slot_array[index] >> 3) & 0x1;
+   return hpte_slot_array[index] & 0x1;
 }
 
 static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
   int index)
 {
-   return hpte_slot_array[index] >> 4;
+   return hpte_slot_array[index] >> 1;
 }
 
 static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
unsigned int index, unsigned int hidx)
 {
-   hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
+   hpte_slot_array[index] = (hidx << 1) | 0x1;
 }
 
 /*
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
b/arch/powerpc/include/asm/book3s/64/hash.h
index 5b8ba60..36ff107 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -14,7 +14,6 @@
  * combinations that newer processors provide but we currently don't.
  */
 #define _PAGE_PTE  0x1 /* distinguishes PTEs from pointers */
-#define _PAGE_PRESENT  0x2 /* software: pte contains a translation 
*/
 #define _PAGE_BIT_SWAP_TYPE2
 #define _PAGE_USER 0x4 /* page may be accessed by userspace */
 #define _PAGE_EXEC 0x8 /* execute permission */
@@ -39,6 +38,8 @@
 #define _PAGE_SOFT_DIRTY   0x0
 #endif
 
+#define _PAGE_PRESENT  (1ul << 63) /* pte contains a translation */
+
 /*
  * We need to differentiate between explicit huge page and THP huge
  * page, since THP huge page also need to track real subpage details
@@ -402,7 +403,7 @@ static inline int pte_protnone(pte_t pte)
 
 static inline int pte_present(pte_t pte)
 {
-   return pte_val(pte) & _PAGE_PRESENT;
+   return !!(pte_val(pte) & _PAGE_PRESENT);
 }
 
 /* Conversion functions: convert a page and protection to a page entry,
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 9f58ff4..898d633 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -110,7 +110,8 @@ extern unsigned long Hash_size, Hash_mask;
 #endif /* CONFIG_PPC32 */
 
 #ifdef CONFIG_PPC64
-extern int map_kernel_page(unsigned long ea, unsigned long pa, int flags);
+extern int map_kernel_page(unsigned long ea, unsigned long pa,
+  unsigned long flags);
 #endif /* CONFIG_PPC64 */
 
 extern unsigned long ioremap_bot;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index a1bbdfd..af304e6 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -88,7 +88,7 @@ static __ref void *early_alloc_pgtable(unsigned long size)
  * map_kernel_page adds an entry to the ioremap page table
  * and adds an entry to the HPT, possibly bolting it
  */
-int map_kernel_page(unsigned long ea, unsigned long pa, int flags)
+int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
 {
pgd_t *pgdp;
pud_t *pudp;
-- 
2.6.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 3/9] powerpc/mm/book3s-64: Use physical addresses in upper page table tree levels

2016-02-21 Thread Paul Mackerras
From: Paul Mackerras 

This changes the Linux page tables to store physical addresses
rather than kernel virtual addresses in the upper levels of the
tree (pgd, pud and pmd) for 64-bit Book 3S machines.

This frees up some high order bits, and will be needed with
PowerISA v3.0 machines which read the page table tree in hardware
in radix mode.

Signed-off-by: Paul Mackerras 
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h |  2 +-
 arch/powerpc/include/asm/book3s/64/hash.h| 13 +++--
 arch/powerpc/include/asm/nohash/64/pgtable.h |  3 +++
 arch/powerpc/include/asm/pgalloc-64.h| 16 
 4 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h 
b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index bee3643..0425d3e 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -64,7 +64,7 @@
 #define pgd_none(pgd)  (!pgd_val(pgd))
 #define pgd_bad(pgd)   (pgd_val(pgd) == 0)
 #define pgd_present(pgd)   (pgd_val(pgd) != 0)
-#define pgd_page_vaddr(pgd)(pgd_val(pgd) & ~PGD_MASKED_BITS)
+#define pgd_page_vaddr(pgd)__va(pgd_val(pgd) & ~PGD_MASKED_BITS)
 
 static inline void pgd_clear(pgd_t *pgdp)
 {
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
b/arch/powerpc/include/asm/book3s/64/hash.h
index 64eff40..5b8ba60 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -222,13 +222,14 @@
 #define PUD_BAD_BITS   (PMD_TABLE_SIZE-1)
 
 #ifndef __ASSEMBLY__
-#definepmd_bad(pmd)(!is_kernel_addr(pmd_val(pmd)) \
-|| (pmd_val(pmd) & PMD_BAD_BITS))
-#define pmd_page_vaddr(pmd)(pmd_val(pmd) & ~PMD_MASKED_BITS)
+#definepmd_bad(pmd)(pmd_val(pmd) & PMD_BAD_BITS)
+#define pmd_page_vaddr(pmd)__va(pmd_val(pmd) & ~PMD_MASKED_BITS)
 
-#definepud_bad(pud)(!is_kernel_addr(pud_val(pud)) \
-|| (pud_val(pud) & PUD_BAD_BITS))
-#define pud_page_vaddr(pud)(pud_val(pud) & ~PUD_MASKED_BITS)
+#definepud_bad(pud)(pud_val(pud) & PUD_BAD_BITS)
+#define pud_page_vaddr(pud)__va(pud_val(pud) & ~PUD_MASKED_BITS)
+
+/* Pointers in the page table tree are physical addresses */
+#define __pgtable_ptr_val(ptr) __pa(ptr)
 
 #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1))
 #define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1))
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h 
b/arch/powerpc/include/asm/nohash/64/pgtable.h
index b9f734d..10debb9 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -108,6 +108,9 @@
 #ifndef __ASSEMBLY__
 /* pte_clear moved to later in this file */
 
+/* Pointers in the page table tree are virtual addresses */
+#define __pgtable_ptr_val(ptr) ((unsigned long)(ptr))
+
 #define PMD_BAD_BITS   (PTE_TABLE_SIZE-1)
 #define PUD_BAD_BITS   (PMD_TABLE_SIZE-1)
 
diff --git a/arch/powerpc/include/asm/pgalloc-64.h 
b/arch/powerpc/include/asm/pgalloc-64.h
index 69ef28a..7ac59a3 100644
--- a/arch/powerpc/include/asm/pgalloc-64.h
+++ b/arch/powerpc/include/asm/pgalloc-64.h
@@ -53,7 +53,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 #ifndef CONFIG_PPC_64K_PAGES
 
-#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, (unsigned long)PUD)
+#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, __pgtable_ptr_val(PUD))
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
@@ -68,19 +68,19 @@ static inline void pud_free(struct mm_struct *mm, pud_t 
*pud)
 
 static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 {
-   pud_set(pud, (unsigned long)pmd);
+   pud_set(pud, __pgtable_ptr_val(pmd));
 }
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
   pte_t *pte)
 {
-   pmd_set(pmd, (unsigned long)pte);
+   pmd_set(pmd, __pgtable_ptr_val(pte));
 }
 
 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
pgtable_t pte_page)
 {
-   pmd_set(pmd, (unsigned long)page_address(pte_page));
+   pmd_set(pmd, __pgtable_ptr_val(page_address(pte_page)));
 }
 
 #define pmd_pgtable(pmd) pmd_page(pmd)
@@ -171,23 +171,23 @@ extern void pgtable_free_tlb(struct mmu_gather *tlb, void 
*table, int shift);
 extern void __tlb_remove_table(void *_table);
 #endif
 
-#define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned long)pmd)
+#define pud_populate(mm, pud, pmd) pud_set(pud, __pgtable_ptr_val(pmd))
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
   pte_t *pte)
 {
-   pmd_set(pmd, (unsigned long)pte);
+   pmd_set(pmd, __pgtable_ptr_val(pte));
 }
 
 static inline void pmd_populate(stru

[PATCH 2/9] powerpc/mm/book3s-64: Free up 7 high-order bits in the Linux PTE

2016-02-21 Thread Paul Mackerras
From: Paul Mackerras 

This frees up bits 57-63 in the Linux PTE on 64-bit Book 3S machines.
In the 4k page case, this is done just by reducing the size of the
RPN field to 39 bits, giving 51-bit real addresses.  In the 64k page
case, we had 10 unused bits in the middle of the PTE, so this moves
the RPN field down 10 bits to make use of those unused bits.  This
means the RPN field is now 3 bits larger at 37 bits, giving 53-bit
real addresses in the normal case, or 49-bit real addresses for the
special 4k PFN case.

We are doing this in order to be able to move some other PTE bits
into the positions where PowerISA V3.0 processors will expect to
find them in radix-tree mode.  Ultimately we will be able to move
the RPN field to lower bit positions and make it larger.

Signed-off-by: Paul Mackerras 
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h  |  1 +
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 10 ++
 arch/powerpc/include/asm/book3s/64/hash.h |  6 +++---
 arch/powerpc/include/asm/book3s/64/pgtable.h  |  6 +++---
 arch/powerpc/mm/pgtable_64.c  |  2 +-
 5 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h 
b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index ea0414d..bee3643 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -53,6 +53,7 @@
 
 /* shift to put page number into pte */
 #define PTE_RPN_SHIFT  (18)
+#define PTE_RPN_SIZE   (39)/* gives 51-bit real addresses */
 
 #define _PAGE_4K_PFN   0
 #ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h 
b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 849bbec..a8c4c2a 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -39,10 +39,12 @@
 
 /* Shift to put page number into pte.
  *
- * That gives us a max RPN of 34 bits, which means a max of 50 bits
- * of addressable physical space, or 46 bits for the special 4k PFNs.
+ * That gives us a max RPN of 37 bits, which means a max of 53 bits
+ * of addressable physical space, or 49 bits for the special 4k PFNs.
  */
-#define PTE_RPN_SHIFT  (30)
+#define PTE_RPN_SHIFT  (20)
+#define PTE_RPN_SIZE   (37)
+
 /*
  * we support 16 fragments per PTE page of 64K size.
  */
@@ -120,7 +122,7 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long 
index);
(((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K)
 
 #define remap_4k_pfn(vma, addr, pfn, prot) \
-   (WARN_ON(((pfn) >= (1UL << (64 - PTE_RPN_SHIFT ? -EINVAL :  \
+   (WARN_ON(((pfn) >= (1UL << PTE_RPN_SIZE))) ? -EINVAL :  \
remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE,\
__pgprot(pgprot_val((prot)) | _PAGE_4K_PFN)))
 
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
b/arch/powerpc/include/asm/book3s/64/hash.h
index 9a0a4ef..64eff40 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -131,7 +131,7 @@
  * The mask convered by the RPN must be a ULL on 32-bit platforms with
  * 64-bit PTEs
  */
-#define PTE_RPN_MASK   (~((1UL << PTE_RPN_SHIFT) - 1))
+#define PTE_RPN_MASK   (((1UL << PTE_RPN_SIZE) - 1) << PTE_RPN_SHIFT)
 /*
  * _PAGE_CHG_MASK masks of bits that are to be preserved across
  * pgprot changes
@@ -412,13 +412,13 @@ static inline int pte_present(pte_t pte)
  */
 static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
 {
-   return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
+   return __ptepte_basic_t)(pfn) << PTE_RPN_SHIFT) & PTE_RPN_MASK) |
 pgprot_val(pgprot));
 }
 
 static inline unsigned long pte_pfn(pte_t pte)
 {
-   return pte_val(pte) >> PTE_RPN_SHIFT;
+   return (pte_val(pte) & PTE_RPN_MASK) >> PTE_RPN_SHIFT;
 }
 
 /* Generic modifiers for PTE bits */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index ac07a30..c8240b7 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -154,10 +154,10 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
 #define SWP_TYPE_BITS 5
 #define __swp_type(x)  (((x).val >> _PAGE_BIT_SWAP_TYPE) \
& ((1UL << SWP_TYPE_BITS) - 1))
-#define __swp_offset(x)((x).val >> PTE_RPN_SHIFT)
+#define __swp_offset(x)(((x).val & PTE_RPN_MASK) >> 
PTE_RPN_SHIFT)
 #define __swp_entry(type, offset)  ((swp_entry_t) { \
-   ((type) << _PAGE_BIT_SWAP_TYPE) \
-   | ((offset) << PTE_RPN_SHIFT) })
+   ((type) << _PAGE_BIT_SWAP_TYPE) \
+   | (((offset) << PTE_RPN_SHIFT) & PTE_RPN_MASK)})
 /*
  * swp_entry_t must be independent of pte bits. We build a swp_entry_t fro

[PATCH 0/9] powerpc/mm: Restructure Linux PTE on Book3S/64 to radix format

2016-02-21 Thread Paul Mackerras
This patch series modifies the Linux PTE format used on 64-bit Book3S
processors (i.e. POWER server processors) to make the bits line up
with the PTE format used in the radix trees defined in PowerISA v3.0.
This will reduce the amount of further change required to make a
kernel that can run with either a radix MMU or a hashed page table
(HPT) MMU.

This also changes the upper levels of the tree to use real addresses
rather than kernel virtual addresses - that is, we no longer have the
0xc000... at the top of each PGD/PUD/PMD entry.  Unlike the previous
version of these patches, these patches do not change the format for
the 64-bit embedded processors, only for 64-bit server processors.

The patch series is against v4.5-rc5.

I have compiled this for all the defconfigs in the tree, without
error.  I have tested this, with Aneesh's "powerpc/mm/hash: Clear the
invalid slot information correctly" patch on top, both running
bare-metal on a POWER8 and in a KVM guest on that POWER8 system.  In
the guest I tested both 4k and 64k configs, with THP enabled; in the
host I tested with 64k page size and THP enabled.  All these tests ran
fine, including running a KVM guest on the bare-metal system.  So far
I have done kernel compiles in a loop as the test, but I plan to run
LTP and possibly some other tests.

Paul.

 arch/powerpc/include/asm/book3s/64/hash-4k.h  |  5 ++-
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 24 +--
 arch/powerpc/include/asm/book3s/64/hash.h | 57 ++-
 arch/powerpc/include/asm/book3s/64/pgtable.h  |  6 +--
 arch/powerpc/include/asm/nohash/64/pgtable.h  |  3 ++
 arch/powerpc/include/asm/pgalloc-64.h | 16 
 arch/powerpc/mm/hash64_64k.c  |  3 +-
 arch/powerpc/mm/hash_utils_64.c   | 10 ++---
 arch/powerpc/mm/hugetlbpage-hash64.c  |  5 ++-
 arch/powerpc/mm/mmu_decl.h|  3 +-
 arch/powerpc/mm/pgtable_64.c  |  4 +-
 11 files changed, 73 insertions(+), 63 deletions(-)
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/9] powerpc/mm/book3s-64: Clean up some obsolete or misleading comments

2016-02-21 Thread Paul Mackerras
From: Paul Mackerras 

No code changes.

Signed-off-by: Paul Mackerras 
---
 arch/powerpc/include/asm/book3s/64/hash.h | 13 ++---
 arch/powerpc/mm/hash64_64k.c  |  3 +--
 arch/powerpc/mm/hash_utils_64.c   | 10 +-
 3 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
b/arch/powerpc/include/asm/book3s/64/hash.h
index 8d1c816..9a0a4ef 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -4,8 +4,7 @@
 
 /*
  * Common bits between 4K and 64K pages in a linux-style PTE.
- * These match the bits in the (hardware-defined) PowerPC PTE as closely
- * as possible. Additional bits may be defined in pgtable-hash64-*.h
+ * Additional bits may be defined in pgtable-hash64-*.h
  *
  * Note: We only support user read/write permissions. Supervisor always
  * have full read/write to pages above PAGE_OFFSET (pages below that
@@ -14,13 +13,13 @@
  * We could create separate kernel read-only if we used the 3 PP bits
  * combinations that newer processors provide but we currently don't.
  */
-#define _PAGE_PTE  0x1
+#define _PAGE_PTE  0x1 /* distinguishes PTEs from pointers */
 #define _PAGE_PRESENT  0x2 /* software: pte contains a translation 
*/
 #define _PAGE_BIT_SWAP_TYPE2
-#define _PAGE_USER 0x4 /* matches one of the PP bits */
-#define _PAGE_EXEC 0x8 /* No execute on POWER4 and newer (we 
invert) */
-#define _PAGE_GUARDED  0x00010
-/* We can derive Memory coherence from _PAGE_NO_CACHE */
+#define _PAGE_USER 0x4 /* page may be accessed by userspace */
+#define _PAGE_EXEC 0x8 /* execute permission */
+#define _PAGE_GUARDED  0x00010 /* G: guarded (side-effect) page */
+/* M (memory coherence) is always set in the HPTE, so we don't need it here */
 #define _PAGE_COHERENT 0x0
 #define _PAGE_NO_CACHE 0x00020 /* I: cache inhibit */
 #define _PAGE_WRITETHRU0x00040 /* W: cache write-through */
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 0762c1e..a026c6db 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -243,8 +243,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
return 0;
/*
 * Try to lock the PTE, add ACCESSED and DIRTY if it was
-* a write access. Since this is 4K insert of 64K page size
-* also add _PAGE_COMBO
+* a write access.
 */
new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
if (access & _PAGE_RW)
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index ba59d59..47a0bc1 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -168,11 +168,11 @@ unsigned long htab_convert_pte_flags(unsigned long 
pteflags)
rflags |= HPTE_R_N;
/*
 * PP bits:
-* Linux use slb key 0 for kernel and 1 for user.
-* kernel areas are mapped by PP bits 00
-* and and there is no kernel RO (_PAGE_KERNEL_RO).
-* User area mapped by 0x2 and read only use by
-* 0x3.
+* Linux uses slb key 0 for kernel and 1 for user.
+* kernel areas are mapped with PP=00
+* and there is no kernel RO (_PAGE_KERNEL_RO).
+* User area is mapped with PP=0x2 for read/write
+* or PP=0x3 for read-only (including writeable but clean pages).
 */
if (pteflags & _PAGE_USER) {
rflags |= 0x2;
-- 
2.6.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: Fwd: [PATCH v4 12/18] cxl: Add guest-specific code

2016-02-21 Thread Manoj Kumar

Christophe, Fred:

Is getting the code checkpatch clean not a requirement for
this component?

total: 458 errors, 995 warnings, 1602 lines checked

NOTE: Whitespace errors detected.
  You may wish to use scripts/cleanpatch or scripts/cleanfile


I am stopping my review at this point.
Will pick it back up after you resubmit.

--
Manoj Kumar


Subject: [PATCH v4 12/18] cxl: Add guest-specific code
Date: Tue, 16 Feb 2016 22:39:05 +0100
From: Frederic Barrat 
To: imun...@au1.ibm.com, michael.neul...@au1.ibm.com,
m...@ellerman.id.au, linuxppc-dev@lists.ozlabs.org

From: Christophe Lombard 

The new of.c file contains code to parse the device tree to find out
about CAPI adapters and AFUs.

guest.c implements the guest-specific callbacks for the backend API.

The process element ID is not known until the context is attached, so
we have to separate the context ID assigned by the cxl driver from the
process element ID visible to the user applications. In bare-metal,
the 2 IDs match.

Co-authored-by: Frederic Barrat 
Signed-off-by: Frederic Barrat 
Signed-off-by: Christophe Lombard 
---
  drivers/misc/cxl/Makefile  |   1 +
  drivers/misc/cxl/api.c |   2 +-
  drivers/misc/cxl/context.c |   6 +-
  drivers/misc/cxl/cxl.h |  37 +-
  drivers/misc/cxl/file.c|   2 +-
  drivers/misc/cxl/guest.c   | 950
+
  drivers/misc/cxl/main.c|  18 +-
  drivers/misc/cxl/of.c  | 513 
  8 files changed, 1519 insertions(+), 10 deletions(-)
  create mode 100644 drivers/misc/cxl/guest.c
  create mode 100644 drivers/misc/cxl/of.c

diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile
index be2ac5c..a3d4bef 100644
--- a/drivers/misc/cxl/Makefile
+++ b/drivers/misc/cxl/Makefile
@@ -4,6 +4,7 @@ ccflags-$(CONFIG_PPC_WERROR)+= -Werror
  cxl-y+= main.o file.o irq.o fault.o native.o
  cxl-y+= context.o sysfs.o debugfs.o pci.o trace.o
  cxl-y+= vphb.o api.o
+cxl-y+= guest.o of.o hcalls.o
  obj-$(CONFIG_CXL)+= cxl.o
  obj-$(CONFIG_CXL_BASE)+= base.o

diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index 31eb842..325f957 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -191,7 +191,7 @@ EXPORT_SYMBOL_GPL(cxl_start_context);

  int cxl_process_element(struct cxl_context *ctx)
  {
-return ctx->pe;
+return ctx->external_pe;
  }
  EXPORT_SYMBOL_GPL(cxl_process_element);

diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 200837f..180c85a 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -95,8 +95,12 @@ int cxl_context_init(struct cxl_context *ctx, struct
cxl_afu *afu, bool master,
  return i;

  ctx->pe = i;
-if (cpu_has_feature(CPU_FTR_HVMODE))
+if (cpu_has_feature(CPU_FTR_HVMODE)) {
  ctx->elem = &ctx->afu->native->spa[i];
+ctx->external_pe = ctx->pe;
+} else {
+ctx->external_pe = -1; /* assigned when attaching */
+}
  ctx->pe_inserted = false;

  /*
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 3a1fabd..4372a87 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -433,6 +433,12 @@ struct cxl_irq_name {
  char *name;
  };

+struct irq_avail {
+irq_hw_number_t offset;
+irq_hw_number_t range;
+unsigned long   *bitmap;
+};
+
  /*
   * This is a cxl context.  If the PSL is in dedicated mode, there will
be one
   * of these per AFU.  If in AFU directed there can be lots of these.
@@ -488,7 +494,19 @@ struct cxl_context {

  struct cxl_process_element *elem;

-int pe; /* process element handle */
+/*
+ * pe is the process element handle, assigned by this driver when the
+ * context is initialized.
+ *
+ * external_pe is the PE shown outside of cxl.
+ * On bare-metal, pe=external_pe, because we decide what the handle
is.
+ * In a guest, we only find out about the pe used by pHyp when the
+ * context is attached, and that's the value we want to report outside
+ * of cxl.
+ */
+int pe;
+int external_pe;
+
  u32 irq_count;
  bool pe_inserted;
  bool master;
@@ -782,6 +800,7 @@ void cxl_pci_vphb_reconfigure(struct cxl_afu *afu);
  void cxl_pci_vphb_remove(struct cxl_afu *afu);

  extern struct pci_driver cxl_pci_driver;
+extern struct platform_driver cxl_of_driver;
  int afu_allocate_irqs(struct cxl_context *ctx, u32 count);

  int afu_open(struct inode *inode, struct file *file);
@@ -792,6 +811,21 @@ unsigned int afu_poll(struct file *file, struct
poll_table_struct *poll);
  ssize_t afu_read(struct file *file, char __user *buf, size_t count,
loff_t *off);
  extern const struct file_operations afu_fops;

+struct cxl *cxl_guest_init_adapter(struct device_node *np, struct
platform_device *dev);
+void cxl_guest_remove_adapter(struct cxl *adapter);
+int cxl_of_read_adapter_handle(struct cxl *adapter, struct device_node

Re: Fwd: [PATCH v4 11/18] cxl: Separate bare-metal fields in adapter and AFU data structures

2016-02-21 Thread Manoj Kumar

Christophe, Fred: Perhaps none of these comments below are specific
to your patch, but clarification would help the next reviewer.

--
Manoj Kumar


Subject: [PATCH v4 11/18] cxl: Separate bare-metal fields in adapter and




-WARN_ON(afu->spa_size > 0x10); /* Max size supported by the
hardware */
+WARN_ON(afu->native->spa_size > 0x10); /* Max size supported by
the hardware */


Would prefer to see a MACRO defined, instead of the literal 0x100




  cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x);


Same as above.



  p1n_base = p1_base(dev) + 0x1 + (afu->slice * p1n_size);


Same as above.



@@ -621,7 +622,7 @@ static int cxl_read_afu_descriptor(struct cxl_afu *afu)
  afu->pp_size = AFUD_PPPSA_LEN(val) * 4096;


Both val and pp_size are 64bit quantities. Not clear how the overflow
during multiplication is going to be handled.



  afu->crs_len = AFUD_CR_LEN(val) * 256;


What do the 4096 and 256 represent?



  /* Convert everything to bytes, because there is NO WAY I'd look
at the
   * code a month later and forget what units these are in ;-) */
-adapter->ps_off = ps_off * 64 * 1024;
+adapter->native->ps_off = ps_off * 64 * 1024;
  adapter->ps_size = ps_size * 64 * 1024;
-adapter->afu_desc_off = afu_desc_off * 64 * 1024;
-adapter->afu_desc_size = afu_desc_size *64 * 1024;
+adapter->native->afu_desc_off = afu_desc_off * 64 * 1024;
+adapter->native->afu_desc_size = afu_desc_size * 64 * 1024;


Is this (64k) page size related?


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC PATCH 0/9] powerpc/mm: Restructure Linux PTE on Book3S/64 to radix format

2016-02-21 Thread Michael Ellerman
On Sat, 2016-02-20 at 20:10 +0530, Aneesh Kumar K.V wrote:
> Paul Mackerras  writes:
> 
> > This patch series modifies the Linux PTE format used on 64-bit Book3S
> > processors (i.e. POWER server processors) to make the bits line up
> > with the PTE format used in the radix trees defined in PowerISA v3.0.
> > This will reduce the amount of further change required to make a
> > kernel that can run with either a radix MMU or a hashed page table
> > (HPT) MMU.
> > 
> > This also changes the upper levels of the tree to use real addresses
> > rather than kernel virtual addresses - that is, we no longer have the
> > 0xc000... at the top of each PGD/PUD/PMD entry.  I made this change
> > for all 64-bit machines, both embedded and server.
> > 
> > The patch series is against v4.5-rc4 plus Aneesh's "powerpc/mm/hash:
> > Clear the invalid slot information correctly" patch.
> > 
> > I have compiled this for all the defconfigs in the tree, without
> > error.  I have tested this, with the fixes branch of the powerpc tree
> > merged in, both running bare-metal on a POWER8 and in a KVM guest on
> > that POWER8 system.  In the guest I tested both 4k and 64k configs,
> > with THP enabled; in the host I tested with 64k page size and THP
> > enabled.  All these tests ran fine, including running a KVM guest on
> > the bare-metal system.  So far I have done kernel compiles in a loop
> > as the test, but I plan to run LTP and possibly some other tests.
> > 
> > Comments welcome.
> 
> I was expecting some complex changes in asm and other part of the code. That
> is one of the reason I was holding of a series like this till I get the
> radix merged.

Yeah, but you actually rewrote most/all of that code in C as part of your
earlier refactoring :)

> Now how do we want to go with this series ?. If we are taking this
> series before the books3 hash linux abstraction series, I will have to
> redo that series now on top of this.

I'd prefer to merge this first.

I know you'll have to redo your series, but hopefully some of your series can
just go away, because we don't need to abstract the PTE bits anymore.

cheers

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC PATCH 7/9] powerpc/mm/book3s-64: Shuffle read, write, execute and user bits in PTE

2016-02-21 Thread Michael Ellerman
On Mon, 2016-02-22 at 09:36 +1100, Paul Mackerras wrote:
> On Sun, Feb 21, 2016 at 01:00:54PM +0530, Aneesh Kumar K.V wrote:
> > Paul Mackerras  writes:
> >
> > Also can you use constants like
> > #define _PAGE_USER PPC_BIT(60)
>
> I'd really rather not - that is harder for the casual reader to parse,
> because they then have to go off and find out what exactly PPC_BIT
> does.  The only time that using PPC_BIT would help is when checking
> that the bit definitions match the Power ISA, and that's presumably
> done by intelligent people that can handle backwards bit numbering in
> their heads. :)

Yep agree 100%.

Using PPC_BIT() means every time someone sees that defintion they need to think
about what the conversion is and whether it's right, ie. for the entire future
history of this code.

On the other hand not using PPC_BIT() means the person who writes the
definition needs to think about it and do the correct conversion, but only
once.

cheers

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: Fwd: [PATCH v4 10/18] cxl: New hcalls to support CAPI adapters

2016-02-21 Thread Manoj Kumar

Fred: See comments below.

The most egregious checkpatch violations so far in this series,
for going beyond 80 columns.

total: 65 warnings, 924 lines checked


On 2/21/2016 3:31 PM, Uma Krishnan wrote:

From: Christophe Lombard 

The hypervisor calls provide an interface with a coherent plaform


plaform->platform



+/**
+ * cxl_h_download_adapter_image - Download the base image in the coherent
+ *platoform facility.



platoform->platform


+
+/**
+ * cxl_h_validate_adapter_image - Validate the base image in the coherent
+ *platoform facility.


platoform->platform

Irreverent to the Socratic amongst us.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: Fwd: [PATCH v4 09/18] cxl: New possible return value from hcall

2016-02-21 Thread Manoj Kumar

Reviewed-by: Manoj Kumar 

---
Manoj Kumar



Subject: [PATCH v4 09/18] cxl: New possible return value from hcall
Date: Tue, 16 Feb 2016 22:39:02 +0100
From: Frederic Barrat 
To: imun...@au1.ibm.com, michael.neul...@au1.ibm.com,
m...@ellerman.id.au, linuxppc-dev@lists.ozlabs.org

From: Christophe Lombard 

The hcalls introduced for CAPI use a possible new value:
H_STATE (invalid state).


Same suggestion as earlier. Try to avoid references to
the term CAPI externally.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC PATCH 7/9] powerpc/mm/book3s-64: Shuffle read, write, execute and user bits in PTE

2016-02-21 Thread Paul Mackerras
On Sun, Feb 21, 2016 at 01:00:54PM +0530, Aneesh Kumar K.V wrote:
> Paul Mackerras  writes:
> 
> > This moves the _PAGE_EXEC, _PAGE_RW and _PAGE_USER bits around in
> > the Linux PTE on 64-bit Book 3S systems to correspond with the bit
> > positions used in radix mode by PowerISA v3.0 CPUs.  This also adds
> > a _PAGE_READ bit corresponding to the read permission bit in the
> > radix PTE.  _PAGE_READ is currently unused but could possibly be used
> > in future to improve pte_protnone().
> >
> > Signed-off-by: Paul Mackerras 
> > ---
> >  arch/powerpc/include/asm/book3s/64/hash.h | 10 ++
> >  1 file changed, 6 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
> > b/arch/powerpc/include/asm/book3s/64/hash.h
> > index c8eba0e..0fc750c 100644
> > --- a/arch/powerpc/include/asm/book3s/64/hash.h
> > +++ b/arch/powerpc/include/asm/book3s/64/hash.h
> > @@ -13,9 +13,12 @@
> >   * We could create separate kernel read-only if we used the 3 PP bits
> >   * combinations that newer processors provide but we currently don't.
> >   */
> > -#define _PAGE_BIT_SWAP_TYPE2
> > -#define _PAGE_USER 0x4 /* page may be accessed by userspace */
> > -#define _PAGE_EXEC 0x8 /* execute permission */
> > +#define _PAGE_BIT_SWAP_TYPE0
> > +
> > +#define _PAGE_EXEC 0x1 /* execute permission */
> > +#define _PAGE_RW   0x2 /* read & write access allowed */
> > +#define _PAGE_READ 0x4 /* read access allowed */
> > +#define _PAGE_USER 0x8 /* page may be accessed by userspace */
> >  #define _PAGE_GUARDED  0x00010 /* G: guarded (side-effect) 
> > page */
> >  /* M (memory coherence) is always set in the HPTE, so we don't need it 
> > here */
> >  #define _PAGE_COHERENT 0x0
> > @@ -23,7 +26,6 @@
> >  #define _PAGE_WRITETHRU0x00040 /* W: cache write-through */
> >  #define _PAGE_DIRTY0x00080 /* C: page changed */
> >  #define _PAGE_ACCESSED 0x00100 /* R: page referenced */
> > -#define _PAGE_RW   0x00200 /* software: user write access allowed 
> > */
> >  #define _PAGE_BUSY 0x00800 /* software: PTE & hash are busy */
> >  #define _PAGE_SPECIAL  0x1 /* software: special page */
> >
> 
> 
> For radix, _PAGE_USER is the inverse of this right ?. Ie, we set that
> bit position to 1 to indicate privileged access only.

Right, we'll need a follow-on patch that changes this to a _PAGE_PRIV
bit and changes the logic that uses _PAGE_USER either that or a
_PAGE_PRIV bit.  But at least we have the bit position reserved now.

> Also can you use constants like
> #define _PAGE_USER PPC_BIT(60)

I'd really rather not - that is harder for the casual reader to parse,
because they then have to go off and find out what exactly PPC_BIT
does.  The only time that using PPC_BIT would help is when checking
that the bit definitions match the Power ISA, and that's presumably
done by intelligent people that can handle backwards bit numbering in
their heads. :)

Paul.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC PATCH 3/9] powerpc/mm/64: Use physical addresses in upper page table tree levels

2016-02-21 Thread Paul Mackerras
On Sat, Feb 20, 2016 at 10:05:58PM +0530, Aneesh Kumar K.V wrote:
> Paul Mackerras  writes:
> 
> > This changes the Linux page tables to store physical addresses
> > rather than kernel virtual addresses in the upper levels of the
> > tree (pgd, pud and pmd) for all 64-bit machines.
> >
> > This frees up some high order bits, and will be needed with book3s
> > PowerISA v3.0 machines which read the page table tree in hardware
> > in radix mode.
> 
> 
> Should we not update pmd_pgtable ?

Not sure what you mean by this - the patch does update pmd_pgtable
for the 64k page case (the 4k case is already fine).

> I have the below patch in my series.
> 
> http://mid.gmane.org/1455814254-10226-13-git-send-email-aneesh.ku...@linux.vnet.ibm.com

That looks like a reasonable thing to do.

Paul.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC PATCH 0/9] powerpc/mm: Restructure Linux PTE on Book3S/64 to radix format

2016-02-21 Thread Paul Mackerras
On Sun, Feb 21, 2016 at 01:11:17PM +0530, Aneesh Kumar K.V wrote:
> "Aneesh Kumar K.V"  writes:
> 
> > Paul Mackerras  writes:
> >
> >> This patch series modifies the Linux PTE format used on 64-bit Book3S
> >> processors (i.e. POWER server processors) to make the bits line up
> >> with the PTE format used in the radix trees defined in PowerISA v3.0.
> >> This will reduce the amount of further change required to make a
> >> kernel that can run with either a radix MMU or a hashed page table
> >> (HPT) MMU.
> >>
> >> This also changes the upper levels of the tree to use real addresses
> >> rather than kernel virtual addresses - that is, we no longer have the
> >> 0xc000... at the top of each PGD/PUD/PMD entry.  I made this change
> >> for all 64-bit machines, both embedded and server.
> >>
> >> The patch series is against v4.5-rc4 plus Aneesh's "powerpc/mm/hash:
> >> Clear the invalid slot information correctly" patch.
> >>
> >> I have compiled this for all the defconfigs in the tree, without
> >> error.  I have tested this, with the fixes branch of the powerpc tree
> >> merged in, both running bare-metal on a POWER8 and in a KVM guest on
> >> that POWER8 system.  In the guest I tested both 4k and 64k configs,
> >> with THP enabled; in the host I tested with 64k page size and THP
> >> enabled.  All these tests ran fine, including running a KVM guest on
> >> the bare-metal system.  So far I have done kernel compiles in a loop
> >> as the test, but I plan to run LTP and possibly some other tests.
> >>
> >> Comments welcome.
> >
> > I was expecting some complex changes in asm and other part of the code. That
> > is one of the reason I was holding of a series like this till I get the
> > radix merged. I should have really tried the radix/hash linux page table 
> > consolidation to see the impact.
> 
> One of the details that i hit last time with _PAGE_PTE was the usage of
> @h symbol in asm code. I did a quick look and I guess we are ok. But it
> will be good to double check. pmdp_splitting_flush (which got removed)
> had usages like %4@h etc

I have done some pretty thorough grepping in arch/powerpc.  There is
no assembly code left that manipulates Linux PTEs (on 64-bit Book 3S,
that is), because you converted it all to C code. :)  There are a
couple of bits of inline asm, but the only bit that is used as an
immediate value is _PAGE_BUSY, which goes from 0x800 to 0x200, and
could actually stay at 0x800 in fact.

Paul.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC PATCH 4/9] powerpc/mm/book3s-64: Move _PAGE_PRESENT to the most significant bit

2016-02-21 Thread Paul Mackerras
On Sat, Feb 20, 2016 at 10:11:14PM +0530, Aneesh Kumar K.V wrote:
> Paul Mackerras  writes:
> 
> > This changes _PAGE_PRESENT for 64-bit Book 3S processors from 0x2 to
> > 0x8000___, because that is where PowerISA v3.0 CPUs in
> > radix mode will expect to find it.
> 
> All the changes in this patch related to _PAGE_PRESENT movement or are
> they cleanup that got added to this patch. I am looking at the hpte slot
> array changes and wondering how that is related to _PAGE_PRESENT.

I was preserving the property mentioned in this comment:

> >  /*
> >   * The linux hugepage PMD now include the pmd entries followed by the 
> > address
> >   * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
> > - * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per
> > + * [ 000 | 1 bit secondary | 3 bit hidx | 1 bit valid]. We use one byte per
> >   * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and
> >   * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
> >   *
> > - * The last three bits are intentionally left to zero. This memory location
> > + * The top three bits are intentionally left as zero. This memory location
> >   * are also used as normal page PTE pointers. So if we have any pointers
> >   * left around while we collapse a hugepage, we need to make sure
> >   * _PAGE_PRESENT bit of that is zero when we look at them

I don't know if this comment still applies, but now that _PAGE_PRESENT
is the top bit of a byte rather than one of the low bits, then to avoid
having _PAGE_PRESENT overlap these HPTE location bits in a byte, we
need to move the location bits.

It seems pretty bogus to me that we might interpret an array of these
bytes as a PTE, and if you're sure we never do that, we can drop this
change (and the now-misleading comment).

Paul.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC PATCH 2/9] powerpc/mm/book3s-64: Free up 7 high-order bits in the Linux PTE

2016-02-21 Thread Paul Mackerras
On Sat, Feb 20, 2016 at 09:46:19PM +0530, Aneesh Kumar K.V wrote:
> Paul Mackerras  writes:
> 
> > This frees up bits 57-63 in the Linux PTE on 64-bit Book 3S machines.
> > In the 4k page case, this is done just by reducing the size of the
> > RPN field to 39 bits, giving 51-bit real addresses.  In the 64k page
> > case, we had 10 unused bits in the middle of the PTE, so this moves
> > the RPN field down 10 bits to make use of those unused bits.  This
> > means the RPN field is now 3 bits larger at 37 bits, giving 53-bit
> > real addresses in the normal case, or 49-bit real addresses for the
> > special 4k PFN case.
> 
> 
> Is it ok to have different real address bits between 4k and 64k linux
> config ?

Do you mean a different number of address bits, or a different
location?  In fact both were different before this patch anyway.

Paul.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: Fwd: [PATCH v4 08/18] cxl: IRQ allocation for guests

2016-02-21 Thread Manoj Kumar

Fred: See comment below.

---
Manoj Kumar


Subject: [PATCH v4 08/18] cxl: IRQ allocation for guests
Date: Tue, 16 Feb 2016 22:39:01 +0100
From: Frederic Barrat 
To: imun...@au1.ibm.com, michael.neul...@au1.ibm.com,
m...@ellerman.id.au, linuxppc-dev@lists.ozlabs.org

The PSL interrupt is not going to be multiplexed in a guest, so an
interrupt will be allocated for it for each context.


Not clear why this is the case. Why cannot the CXL later still
multiplex this in a guest? Is this a design choice, an
architectural issue, or the complexity of implementation did
not warrant this? From an API perspective it would have been
preferable to not cascade this change down to all consumers,
and have consumers aware whether they are working in a
bare-metal or a guest environment.

It will still be

the first interrupt found in the first interrupt range, but is treated
almost like any other AFU interrupt when creating/deleting the
context. Only the handler is different. Rework the code so that the
range 0 is treated like the other ranges.

Co-authored-by: Christophe Lombard 
Signed-off-by: Frederic Barrat 
Signed-off-by: Christophe Lombard 
---
  drivers/misc/cxl/irq.c | 78
+-
  1 file changed, 64 insertions(+), 14 deletions(-)

diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c
index 5033869..3c04c14 100644
--- a/drivers/misc/cxl/irq.c
+++ b/drivers/misc/cxl/irq.c
@@ -19,6 +19,13 @@
  #include "cxl.h"
  #include "trace.h"

+static int afu_irq_range_start(void)
+{
+if (cpu_has_feature(CPU_FTR_HVMODE))
+return 1;
+return 0;
+}
+
  static irqreturn_t schedule_cxl_fault(struct cxl_context *ctx, u64
dsisr, u64 dar)
  {
  ctx->dsisr = dsisr;
@@ -117,11 +124,23 @@ static irqreturn_t cxl_irq_afu(int irq, void *data)
  {
  struct cxl_context *ctx = data;
  irq_hw_number_t hwirq = irqd_to_hwirq(irq_get_irq_data(irq));
-int irq_off, afu_irq = 1;
+int irq_off, afu_irq = 0;
  __u16 range;
  int r;

-for (r = 1; r < CXL_IRQ_RANGES; r++) {
+/*
+ * Look for the interrupt number.
+ * On bare-metal, we know range 0 only contains the PSL
+ * interrupt so we could start counting at range 1 and initialize
+ * afu_irq at 1.
+ * In a guest, range 0 also contains AFU interrupts, so it must
+ * be counted for. Therefore we initialize afu_irq at 0 to take into
+ * account the PSL interrupt.
+ *
+ * For code-readability, it just seems easier to go over all
+ * the ranges on bare-metal and guest. The end result is the same.
+ */
+for (r = 0; r < CXL_IRQ_RANGES; r++) {
  irq_off = hwirq - ctx->irqs.offset[r];
  range = ctx->irqs.range[r];
  if (irq_off >= 0 && irq_off < range) {
@@ -131,7 +150,7 @@ static irqreturn_t cxl_irq_afu(int irq, void *data)
  afu_irq += range;
  }
  if (unlikely(r >= CXL_IRQ_RANGES)) {
-WARN(1, "Recieved AFU IRQ out of range for pe %i (virq %i hwirq
%lx)\n",
+WARN(1, "Received AFU IRQ out of range for pe %i (virq %i hwirq
%lx)\n",
   ctx->pe, irq, hwirq);
  return IRQ_HANDLED;
  }
@@ -141,7 +160,7 @@ static irqreturn_t cxl_irq_afu(int irq, void *data)
 afu_irq, ctx->pe, irq, hwirq);

  if (unlikely(!ctx->irq_bitmap)) {
-WARN(1, "Recieved AFU IRQ for context with no IRQ bitmap\n");
+WARN(1, "Received AFU IRQ for context with no IRQ bitmap\n");
  return IRQ_HANDLED;
  }
  spin_lock(&ctx->lock);
@@ -227,17 +246,33 @@ int afu_allocate_irqs(struct cxl_context *ctx, u32
count)
  {
  int rc, r, i, j = 1;
  struct cxl_irq_name *irq_name;
+int alloc_count;
+
+/*
+ * In native mode, range 0 is reserved for the multiplexed
+ * PSL interrupt. It has been allocated when the AFU was initialized.
+ *
+ * In a guest, the PSL interrupt is not mutliplexed, but per-context,
+ * and is the first interrupt from range 0. It still needs to be
+ * allocated, so bump the count by one.
+ */
+if (cpu_has_feature(CPU_FTR_HVMODE))
+alloc_count = count;
+else
+alloc_count = count + 1;

  /* Initialize the list head to hold irq names */
  INIT_LIST_HEAD(&ctx->irq_names);

  if ((rc = cxl_ops->alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter,
-count)))
+alloc_count)))
  return rc;

-/* Multiplexed PSL Interrupt */
-ctx->irqs.offset[0] = ctx->afu->psl_hwirq;
-ctx->irqs.range[0] = 1;
+if (cpu_has_feature(CPU_FTR_HVMODE)) {
+/* Multiplexed PSL Interrupt */
+ctx->irqs.offset[0] = ctx->afu->psl_hwirq;
+ctx->irqs.range[0] = 1;
+}

  ctx->irq_count = count;
  ctx->irq_bitmap = kcalloc(BITS_TO_LONGS(count),
@@ -249,7 +284,7 @@ int afu_allocate_irqs(struct cxl_context *ctx, u32
count)
   * Allocate names first.  If any fail, bail out before allocating
   * actual hardware IRQs.
 

Re: Fwd: [PATCH v4 07/18] cxl: Update cxl_irq() prototype

2016-02-21 Thread Manoj Kumar


Reviewed-by: Manoj Kumar 

---
Manoj Kumar


Subject: [PATCH v4 07/18] cxl: Update cxl_irq() prototype
Date: Tue, 16 Feb 2016 22:39:00 +0100
From: Frederic Barrat 
To: imun...@au1.ibm.com, michael.neul...@au1.ibm.com,
m...@ellerman.id.au, linuxppc-dev@lists.ozlabs.org

The context parameter when calling cxl_irq() should be strongly typed.

Co-authored-by: Christophe Lombard 
Signed-off-by: Frederic Barrat 
Signed-off-by: Christophe Lombard 
Acked-by: Ian Munsie 
---
  drivers/misc/cxl/cxl.h | 2 +-
  drivers/misc/cxl/irq.c | 3 +--
  2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 40f6783..c7ed265 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -714,7 +714,7 @@ struct cxl_irq_info {
  };

  void cxl_assign_psn_space(struct cxl_context *ctx);
-irqreturn_t cxl_irq(int irq, void *ctx, struct cxl_irq_info *irq_info);
+irqreturn_t cxl_irq(int irq, struct cxl_context *ctx, struct
cxl_irq_info *irq_info);
  int cxl_register_one_irq(struct cxl *adapter, irq_handler_t handler,
  void *cookie, irq_hw_number_t *dest_hwirq,
  unsigned int *dest_virq, const char *name);
diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c
index 56ad301..5033869 100644
--- a/drivers/misc/cxl/irq.c
+++ b/drivers/misc/cxl/irq.c
@@ -27,9 +27,8 @@ static irqreturn_t schedule_cxl_fault(struct
cxl_context *ctx, u64 dsisr, u64 da
  return IRQ_HANDLED;
  }

-irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info)
+irqreturn_t cxl_irq(int irq, struct cxl_context *ctx, struct
cxl_irq_info *irq_info)
  {
-struct cxl_context *ctx = data;
  u64 dsisr, dar;

  dsisr = irq_info->dsisr;


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: Fwd: [PATCH v4 06/18] cxl: Isolate a few bare-metal-specific calls

2016-02-21 Thread Manoj Kumar

Reviewed-by: Manoj Kumar 

---
Manoj Kumar



Subject: [PATCH v4 06/18] cxl: Isolate a few bare-metal-specific calls
Date: Tue, 16 Feb 2016 22:38:59 +0100
From: Frederic Barrat 
To: imun...@au1.ibm.com, michael.neul...@au1.ibm.com,
m...@ellerman.id.au, linuxppc-dev@lists.ozlabs.org

A few functions are mostly common between bare-metal and guest and
just need minor tuning. To avoid crowding the backend API, introduce a
few 'if' based on the CPU being in HV mode.

Co-authored-by: Christophe Lombard 
Signed-off-by: Frederic Barrat 
Signed-off-by: Christophe Lombard 
Acked-by: Ian Munsie 
---
  drivers/misc/cxl/context.c |  3 ++-
  drivers/misc/cxl/cxl.h |  7 +--
  drivers/misc/cxl/debugfs.c |  4 
  drivers/misc/cxl/fault.c   | 19 +++
  4 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index aa65262..46f9844 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -95,7 +95,8 @@ int cxl_context_init(struct cxl_context *ctx, struct
cxl_afu *afu, bool master,
  return i;

  ctx->pe = i;
-ctx->elem = &ctx->afu->spa[i];
+if (cpu_has_feature(CPU_FTR_HVMODE))
+ctx->elem = &ctx->afu->spa[i];
  ctx->pe_inserted = false;

  /*
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 02065b4..40f6783 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -560,8 +560,11 @@ static inline bool cxl_adapter_link_ok(struct cxl
*cxl)
  {
  struct pci_dev *pdev;

-pdev = to_pci_dev(cxl->dev.parent);
-return !pci_channel_offline(pdev);
+if (cpu_has_feature(CPU_FTR_HVMODE)) {
+pdev = to_pci_dev(cxl->dev.parent);
+return !pci_channel_offline(pdev);
+}
+return true;
  }

  static inline void __iomem *_cxl_p1_addr(struct cxl *cxl, cxl_p1_reg_t
reg)
diff --git a/drivers/misc/cxl/debugfs.c b/drivers/misc/cxl/debugfs.c
index 18df6f4..5751899 100644
--- a/drivers/misc/cxl/debugfs.c
+++ b/drivers/misc/cxl/debugfs.c
@@ -118,6 +118,10 @@ void cxl_debugfs_afu_remove(struct cxl_afu *afu)
  int __init cxl_debugfs_init(void)
  {
  struct dentry *ent;
+
+if (!cpu_has_feature(CPU_FTR_HVMODE))
+return 0;
+
  ent = debugfs_create_dir("cxl", NULL);
  if (IS_ERR(ent))
  return PTR_ERR(ent);
diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
index ab740a1..9a8650b 100644
--- a/drivers/misc/cxl/fault.c
+++ b/drivers/misc/cxl/fault.c
@@ -254,14 +254,17 @@ void cxl_handle_fault(struct work_struct *fault_work)
  u64 dar = ctx->dar;
  struct mm_struct *mm = NULL;

-if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr ||
-cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar ||
-cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) != ctx->pe) {
-/* Most likely explanation is harmless - a dedicated process
- * has detached and these were cleared by the PSL purge, but
- * warn about it just in case */
-dev_notice(&ctx->afu->dev, "cxl_handle_fault: Translation fault
regs changed\n");
-return;
+if (cpu_has_feature(CPU_FTR_HVMODE)) {
+if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr ||
+cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar ||
+cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) != ctx->pe) {
+/* Most likely explanation is harmless - a dedicated
+ * process has detached and these were cleared by the
+ * PSL purge, but warn about it just in case
+ */
+dev_notice(&ctx->afu->dev, "cxl_handle_fault: Translation
fault regs changed\n");
+return;
+}
  }

  /* Early return if the context is being / has been detached */


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: Fwd: [PATCH v4 05/18] cxl: Rename some bare-metal specific functions

2016-02-21 Thread Manoj Kumar

Reviewed-by: Manoj Kumar 

---
Manoj Kumar




Subject: [PATCH v4 05/18] cxl: Rename some bare-metal specific functions
Date: Tue, 16 Feb 2016 22:38:58 +0100
From: Frederic Barrat 
To: imun...@au1.ibm.com, michael.neul...@au1.ibm.com,
m...@ellerman.id.au, linuxppc-dev@lists.ozlabs.org

Rename a few functions, changing the 'cxl_' prefix to either
'cxl_pci_' or 'cxl_native_', to make clear that the implementation is
bare-metal specific.

Those functions will have an equivalent implementation for a guest in
a later patch.

Co-authored-by: Christophe Lombard 
Signed-off-by: Frederic Barrat 
Signed-off-by: Christophe Lombard 
---
  drivers/misc/cxl/cxl.h| 28 +++---
  drivers/misc/cxl/native.c | 98
---
  drivers/misc/cxl/pci.c| 78 +++--
  3 files changed, 104 insertions(+), 100 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 8233af3..02065b4 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -519,14 +519,14 @@ struct cxl {
  bool perst_same_image;
  };

-int cxl_alloc_one_irq(struct cxl *adapter);
-void cxl_release_one_irq(struct cxl *adapter, int hwirq);
-int cxl_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl
*adapter, unsigned int num);
-void cxl_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl
*adapter);
-int cxl_setup_irq(struct cxl *adapter, unsigned int hwirq, unsigned int
virq);
+int cxl_pci_alloc_one_irq(struct cxl *adapter);
+void cxl_pci_release_one_irq(struct cxl *adapter, int hwirq);
+int cxl_pci_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl
*adapter, unsigned int num);
+void cxl_pci_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl
*adapter);
+int cxl_pci_setup_irq(struct cxl *adapter, unsigned int hwirq, unsigned
int virq);
  int cxl_update_image_control(struct cxl *adapter);
-int cxl_reset(struct cxl *adapter);
-void cxl_release_afu(struct device *dev);
+int cxl_pci_reset(struct cxl *adapter);
+void cxl_pci_release_afu(struct device *dev);

  /* common == phyp + powernv */
  struct cxl_process_element_common {
@@ -623,7 +623,7 @@ static inline u64 cxl_p2n_read(struct cxl_afu *afu,
cxl_p2n_reg_t reg)
  return ~0ULL;
  }

-ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
+ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
  loff_t off, size_t count);


@@ -663,12 +663,12 @@ struct cxl *cxl_alloc_adapter(void);
  struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice);
  int cxl_afu_select_best_mode(struct cxl_afu *afu);

-int cxl_register_psl_irq(struct cxl_afu *afu);
-void cxl_release_psl_irq(struct cxl_afu *afu);
-int cxl_register_psl_err_irq(struct cxl *adapter);
-void cxl_release_psl_err_irq(struct cxl *adapter);
-int cxl_register_serr_irq(struct cxl_afu *afu);
-void cxl_release_serr_irq(struct cxl_afu *afu);
+int cxl_native_register_psl_irq(struct cxl_afu *afu);
+void cxl_native_release_psl_irq(struct cxl_afu *afu);
+int cxl_native_register_psl_err_irq(struct cxl *adapter);
+void cxl_native_release_psl_err_irq(struct cxl *adapter);
+int cxl_native_register_serr_irq(struct cxl_afu *afu);
+void cxl_native_release_serr_irq(struct cxl_afu *afu);
  int afu_register_irqs(struct cxl_context *ctx, u32 count);
  void afu_release_irqs(struct cxl_context *ctx, void *cookie);
  void afu_irq_name_free(struct cxl_context *ctx);
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 16d3b1a..b8a6ad5 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -80,7 +80,7 @@ int cxl_afu_disable(struct cxl_afu *afu)
  }

  /* This will disable as well as reset */
-static int __cxl_afu_reset(struct cxl_afu *afu)
+static int native_afu_reset(struct cxl_afu *afu)
  {
  pr_devel("AFU reset request\n");

@@ -90,7 +90,7 @@ static int __cxl_afu_reset(struct cxl_afu *afu)
 false);
  }

-static int cxl_afu_check_and_enable(struct cxl_afu *afu)
+static int native_afu_check_and_enable(struct cxl_afu *afu)
  {
  if (!cxl_ops->link_ok(afu->adapter)) {
  WARN(1, "Refusing to enable afu while link down!\n");
@@ -631,7 +631,7 @@ static int deactivate_dedicated_process(struct
cxl_afu *afu)
  return 0;
  }

-static int cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode)
+static int native_afu_deactivate_mode(struct cxl_afu *afu, int mode)
  {
  if (mode == CXL_MODE_DIRECTED)
  return deactivate_afu_directed(afu);
@@ -640,7 +640,7 @@ static int cxl_afu_deactivate_mode(struct cxl_afu
*afu, int mode)
  return 0;
  }

-static int cxl_afu_activate_mode(struct cxl_afu *afu, int mode)
+static int native_afu_activate_mode(struct cxl_afu *afu, int mode)
  {
  if (!mode)
  return 0;
@@ -660,7 +660,8 @@ static int cxl_afu_activate_mode(struct cxl_afu
*afu, int mode)
  return -EINVAL;
  }

-static int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64
wed, u64 amr)
+static int native_at

Re: Fwd: [PATCH v4 04/18] cxl: Introduce implementation-specific API

2016-02-21 Thread Manoj Kumar


Reviewed-by: Manoj Kumar 

---
Manoj Kumar


Subject: [PATCH v4 04/18] cxl: Introduce implementation-specific API
Date: Tue, 16 Feb 2016 22:38:57 +0100
From: Frederic Barrat 
To: imun...@au1.ibm.com, michael.neul...@au1.ibm.com,
m...@ellerman.id.au, linuxppc-dev@lists.ozlabs.org

The backend API (in cxl.h) lists some low-level functions whose
implementation is different on bare-metal and in a guest. Each
environment implements its own functions, and the common code uses
them through function pointers, defined in cxl_backend_ops

Co-authored-by: Christophe Lombard 
Signed-off-by: Frederic Barrat 
Signed-off-by: Christophe Lombard 
Acked-by: Ian Munsie 
---
  drivers/misc/cxl/api.c |   8 +--
  drivers/misc/cxl/context.c |   4 +-
  drivers/misc/cxl/cxl.h |  53 +++---
  drivers/misc/cxl/fault.c   |   6 +-
  drivers/misc/cxl/file.c|  15 ++---
  drivers/misc/cxl/irq.c |  19 ---
  drivers/misc/cxl/main.c|  11 ++--
  drivers/misc/cxl/native.c  | 135
-
  drivers/misc/cxl/pci.c |  16 +++---
  drivers/misc/cxl/sysfs.c   |  32 +++
  drivers/misc/cxl/vphb.c|   6 +-
  11 files changed, 185 insertions(+), 120 deletions(-)

diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index b45d857..31eb842 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -100,7 +100,7 @@ EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs);
  void cxl_free_afu_irqs(struct cxl_context *ctx)
  {
  afu_irq_name_free(ctx);
-cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
+cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
  }
  EXPORT_SYMBOL_GPL(cxl_free_afu_irqs);

@@ -176,7 +176,7 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,

  cxl_ctx_get();

-if ((rc = cxl_attach_process(ctx, kernel, wed , 0))) {
+if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
  put_pid(ctx->pid);
  cxl_ctx_put();
  goto out;
@@ -342,11 +342,11 @@ int cxl_afu_reset(struct cxl_context *ctx)
  struct cxl_afu *afu = ctx->afu;
  int rc;

-rc = __cxl_afu_reset(afu);
+rc = cxl_ops->afu_reset(afu);
  if (rc)
  return rc;

-return cxl_afu_check_and_enable(afu);
+return cxl_ops->afu_check_and_enable(afu);
  }
  EXPORT_SYMBOL_GPL(cxl_afu_reset);

diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 262b88e..aa65262 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -214,8 +214,8 @@ int __detach_context(struct cxl_context *ctx)
  /* Only warn if we detached while the link was OK.
   * If detach fails when hw is down, we don't care.
   */
-WARN_ON(cxl_detach_process(ctx) &&
-cxl_adapter_link_ok(ctx->afu->adapter));
+WARN_ON(cxl_ops->detach_process(ctx) &&
+cxl_ops->link_ok(ctx->afu->adapter));
  flush_work(&ctx->fault_work); /* Only needed for dedicated process */

  /* release the reference to the group leader and mm handling pid */
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 3b824e3..8233af3 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -623,11 +623,6 @@ static inline u64 cxl_p2n_read(struct cxl_afu *afu,
cxl_p2n_reg_t reg)
  return ~0ULL;
  }

-u64 cxl_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off);
-u32 cxl_afu_cr_read32(struct cxl_afu *afu, int cr, u64 off);
-u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off);
-u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off);
-
  ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
  loff_t off, size_t count);

@@ -666,10 +661,6 @@ void cxl_sysfs_afu_m_remove(struct cxl_afu *afu);

  struct cxl *cxl_alloc_adapter(void);
  struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice);
-
-int cxl_afu_activate_mode(struct cxl_afu *afu, int mode);
-int _cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode);
-int cxl_afu_deactivate_mode(struct cxl_afu *afu);
  int cxl_afu_select_best_mode(struct cxl_afu *afu);

  int cxl_register_psl_irq(struct cxl_afu *afu);
@@ -681,8 +672,6 @@ void cxl_release_serr_irq(struct cxl_afu *afu);
  int afu_register_irqs(struct cxl_context *ctx, u32 count);
  void afu_release_irqs(struct cxl_context *ctx, void *cookie);
  void afu_irq_name_free(struct cxl_context *ctx);
-irqreturn_t handle_psl_slice_error(struct cxl_context *ctx, u64 dsisr,
-u64 errstat);

  int cxl_debugfs_init(void);
  void cxl_debugfs_exit(void);
@@ -727,18 +716,10 @@ int cxl_register_one_irq(struct cxl *adapter,
irq_handler_t handler,
  void *cookie, irq_hw_number_t *dest_hwirq,
  unsigned int *dest_virq, const char *name);

-int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed,
-u64 amr);
-int cxl_detach_process(struct cxl_context *ctx);
-
-int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask);
-
  int cxl_check_error(struct cxl_afu *afu);
  int

Re: Fwd: [PATCH v4 03/18] cxl: Define process problem state area at attach time only

2016-02-21 Thread Manoj Kumar


Reviewed-by: Manoj Kumar 

---
Manoj Kumar



Subject: [PATCH v4 03/18] cxl: Define process problem state area at
attach time only
Date: Tue, 16 Feb 2016 22:38:56 +0100
From: Frederic Barrat 
To: imun...@au1.ibm.com, michael.neul...@au1.ibm.com,
m...@ellerman.id.au, linuxppc-dev@lists.ozlabs.org

Cxl kernel API was defining the process problem state area during


Suggestion:
Cxl -> CXL



context initialization, making it possible to map the problem state
area before attaching the context. This won't work on a powerVM
guest. So force the logical behavior, like in userspace: attach first,
then map the problem state area.
Remove calls to cxl_assign_psn_space during init. The function is
already called on the attach paths.

Co-authored-by: Christophe Lombard 
Signed-off-by: Frederic Barrat 
Signed-off-by: Christophe Lombard 
---
  drivers/misc/cxl/api.c | 11 ++-
  1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index ea3eeb7..b45d857 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -51,8 +51,6 @@ struct cxl_context *cxl_dev_context_init(struct
pci_dev *dev)
  if (rc)
  goto err_mapping;

-cxl_assign_psn_space(ctx);
-
  return ctx;

  err_mapping:
@@ -207,7 +205,6 @@ EXPORT_SYMBOL_GPL(cxl_stop_context);
  void cxl_set_master(struct cxl_context *ctx)
  {
  ctx->master = true;
-cxl_assign_psn_space(ctx);
  }
  EXPORT_SYMBOL_GPL(cxl_set_master);

@@ -325,15 +322,11 @@ EXPORT_SYMBOL_GPL(cxl_start_work);

  void __iomem *cxl_psa_map(struct cxl_context *ctx)
  {
-struct cxl_afu *afu = ctx->afu;
-int rc;
-
-rc = cxl_afu_check_and_enable(afu);
-if (rc)
+if (ctx->status != STARTED)
  return NULL;

  pr_devel("%s: psn_phys%llx size:%llx\n",
- __func__, afu->psn_phys, afu->adapter->ps_size);
+__func__, ctx->psn_phys, ctx->psn_size);
  return ioremap(ctx->psn_phys, ctx->psn_size);
  }
  EXPORT_SYMBOL_GPL(cxl_psa_map);


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: Fwd: [PATCH v4 02/18] cxl: Move bare-metal specific code to specialized files

2016-02-21 Thread Manoj Kumar

Reviewed-by: Manoj Kumar 

---
Manoj Kumar


--

Subject: [PATCH v4 02/18] cxl: Move bare-metal specific code to
specialized files
Date: Tue, 16 Feb 2016 22:38:55 +0100
From: Frederic Barrat 
To: imun...@au1.ibm.com, michael.neul...@au1.ibm.com,
m...@ellerman.id.au, linuxppc-dev@lists.ozlabs.org

Move a few functions around to better separate code specific to
bare-metal environment from code which will be commonly used between
guest and bare-metal.

Code specific to bare-metal is meant to be in native.c or pci.c
only. It's basically anything which touches the capi p1 registers,


I thought we were going to avoid using the CAPI term externally.
Please update if submitting a v4 of this patch series.


some p2 registers not needed from a guest and the PCI interface.

Co-authored-by: Christophe Lombard 
Signed-off-by: Frederic Barrat 
Signed-off-by: Christophe Lombard 
Acked-by: Ian Munsie 
---
  drivers/misc/cxl/cxl.h|  24 +
  drivers/misc/cxl/irq.c| 205 +--
  drivers/misc/cxl/main.c   |   2 +-
  drivers/misc/cxl/native.c | 240
+-
  drivers/misc/cxl/pci.c|  18 
  5 files changed, 245 insertions(+), 244 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 3f88140..3b824e3 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -623,23 +623,8 @@ static inline u64 cxl_p2n_read(struct cxl_afu *afu,
cxl_p2n_reg_t reg)
  return ~0ULL;
  }

-static inline u64 cxl_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off)
-{
-if (likely(cxl_adapter_link_ok(afu->adapter)))
-return in_le64((afu)->afu_desc_mmio + (afu)->crs_offset +
-   ((cr) * (afu)->crs_len) + (off));
-else
-return ~0ULL;
-}
-
-static inline u32 cxl_afu_cr_read32(struct cxl_afu *afu, int cr, u64 off)
-{
-if (likely(cxl_adapter_link_ok(afu->adapter)))
-return in_le32((afu)->afu_desc_mmio + (afu)->crs_offset +
-   ((cr) * (afu)->crs_len) + (off));
-else
-return 0x;
-}
+u64 cxl_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off);
+u32 cxl_afu_cr_read32(struct cxl_afu *afu, int cr, u64 off);
  u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off);
  u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off);

@@ -654,7 +639,6 @@ struct cxl_calls {
  int register_cxl_calls(struct cxl_calls *calls);
  void unregister_cxl_calls(struct cxl_calls *calls);

-int cxl_alloc_adapter_nr(struct cxl *adapter);
  void cxl_remove_adapter_nr(struct cxl *adapter);

  int cxl_alloc_spa(struct cxl_afu *afu);
@@ -697,7 +681,8 @@ void cxl_release_serr_irq(struct cxl_afu *afu);
  int afu_register_irqs(struct cxl_context *ctx, u32 count);
  void afu_release_irqs(struct cxl_context *ctx, void *cookie);
  void afu_irq_name_free(struct cxl_context *ctx);
-irqreturn_t cxl_slice_irq_err(int irq, void *data);
+irqreturn_t handle_psl_slice_error(struct cxl_context *ctx, u64 dsisr,
+u64 errstat);

  int cxl_debugfs_init(void);
  void cxl_debugfs_exit(void);
@@ -746,7 +731,6 @@ int cxl_attach_process(struct cxl_context *ctx, bool
kernel, u64 wed,
  u64 amr);
  int cxl_detach_process(struct cxl_context *ctx);

-int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info);
  int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask);

  int cxl_check_error(struct cxl_afu *afu);
diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c
index e468e6c..16fd67f 100644
--- a/drivers/misc/cxl/irq.c
+++ b/drivers/misc/cxl/irq.c
@@ -19,72 +19,6 @@
  #include "cxl.h"
  #include "trace.h"

-/* XXX: This is implementation specific */
-static irqreturn_t handle_psl_slice_error(struct cxl_context *ctx, u64
dsisr, u64 errstat)
-{
-u64 fir1, fir2, fir_slice, serr, afu_debug;
-
-fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR1);
-fir2 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR2);
-fir_slice = cxl_p1n_read(ctx->afu, CXL_PSL_FIR_SLICE_An);
-serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An);
-afu_debug = cxl_p1n_read(ctx->afu, CXL_AFU_DEBUG_An);
-
-dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%016llx\n", errstat);
-dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1);
-dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%016llx\n", fir2);
-dev_crit(&ctx->afu->dev, "PSL_SERR_An: 0x%016llx\n", serr);
-dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice);
-dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n",
afu_debug);
-
-dev_crit(&ctx->afu->dev, "STOPPING CXL TRACE\n");
-cxl_stop_trace(ctx->afu->adapter);
-
-return cxl_ack_irq(ctx, 0, errstat);
-}
-
-irqreturn_t cxl_slice_irq_err(int irq, void *data)
-{
-struct cxl_afu *afu = data;
-u64 fir_slice, errstat, serr, afu_debug;
-
-WARN(irq, "CXL SLICE ERROR interrupt %i\n", irq);
-
-serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
-fir_slice = cxl_p1n_read(afu, CXL_PSL_FIR_SLICE_An);
-errsta

Re: Fwd: [PATCH v4 01/18] cxl: Move common code away from bare-metal-specific files

2016-02-21 Thread Manoj Kumar


Would have been nice to have the code and the commit messages
consistent in the terminology.

Commit comments: bare-metal and guest
Code: powernv and phyp

Reviewed-by: Manoj Kumar 

---
Manoj Kumar


 Forwarded Message 
Subject: [PATCH v4 01/18] cxl: Move common code away from
bare-metal-specific files
Date: Tue, 16 Feb 2016 22:38:54 +0100
From: Frederic Barrat 
To: imun...@au1.ibm.com, michael.neul...@au1.ibm.com,
m...@ellerman.id.au, linuxppc-dev@lists.ozlabs.org

From: Christophe Lombard 

Move around some functions which will be accessed from the bare-metal
and guest environments.
Code in native.c and pci.c is meant to be bare-metal specific.
Other files contain code which may be shared with guests.

Co-authored-by: Frederic Barrat 
Signed-off-by: Frederic Barrat 
Signed-off-by: Christophe Lombard 
Acked-by: Ian Munsie 
---
  drivers/misc/cxl/cxl.h|  9 +++
  drivers/misc/cxl/irq.c| 14 +-
  drivers/misc/cxl/main.c   | 67
+++
  drivers/misc/cxl/native.c | 21 ---
  drivers/misc/cxl/pci.c| 48 +
  5 files changed, 84 insertions(+), 75 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index a521bc7..3f88140 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -526,6 +526,7 @@ void cxl_release_irq_ranges(struct cxl_irq_ranges
*irqs, struct cxl *adapter);
  int cxl_setup_irq(struct cxl *adapter, unsigned int hwirq, unsigned
int virq);
  int cxl_update_image_control(struct cxl *adapter);
  int cxl_reset(struct cxl *adapter);
+void cxl_release_afu(struct device *dev);

  /* common == phyp + powernv */
  struct cxl_process_element_common {
@@ -679,6 +680,9 @@ void cxl_sysfs_afu_remove(struct cxl_afu *afu);
  int cxl_sysfs_afu_m_add(struct cxl_afu *afu);
  void cxl_sysfs_afu_m_remove(struct cxl_afu *afu);

+struct cxl *cxl_alloc_adapter(void);
+struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice);
+
  int cxl_afu_activate_mode(struct cxl_afu *afu, int mode);
  int _cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode);
  int cxl_afu_deactivate_mode(struct cxl_afu *afu);
@@ -733,6 +737,11 @@ struct cxl_irq_info {
  };

  void cxl_assign_psn_space(struct cxl_context *ctx);
+irqreturn_t cxl_irq(int irq, void *ctx, struct cxl_irq_info *irq_info);
+int cxl_register_one_irq(struct cxl *adapter, irq_handler_t handler,
+void *cookie, irq_hw_number_t *dest_hwirq,
+unsigned int *dest_virq, const char *name);
+
  int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed,
  u64 amr);
  int cxl_detach_process(struct cxl_context *ctx);
diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c
index 09a4060..e468e6c 100644
--- a/drivers/misc/cxl/irq.c
+++ b/drivers/misc/cxl/irq.c
@@ -93,7 +93,7 @@ static irqreturn_t schedule_cxl_fault(struct
cxl_context *ctx, u64 dsisr, u64 da
  return IRQ_HANDLED;
  }

-static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info
*irq_info)
+irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info)
  {
  struct cxl_context *ctx = data;
  u64 dsisr, dar;
@@ -291,12 +291,12 @@ void cxl_unmap_irq(unsigned int virq, void *cookie)
  irq_dispose_mapping(virq);
  }

-static int cxl_register_one_irq(struct cxl *adapter,
-irq_handler_t handler,
-void *cookie,
-irq_hw_number_t *dest_hwirq,
-unsigned int *dest_virq,
-const char *name)
+int cxl_register_one_irq(struct cxl *adapter,
+irq_handler_t handler,
+void *cookie,
+irq_hw_number_t *dest_hwirq,
+unsigned int *dest_virq,
+const char *name)
  {
  int hwirq, virq;

diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c
index 9fde75e..7ef5b43 100644
--- a/drivers/misc/cxl/main.c
+++ b/drivers/misc/cxl/main.c
@@ -32,6 +32,27 @@ uint cxl_verbose;
  module_param_named(verbose, cxl_verbose, uint, 0600);
  MODULE_PARM_DESC(verbose, "Enable verbose dmesg output");

+int cxl_afu_slbia(struct cxl_afu *afu)
+{
+unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+
+pr_devel("cxl_afu_slbia issuing SLBIA command\n");
+cxl_p2n_write(afu, CXL_SLBIA_An, CXL_TLB_SLB_IQ_ALL);
+while (cxl_p2n_read(afu, CXL_SLBIA_An) & CXL_TLB_SLB_P) {
+if (time_after_eq(jiffies, timeout)) {
+dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n");
+return -EBUSY;
+}
+/* If the adapter has gone down, we can assume that we
+ * will PERST it and that will invalidate everything.
+ */
+if (!cxl_adapter_link_ok(afu->adapter))
+return -EIO;
+cpu_relax();
+}
+return 0;
+}
+
  static inline void _cxl_slbia(struct cxl_context *ctx, struct
mm_struct *mm)
  {
  struct task_struct *task;
@@ -174,6 +195,52 @@ void cxl_remove_adapter_nr(struct cxl *adapter)
  i