[PATCH] powerpc: mm: Limit rma_size to 1TB when running without HV mode

2019-07-09 Thread Suraj Jitindar Singh
The virtual real mode addressing (VRMA) mechanism is used when a
partition is using HPT (Hash Page Table) translation and performs
real mode accesses (MSR[IR|DR] = 0) in non-hypervisor mode. In this
mode effective address bits 0:23 are treated as zero (i.e. the access
is aliased to 0) and the access is performed using an implicit 1TB SLB
entry.

The size of the RMA (Real Memory Area) is communicated to the guest as
the size of the first memory region in the device tree. And because of
the mechanism described above can be expected to not exceed 1TB. In the
event that the host erroneously represents the RMA as being larger than
1TB, guest accesses in real mode to memory addresses above 1TB will be
aliased down to below 1TB. This means that a memory access performed in
real mode may differ to one performed in virtual mode for the same memory
address, which would likely have unintended consequences.

To avoid this outcome have the guest explicitly limit the size of the
RMA to the current maximum, which is 1TB. This means that even if the
first memory block is larger than 1TB, only the first 1TB should be
accessed in real mode.

Signed-off-by: Suraj Jitindar Singh 
---
 arch/powerpc/mm/book3s64/hash_utils.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/mm/book3s64/hash_utils.c 
b/arch/powerpc/mm/book3s64/hash_utils.c
index 28ced26f2a00..4d0e2cce9cd5 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1901,11 +1901,19 @@ void hash__setup_initial_memory_limit(phys_addr_t 
first_memblock_base,
 *
 * For guests on platforms before POWER9, we clamp the it limit to 1G
 * to avoid some funky things such as RTAS bugs etc...
+* On POWER9 we limit to 1TB in case the host erroneously told us that
+* the RMA was >1TB. Effective address bits 0:23 are treated as zero
+* (meaning the access is aliased to zero i.e. addr = addr % 1TB)
+* for virtual real mode addressing and so it doesn't make sense to
+* have an area larger than 1TB as it can't be addressed.
 */
if (!early_cpu_has_feature(CPU_FTR_HVMODE)) {
ppc64_rma_size = first_memblock_size;
if (!early_cpu_has_feature(CPU_FTR_ARCH_300))
ppc64_rma_size = min_t(u64, ppc64_rma_size, 0x4000);
+   else
+   ppc64_rma_size = min_t(u64, ppc64_rma_size,
+  1UL << SID_SHIFT_1T);
 
/* Finally limit subsequent allocations */
memblock_set_current_limit(ppc64_rma_size);
-- 
2.13.6



[PATCH v3 3/3] Powerpc64/Watchpoint: Rewrite ptrace-hwbreak.c selftest

2019-07-09 Thread Ravi Bangoria
ptrace-hwbreak.c selftest is logically broken. On powerpc, when
watchpoint is created with ptrace, signals are generated before
executing the instruction and user has to manually singlestep
the instruction with watchpoint disabled, which selftest never
does and thus it keeps on getting the signal at the same
instruction. If we fix it, selftest fails because the logical
connection between tracer(parent) and tracee(child) is also
broken. Rewrite the selftest and add new tests for unaligned
access.

With patch:
  $ ./tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak
  test: ptrace-hwbreak
  tags: git_version:v5.2-rc2-33-ga247a75f90a9-dirty
  PTRACE_SET_DEBUGREG, WO, len: 1: Ok
  PTRACE_SET_DEBUGREG, WO, len: 2: Ok
  PTRACE_SET_DEBUGREG, WO, len: 4: Ok
  PTRACE_SET_DEBUGREG, WO, len: 8: Ok
  PTRACE_SET_DEBUGREG, RO, len: 1: Ok
  PTRACE_SET_DEBUGREG, RO, len: 2: Ok
  PTRACE_SET_DEBUGREG, RO, len: 4: Ok
  PTRACE_SET_DEBUGREG, RO, len: 8: Ok
  PTRACE_SET_DEBUGREG, RW, len: 1: Ok
  PTRACE_SET_DEBUGREG, RW, len: 2: Ok
  PTRACE_SET_DEBUGREG, RW, len: 4: Ok
  PTRACE_SET_DEBUGREG, RW, len: 8: Ok
  PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO, len: 1: Ok
  PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RO, len: 1: Ok
  PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RW, len: 1: Ok
  PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO, len: 6: Ok
  PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RO, len: 6: Ok
  PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RW, len: 6: Ok
  PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, WO, len: 6: Ok
  PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RO, len: 6: Ok
  PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RW, len: 6: Ok
  PPC_PTRACE_SETHWDEBUG, DAWR_MAX_LEN, RW, len: 512: Ok
  success: ptrace-hwbreak

Signed-off-by: Ravi Bangoria 
---
 .../selftests/powerpc/ptrace/ptrace-hwbreak.c | 535 +++---
 1 file changed, 325 insertions(+), 210 deletions(-)

diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c 
b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
index 3066d310f32b..fb1e05d7f77c 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
@@ -22,318 +22,433 @@
 #include 
 #include "ptrace.h"
 
-/* Breakpoint access modes */
-enum {
-   BP_X = 1,
-   BP_RW = 2,
-   BP_W = 4,
-};
-
-static pid_t child_pid;
-static struct ppc_debug_info dbginfo;
-
-static void get_dbginfo(void)
-{
-   int ret;
-
-   ret = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, );
-   if (ret) {
-   perror("Can't get breakpoint info\n");
-   exit(-1);
-   }
-}
-
-static bool hwbreak_present(void)
-{
-   return (dbginfo.num_data_bps != 0);
-}
+/*
+ * Use volatile on all global var so that compiler doesn't
+ * optimise their load/stores. Otherwise selftest can fail.
+ */
+static volatile __u64 glvar;
 
-static bool dawr_present(void)
-{
-   return !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_DAWR);
-}
+#define DAWR_MAX_LEN 512
+static volatile __u8 big_var[DAWR_MAX_LEN] __attribute__((aligned(512)));
 
-static void set_breakpoint_addr(void *addr)
-{
-   int ret;
+#define A_LEN 6
+#define B_LEN 6
+struct gstruct {
+   __u8 a[A_LEN]; /* double word aligned */
+   __u8 b[B_LEN]; /* double word unaligned */
+};
+static volatile struct gstruct gstruct __attribute__((aligned(512)));
 
-   ret = ptrace(PTRACE_SET_DEBUGREG, child_pid, 0, addr);
-   if (ret) {
-   perror("Can't set breakpoint addr\n");
-   exit(-1);
-   }
-}
 
-static int set_hwbreakpoint_addr(void *addr, int range)
+static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo)
 {
-   int ret;
-
-   struct ppc_hw_breakpoint info;
-
-   info.version = 1;
-   info.trigger_type = PPC_BREAKPOINT_TRIGGER_RW;
-   info.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
-   if (range > 0)
-   info.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
-   info.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
-   info.addr = (__u64)addr;
-   info.addr2 = (__u64)addr + range;
-   info.condition_value = 0;
-
-   ret = ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, );
-   if (ret < 0) {
-   perror("Can't set breakpoint\n");
+   if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, dbginfo)) {
+   perror("Can't get breakpoint info");
exit(-1);
}
-   return ret;
 }
 
-static int del_hwbreakpoint_addr(int watchpoint_handle)
+static bool dawr_present(struct ppc_debug_info *dbginfo)
 {
-   int ret;
-
-   ret = ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, watchpoint_handle);
-   if (ret < 0) {
-   perror("Can't delete hw breakpoint\n");
-   exit(-1);
-   }
-   return ret;
+   return !!(dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_DAWR);
 }
 
-#define DAWR_LENGTH_MAX 512
-
-/* Dummy variables to test read/write accesses */

[PATCH v3 2/3] Powerpc64/Watchpoint: Don't ignore extraneous exceptions

2019-07-09 Thread Ravi Bangoria
On Powerpc64, watchpoint match range is double-word granular. On
a watchpoint hit, DAR is set to the first byte of overlap between
actual access and watched range. And thus it's quite possible that
DAR does not point inside user specified range. Ex, say user creates
a watchpoint with address range 0x1004 to 0x1007. So hw would be
configured to watch from 0x1000 to 0x1007. If there is a 4 byte
access from 0x1002 to 0x1005, DAR will point to 0x1002 and thus
interrupt handler considers it as extraneous, but it's actually not,
because part of the access belongs to what user has asked. So, let
kernel pass it on to user and let user decide what to do with it
instead of silently ignoring it. The drawback is, it can generate
false positive events.

Signed-off-by: Ravi Bangoria 
---
 arch/powerpc/kernel/hw_breakpoint.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index 5c876e986c18..c457d52778e3 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -204,9 +204,10 @@ int hw_breakpoint_handler(struct die_args *args)
 #ifndef CONFIG_PPC_8xx
int stepped = 1;
unsigned int instr;
+#else
+   unsigned long dar = regs->dar;
 #endif
struct arch_hw_breakpoint *info;
-   unsigned long dar = regs->dar;
 
/* Disable breakpoints during exception handling */
hw_breakpoint_disable();
@@ -240,14 +241,14 @@ int hw_breakpoint_handler(struct die_args *args)
 
/*
 * Verify if dar lies within the address range occupied by the symbol
-* being watched to filter extraneous exceptions.  If it doesn't,
-* we still need to single-step the instruction, but we don't
-* generate an event.
+* being watched to filter extraneous exceptions.
 */
info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
+#ifdef CONFIG_PPC_8xx
if (!((bp->attr.bp_addr <= dar) &&
  (dar - bp->attr.bp_addr < bp->attr.bp_len)))
info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+#endif
 
 #ifndef CONFIG_PPC_8xx
/* Do not emulate user-space instructions, instead single-step them */
-- 
2.20.1



[PATCH v3 0/3] Powerpc64/Watchpoint: Few important fixes

2019-07-09 Thread Ravi Bangoria
v2: https://lists.ozlabs.org/pipermail/linuxppc-dev/2019-July/192967.html

v2->v3:
 - Rebase to powerpc/next
 - PATCH 2/3 is new

Ravi Bangoria (3):
  Powerpc64/Watchpoint: Fix length calculation for unaligned target
  Powerpc64/Watchpoint: Don't ignore extraneous exceptions
  Powerpc64/Watchpoint: Rewrite ptrace-hwbreak.c selftest

 arch/powerpc/include/asm/debug.h  |   1 +
 arch/powerpc/include/asm/hw_breakpoint.h  |   9 +-
 arch/powerpc/kernel/dawr.c|   6 +-
 arch/powerpc/kernel/hw_breakpoint.c   |  33 +-
 arch/powerpc/kernel/process.c |  46 ++
 arch/powerpc/kernel/ptrace.c  |  37 +-
 arch/powerpc/xmon/xmon.c  |   3 +-
 .../selftests/powerpc/ptrace/ptrace-hwbreak.c | 535 +++---
 8 files changed, 413 insertions(+), 257 deletions(-)

-- 
2.20.1



[PATCH v3 1/3] Powerpc64/Watchpoint: Fix length calculation for unaligned target

2019-07-09 Thread Ravi Bangoria
Watchpoint match range is always doubleword(8 bytes) aligned on
powerpc. If the given range is crossing doubleword boundary, we
need to increase the length such that next doubleword also get
covered. Ex,

  address   len = 6 bytes
|=.
   |v--|--v|
   | | | | | | | | | | | | | | | | |
   |---|---|
<---8 bytes--->

In such case, current code configures hw as:
  start_addr = address & ~HW_BREAKPOINT_ALIGN
  len = 8 bytes

And thus read/write in last 4 bytes of the given range is ignored.
Fix this by including next doubleword in the length. Plus, fix
ptrace code which is messing up address/len.

Signed-off-by: Ravi Bangoria 
---
 arch/powerpc/include/asm/debug.h |  1 +
 arch/powerpc/include/asm/hw_breakpoint.h |  9 +++--
 arch/powerpc/kernel/dawr.c   |  6 ++--
 arch/powerpc/kernel/hw_breakpoint.c  | 24 +++--
 arch/powerpc/kernel/process.c| 46 
 arch/powerpc/kernel/ptrace.c | 37 ++-
 arch/powerpc/xmon/xmon.c |  3 +-
 7 files changed, 83 insertions(+), 43 deletions(-)

diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h
index 7756026b95ca..9c1b4aaa374b 100644
--- a/arch/powerpc/include/asm/debug.h
+++ b/arch/powerpc/include/asm/debug.h
@@ -45,6 +45,7 @@ static inline int debugger_break_match(struct pt_regs *regs) 
{ return 0; }
 static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
 #endif
 
+int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw);
 void __set_breakpoint(struct arch_hw_breakpoint *brk);
 bool ppc_breakpoint_available(void);
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h 
b/arch/powerpc/include/asm/hw_breakpoint.h
index 41abdae6d079..7e1ccf85908d 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -28,6 +28,7 @@ struct arch_hw_breakpoint {
unsigned long   address;
u16 type;
u16 len; /* length of the target data symbol */
+   u16 hw_len; /* length programmed in hw */
 };
 
 /* Note: Don't change the the first 6 bits below as they are in the same order
@@ -47,6 +48,11 @@ struct arch_hw_breakpoint {
 #define HW_BRK_TYPE_PRIV_ALL   (HW_BRK_TYPE_USER | HW_BRK_TYPE_KERNEL | \
 HW_BRK_TYPE_HYP)
 
+#define HW_BREAKPOINT_ALIGN 0x7
+
+#define DABR_MAX_LEN   8
+#define DAWR_MAX_LEN   512
+
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 #include 
 #include 
@@ -58,8 +64,6 @@ struct pmu;
 struct perf_sample_data;
 struct task_struct;
 
-#define HW_BREAKPOINT_ALIGN 0x7
-
 extern int hw_breakpoint_slots(int type);
 extern int arch_bp_generic_fields(int type, int *gen_bp_type);
 extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
@@ -84,6 +88,7 @@ static inline void hw_breakpoint_disable(void)
brk.address = 0;
brk.type = 0;
brk.len = 0;
+   brk.hw_len = 0;
if (ppc_breakpoint_available())
__set_breakpoint();
 }
diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
index 5f66b95b6858..8531623aa9b2 100644
--- a/arch/powerpc/kernel/dawr.c
+++ b/arch/powerpc/kernel/dawr.c
@@ -30,10 +30,10 @@ int set_dawr(struct arch_hw_breakpoint *brk)
 * DAWR length is stored in field MDR bits 48:53.  Matches range in
 * doublewords (64 bits) baised by -1 eg. 0b00=1DW and
 * 0b11=64DW.
-* brk->len is in bytes.
+* brk->hw_len is in bytes.
 * This aligns up to double word size, shifts and does the bias.
 */
-   mrd = ((brk->len + 7) >> 3) - 1;
+   mrd = ((brk->hw_len + 7) >> 3) - 1;
dawrx |= (mrd & 0x3f) << (63 - 53);
 
if (ppc_md.set_dawr)
@@ -54,7 +54,7 @@ static ssize_t dawr_write_file_bool(struct file *file,
const char __user *user_buf,
size_t count, loff_t *ppos)
 {
-   struct arch_hw_breakpoint null_brk = {0, 0, 0};
+   struct arch_hw_breakpoint null_brk = {0, 0, 0, 0};
size_t rc;
 
/* Send error to user if they hypervisor won't allow us to write DAWR */
diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index 95605a9c9a1e..5c876e986c18 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -147,9 +147,9 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
 const struct perf_event_attr *attr,
 struct arch_hw_breakpoint *hw)
 {
-   int ret = -EINVAL, length_max;
+   int ret = -EINVAL;
 
-   if (!bp)
+   if (!bp || !attr->bp_len)
return ret;
 
hw->type = HW_BRK_TYPE_TRANSLATE;
@@ -169,26 +169,10 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
hw->address = 

Re: [PATCH v4 0/6] Fixes related namespace alignment/page size/big endian

2019-07-09 Thread Aneesh Kumar K.V


Hi Dan,

Can you merge this to your tree?

-aneesh
"Aneesh Kumar K.V"  writes:

> This series handle configs where hugepage support is not enabled by default.
> Also, we update some of the information messages to make sure we use 
> PAGE_SIZE instead
> of SZ_4K. We now store page size and struct page size in pfn_sb and do extra 
> check
> before enabling namespace. There also an endianness fix.
>
> The patch series is on top of subsection v10 patchset
>
> http://lore.kernel.org/linux-mm/156092349300.979959.17603710711957735135.st...@dwillia2-desk3.amr.corp.intel.com
>
> Changes from V3:
> * Dropped the change related PFN_MIN_VERSION
> * for pfn_sb minor version < 4, we default page_size to PAGE_SIZE instead of 
> SZ_4k.
>
> Aneesh Kumar K.V (6):
>   nvdimm: Consider probe return -EOPNOTSUPP as success
>   mm/nvdimm: Add page size and struct page size to pfn superblock
>   mm/nvdimm: Use correct #defines instead of open coding
>   mm/nvdimm: Pick the right alignment default when creating dax devices
>   mm/nvdimm: Use correct alignment when looking at first pfn from a
> region
>   mm/nvdimm: Fix endian conversion issues 
>
>  arch/powerpc/include/asm/libnvdimm.h |  9 
>  arch/powerpc/mm/Makefile |  1 +
>  arch/powerpc/mm/nvdimm.c | 34 +++
>  arch/x86/include/asm/libnvdimm.h | 19 +
>  drivers/nvdimm/btt.c |  8 ++--
>  drivers/nvdimm/bus.c |  4 +-
>  drivers/nvdimm/label.c   |  2 +-
>  drivers/nvdimm/namespace_devs.c  | 13 +++---
>  drivers/nvdimm/nd-core.h |  3 +-
>  drivers/nvdimm/nd.h  |  6 ---
>  drivers/nvdimm/pfn.h |  5 ++-
>  drivers/nvdimm/pfn_devs.c| 62 ++--
>  drivers/nvdimm/pmem.c| 26 ++--
>  drivers/nvdimm/region_devs.c | 27 
>  include/linux/huge_mm.h  |  7 +++-
>  kernel/memremap.c|  8 ++--
>  16 files changed, 194 insertions(+), 40 deletions(-)
>  create mode 100644 arch/powerpc/include/asm/libnvdimm.h
>  create mode 100644 arch/powerpc/mm/nvdimm.c
>  create mode 100644 arch/x86/include/asm/libnvdimm.h
>
> -- 
> 2.21.0



[PATCH] sound: ppc: snd_ps3: Remove Unneeded variable: "ret"

2019-07-09 Thread Hariprasad Kelam
This patch fixes below issue reported by coccicheck
sound/ppc/snd_ps3.c:631:5-8: Unneeded variable: "ret". Return "0" on
line 668

We cannot change return type of snd_ps3_pcm_trigger as it is registered
with snd_pcm_ops->trigger

Signed-off-by: Hariprasad Kelam 
---
 sound/ppc/snd_ps3.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sound/ppc/snd_ps3.c b/sound/ppc/snd_ps3.c
index 71b7fd3..c213eb7 100644
--- a/sound/ppc/snd_ps3.c
+++ b/sound/ppc/snd_ps3.c
@@ -628,7 +628,6 @@ static int snd_ps3_pcm_trigger(struct snd_pcm_substream 
*substream,
   int cmd)
 {
struct snd_ps3_card_info *card = snd_pcm_substream_chip(substream);
-   int ret = 0;
 
switch (cmd) {
case SNDRV_PCM_TRIGGER_START:
@@ -665,7 +664,7 @@ static int snd_ps3_pcm_trigger(struct snd_pcm_substream 
*substream,
 
}
 
-   return ret;
+   return 0;
 };
 
 /*
-- 
2.7.4



Re: [PATCH 3/5] x86: Kconfig: Remove CONFIG_NODES_SPAN_OTHER_NODES

2019-07-09 Thread Hoan Tran OS
Hi Thomas,

Thanks for you comments

On 6/25/19 3:45 PM, Thomas Gleixner wrote:
> Hoan,
> 
> On Tue, 25 Jun 2019, Hoan Tran OS wrote:
> 
> Please use 'x86/Kconfig: ' as prefix.
> 
>> This patch removes CONFIG_NODES_SPAN_OTHER_NODES as it's
>> enabled by default with NUMA.
> 
> Please do not use 'This patch' in changelogs. It's pointless because we
> already know that this is a patch.
> 
> See also Documentation/process/submitting-patches.rst and search for 'This
> patch'
> 
> Simply say:
> 
>Remove CONFIG_NODES_SPAN_OTHER_NODES as it's enabled by default with
>NUMA.
> 

Got it, will fix

> But .
> 
>> @@ -1567,15 +1567,6 @@ config X86_64_ACPI_NUMA
>>  ---help---
>>Enable ACPI SRAT based node topology detection.
>>   
>> -# Some NUMA nodes have memory ranges that span
>> -# other nodes.  Even though a pfn is valid and
>> -# between a node's start and end pfns, it may not
>> -# reside on that node.  See memmap_init_zone()
>> -# for details.
>> -config NODES_SPAN_OTHER_NODES
>> -def_bool y
>> -depends on X86_64_ACPI_NUMA
> 
> the changelog does not mention that this lifts the dependency on
> X86_64_ACPI_NUMA and therefore enables that functionality for anything
> which has NUMA enabled including 32bit.
> 

I think this config is used for a NUMA layout which NUMA nodes addresses 
are spanned to other nodes. I think 32bit NUMA also have the same issue 
with that layout. Please correct me if I'm wrong.

> The core mm change gives no helpful information either. You just copied the
> above comment text from some random Kconfig.

Yes, as it's a correct comment and is used at multiple places.

Thanks
Hoan

> 
> This needs a bit more data in the changelogs and the cover letter:
> 
>   - Why is it useful to enable it unconditionally
> 
>   - Why is it safe to do so, even if the architecture had constraints on
> it
> 
>   - What's the potential impact
> 
> Thanks,
> 
>   tglx
> 


Re: [PATCH 1/3] powerpc/powernv: remove the unused pnv_pci_set_p2p function

2019-07-09 Thread Max Gurtovoy



On 7/9/2019 5:40 PM, Christoph Hellwig wrote:

On Tue, Jul 09, 2019 at 05:37:18PM +0300, Max Gurtovoy wrote:

On 7/9/2019 5:32 PM, Christoph Hellwig wrote:

On Tue, Jul 09, 2019 at 05:31:37PM +0300, Max Gurtovoy wrote:

Are we ok with working on a solution during kernel-5.3 cycle ?

You can start working on it any time, no need to ask for permission.

I just want to make sure we don't remove it from the kernel before we send
a general API solution.

The code is gone in this merge window.


Ok, so we must fix it to kernel-5.3 to make sure we're covered.

Understood.




This way we'll make sure that all the kernel versions has this
functionality...

Again, we do not provide functionality for out of tree modules.  We've
had the p2p API for about a year now, its not like you didn't have
plenty of time.


I didn't know about the intention to remove this code...

Also this code was merged before the p2p API for p2pmem.



Re: [PATCH 1/3] powerpc/powernv: remove the unused pnv_pci_set_p2p function

2019-07-09 Thread Max Gurtovoy



On 7/9/2019 5:32 PM, Christoph Hellwig wrote:

On Tue, Jul 09, 2019 at 05:31:37PM +0300, Max Gurtovoy wrote:

Are we ok with working on a solution during kernel-5.3 cycle ?

You can start working on it any time, no need to ask for permission.


I just want to make sure we don't remove it from the kernel before we 
send a general API solution.


This way we'll make sure that all the kernel versions has this 
functionality...




Re: [PATCH 1/3] powerpc/powernv: remove the unused pnv_pci_set_p2p function

2019-07-09 Thread Max Gurtovoy



On 7/9/2019 4:59 PM, Christoph Hellwig wrote:

On Tue, Jul 09, 2019 at 01:49:04PM +, Max Gurtovoy wrote:

Hi Greg/Christoph,
Can we leave it meanwhile till we'll find a general solution (for the upcoming 
kernel) ?
I guess we can somehow generalize the P2P initialization process for PPC and 
leave it empty for now for other archs.
Or maybe we can find some other solution (sysfs/configfs/module param), but it 
will take time since we'll need to work closely with the IBM pci guys that 
wrote this code.

We do not keep code without in-tree users around, especially not if
we have a better API with in-tree users.

AFAICS the only thing you'll need is to wire up the enable/disable
calls.


I guess you're right, but we still need to know the time frame we have 
here since this should be tested carefully on the P9 hardware.


Are we ok with working on a solution during kernel-5.3 cycle ?



RE: [PATCH 1/3] powerpc/powernv: remove the unused pnv_pci_set_p2p function

2019-07-09 Thread Max Gurtovoy
Hi Greg/Christoph,
Can we leave it meanwhile till we'll find a general solution (for the upcoming 
kernel) ?
I guess we can somehow generalize the P2P initialization process for PPC and 
leave it empty for now for other archs.
Or maybe we can find some other solution (sysfs/configfs/module param), but it 
will take time since we'll need to work closely with the IBM pci guys that 
wrote this code.

-Max.


-Original Message-
From: Christoph Hellwig  
Sent: Thursday, May 23, 2019 10:53 AM
To: Frederic Barrat 
Cc: Christoph Hellwig ; Benjamin Herrenschmidt 
; Paul Mackerras ; Michael Ellerman 
; linuxppc-dev@lists.ozlabs.org; Max Gurtovoy 

Subject: Re: [PATCH 1/3] powerpc/powernv: remove the unused pnv_pci_set_p2p 
function

On Mon, May 06, 2019 at 10:46:11AM +0200, Frederic Barrat wrote:
> Hi,
>
> The PCI p2p and tunnel code is used by the Mellanox CX5 driver, at 
> least their latest, out of tree version, which is used for CORAL. My 
> understanding is that they'll upstream it at some point, though I 
> don't know what their schedule is like.

FYI, Max who wrote (at least larger parts of) that code is on Cc agreed that 
all P2P code should go through the kernel P2P infrastructure and might be able 
to spend some cycles on it.

Which still doesn't change anything about that fact that we [1] generally don't 
add infrastructure for anything that is not in the tree.

[1] well, powernv seems to have handles this a little oddly, and now is on my 
special watchlist.


Re: [PATCH 1/3] powerpc/powernv: remove the unused pnv_pci_set_p2p function

2019-07-09 Thread 'gre...@linuxfoundation.org'
On Tue, Jul 09, 2019 at 06:06:54PM +0300, Max Gurtovoy wrote:
> 
> On 7/9/2019 5:40 PM, Christoph Hellwig wrote:
> > On Tue, Jul 09, 2019 at 05:37:18PM +0300, Max Gurtovoy wrote:
> > > On 7/9/2019 5:32 PM, Christoph Hellwig wrote:
> > > > On Tue, Jul 09, 2019 at 05:31:37PM +0300, Max Gurtovoy wrote:
> > > > > Are we ok with working on a solution during kernel-5.3 cycle ?
> > > > You can start working on it any time, no need to ask for permission.
> > > I just want to make sure we don't remove it from the kernel before we send
> > > a general API solution.
> > The code is gone in this merge window.
> 
> Ok, so we must fix it to kernel-5.3 to make sure we're covered.
> 
> Understood.
> 
> > 
> > > This way we'll make sure that all the kernel versions has this
> > > functionality...
> > Again, we do not provide functionality for out of tree modules.  We've
> > had the p2p API for about a year now, its not like you didn't have
> > plenty of time.
> 
> I didn't know about the intention to remove this code...

The original email you responded to in this thread was received by you
back in May.  It is now July, 5.3 will not be out for 8-9 weeks.  There
has been plenty of time here...

greg k-h


Re: [RFC PATCH v5 5/7] kvmppc: Radix changes for secure guest

2019-07-09 Thread janani

On 2019-07-09 05:25, Bharata B Rao wrote:

- After the guest becomes secure, when we handle a page fault of a page
  belonging to SVM in HV, send that page to UV via UV_PAGE_IN.
- Whenever a page is unmapped on the HV side, inform UV via 
UV_PAGE_INVAL.

- Ensure all those routines that walk the secondary page tables of
  the guest don't do so in case of secure VM. For secure guest, the
  active secondary page tables are in secure memory and the secondary
  page tables in HV are freed when guest becomes secure.

Signed-off-by: Bharata B Rao 

 Reviewed-by: Janani Janakiraman 

---
 arch/powerpc/include/asm/kvm_host.h   | 12 
 arch/powerpc/include/asm/ultravisor-api.h |  1 +
 arch/powerpc/include/asm/ultravisor.h |  7 +++
 arch/powerpc/kvm/book3s_64_mmu_radix.c| 22 ++
 arch/powerpc/kvm/book3s_hv_hmm.c  | 20 
 5 files changed, 62 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_host.h
b/arch/powerpc/include/asm/kvm_host.h
index 0c49c3401c63..dcbf7480cb10 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -865,6 +865,8 @@ static inline void
kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 #ifdef CONFIG_PPC_UV
 extern int kvmppc_hmm_init(void);
 extern void kvmppc_hmm_free(void);
+extern bool kvmppc_is_guest_secure(struct kvm *kvm);
+extern int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gpa);
 #else
 static inline int kvmppc_hmm_init(void)
 {
@@ -872,6 +874,16 @@ static inline int kvmppc_hmm_init(void)
 }

 static inline void kvmppc_hmm_free(void) {}
+
+static inline bool kvmppc_is_guest_secure(struct kvm *kvm)
+{
+   return false;
+}
+
+static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned 
long gpa)

+{
+   return -EFAULT;
+}
 #endif /* CONFIG_PPC_UV */

 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/ultravisor-api.h
b/arch/powerpc/include/asm/ultravisor-api.h
index d6d6eb2e6e6b..9f5510b55892 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -24,5 +24,6 @@
 #define UV_UNREGISTER_MEM_SLOT 0xF124
 #define UV_PAGE_IN 0xF128
 #define UV_PAGE_OUT0xF12C
+#define UV_PAGE_INVAL  0xF138

 #endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
diff --git a/arch/powerpc/include/asm/ultravisor.h
b/arch/powerpc/include/asm/ultravisor.h
index fe45be9ee63b..f4f674794b35 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -77,6 +77,13 @@ static inline int uv_unregister_mem_slot(u64 lpid,
u64 slotid)

return ucall(UV_UNREGISTER_MEM_SLOT, retbuf, lpid, slotid);
 }
+
+static inline int uv_page_inval(u64 lpid, u64 gpa, u64 page_shift)
+{
+   unsigned long retbuf[UCALL_BUFSIZE];
+
+   return ucall(UV_PAGE_INVAL, retbuf, lpid, gpa, page_shift);
+}
 #endif /* !__ASSEMBLY__ */

 #endif /* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index f55ef071883f..c454600c454f 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -21,6 +21,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 

 /*
  * Supported radix tree geometry.
@@ -923,6 +925,9 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run
*run, struct kvm_vcpu *vcpu,
if (!(dsisr & DSISR_PRTABLE_FAULT))
gpa |= ea & 0xfff;

+   if (kvmppc_is_guest_secure(kvm))
+   return kvmppc_send_page_to_uv(kvm, gpa & PAGE_MASK);
+
/* Get the corresponding memslot */
memslot = gfn_to_memslot(kvm, gfn);

@@ -980,6 +985,11 @@ int kvm_unmap_radix(struct kvm *kvm, struct
kvm_memory_slot *memslot,
unsigned long gpa = gfn << PAGE_SHIFT;
unsigned int shift;

+   if (kvmppc_is_guest_secure(kvm)) {
+   uv_page_inval(kvm->arch.lpid, gpa, PAGE_SIZE);
+   return 0;
+   }
+
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, );
if (ptep && pte_present(*ptep))
kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
@@ -997,6 +1007,9 @@ int kvm_age_radix(struct kvm *kvm, struct
kvm_memory_slot *memslot,
int ref = 0;
unsigned long old, *rmapp;

+   if (kvmppc_is_guest_secure(kvm))
+   return ref;
+
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, );
if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
@@ -1021,6 +1034,9 @@ int kvm_test_age_radix(struct kvm *kvm, struct
kvm_memory_slot *memslot,
unsigned int shift;
int ref = 0;

+   if (kvmppc_is_guest_secure(kvm))
+   return ref;
+
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, );
if (ptep && pte_present(*ptep) && pte_young(*ptep))
ref = 1;
@@ -1038,6 

[PATCH v2 3/7] kexec_elf: remove parsing of section headers

2019-07-09 Thread Sven Schnelle
We're not using them, so we can drop the parsing.

Signed-off-by: Sven Schnelle 
---
 include/linux/kexec.h |   1 -
 kernel/kexec_elf.c| 137 --
 2 files changed, 138 deletions(-)

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index da2a6b1d69e7..f0b809258ed3 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -226,7 +226,6 @@ struct kexec_elf_info {
 
const struct elfhdr *ehdr;
const struct elf_phdr *proghdrs;
-   struct elf_shdr *sechdrs;
 };
 
 int kexec_build_elf_info(const char *buf, size_t len, struct elfhdr *ehdr,
diff --git a/kernel/kexec_elf.c b/kernel/kexec_elf.c
index 76e7df64d715..effe9dc0b055 100644
--- a/kernel/kexec_elf.c
+++ b/kernel/kexec_elf.c
@@ -244,134 +244,6 @@ static int elf_read_phdrs(const char *buf, size_t len,
return 0;
 }
 
-/**
- * elf_is_shdr_sane - check that it is safe to use the section header
- * @buf_len:   size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_shdr_sane(const struct elf_shdr *shdr, size_t buf_len)
-{
-   bool size_ok;
-
-   /* SHT_NULL headers have undefined values, so we can't check them. */
-   if (shdr->sh_type == SHT_NULL)
-   return true;
-
-   /* Now verify sh_entsize */
-   switch (shdr->sh_type) {
-   case SHT_SYMTAB:
-   size_ok = shdr->sh_entsize == sizeof(Elf_Sym);
-   break;
-   case SHT_RELA:
-   size_ok = shdr->sh_entsize == sizeof(Elf_Rela);
-   break;
-   case SHT_DYNAMIC:
-   size_ok = shdr->sh_entsize == sizeof(Elf_Dyn);
-   break;
-   case SHT_REL:
-   size_ok = shdr->sh_entsize == sizeof(Elf_Rel);
-   break;
-   case SHT_NOTE:
-   case SHT_PROGBITS:
-   case SHT_HASH:
-   case SHT_NOBITS:
-   default:
-   /*
-* This is a section whose entsize requirements
-* I don't care about.  If I don't know about
-* the section I can't care about it's entsize
-* requirements.
-*/
-   size_ok = true;
-   break;
-   }
-
-   if (!size_ok) {
-   pr_debug("ELF section with wrong entry size.\n");
-   return false;
-   } else if (shdr->sh_addr + shdr->sh_size < shdr->sh_addr) {
-   pr_debug("ELF section address wraps around.\n");
-   return false;
-   }
-
-   if (shdr->sh_type != SHT_NOBITS) {
-   if (shdr->sh_offset + shdr->sh_size < shdr->sh_offset) {
-   pr_debug("ELF section location wraps around.\n");
-   return false;
-   } else if (shdr->sh_offset + shdr->sh_size > buf_len) {
-   pr_debug("ELF section not in file.\n");
-   return false;
-   }
-   }
-
-   return true;
-}
-
-static int elf_read_shdr(const char *buf, size_t len,
-struct kexec_elf_info *elf_info,
-int idx)
-{
-   struct elf_shdr *shdr = _info->sechdrs[idx];
-   const struct elfhdr *ehdr = elf_info->ehdr;
-   const char *sbuf;
-   struct elf_shdr *buf_shdr;
-
-   sbuf = buf + ehdr->e_shoff + idx * sizeof(*buf_shdr);
-   buf_shdr = (struct elf_shdr *) sbuf;
-
-   shdr->sh_name  = elf32_to_cpu(ehdr, buf_shdr->sh_name);
-   shdr->sh_type  = elf32_to_cpu(ehdr, buf_shdr->sh_type);
-   shdr->sh_addr  = elf_addr_to_cpu(ehdr, buf_shdr->sh_addr);
-   shdr->sh_offset= elf_addr_to_cpu(ehdr, buf_shdr->sh_offset);
-   shdr->sh_link  = elf32_to_cpu(ehdr, buf_shdr->sh_link);
-   shdr->sh_info  = elf32_to_cpu(ehdr, buf_shdr->sh_info);
-
-   /*
-* The following fields have a type equivalent to Elf_Addr
-* both in 32 bit and 64 bit ELF.
-*/
-   shdr->sh_flags = elf_addr_to_cpu(ehdr, buf_shdr->sh_flags);
-   shdr->sh_size  = elf_addr_to_cpu(ehdr, buf_shdr->sh_size);
-   shdr->sh_addralign = elf_addr_to_cpu(ehdr, buf_shdr->sh_addralign);
-   shdr->sh_entsize   = elf_addr_to_cpu(ehdr, buf_shdr->sh_entsize);
-
-   return elf_is_shdr_sane(shdr, len) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_read_shdrs - read the section headers from the buffer
- *
- * This function assumes that the section header table was checked for sanity.
- * Use elf_is_ehdr_sane() if it wasn't.
- */
-static int elf_read_shdrs(const char *buf, size_t len,
- struct kexec_elf_info *elf_info)
-{
-   size_t shdr_size, i;
-
-   /*
-* e_shnum is at most 65536 so calculating
-* the size of the section header cannot overflow.
-*/
-   shdr_size = sizeof(struct elf_shdr) * elf_info->ehdr->e_shnum;
-
-   elf_info->sechdrs = kzalloc(shdr_size, GFP_KERNEL);
-   if (!elf_info->sechdrs)
-   return -ENOMEM;
-
-   for (i = 0; 

[PATCH v2 6/7] kexec_elf: remove Elf_Rel macro

2019-07-09 Thread Sven Schnelle
It wasn't used anywhere, so lets drop it.

Signed-off-by: Sven Schnelle 
---
 kernel/kexec_elf.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/kernel/kexec_elf.c b/kernel/kexec_elf.c
index 99e6d63b5dfc..b7e47ddd7cad 100644
--- a/kernel/kexec_elf.c
+++ b/kernel/kexec_elf.c
@@ -8,10 +8,6 @@
 #include 
 #include 
 
-#ifndef Elf_Rel
-#define Elf_RelElf64_Rel
-#endif /* Elf_Rel */
-
 static inline bool elf_is_elf_file(const struct elfhdr *ehdr)
 {
return memcmp(ehdr->e_ident, ELFMAG, SELFMAG) == 0;
-- 
2.20.1



[PATCH v2 2/7] kexec_elf: change order of elf_*_to_cpu() functions

2019-07-09 Thread Sven Schnelle
Change the order to have a 64/32/16 order, no functional change.

Signed-off-by: Sven Schnelle 
---
 kernel/kexec_elf.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/kexec_elf.c b/kernel/kexec_elf.c
index 6e9f52171ede..76e7df64d715 100644
--- a/kernel/kexec_elf.c
+++ b/kernel/kexec_elf.c
@@ -31,22 +31,22 @@ static uint64_t elf64_to_cpu(const struct elfhdr *ehdr, 
uint64_t value)
return value;
 }
 
-static uint16_t elf16_to_cpu(const struct elfhdr *ehdr, uint16_t value)
+static uint32_t elf32_to_cpu(const struct elfhdr *ehdr, uint32_t value)
 {
if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
-   value = le16_to_cpu(value);
+   value = le32_to_cpu(value);
else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
-   value = be16_to_cpu(value);
+   value = be32_to_cpu(value);
 
return value;
 }
 
-static uint32_t elf32_to_cpu(const struct elfhdr *ehdr, uint32_t value)
+static uint16_t elf16_to_cpu(const struct elfhdr *ehdr, uint16_t value)
 {
if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
-   value = le32_to_cpu(value);
+   value = le16_to_cpu(value);
else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
-   value = be32_to_cpu(value);
+   value = be16_to_cpu(value);
 
return value;
 }
-- 
2.20.1



[PATCH v2 1/7] kexec: add KEXEC_ELF

2019-07-09 Thread Sven Schnelle
Right now powerpc provides an implementation to read elf files
with the kexec_file() syscall. Make that available as a public
kexec interface so it can be re-used on other architectures.

Signed-off-by: Sven Schnelle 
---
 arch/Kconfig   |   3 +
 arch/powerpc/Kconfig   |   1 +
 arch/powerpc/kernel/kexec_elf_64.c | 551 +
 include/linux/kexec.h  |  24 ++
 kernel/Makefile|   1 +
 kernel/kexec_elf.c | 537 
 6 files changed, 576 insertions(+), 541 deletions(-)
 create mode 100644 kernel/kexec_elf.c

diff --git a/arch/Kconfig b/arch/Kconfig
index c47b328eada0..30694aca4316 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -18,6 +18,9 @@ config KEXEC_CORE
select CRASH_CORE
bool
 
+config KEXEC_ELF
+   bool
+
 config HAVE_IMA_KEXEC
bool
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 12cee37f15c4..addc2dad78e0 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -510,6 +510,7 @@ config KEXEC_FILE
select KEXEC_CORE
select HAVE_IMA_KEXEC
select BUILD_BIN2C
+   select KEXEC_ELF
depends on PPC64
depends on CRYPTO=y
depends on CRYPTO_SHA256=y
diff --git a/arch/powerpc/kernel/kexec_elf_64.c 
b/arch/powerpc/kernel/kexec_elf_64.c
index ba4f18a43ee8..30bd57a93c17 100644
--- a/arch/powerpc/kernel/kexec_elf_64.c
+++ b/arch/powerpc/kernel/kexec_elf_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Load ELF vmlinux file for the kexec_file_load syscall.
  *
@@ -10,15 +11,6 @@
  * Based on kexec-tools' kexec-elf-exec.c and kexec-elf-ppc64.c.
  * Heavily modified for the kernel by
  * Thiago Jung Bauermann .
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation (version 2 of the License).
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #define pr_fmt(fmt)"kexec_elf: " fmt
@@ -39,532 +31,6 @@
 #define Elf_RelElf64_Rel
 #endif /* Elf_Rel */
 
-struct elf_info {
-   /*
-* Where the ELF binary contents are kept.
-* Memory managed by the user of the struct.
-*/
-   const char *buffer;
-
-   const struct elfhdr *ehdr;
-   const struct elf_phdr *proghdrs;
-   struct elf_shdr *sechdrs;
-};
-
-static inline bool elf_is_elf_file(const struct elfhdr *ehdr)
-{
-   return memcmp(ehdr->e_ident, ELFMAG, SELFMAG) == 0;
-}
-
-static uint64_t elf64_to_cpu(const struct elfhdr *ehdr, uint64_t value)
-{
-   if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
-   value = le64_to_cpu(value);
-   else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
-   value = be64_to_cpu(value);
-
-   return value;
-}
-
-static uint16_t elf16_to_cpu(const struct elfhdr *ehdr, uint16_t value)
-{
-   if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
-   value = le16_to_cpu(value);
-   else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
-   value = be16_to_cpu(value);
-
-   return value;
-}
-
-static uint32_t elf32_to_cpu(const struct elfhdr *ehdr, uint32_t value)
-{
-   if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
-   value = le32_to_cpu(value);
-   else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
-   value = be32_to_cpu(value);
-
-   return value;
-}
-
-/**
- * elf_is_ehdr_sane - check that it is safe to use the ELF header
- * @buf_len:   size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_ehdr_sane(const struct elfhdr *ehdr, size_t buf_len)
-{
-   if (ehdr->e_phnum > 0 && ehdr->e_phentsize != sizeof(struct elf_phdr)) {
-   pr_debug("Bad program header size.\n");
-   return false;
-   } else if (ehdr->e_shnum > 0 &&
-  ehdr->e_shentsize != sizeof(struct elf_shdr)) {
-   pr_debug("Bad section header size.\n");
-   return false;
-   } else if (ehdr->e_ident[EI_VERSION] != EV_CURRENT ||
-  ehdr->e_version != EV_CURRENT) {
-   pr_debug("Unknown ELF version.\n");
-   return false;
-   }
-
-   if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) {
-   size_t phdr_size;
-
-   /*
-* e_phnum is at most 65535 so calculating the size of the
-* program header cannot overflow.
-*/
-   phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum;
-
-   /* Sanity check the program header table location. */
-   if (ehdr->e_phoff + phdr_size < ehdr->e_phoff) {
-   pr_debug("Program headers at 

[PATCH v2 4/7] kexec_elf: remove PURGATORY_STACK_SIZE

2019-07-09 Thread Sven Schnelle
It's not used anywhere so just drop it.

Signed-off-by: Sven Schnelle 
---
 kernel/kexec_elf.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/kexec_elf.c b/kernel/kexec_elf.c
index effe9dc0b055..70d31b8feeae 100644
--- a/kernel/kexec_elf.c
+++ b/kernel/kexec_elf.c
@@ -8,8 +8,6 @@
 #include 
 #include 
 
-#define PURGATORY_STACK_SIZE   (16 * 1024)
-
 #define elf_addr_to_cpuelf64_to_cpu
 
 #ifndef Elf_Rel
-- 
2.20.1



[PATCH v2 5/7] kexec_elf: remove elf_addr_to_cpu macro

2019-07-09 Thread Sven Schnelle
It had only one definition, so just use the function directly.

Signed-off-by: Sven Schnelle 
---
 kernel/kexec_elf.c | 20 +---
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/kernel/kexec_elf.c b/kernel/kexec_elf.c
index 70d31b8feeae..99e6d63b5dfc 100644
--- a/kernel/kexec_elf.c
+++ b/kernel/kexec_elf.c
@@ -8,8 +8,6 @@
 #include 
 #include 
 
-#define elf_addr_to_cpuelf64_to_cpu
-
 #ifndef Elf_Rel
 #define Elf_RelElf64_Rel
 #endif /* Elf_Rel */
@@ -143,9 +141,9 @@ static int elf_read_ehdr(const char *buf, size_t len, 
struct elfhdr *ehdr)
ehdr->e_type  = elf16_to_cpu(ehdr, buf_ehdr->e_type);
ehdr->e_machine   = elf16_to_cpu(ehdr, buf_ehdr->e_machine);
ehdr->e_version   = elf32_to_cpu(ehdr, buf_ehdr->e_version);
-   ehdr->e_entry = elf_addr_to_cpu(ehdr, buf_ehdr->e_entry);
-   ehdr->e_phoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_phoff);
-   ehdr->e_shoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_shoff);
+   ehdr->e_entry = elf64_to_cpu(ehdr, buf_ehdr->e_entry);
+   ehdr->e_phoff = elf64_to_cpu(ehdr, buf_ehdr->e_phoff);
+   ehdr->e_shoff = elf64_to_cpu(ehdr, buf_ehdr->e_shoff);
ehdr->e_flags = elf32_to_cpu(ehdr, buf_ehdr->e_flags);
ehdr->e_phentsize = elf16_to_cpu(ehdr, buf_ehdr->e_phentsize);
ehdr->e_phnum = elf16_to_cpu(ehdr, buf_ehdr->e_phnum);
@@ -190,18 +188,18 @@ static int elf_read_phdr(const char *buf, size_t len,
buf_phdr = (struct elf_phdr *) pbuf;
 
phdr->p_type   = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_type);
-   phdr->p_offset = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_offset);
-   phdr->p_paddr  = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_paddr);
-   phdr->p_vaddr  = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_vaddr);
+   phdr->p_offset = elf64_to_cpu(elf_info->ehdr, buf_phdr->p_offset);
+   phdr->p_paddr  = elf64_to_cpu(elf_info->ehdr, buf_phdr->p_paddr);
+   phdr->p_vaddr  = elf64_to_cpu(elf_info->ehdr, buf_phdr->p_vaddr);
phdr->p_flags  = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_flags);
 
/*
 * The following fields have a type equivalent to Elf_Addr
 * both in 32 bit and 64 bit ELF.
 */
-   phdr->p_filesz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_filesz);
-   phdr->p_memsz  = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_memsz);
-   phdr->p_align  = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_align);
+   phdr->p_filesz = elf64_to_cpu(elf_info->ehdr, buf_phdr->p_filesz);
+   phdr->p_memsz  = elf64_to_cpu(elf_info->ehdr, buf_phdr->p_memsz);
+   phdr->p_align  = elf64_to_cpu(elf_info->ehdr, buf_phdr->p_align);
 
return elf_is_phdr_sane(phdr, len) ? 0 : -ENOEXEC;
 }
-- 
2.20.1



[PATCH v2 0/7] kexec: add generic support for elf kernel images

2019-07-09 Thread Sven Schnelle
Hi List,

i've split up the patch a bit more. The first one move the generic elf stuff
out of arch/powerpc to linux/kexec_elf.c, and prefixes the exposed functions
with kexec_. That other patches remove stuff that is not used as proposed in
the review.

Changes to v1:
 - split up patch into smaller pieces
 - rebase onto powerpc/next
 - remove unused variable in kexec_elf_load()

Changes to RFC version:
 - remove unused Elf_Rel macro
 - remove section header parsing
 - remove PURGATORY_STACK_SIZE
 - change order of elf_*_to_cpu() functions
 - remove elf_addr_to_cpu macro
 
Sven Schnelle (7):
  kexec: add KEXEC_ELF
  kexec_elf: change order of elf_*_to_cpu() functions
  kexec_elf: remove parsing of section headers
  kexec_elf: remove PURGATORY_STACK_SIZE
  kexec_elf: remove elf_addr_to_cpu macro
  kexec_elf: remove Elf_Rel macro
  kexec_elf: remove unused variable in kexec_elf_load()

 arch/Kconfig   |   3 +
 arch/powerpc/Kconfig   |   1 +
 arch/powerpc/kernel/kexec_elf_64.c | 551 +
 include/linux/kexec.h  |  23 ++
 kernel/Makefile|   1 +
 kernel/kexec_elf.c | 389 
 6 files changed, 427 insertions(+), 541 deletions(-)
 create mode 100644 kernel/kexec_elf.c

-- 
2.20.1



[PATCH v2 7/7] kexec_elf: remove unused variable in kexec_elf_load()

2019-07-09 Thread Sven Schnelle
base was never unsigned, so we can remove it.

Signed-off-by: Sven Schnelle 
---
 kernel/kexec_elf.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/kernel/kexec_elf.c b/kernel/kexec_elf.c
index b7e47ddd7cad..a56ec5481e71 100644
--- a/kernel/kexec_elf.c
+++ b/kernel/kexec_elf.c
@@ -348,7 +348,7 @@ int kexec_elf_load(struct kimage *image, struct elfhdr 
*ehdr,
 struct kexec_buf *kbuf,
 unsigned long *lowest_load_addr)
 {
-   unsigned long base = 0, lowest_addr = UINT_MAX;
+   unsigned long lowest_addr = UINT_MAX;
int ret;
size_t i;
 
@@ -370,7 +370,7 @@ int kexec_elf_load(struct kimage *image, struct elfhdr 
*ehdr,
kbuf->bufsz = size;
kbuf->memsz = phdr->p_memsz;
kbuf->buf_align = phdr->p_align;
-   kbuf->buf_min = phdr->p_paddr + base;
+   kbuf->buf_min = phdr->p_paddr;
ret = kexec_add_buffer(kbuf);
if (ret)
goto out;
@@ -380,9 +380,6 @@ int kexec_elf_load(struct kimage *image, struct elfhdr 
*ehdr,
lowest_addr = load_addr;
}
 
-   /* Update entry point to reflect new load address. */
-   ehdr->e_entry += base;
-
*lowest_load_addr = lowest_addr;
ret = 0;
  out:
-- 
2.20.1



Re: [PATCH v5 4/7] kvmppc: Handle memory plug/unplug to secure VM

2019-07-09 Thread janani

On 2019-07-09 05:25, Bharata B Rao wrote:

Register the new memslot with UV during plug and unregister
the memslot during unplug.

Signed-off-by: Bharata B Rao 
Acked-by: Paul Mackerras 

 Reviewed-by: Janani Janakiraman 

---
 arch/powerpc/include/asm/ultravisor-api.h |  1 +
 arch/powerpc/include/asm/ultravisor.h |  7 +++
 arch/powerpc/kvm/book3s_hv.c  | 19 +++
 3 files changed, 27 insertions(+)

diff --git a/arch/powerpc/include/asm/ultravisor-api.h
b/arch/powerpc/include/asm/ultravisor-api.h
index 07b7d638e7af..d6d6eb2e6e6b 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -21,6 +21,7 @@
 #define UV_WRITE_PATE  0xF104
 #define UV_RETURN  0xF11C
 #define UV_REGISTER_MEM_SLOT   0xF120
+#define UV_UNREGISTER_MEM_SLOT 0xF124
 #define UV_PAGE_IN 0xF128
 #define UV_PAGE_OUT0xF12C

diff --git a/arch/powerpc/include/asm/ultravisor.h
b/arch/powerpc/include/asm/ultravisor.h
index b46042f1aa8f..fe45be9ee63b 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -70,6 +70,13 @@ static inline int uv_register_mem_slot(u64 lpid,
u64 start_gpa, u64 size,
return ucall(UV_REGISTER_MEM_SLOT, retbuf, lpid, start_gpa,
 size, flags, slotid);
 }
+
+static inline int uv_unregister_mem_slot(u64 lpid, u64 slotid)
+{
+   unsigned long retbuf[UCALL_BUFSIZE];
+
+   return ucall(UV_UNREGISTER_MEM_SLOT, retbuf, lpid, slotid);
+}
 #endif /* !__ASSEMBLY__ */

 #endif /* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/kvm/book3s_hv.c 
b/arch/powerpc/kvm/book3s_hv.c

index b8f801d00ad4..7cbb5edaed01 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -77,6 +77,7 @@
 #include 
 #include 
 #include 
+#include 

 #include "book3s.h"

@@ -4504,6 +4505,24 @@ static void
kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
if (change == KVM_MR_FLAGS_ONLY && kvm_is_radix(kvm) &&
((new->flags ^ old->flags) & KVM_MEM_LOG_DIRTY_PAGES))
kvmppc_radix_flush_memslot(kvm, old);
+   /*
+* If UV hasn't yet called H_SVM_INIT_START, don't register memslots.
+*/
+   if (!kvm->arch.secure_guest)
+   return;
+
+   /*
+* TODO: Handle KVM_MR_MOVE
+*/
+   if (change == KVM_MR_CREATE) {
+   uv_register_mem_slot(kvm->arch.lpid,
+  new->base_gfn << PAGE_SHIFT,
+  new->npages * PAGE_SIZE,
+  0,
+  new->id);
+   } else if (change == KVM_MR_DELETE) {
+   uv_unregister_mem_slot(kvm->arch.lpid, old->id);
+   }
 }

 /*




Re: [PATCH v5 3/7] kvmppc: H_SVM_INIT_START and H_SVM_INIT_DONE hcalls

2019-07-09 Thread janani

On 2019-07-09 05:25, Bharata B Rao wrote:

H_SVM_INIT_START: Initiate securing a VM
H_SVM_INIT_DONE: Conclude securing a VM

As part of H_SVM_INIT_START, register all existing memslots with
the UV. H_SVM_INIT_DONE call by UV informs HV that transition of
the guest to secure mode is complete.

These two states (transition to secure mode STARTED and transition
to secure mode COMPLETED) are recorded in kvm->arch.secure_guest.
Setting these states will cause the assembly code that enters the
guest to call the UV_RETURN ucall instead of trying to enter the
guest directly.

Signed-off-by: Bharata B Rao 
Acked-by: Paul Mackerras 

 Reviewed-by: Janani Janakiraman 

---
 arch/powerpc/include/asm/hvcall.h |  2 ++
 arch/powerpc/include/asm/kvm_book3s_hmm.h | 12 
 arch/powerpc/include/asm/kvm_host.h   |  4 +++
 arch/powerpc/include/asm/ultravisor-api.h |  1 +
 arch/powerpc/include/asm/ultravisor.h |  9 ++
 arch/powerpc/kvm/book3s_hv.c  |  7 +
 arch/powerpc/kvm/book3s_hv_hmm.c  | 34 +++
 7 files changed, 69 insertions(+)

diff --git a/arch/powerpc/include/asm/hvcall.h
b/arch/powerpc/include/asm/hvcall.h
index 05b8536f6653..fa7695928e30 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -343,6 +343,8 @@
 /* Platform-specific hcalls used by the Ultravisor */
 #define H_SVM_PAGE_IN  0xEF00
 #define H_SVM_PAGE_OUT 0xEF04
+#define H_SVM_INIT_START   0xEF08
+#define H_SVM_INIT_DONE0xEF0C

 /* Values for 2nd argument to H_SET_MODE */
 #define H_SET_MODE_RESOURCE_SET_CIABR  1
diff --git a/arch/powerpc/include/asm/kvm_book3s_hmm.h
b/arch/powerpc/include/asm/kvm_book3s_hmm.h
index 21f3de5f2acb..8c7aacabb2e0 100644
--- a/arch/powerpc/include/asm/kvm_book3s_hmm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_hmm.h
@@ -11,6 +11,8 @@ extern unsigned long kvmppc_h_svm_page_out(struct kvm 
*kvm,

  unsigned long gra,
  unsigned long flags,
  unsigned long page_shift);
+extern unsigned long kvmppc_h_svm_init_start(struct kvm *kvm);
+extern unsigned long kvmppc_h_svm_init_done(struct kvm *kvm);
 #else
 static inline unsigned long
 kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gra,
@@ -25,5 +27,15 @@ kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long 
gra,

 {
return H_UNSUPPORTED;
 }
+
+static inline unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
+{
+   return H_UNSUPPORTED;
+}
+
+static inline unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
+{
+   return H_UNSUPPORTED;
+}
 #endif /* CONFIG_PPC_UV */
 #endif /* __POWERPC_KVM_PPC_HMM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h
b/arch/powerpc/include/asm/kvm_host.h
index ac1a101beb07..0c49c3401c63 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -272,6 +272,10 @@ struct kvm_hpt_info {

 struct kvm_resize_hpt;

+/* Flag values for kvm_arch.secure_guest */
+#define KVMPPC_SECURE_INIT_START	0x1 /* H_SVM_INIT_START has been 
called */

+#define KVMPPC_SECURE_INIT_DONE0x2 /* H_SVM_INIT_DONE 
completed */
+
 struct kvm_arch {
unsigned int lpid;
unsigned int smt_mode;  /* # vcpus per virtual core */
diff --git a/arch/powerpc/include/asm/ultravisor-api.h
b/arch/powerpc/include/asm/ultravisor-api.h
index f1c5800ac705..07b7d638e7af 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -20,6 +20,7 @@
 /* opcodes */
 #define UV_WRITE_PATE  0xF104
 #define UV_RETURN  0xF11C
+#define UV_REGISTER_MEM_SLOT   0xF120
 #define UV_PAGE_IN 0xF128
 #define UV_PAGE_OUT0xF12C

diff --git a/arch/powerpc/include/asm/ultravisor.h
b/arch/powerpc/include/asm/ultravisor.h
index 16f8e0e8ec3f..b46042f1aa8f 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -61,6 +61,15 @@ static inline int uv_page_out(u64 lpid, u64 dst_ra,
u64 src_gpa, u64 flags,
return ucall(UV_PAGE_OUT, retbuf, lpid, dst_ra, src_gpa, flags,
 page_shift);
 }
+
+static inline int uv_register_mem_slot(u64 lpid, u64 start_gpa, u64 
size,

+  u64 flags, u64 slotid)
+{
+   unsigned long retbuf[UCALL_BUFSIZE];
+
+   return ucall(UV_REGISTER_MEM_SLOT, retbuf, lpid, start_gpa,
+size, flags, slotid);
+}
 #endif /* !__ASSEMBLY__ */

 #endif /* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/kvm/book3s_hv.c 
b/arch/powerpc/kvm/book3s_hv.c

index 8ee66aa0da58..b8f801d00ad4 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1097,6 +1097,13 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu 
*vcpu)

kvmppc_get_gpr(vcpu, 

Re: [PATCH v5 2/7] kvmppc: Shared pages support for secure guests

2019-07-09 Thread janani

On 2019-07-09 05:25, Bharata B Rao wrote:

A secure guest will share some of its pages with hypervisor (Eg. virtio
bounce buffers etc). Support shared pages in HMM driver.

Once a secure page is converted to shared page, HMM driver will stop
tracking that page.

Signed-off-by: Bharata B Rao 

 Reviewed-by: Janani Janakiraman 

---
 arch/powerpc/include/asm/hvcall.h |  3 ++
 arch/powerpc/kvm/book3s_hv_hmm.c  | 66 +--
 2 files changed, 66 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/hvcall.h
b/arch/powerpc/include/asm/hvcall.h
index 2f6b952deb0f..05b8536f6653 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -337,6 +337,9 @@
 #define H_TLB_INVALIDATE   0xF808
 #define H_COPY_TOFROM_GUEST0xF80C

+/* Flags for H_SVM_PAGE_IN */
+#define H_PAGE_IN_SHARED0x1
+
 /* Platform-specific hcalls used by the Ultravisor */
 #define H_SVM_PAGE_IN  0xEF00
 #define H_SVM_PAGE_OUT 0xEF04
diff --git a/arch/powerpc/kvm/book3s_hv_hmm.c 
b/arch/powerpc/kvm/book3s_hv_hmm.c

index cd34323888b6..36562b382e70 100644
--- a/arch/powerpc/kvm/book3s_hv_hmm.c
+++ b/arch/powerpc/kvm/book3s_hv_hmm.c
@@ -52,6 +52,7 @@ struct kvmppc_hmm_page_pvt {
unsigned long *rmap;
unsigned int lpid;
unsigned long gpa;
+   bool skip_page_out;
 };

 struct kvmppc_hmm_migrate_args {
@@ -215,6 +216,53 @@ static const struct migrate_vma_ops
kvmppc_hmm_migrate_ops = {
.finalize_and_map = kvmppc_hmm_migrate_finalize_and_map,
 };

+/*
+ * Shares the page with HV, thus making it a normal page.
+ *
+ * - If the page is already secure, then provision a new page and 
share

+ * - If the page is a normal page, share the existing page
+ *
+ * In the former case, uses the HMM fault handler to release the HMM 
page.

+ */
+static unsigned long
+kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long 
page_shift)

+{
+
+   int ret;
+   struct page *hmm_page;
+   struct kvmppc_hmm_page_pvt *pvt;
+   unsigned long pfn;
+   unsigned long *rmap;
+   struct kvm_memory_slot *slot;
+   unsigned long gfn = gpa >> page_shift;
+   int srcu_idx;
+
+   srcu_idx = srcu_read_lock(>srcu);
+   slot = gfn_to_memslot(kvm, gfn);
+   if (!slot) {
+   srcu_read_unlock(>srcu, srcu_idx);
+   return H_PARAMETER;
+   }
+   rmap = >arch.rmap[gfn - slot->base_gfn];
+   srcu_read_unlock(>srcu, srcu_idx);
+
+   if (kvmppc_is_hmm_pfn(*rmap)) {
+   hmm_page = pfn_to_page(*rmap & ~KVMPPC_PFN_HMM);
+   pvt = (struct kvmppc_hmm_page_pvt *)
+   hmm_devmem_page_get_drvdata(hmm_page);
+   pvt->skip_page_out = true;
+   }
+
+   pfn = gfn_to_pfn(kvm, gpa >> page_shift);
+   if (is_error_noslot_pfn(pfn))
+   return H_PARAMETER;
+
+	ret = uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, 
page_shift);

+   kvm_release_pfn_clean(pfn);
+
+   return (ret == U_SUCCESS) ? H_SUCCESS : H_PARAMETER;
+}
+
 /*
  * Move page from normal memory to secure memory.
  */
@@ -235,9 +283,12 @@ kvmppc_h_svm_page_in(struct kvm *kvm, unsigned 
long gpa,

if (page_shift != PAGE_SHIFT)
return H_P3;

-   if (flags)
+   if (flags & ~H_PAGE_IN_SHARED)
return H_P2;

+   if (flags & H_PAGE_IN_SHARED)
+   return kvmppc_share_page(kvm, gpa, page_shift);
+
down_read(>mm->mmap_sem);
srcu_idx = srcu_read_lock(>srcu);
slot = gfn_to_memslot(kvm, gfn);
@@ -299,8 +350,17 @@ kvmppc_hmm_fault_migrate_alloc_and_copy(struct
vm_area_struct *vma,
   hmm_devmem_page_get_drvdata(spage);

pfn = page_to_pfn(dpage);
-   ret = uv_page_out(pvt->lpid, pfn << PAGE_SHIFT,
- pvt->gpa, 0, PAGE_SHIFT);
+
+   /*
+* This same alloc_and_copy() callback is used in two cases:
+* - When HV touches a secure page, for which we do page-out
+* - When a secure page is converted to shared page, we touch
+*   the page to essentially discard the HMM page. In this case we
+*   skip page-out.
+*/
+   if (!pvt->skip_page_out)
+   ret = uv_page_out(pvt->lpid, pfn << PAGE_SHIFT,
+ pvt->gpa, 0, PAGE_SHIFT);
if (ret == U_SUCCESS)
*dst_pfn = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED;
 }


Re: [PATCH v5 1/7] kvmppc: HMM backend driver to manage pages of secure guest

2019-07-09 Thread janani

On 2019-07-09 05:25, Bharata B Rao wrote:

HMM driver for KVM PPC to manage page transitions of
secure guest via H_SVM_PAGE_IN and H_SVM_PAGE_OUT hcalls.

H_SVM_PAGE_IN: Move the content of a normal page to secure page
H_SVM_PAGE_OUT: Move the content of a secure page to normal page

Private ZONE_DEVICE memory equal to the amount of secure memory
available in the platform for running secure guests is created
via a HMM device. The movement of pages between normal and secure
memory is done by ->alloc_and_copy() callback routine of migrate_vma().

Signed-off-by: Bharata B Rao 

 Reviewed-by: Janani Janakiraman 

---
 arch/powerpc/include/asm/hvcall.h |   4 +
 arch/powerpc/include/asm/kvm_book3s_hmm.h |  29 ++
 arch/powerpc/include/asm/kvm_host.h   |  12 +
 arch/powerpc/include/asm/ultravisor-api.h |   2 +
 arch/powerpc/include/asm/ultravisor.h |  17 +
 arch/powerpc/kvm/Makefile |   3 +
 arch/powerpc/kvm/book3s_hv.c  |  19 +
 arch/powerpc/kvm/book3s_hv_hmm.c  | 482 ++
 8 files changed, 568 insertions(+)
 create mode 100644 arch/powerpc/include/asm/kvm_book3s_hmm.h
 create mode 100644 arch/powerpc/kvm/book3s_hv_hmm.c

diff --git a/arch/powerpc/include/asm/hvcall.h
b/arch/powerpc/include/asm/hvcall.h
index 463c63a9fcf1..2f6b952deb0f 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -337,6 +337,10 @@
 #define H_TLB_INVALIDATE   0xF808
 #define H_COPY_TOFROM_GUEST0xF80C

+/* Platform-specific hcalls used by the Ultravisor */
+#define H_SVM_PAGE_IN  0xEF00
+#define H_SVM_PAGE_OUT 0xEF04
+
 /* Values for 2nd argument to H_SET_MODE */
 #define H_SET_MODE_RESOURCE_SET_CIABR  1
 #define H_SET_MODE_RESOURCE_SET_DAWR   2
diff --git a/arch/powerpc/include/asm/kvm_book3s_hmm.h
b/arch/powerpc/include/asm/kvm_book3s_hmm.h
new file mode 100644
index ..21f3de5f2acb
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_book3s_hmm.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __POWERPC_KVM_PPC_HMM_H__
+#define __POWERPC_KVM_PPC_HMM_H__
+
+#ifdef CONFIG_PPC_UV
+extern unsigned long kvmppc_h_svm_page_in(struct kvm *kvm,
+ unsigned long gra,
+ unsigned long flags,
+ unsigned long page_shift);
+extern unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
+ unsigned long gra,
+ unsigned long flags,
+ unsigned long page_shift);
+#else
+static inline unsigned long
+kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gra,
+unsigned long flags, unsigned long page_shift)
+{
+   return H_UNSUPPORTED;
+}
+
+static inline unsigned long
+kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gra,
+ unsigned long flags, unsigned long page_shift)
+{
+   return H_UNSUPPORTED;
+}
+#endif /* CONFIG_PPC_UV */
+#endif /* __POWERPC_KVM_PPC_HMM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h
b/arch/powerpc/include/asm/kvm_host.h
index 184becb62ea4..ac1a101beb07 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -858,4 +858,16 @@ static inline void kvm_arch_vcpu_blocking(struct
kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) 
{}


+#ifdef CONFIG_PPC_UV
+extern int kvmppc_hmm_init(void);
+extern void kvmppc_hmm_free(void);
+#else
+static inline int kvmppc_hmm_init(void)
+{
+   return 0;
+}
+
+static inline void kvmppc_hmm_free(void) {}
+#endif /* CONFIG_PPC_UV */
+
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/ultravisor-api.h
b/arch/powerpc/include/asm/ultravisor-api.h
index 7c4d0b4ced12..f1c5800ac705 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -20,5 +20,7 @@
 /* opcodes */
 #define UV_WRITE_PATE  0xF104
 #define UV_RETURN  0xF11C
+#define UV_PAGE_IN 0xF128
+#define UV_PAGE_OUT0xF12C

 #endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
diff --git a/arch/powerpc/include/asm/ultravisor.h
b/arch/powerpc/include/asm/ultravisor.h
index 996c1efd6c6d..16f8e0e8ec3f 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -44,6 +44,23 @@ static inline int uv_register_pate(u64 lpid, u64
dw0, u64 dw1)
return ucall(UV_WRITE_PATE, retbuf, lpid, dw0, dw1);
 }

+static inline int uv_page_in(u64 lpid, u64 src_ra, u64 dst_gpa, u64 
flags,

+u64 page_shift)
+{
+   unsigned long retbuf[UCALL_BUFSIZE];
+
+   return ucall(UV_PAGE_IN, retbuf, lpid, src_ra, dst_gpa, flags,
+page_shift);
+}
+

Re: [PATCH 4/4] powerpc/64: reuse PPC32 static inline flush_dcache_range()

2019-07-09 Thread Segher Boessenkool
On Tue, Jul 09, 2019 at 08:21:54AM +0530, Aneesh Kumar K.V wrote:
> On 7/9/19 7:50 AM, Oliver O'Halloran wrote:
> >I don't think it's that, there's some magic in flush_icache_range() to
> >handle dropping prefetched instructions on 970.
> >
> >>So overall wondering why we need that extra barriers there.
> >
> >I think the isync is needed there because the architecture only
> >requires sync to provide ordering. A sync alone doesn't guarantee the
> >dcbfs have actually completed so the isync is necessary to ensure the
> >flushed cache lines are back in memory. That said, as far as I know
> >all the IBM book3s chips from power4 onwards will wait for pending
> >dcbfs when they hit a sync, but that might change in the future.
> >
> 
> ISA doesn't list that as the sequence. Only place where isync was 
> mentioned was w.r.t  icbi where want to discards the prefetch.

You need an isync to guarantee all icbi insns before the isync have been
performed before any code after the isync is fetched.  Killing the
prefetch is just part of it.

> >If it's a problem we could add a cpu-feature section around the isync
> >to no-op it in the common case. However, when I had a look with perf
> >it always showed that the sync was the hotspot so I don't think it'll
> >help much.
> 
> What about the preceding barriers (sync; isync;) before dcbf? Why are 
> they needed?

This isn't very generic code.  The code seems to be trying to do
coherency in software.  Like you needed to do for DART on U3/U4, or for
some of the PMU/SMU communication -- both are through main memory, but
both are not cache coherent.  Which means all rules go out of the
window.

To do this properly you need some platform-specific code, for example
to kill hardware and software prefetch streams.  Or hope^Wguarantee
those never touch your communication buffers.


I recommend you keep the original function, maybe with a more specific
name, for the DART etc. code; and have all normal(*) dcbf users use a
new more normal function, with just a single sync instruction.


Segher


(*) As far as anything using dcbf can be called "normal"!


Re: [PATCH v2] tpm: tpm_ibm_vtpm: Fix unallocated banks

2019-07-09 Thread Jarkko Sakkinen
On Mon, Jul 08, 2019 at 03:43:04PM -0700, Christoph Hellwig wrote:
> On Mon, Jul 08, 2019 at 06:24:04PM -0400, Mimi Zohar wrote:
> > > static int tpm_get_pcr_allocation(struct tpm_chip *chip)
> > > {
> > >   int rc;
> > > 
> > >   rc = (chip->flags & TPM_CHIP_FLAG_TPM2) ?
> > >tpm2_get_pcr_allocation(chip) :
> > >tpm1_get_pcr_allocation(chip);
> > 
> > > 
> > >   return rc > 0 ? -ENODEV : rc;
> > > }
> > > 
> > > This addresses the issue that Stefan also pointed out. You have to
> > > deal with the TPM error codes.
> > 
> > Hm, in the past I was told by Christoph not to use the ternary
> > operator.  Have things changed?  Other than removing the comment, the
> > only other difference is the return.
> 
> In the end it is a matter of personal preference, but I find the
> quote version above using the ternary horribly obsfucated.

I fully agree that the return statement is an obsfucated mess and
not a good place at all for using ternary operator.

/Jarkko


Re: [PATCH v2] tpm: tpm_ibm_vtpm: Fix unallocated banks

2019-07-09 Thread Jarkko Sakkinen
On Mon, Jul 08, 2019 at 06:24:04PM -0400, Mimi Zohar wrote:
> > static int tpm_get_pcr_allocation(struct tpm_chip *chip)
> > {
> > int rc;
> > 
> > rc = (chip->flags & TPM_CHIP_FLAG_TPM2) ?
> >  tpm2_get_pcr_allocation(chip) :
> >  tpm1_get_pcr_allocation(chip);
> 
> > 
> > return rc > 0 ? -ENODEV : rc;
> > }
> > 
> > This addresses the issue that Stefan also pointed out. You have to
> > deal with the TPM error codes.
> 
> Hm, in the past I was told by Christoph not to use the ternary
> operator.  Have things changed?  Other than removing the comment, the
> only other difference is the return.

Lets purge the snippet:

rc = (chip->flags & TPM_CHIP_FLAG_TPM2) ?
 tpm2_get_pcr_allocation(chip) :
 tpm1_get_pcr_allocation(chip);

In this statement ternary operator does make sense because it is the
most readable way to express what is going on. We assign something
selecting one of the two options as the value  of rc based on a
condition.

It is like a natural fit for a ternary-operation and less messy than two
assigment statements.

On the other hand:

return rc > 0 ? -ENODEV : rc;

Here a better form would definitely be:

if (rc > 0)
return - ENODEV;

return rc;

It is just two hard to grasp the logic when ternary operation is used.

Total ban of any language construct would be just utterly stupid. I
would instead use common sense here.

/Jarkko


Re: [linux-next][P9]Build error at drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h:69 error: field mirror has incomplete type

2019-07-09 Thread Nathan Chancellor
On Tue, Jul 09, 2019 at 09:56:37PM +0530, Abdul Haleem wrote:
> Greeting's
> 
> linux-next failed to build on Power 9 Box with below error
> 
> In file included from drivers/gpu/drm/amd/amdgpu/amdgpu.h:72:0,
>  from drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c:39:
> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h:69:20: error: field ‘mirror’
> has incomplete type
>   struct hmm_mirror mirror;
> ^
> make[5]: *** [drivers/gpu/drm/amd/amdgpu/amdgpu_drv.o] Error 1
> make[4]: *** [drivers/gpu/drm/amd/amdgpu] Error 2
> make[3]: *** [drivers/gpu/drm] Error 2
> make[2]: *** [drivers/gpu] Error 2
> 
> Kernel version: 5.2.0-next-20190708
> Machine: Power 9 
> Kernel config attached
> 
> -- 
> Regard's
> 
> Abdul Haleem
> IBM Linux Technology Centre
> 

This should be fixed on next-20190709:

https://git.kernel.org/next/linux-next/c/e5eaa7cc0c0359cfe17b0027a6ac5eda7a9635db

Cheers,
Nathan


Re: [PATCH 0/2] fix use-after-free in mpc831x_usb_cfg() and do some cleanups

2019-07-09 Thread Markus Elfring
> According to Markus's suggestion, split it into two small patches:

> https://lkml.org/lkml/2019/7/8/520

Thanks that you picked adjustment possibilities up from my feedback.
https://lore.kernel.org/lkml/99840e11-e0e6-b3f4-e35b-56ef4ec39...@web.de/


Now I wonder why you omitted message recipients from the cover letter.
Please keep the address lists usually complete also here for improvements
on the same source file in subsequent patch series.


Can a subject like “[PATCH 0/2] Fix mpc831x_usb_cfg()” be more succinct?


>  powerpc/83xx: fix use-after-free in mpc831x_usb_cfg()


This update variant is generally fine.
I would prefer to avoid the addition of function calls at two places
when the corresponding exception handling should be specified only once
at the end of such a function implementation.


>  powerpc/83xx: cleanup error paths in mpc831x_usb_cfg()

I would find it clearer to fix the error handling in the first update
step completely.
I guess that a renaming of the label “out” into “out_unmap” (or “unmap_io”?)
would be an auxiliary change for the second update step.


I am curious if different preferences for change combinations will trigger
further collateral evolution.

Regards,
Markus


Re: [PATCH v2 03/35] powerpc: Use kmemdup rather than duplicating its implementation

2019-07-09 Thread Nathan Lynch
Fuqian Huang  writes:
> kmemdup is introduced to duplicate a region of memory in a neat way.
> Rather than kmalloc/kzalloc + memcpy, which the programmer needs to
> write the size twice (sometimes lead to mistakes), kmemdup improves
> readability, leads to smaller code and also reduce the chances of mistakes.
> Suggestion to use kmemdup rather than using kmalloc/kzalloc + memcpy.
>
> Signed-off-by: Fuqian Huang 
> ---
> Changes in v2:
>   - Fix a typo in commit message (memset -> memcpy)

Thanks, but this and the unchecked kmalloc result (and incorrect gfp
flags) have already been addressed in commit
348ea30f51fc63ce3c7fd7dba6043e8e3ee0ef34 ("powerpc/pseries: avoid
blocking in irq when queuing hotplug events"):

https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/commit/?h=next=348ea30f51fc63ce3c7fd7dba6043e8e3ee0ef34


Re: [PATCH 1/3] powerpc/powernv: remove the unused pnv_pci_set_p2p function

2019-07-09 Thread Christoph Hellwig
On Tue, Jul 09, 2019 at 06:06:54PM +0300, Max Gurtovoy wrote:
> Also this code was merged before the p2p API for p2pmem.

Yes, without a single user or intention to submit a user, and without
covering the most useful use case (PCIe switches).  While at the same
time the people involved completely ignored the PCIe P2P discussions
that have been going on the PCI list for a long time.

This is a text book example of why code needs to be upstream first
with a broad discussion instead of slipping some crap in for out
of tree drivers.


Re: ["RFC PATCH" 1/2] powerpc/mm: Fix node look up with numa=off boot

2019-07-09 Thread Nathan Lynch
Michael Ellerman  writes:
> Nathan Lynch  writes:
>> "Aneesh Kumar K.V"  writes:
 Just checking: do people still need numa=off? Seems like it's a
 maintenance burden :-)
 
>>>
>>> That is used in kdump kernel.
>>
>> I see, thanks.
>
> That doesn't mean it's a good idea :)
>
> Does it actually reduce memory usage much? Last time I dug into the
> kdump kernel's usage of weird command line flags none of them really did
> anything useful.

I think it's intended to work around bugs in numa initialization, e.g.

https://www.suse.com/support/kb/doc/?id=7023399

Hopefully the original bug with numa/kdump interaction has been fixed?



Re: [PATCH 1/3] powerpc/powernv: remove the unused pnv_pci_set_p2p function

2019-07-09 Thread Christoph Hellwig
On Tue, Jul 09, 2019 at 05:37:18PM +0300, Max Gurtovoy wrote:
>
> On 7/9/2019 5:32 PM, Christoph Hellwig wrote:
>> On Tue, Jul 09, 2019 at 05:31:37PM +0300, Max Gurtovoy wrote:
>>> Are we ok with working on a solution during kernel-5.3 cycle ?
>> You can start working on it any time, no need to ask for permission.
>
> I just want to make sure we don't remove it from the kernel before we send 
> a general API solution.

The code is gone in this merge window.

> This way we'll make sure that all the kernel versions has this 
> functionality...

Again, we do not provide functionality for out of tree modules.  We've
had the p2p API for about a year now, its not like you didn't have
plenty of time.


Re: [PATCH 1/3] powerpc/powernv: remove the unused pnv_pci_set_p2p function

2019-07-09 Thread Christoph Hellwig
On Tue, Jul 09, 2019 at 05:31:37PM +0300, Max Gurtovoy wrote:
> Are we ok with working on a solution during kernel-5.3 cycle ?

You can start working on it any time, no need to ask for permission.


Re: [PATCH 1/3] powerpc/powernv: remove the unused pnv_pci_set_p2p function

2019-07-09 Thread Christoph Hellwig
On Tue, Jul 09, 2019 at 01:49:04PM +, Max Gurtovoy wrote:
> Hi Greg/Christoph,
> Can we leave it meanwhile till we'll find a general solution (for the 
> upcoming kernel) ?
> I guess we can somehow generalize the P2P initialization process for PPC and 
> leave it empty for now for other archs.
> Or maybe we can find some other solution (sysfs/configfs/module param), but 
> it will take time since we'll need to work closely with the IBM pci guys that 
> wrote this code.

We do not keep code without in-tree users around, especially not if
we have a better API with in-tree users.

AFAICS the only thing you'll need is to wire up the enable/disable
calls.


Re: [PATCH v2] powerpc: slightly improve cache helpers

2019-07-09 Thread Segher Boessenkool
On Tue, Jul 09, 2019 at 07:04:43AM +0200, Christophe Leroy wrote:
> Le 08/07/2019 à 21:14, Nathan Chancellor a écrit :
> >On Mon, Jul 08, 2019 at 11:19:30AM +1000, Michael Ellerman wrote:
> >>On Fri, 2019-05-10 at 09:24:48 UTC, Christophe Leroy wrote:
> >>>Cache instructions (dcbz, dcbi, dcbf and dcbst) take two registers
> >>>that are summed to obtain the target address. Using 'Z' constraint
> >>>and '%y0' argument gives GCC the opportunity to use both registers
> >>>instead of only one with the second being forced to 0.
> >>>
> >>>Suggested-by: Segher Boessenkool 
> >>>Signed-off-by: Christophe Leroy 
> >>
> >>Applied to powerpc next, thanks.
> >>
> >>https://git.kernel.org/powerpc/c/6c5875843b87c3adea2beade9d1b8b3d4523900a
> >>
> >>cheers
> >
> >This patch causes a regression with clang:
> 
> Is that a Clang bug ?

I would think so, but cannot tell from the given information.

> Do you have a disassembly of the code both with and without this patch 
> in order to compare ?

That's what we need to start debugging this, yup.

> Segher, any idea ?

There is nothing I recognise, no.


Segher


[v5 5/6] powerpc/mce: Handle UE event for memcpy_mcsafe

2019-07-09 Thread Santosh Sivaraj
If we take a UE on one of the instructions with a fixup entry, set nip
to continue execution at the fixup entry. Stop processing the event
further or print it.

Based-on-patch-by: Reza Arbab 
Cc: Reza Arbab 
Cc: Mahesh Salgaonkar 
Signed-off-by: Santosh Sivaraj 
---
 arch/powerpc/include/asm/mce.h  |  4 +++-
 arch/powerpc/kernel/mce.c   | 12 +++-
 arch/powerpc/kernel/mce_power.c | 15 +--
 3 files changed, 27 insertions(+), 4 deletions(-)

Nick, I didn't add has_fixup_handler in mce_event structure; if we do so we wil
have to access the mce_event from ue_handler code also. That is because Mahesh
did not want mce_event to be accessed outside of save_mce_event, get_mce_event
and remove_mce_event; that is why I added ignore_event in mce_err also.

I have added the comment you mentioned in your reply.

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 94888a7025b3..f74257eb013b 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -122,7 +122,8 @@ struct machine_check_event {
enum MCE_UeErrorType ue_error_type:8;
u8  effective_address_provided;
u8  physical_address_provided;
-   u8  reserved_1[5];
+   u8  ignore_event;
+   u8  reserved_1[4];
u64 effective_address;
u64 physical_address;
u8  reserved_2[8];
@@ -193,6 +194,7 @@ struct mce_error_info {
enum MCE_Initiator  initiator:8;
enum MCE_ErrorClass error_class:8;
boolsync_error;
+   boolignore_event;
 };
 
 #define MAX_MC_EVT 100
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index e78c4f18ea0a..092e6bbc603f 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -144,7 +144,9 @@ void save_mce_event(struct pt_regs *regs, long handled,
if (phys_addr != ULONG_MAX) {
mce->u.ue_error.physical_address_provided = true;
mce->u.ue_error.physical_address = phys_addr;
-   machine_check_ue_event(mce);
+   mce->u.ue_error.ignore_event = mce_err->ignore_event;
+   if (!mce->u.ue_error.ignore_event)
+   machine_check_ue_event(mce);
}
}
return;
@@ -230,6 +232,14 @@ void machine_check_queue_event(void)
if (!get_mce_event(, MCE_EVENT_RELEASE))
return;
 
+   /*
+* Don't report this machine check because the caller has a asked us
+* to ignore the event, it has a fixup handler which will do the
+* appropriate error handling and reporting.
+*/
+   if (evt.error_type == MCE_ERROR_TYPE_UE && evt.u.ue_error.ignore_event)
+   return;
+
index = __this_cpu_inc_return(mce_queue_count) - 1;
/* If queue is full, just return for now. */
if (index >= MAX_MC_EVT) {
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 04666c0b40a8..582a22b1acfb 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -11,6 +11,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -18,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * Convert an address related to an mm to a PFN. NOTE: we are in real
@@ -565,9 +567,18 @@ static int mce_handle_derror(struct pt_regs *regs,
return 0;
 }
 
-static long mce_handle_ue_error(struct pt_regs *regs)
+static long mce_handle_ue_error(struct pt_regs *regs,
+   struct mce_error_info *mce_err)
 {
long handled = 0;
+   const struct exception_table_entry *entry;
+
+   entry = search_kernel_exception_table(regs->nip);
+   if (entry) {
+   mce_err->ignore_event = true;
+   regs->nip = extable_fixup(entry);
+   return 1;
+   }
 
/*
 * On specific SCOM read via MMIO we may get a machine check
@@ -600,7 +611,7 @@ static long mce_handle_error(struct pt_regs *regs,
_addr);
 
if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
-   handled = mce_handle_ue_error(regs);
+   handled = mce_handle_ue_error(regs, _err);
 
save_mce_event(regs, handled, _err, regs->nip, addr, phys_addr);
 
-- 
2.20.1



[v5 6/6] powerpc: add machine check safe copy_to_user

2019-07-09 Thread Santosh Sivaraj
Use  memcpy_mcsafe() implementation to define copy_to_user_mcsafe()

Signed-off-by: Santosh Sivaraj 
---
 arch/powerpc/Kconfig   |  1 +
 arch/powerpc/include/asm/uaccess.h | 14 ++
 2 files changed, 15 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8c1c636308c8..a173b392c272 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -134,6 +134,7 @@ config PPC
select ARCH_HAS_STRICT_KERNEL_RWX   if ((PPC_BOOK3S_64 || PPC32) && 
!RELOCATABLE && !HIBERNATION)
select ARCH_HAS_TICK_BROADCAST  if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE  if PPC64
+   select ARCH_HAS_UACCESS_MCSAFE  if PPC64
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAS_ZONE_DEVICE if PPC_BOOK3S_64
select ARCH_HAVE_NMI_SAFE_CMPXCHG
diff --git a/arch/powerpc/include/asm/uaccess.h 
b/arch/powerpc/include/asm/uaccess.h
index 76f34346b642..8899864a5552 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -386,6 +386,20 @@ static inline unsigned long raw_copy_to_user(void __user 
*to,
return ret;
 }
 
+static __always_inline unsigned long __must_check
+copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n)
+{
+   if (likely(check_copy_size(from, n, true))) {
+   if (access_ok(to, n)) {
+   allow_write_to_user(to, n);
+   n = memcpy_mcsafe((void *)to, from, n);
+   prevent_write_to_user(to, n);
+   }
+   }
+
+   return n;
+}
+
 extern unsigned long __clear_user(void __user *addr, unsigned long size);
 
 static inline unsigned long clear_user(void __user *addr, unsigned long size)
-- 
2.20.1



[v5 4/6] extable: Add function to search only kernel exception table

2019-07-09 Thread Santosh Sivaraj
In real mode, the search_exception tables cannot be called because
it also searches the module exception tables if entry is not found
in the kernel exception tables.

Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: Nicholas Piggin 
Signed-off-by: Santosh Sivaraj 
---
 include/linux/extable.h |  2 ++
 kernel/extable.c| 16 +---
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/include/linux/extable.h b/include/linux/extable.h
index 41c5b3a25f67..0c2819ba67f0 100644
--- a/include/linux/extable.h
+++ b/include/linux/extable.h
@@ -19,6 +19,8 @@ void trim_init_extable(struct module *m);
 
 /* Given an address, look for it in the exception tables */
 const struct exception_table_entry *search_exception_tables(unsigned long add);
+const struct
+exception_table_entry *search_kernel_exception_table(unsigned long addr);
 
 #ifdef CONFIG_MODULES
 /* For extable.c to search modules' exception tables. */
diff --git a/kernel/extable.c b/kernel/extable.c
index e23cce6e6092..6d544cb79fff 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -40,13 +40,23 @@ void __init sort_main_extable(void)
}
 }
 
-/* Given an address, look for it in the exception tables. */
+/* For the given address, look for it in the kernel exception table */
+const
+struct exception_table_entry *search_kernel_exception_table(unsigned long addr)
+{
+   return search_extable(__start___ex_table,
+ __stop___ex_table - __start___ex_table, addr);
+}
+
+/*
+ * Given an address, look for it in the kernel and the module exception
+ * tables.
+ */
 const struct exception_table_entry *search_exception_tables(unsigned long addr)
 {
const struct exception_table_entry *e;
 
-   e = search_extable(__start___ex_table,
-  __stop___ex_table - __start___ex_table, addr);
+   e = search_kernel_exception_table(addr);
if (!e)
e = search_module_extables(addr);
return e;
-- 
2.20.1



[v5 3/6] powerpc/memcpy: Add memcpy_mcsafe for pmem

2019-07-09 Thread Santosh Sivaraj
From: Balbir Singh 

The pmem infrastructure uses memcpy_mcsafe in the pmem layer so as to
convert machine check exceptions into a return value on failure in case
a machine check exception is encountered during the memcpy. The return
value is the number of bytes remaining to be copied.

This patch largely borrows from the copyuser_power7 logic and does not add
the VMX optimizations, largely to keep the patch simple. If needed those
optimizations can be folded in.

Signed-off-by: Balbir Singh 
[ar...@linux.ibm.com: Added symbol export]
[santosh: return remaining bytes instead of -EFAULT]
Signed-off-by: Santosh Sivaraj 
---
 arch/powerpc/include/asm/string.h   |   2 +
 arch/powerpc/lib/Makefile   |   2 +-
 arch/powerpc/lib/memcpy_mcsafe_64.S | 239 
 3 files changed, 242 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/lib/memcpy_mcsafe_64.S

diff --git a/arch/powerpc/include/asm/string.h 
b/arch/powerpc/include/asm/string.h
index 9bf6dffb4090..b72692702f35 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -53,7 +53,9 @@ void *__memmove(void *to, const void *from, __kernel_size_t 
n);
 #ifndef CONFIG_KASAN
 #define __HAVE_ARCH_MEMSET32
 #define __HAVE_ARCH_MEMSET64
+#define __HAVE_ARCH_MEMCPY_MCSAFE
 
+extern int memcpy_mcsafe(void *dst, const void *src, __kernel_size_t sz);
 extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
 extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
 extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index c55f9c27bf79..529d6536eb4a 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -39,7 +39,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o 
copypage_power7.o \
   memcpy_power7.o
 
 obj64-y+= copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
-  memcpy_64.o pmem.o
+  memcpy_64.o pmem.o memcpy_mcsafe_64.o
 
 obj64-$(CONFIG_SMP)+= locks.o
 obj64-$(CONFIG_ALTIVEC)+= vmx-helper.o
diff --git a/arch/powerpc/lib/memcpy_mcsafe_64.S 
b/arch/powerpc/lib/memcpy_mcsafe_64.S
new file mode 100644
index ..4d8a3d315992
--- /dev/null
+++ b/arch/powerpc/lib/memcpy_mcsafe_64.S
@@ -0,0 +1,239 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) IBM Corporation, 2011
+ * Derived from copyuser_power7.s by Anton Blanchard 
+ * Author - Balbir Singh 
+ */
+#include 
+#include 
+#include 
+
+   .macro err1
+100:
+   EX_TABLE(100b,.Ldo_err1)
+   .endm
+
+   .macro err2
+200:
+   EX_TABLE(200b,.Ldo_err2)
+   .endm
+
+.Ldo_err2:
+   ld  r22,STK_REG(R22)(r1)
+   ld  r21,STK_REG(R21)(r1)
+   ld  r20,STK_REG(R20)(r1)
+   ld  r19,STK_REG(R19)(r1)
+   ld  r18,STK_REG(R18)(r1)
+   ld  r17,STK_REG(R17)(r1)
+   ld  r16,STK_REG(R16)(r1)
+   ld  r15,STK_REG(R15)(r1)
+   ld  r14,STK_REG(R14)(r1)
+   addir1,r1,STACKFRAMESIZE
+.Ldo_err1:
+   /* Do a byte by byte copy to get the exact remaining size */
+   mtctr   r7
+100:   EX_TABLE(100b, .Ldone)
+46:
+err1;  lbz r0,0(r4)
+   addir4,r4,1
+err1;  stb r0,0(r3)
+   addir3,r3,1
+   bdnz46b
+   li  r3,0
+   blr
+
+.Ldone:
+   mfctr   r3
+   blr
+
+
+_GLOBAL(memcpy_mcsafe)
+   mr  r7,r5
+   cmpldi  r5,16
+   blt .Lshort_copy
+
+.Lcopy:
+   /* Get the source 8B aligned */
+   neg r6,r4
+   mtocrf  0x01,r6
+   clrldi  r6,r6,(64-3)
+
+   bf  cr7*4+3,1f
+err1;  lbz r0,0(r4)
+   addir4,r4,1
+err1;  stb r0,0(r3)
+   addir3,r3,1
+   subir7,r7,1
+
+1: bf  cr7*4+2,2f
+err1;  lhz r0,0(r4)
+   addir4,r4,2
+err1;  sth r0,0(r3)
+   addir3,r3,2
+   subir7,r7,2
+
+2: bf  cr7*4+1,3f
+err1;  lwz r0,0(r4)
+   addir4,r4,4
+err1;  stw r0,0(r3)
+   addir3,r3,4
+   subir7,r7,4
+
+3: sub r5,r5,r6
+   cmpldi  r5,128
+   blt 5f
+
+   mflrr0
+   stdur1,-STACKFRAMESIZE(r1)
+   std r14,STK_REG(R14)(r1)
+   std r15,STK_REG(R15)(r1)
+   std r16,STK_REG(R16)(r1)
+   std r17,STK_REG(R17)(r1)
+   std r18,STK_REG(R18)(r1)
+   std r19,STK_REG(R19)(r1)
+   std r20,STK_REG(R20)(r1)
+   std r21,STK_REG(R21)(r1)
+   std r22,STK_REG(R22)(r1)
+   std r0,STACKFRAMESIZE+16(r1)
+
+   srdir6,r5,7
+   mtctr   r6
+
+   /* Now do cacheline (128B) sized loads and stores. */
+   .align  5
+4:
+err2;  ld  r0,0(r4)
+err2;  ld  r6,8(r4)
+err2;  ld  r8,16(r4)
+err2;  ld  r9,24(r4)
+err2;  ld  r10,32(r4)
+err2;  ld  r11,40(r4)
+err2;  ld  r12,48(r4)
+err2;  ld  r14,56(r4)
+err2;  ld  r15,64(r4)
+err2;  ld  r16,72(r4)
+err2;  ld  

[v5 2/6] powerpc/mce: Fix MCE handling for huge pages

2019-07-09 Thread Santosh Sivaraj
From: Balbir Singh 

The current code would fail on huge pages addresses, since the shift
would be incorrect. Use the correct page shift value returned by
__find_linux_pte() to get the correct pfn. The code is more generic
and can handle both regular and compound pages.

Fixes: ba41e1e1ccb9 ("powerpc/mce: Hookup derror (load/store) UE errors")

Signed-off-by: Balbir Singh 
[ar...@linux.ibm.com: Fixup pseries_do_memory_failure()]
Signed-off-by: Reza Arbab 
Signed-off-by: Santosh Sivaraj 
---
 arch/powerpc/include/asm/mce.h   |  3 ++-
 arch/powerpc/kernel/mce_power.c  | 26 --
 arch/powerpc/platforms/pseries/ras.c |  6 --
 3 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index a4c6a74ad2fb..94888a7025b3 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -209,7 +209,8 @@ extern void release_mce_event(void);
 extern void machine_check_queue_event(void);
 extern void machine_check_print_event_info(struct machine_check_event *evt,
   bool user_mode, bool in_guest);
-unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr);
+unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr,
+ unsigned int *shift);
 #ifdef CONFIG_PPC_BOOK3S_64
 void flush_and_reload_slb(void);
 #endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index e39536aad30d..04666c0b40a8 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -23,7 +23,8 @@
  * Convert an address related to an mm to a PFN. NOTE: we are in real
  * mode, we could potentially race with page table updates.
  */
-unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
+unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr,
+ unsigned int *shift)
 {
pte_t *ptep;
unsigned long flags;
@@ -36,13 +37,15 @@ unsigned long addr_to_pfn(struct pt_regs *regs, unsigned 
long addr)
 
local_irq_save(flags);
if (mm == current->mm)
-   ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL);
+   ptep = find_current_mm_pte(mm->pgd, addr, NULL, shift);
else
-   ptep = find_init_mm_pte(addr, NULL);
+   ptep = find_init_mm_pte(addr, shift);
local_irq_restore(flags);
if (!ptep || pte_special(*ptep))
return ULONG_MAX;
-   return pte_pfn(*ptep);
+   if (!*shift)
+   *shift = PAGE_SHIFT;
+   return (pte_val(*ptep) & PTE_RPN_MASK) >> *shift;
 }
 
 /* flush SLBs and reload */
@@ -358,15 +361,16 @@ static int mce_find_instr_ea_and_pfn(struct pt_regs 
*regs, uint64_t *addr,
unsigned long pfn, instr_addr;
struct instruction_op op;
struct pt_regs tmp = *regs;
+   unsigned int shift;
 
-   pfn = addr_to_pfn(regs, regs->nip);
+   pfn = addr_to_pfn(regs, regs->nip, );
if (pfn != ULONG_MAX) {
-   instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK);
+   instr_addr = (pfn << shift) + (regs->nip & ((1 << shift) - 1));
instr = *(unsigned int *)(instr_addr);
if (!analyse_instr(, , instr)) {
-   pfn = addr_to_pfn(regs, op.ea);
+   pfn = addr_to_pfn(regs, op.ea, );
*addr = op.ea;
-   *phys_addr = (pfn << PAGE_SHIFT);
+   *phys_addr = (pfn << shift);
return 0;
}
/*
@@ -442,12 +446,14 @@ static int mce_handle_ierror(struct pt_regs *regs,
if (mce_err->sync_error &&
table[i].error_type == MCE_ERROR_TYPE_UE) {
unsigned long pfn;
+   unsigned int shift;
 
if (get_paca()->in_mce < MAX_MCE_DEPTH) {
-   pfn = addr_to_pfn(regs, regs->nip);
+   pfn = addr_to_pfn(regs, regs->nip,
+ );
if (pfn != ULONG_MAX) {
*phys_addr =
-   (pfn << PAGE_SHIFT);
+   (pfn << shift);
}
}
}
diff --git a/arch/powerpc/platforms/pseries/ras.c 
b/arch/powerpc/platforms/pseries/ras.c
index f16fdd0f71f7..5e43283d3300 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -740,12 +740,14 @@ static void pseries_do_memory_failure(struct pt_regs 
*regs,
paddr = 

[v5 1/6] powerpc/mce: Make machine_check_ue_event() static

2019-07-09 Thread Santosh Sivaraj
From: Reza Arbab 

The function doesn't get used outside this file, so make it static.

Signed-off-by: Reza Arbab 
Signed-off-by: Santosh Sivaraj 
Reviewed-by: Nicholas Piggin 
---
 arch/powerpc/kernel/mce.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index b18df633eae9..e78c4f18ea0a 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -33,7 +33,7 @@ static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
mce_ue_event_queue);
 
 static void machine_check_process_queued_event(struct irq_work *work);
-void machine_check_ue_event(struct machine_check_event *evt);
+static void machine_check_ue_event(struct machine_check_event *evt);
 static void machine_process_ue_event(struct work_struct *work);
 
 static struct irq_work mce_event_process_work = {
@@ -203,7 +203,7 @@ void release_mce_event(void)
 /*
  * Queue up the MCE event which then can be handled later.
  */
-void machine_check_ue_event(struct machine_check_event *evt)
+static void machine_check_ue_event(struct machine_check_event *evt)
 {
int index;
 
-- 
2.20.1



[v5 0/6] powerpc: implement machine check safe memcpy

2019-07-09 Thread Santosh Sivaraj
During a memcpy from a pmem device, if a machine check exception is
generated we end up in a panic. In case of fsdax read, this should
only result in a -EIO. Avoid MCE by implementing memcpy_mcsafe.

Before this patch series:

```
bash-4.4# mount -o dax /dev/pmem0 /mnt/pmem/
[ 7621.714094] Disabling lock debugging due to kernel taint
[ 7621.714099] MCE: CPU0: machine check (Severe) Host UE Load/Store [Not 
recovered]
[ 7621.714104] MCE: CPU0: NIP: [c0088978] memcpy_power7+0x418/0x7e0
[ 7621.714107] MCE: CPU0: Hardware error
[ 7621.714112] opal: Hardware platform error: Unrecoverable Machine Check 
exception
[ 7621.714118] CPU: 0 PID: 1368 Comm: mount Tainted: G   M  
5.2.0-rc5-00239-g241e39004581 #50
[ 7621.714123] NIP:  c0088978 LR: c08e16f8 CTR: 01de
[ 7621.714129] REGS: c000fffbfd70 TRAP: 0200   Tainted: G   M   
(5.2.0-rc5-00239-g241e39004581)
[ 7621.714131] MSR:  92209033   CR: 
24428840  XER: 0004
[ 7621.714160] CFAR: c00889a8 DAR: deadbeefdeadbeef DSISR: 8000 
IRQMASK: 0
[ 7621.714171] GPR00: 0e00 c000f0b8b1e0 c12cf100 
c000ed8e1100 
[ 7621.714186] GPR04: c2001100 0001 0200 
03fff1272000 
[ 7621.714201] GPR08: 8000 0010 0020 
0030 
[ 7621.714216] GPR12: 0040 7fffb8c6d390 0050 
0060 
[ 7621.714232] GPR16: 0070  0001 
c000f0b8b960 
[ 7621.714247] GPR20: 0001 c000f0b8b940 0001 
0001 
[ 7621.714262] GPR24: c1382560 c00c003b6380 c00c003b6380 
0001 
[ 7621.714277] GPR28:  0001 c200 
0001 
[ 7621.714294] NIP [c0088978] memcpy_power7+0x418/0x7e0
[ 7621.714298] LR [c08e16f8] pmem_do_bvec+0xf8/0x430
...  ...
```

After this patch series:

```
bash-4.4# mount -o dax /dev/pmem0 /mnt/pmem/
[25302.883978] Buffer I/O error on dev pmem0, logical block 0, async page read
[25303.020816] EXT4-fs (pmem0): DAX enabled. Warning: EXPERIMENTAL, use at your 
own risk
[25303.021236] EXT4-fs (pmem0): Can't read superblock on 2nd try
[25303.152515] EXT4-fs (pmem0): DAX enabled. Warning: EXPERIMENTAL, use at your 
own risk
[25303.284031] EXT4-fs (pmem0): DAX enabled. Warning: EXPERIMENTAL, use at your 
own risk
[25304.084100] UDF-fs: bad mount option "dax" or missing value
mount: /mnt/pmem: wrong fs type, bad option, bad superblock on /dev/pmem0, 
missing codepage or helper program, or other error.
```

MCE is injected on a pmem address using mambo. The last patch which adds a nop
is only for testing on mambo, where r13 is not restored upon hitting vector 200.

The memcpy code can be optimised by adding VMX optimizations and GAS macros can
be used to enable code reusablity, which I will send as another series.

---
Change-log:

v5:
* Don't use search_exception_tables since it searches for module exception 
tables
  also [Nicholas]
* Fix commit message for patch 2 [Nicholas]

v4:
* Squash return remaining bytes patch to memcpy_mcsafe implemtation patch 
[christophe]
* Access ok should be checked for copy_to_user_mcsafe() [christophe]

v3:
* Drop patch which enables DR/IR for external modules
* Drop notifier call chain, we don't want to do that in real mode
* Return remaining bytes from memcpy_mcsafe correctly
* We no longer restore r13 for simulator tests, rather use a nop at 
  vector 0x200 [workaround for simulator; not to be merged]

v2:
* Don't set RI bit explicitly [mahesh]
* Re-ordered series to get r13 workaround as the last patch

---
Balbir Singh (2):
  powerpc/mce: Fix MCE handling for huge pages
  powerpc/memcpy: Add memcpy_mcsafe for pmem

Reza Arbab (1):
  powerpc/mce: Make machine_check_ue_event() static

Santosh Sivaraj (3):
  extable: Add function to search only kernel exception table
  powerpc/mce: Handle UE event for memcpy_mcsafe
  powerpc: add machine check safe copy_to_user

 arch/powerpc/Kconfig |   1 +
 arch/powerpc/include/asm/mce.h   |   7 +-
 arch/powerpc/include/asm/string.h|   2 +
 arch/powerpc/include/asm/uaccess.h   |  14 ++
 arch/powerpc/kernel/mce.c|  16 +-
 arch/powerpc/kernel/mce_power.c  |  41 +++--
 arch/powerpc/lib/Makefile|   2 +-
 arch/powerpc/lib/memcpy_mcsafe_64.S  | 239 +++
 arch/powerpc/platforms/pseries/ras.c |   6 +-
 include/linux/extable.h  |   2 +
 kernel/extable.c |  16 +-
 11 files changed, 323 insertions(+), 23 deletions(-)
 create mode 100644 arch/powerpc/lib/memcpy_mcsafe_64.S

-- 
2.20.1



[PATCH 2/2] powerpc/83xx: cleanup error paths in mpc831x_usb_cfg()

2019-07-09 Thread Wen Yang
Rename the jump labels according to the cleanup they perform,
and move reference handling to simplify cleanup.

Signed-off-by: Wen Yang 
Cc: Scott Wood 
Cc: Kumar Gala 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Markus Elfring 
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-ker...@vger.kernel.org
---
 arch/powerpc/platforms/83xx/usb.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/platforms/83xx/usb.c 
b/arch/powerpc/platforms/83xx/usb.c
index 19dcef5..56b36fa 100644
--- a/arch/powerpc/platforms/83xx/usb.c
+++ b/arch/powerpc/platforms/83xx/usb.c
@@ -160,11 +160,9 @@ int mpc831x_usb_cfg(void)
 
/* Map USB SOC space */
ret = of_address_to_resource(np, 0, );
-   if (ret) {
-   of_node_put(immr_node);
-   of_node_put(np);
-   return ret;
-   }
+   if (ret)
+   goto out_put_node;
+
usb_regs = ioremap(res.start, resource_size());
 
/* Using on-chip PHY */
@@ -173,7 +171,7 @@ int mpc831x_usb_cfg(void)
u32 refsel;
 
if (of_device_is_compatible(immr_node, "fsl,mpc8308-immr"))
-   goto out;
+   goto out_unmap;
 
if (of_device_is_compatible(immr_node, "fsl,mpc8315-immr"))
refsel = CONTROL_REFSEL_24MHZ;
@@ -200,8 +198,9 @@ int mpc831x_usb_cfg(void)
ret = -EINVAL;
}
 
-out:
+out_unmap:
iounmap(usb_regs);
+out_put_node:
of_node_put(immr_node);
of_node_put(np);
return ret;
-- 
2.9.5



[PATCH 1/2] powerpc/83xx: fix use-after-free in mpc831x_usb_cfg()

2019-07-09 Thread Wen Yang
The immr_node variable is still being used after the of_node_put() call,
which may result in use-after-free.
Fix this issue by calling of_node_put() after the last usage.

Fixes: fd066e850351 ("powerpc/mpc8308: fix USB DR controller initialization")
Signed-off-by: Wen Yang 
Cc: Scott Wood 
Cc: Kumar Gala 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Markus Elfring 
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-ker...@vger.kernel.org
---
 arch/powerpc/platforms/83xx/usb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/83xx/usb.c 
b/arch/powerpc/platforms/83xx/usb.c
index 3d247d7..19dcef5 100644
--- a/arch/powerpc/platforms/83xx/usb.c
+++ b/arch/powerpc/platforms/83xx/usb.c
@@ -158,11 +158,10 @@ int mpc831x_usb_cfg(void)
 
iounmap(immap);
 
-   of_node_put(immr_node);
-
/* Map USB SOC space */
ret = of_address_to_resource(np, 0, );
if (ret) {
+   of_node_put(immr_node);
of_node_put(np);
return ret;
}
@@ -203,6 +202,7 @@ int mpc831x_usb_cfg(void)
 
 out:
iounmap(usb_regs);
+   of_node_put(immr_node);
of_node_put(np);
return ret;
 }
-- 
2.9.5



[PATCH v5 7/7] KVM: PPC: Ultravisor: Add PPC_UV config option

2019-07-09 Thread Bharata B Rao
From: Anshuman Khandual 

CONFIG_PPC_UV adds support for ultravisor.

Signed-off-by: Anshuman Khandual 
Signed-off-by: Bharata B Rao 
Signed-off-by: Ram Pai 
[ Update config help and commit message ]
Signed-off-by: Claudio Carvalho 
---
 arch/powerpc/Kconfig | 20 
 1 file changed, 20 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index f0e5b38d52e8..20c6c213d2be 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -440,6 +440,26 @@ config PPC_TRANSACTIONAL_MEM
---help---
  Support user-mode Transactional Memory on POWERPC.
 
+config PPC_UV
+   bool "Ultravisor support"
+   depends on KVM_BOOK3S_HV_POSSIBLE
+   select HMM_MIRROR
+   select HMM
+   select ZONE_DEVICE
+   select MIGRATE_VMA_HELPER
+   select DEV_PAGEMAP_OPS
+   select DEVICE_PRIVATE
+   select MEMORY_HOTPLUG
+   select MEMORY_HOTREMOVE
+   default n
+   help
+ This option paravirtualizes the kernel to run in POWER platforms that
+ supports the Protected Execution Facility (PEF). In such platforms,
+ the ultravisor firmware runs at a privilege level above the
+ hypervisor.
+
+ If unsure, say "N".
+
 config LD_HEAD_STUB_CATCH
bool "Reserve 256 bytes to cope with linker stubs in HEAD text" if 
EXPERT
depends on PPC64
-- 
2.21.0



[RFC PATCH v5 6/7] kvmppc: Support reset of secure guest

2019-07-09 Thread Bharata B Rao
Add support for reset of secure guest via a new ioctl KVM_PPC_SVM_OFF.
This ioctl will be issued by QEMU during reset and includes the
the following steps:

- Ask UV to terminate the guest via UV_SVM_TERMINATE ucall
- Unpin the VPA pages so that they can be migrated back to secure
  side when guest becomes secure again. This is required because
  pinned pages can't be migrated.
- Reinitialize guest's partitioned scoped page tables. These are
  freed when guest become secure (H_SVM_INIT_DONE)
- Release all HMM pages of the secure guest.

After these steps, guest is ready to issue UV_ESM call once again
to switch to secure mode.

Signed-off-by: Bharata B Rao 
Signed-off-by: Sukadev Bhattiprolu 
[Implementation of uv_svm_terminate() and its call from
guest shutdown path]
Signed-off-by: Ram Pai 
[Unpinning of VPA pages]
---
 Documentation/virtual/kvm/api.txt | 19 ++
 arch/powerpc/include/asm/kvm_book3s_hmm.h |  7 +++
 arch/powerpc/include/asm/kvm_ppc.h|  2 +
 arch/powerpc/include/asm/ultravisor-api.h |  1 +
 arch/powerpc/include/asm/ultravisor.h |  7 +++
 arch/powerpc/kvm/book3s_hv.c  | 70 +++
 arch/powerpc/kvm/book3s_hv_hmm.c  | 60 +++
 arch/powerpc/kvm/powerpc.c| 12 
 include/uapi/linux/kvm.h  |  1 +
 tools/include/uapi/linux/kvm.h|  1 +
 10 files changed, 180 insertions(+)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index ba6c42c576dd..c89c24ad86ed 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -4065,6 +4065,25 @@ KVM_ARM_VCPU_FINALIZE call.
 See KVM_ARM_VCPU_INIT for details of vcpu features that require finalization
 using this ioctl.
 
+4.120 KVM_PPC_SVM_OFF
+
+Capability: basic
+Architectures: powerpc
+Type: vm ioctl
+Parameters: none
+Returns: 0 on successful completion,
+Errors:
+  EINVAL:if ultravisor failed to terminate the secure guest
+  ENOMEM:if hypervisor failed to allocate new radix page tables for guest
+
+This ioctl is used to turn off the secure mode of the guest or transition
+the guest from secure mode to normal mode. This is invoked when the guest
+is reset. This has no effect if called for a normal guest.
+
+This ioctl issues an ultravisor call to terminate the secure guest,
+unpin the VPA pages, reinitialize guest's partition scoped page
+tables and releases all the HMM pages that is associated with this guest.
+
 5. The kvm_run structure
 
 
diff --git a/arch/powerpc/include/asm/kvm_book3s_hmm.h 
b/arch/powerpc/include/asm/kvm_book3s_hmm.h
index 8c7aacabb2e0..cd2b0f927766 100644
--- a/arch/powerpc/include/asm/kvm_book3s_hmm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_hmm.h
@@ -13,6 +13,8 @@ extern unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
  unsigned long page_shift);
 extern unsigned long kvmppc_h_svm_init_start(struct kvm *kvm);
 extern unsigned long kvmppc_h_svm_init_done(struct kvm *kvm);
+extern void kvmppc_hmm_free_memslot_pfns(struct kvm *kvm,
+   struct kvm_memslots *slots);
 #else
 static inline unsigned long
 kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gra,
@@ -37,5 +39,10 @@ static inline unsigned long kvmppc_h_svm_init_done(struct 
kvm *kvm)
 {
return H_UNSUPPORTED;
 }
+
+static inline void kvmppc_hmm_free_memslot_pfns(struct kvm *kvm,
+   struct kvm_memslots *slots)
+{
+}
 #endif /* CONFIG_PPC_UV */
 #endif /* __POWERPC_KVM_PPC_HMM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index bc892380e6cd..d80ece28d65d 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -188,6 +188,7 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm 
*kvm,
 extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm);
 extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm);
 extern void kvmppc_setup_partition_table(struct kvm *kvm);
+extern int kvmppc_reinit_partition_table(struct kvm *kvm);
 
 extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce_64 *args);
@@ -332,6 +333,7 @@ struct kvmppc_ops {
   int size);
int (*store_to_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
  int size);
+   int (*svm_off)(struct kvm *kvm);
 };
 
 extern struct kvmppc_ops *kvmppc_hv_ops;
diff --git a/arch/powerpc/include/asm/ultravisor-api.h 
b/arch/powerpc/include/asm/ultravisor-api.h
index 9f5510b55892..c8180427fa01 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -25,5 +25,6 @@
 #define UV_PAGE_IN 0xF128
 #define UV_PAGE_OUT0xF12C
 #define UV_PAGE_INVAL  0xF138
+#define UV_SVM_TERMINATE   0xF13C
 
 #endif /* 

[RFC PATCH v5 5/7] kvmppc: Radix changes for secure guest

2019-07-09 Thread Bharata B Rao
- After the guest becomes secure, when we handle a page fault of a page
  belonging to SVM in HV, send that page to UV via UV_PAGE_IN.
- Whenever a page is unmapped on the HV side, inform UV via UV_PAGE_INVAL.
- Ensure all those routines that walk the secondary page tables of
  the guest don't do so in case of secure VM. For secure guest, the
  active secondary page tables are in secure memory and the secondary
  page tables in HV are freed when guest becomes secure.

Signed-off-by: Bharata B Rao 
---
 arch/powerpc/include/asm/kvm_host.h   | 12 
 arch/powerpc/include/asm/ultravisor-api.h |  1 +
 arch/powerpc/include/asm/ultravisor.h |  7 +++
 arch/powerpc/kvm/book3s_64_mmu_radix.c| 22 ++
 arch/powerpc/kvm/book3s_hv_hmm.c  | 20 
 5 files changed, 62 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 0c49c3401c63..dcbf7480cb10 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -865,6 +865,8 @@ static inline void kvm_arch_vcpu_block_finish(struct 
kvm_vcpu *vcpu) {}
 #ifdef CONFIG_PPC_UV
 extern int kvmppc_hmm_init(void);
 extern void kvmppc_hmm_free(void);
+extern bool kvmppc_is_guest_secure(struct kvm *kvm);
+extern int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gpa);
 #else
 static inline int kvmppc_hmm_init(void)
 {
@@ -872,6 +874,16 @@ static inline int kvmppc_hmm_init(void)
 }
 
 static inline void kvmppc_hmm_free(void) {}
+
+static inline bool kvmppc_is_guest_secure(struct kvm *kvm)
+{
+   return false;
+}
+
+static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gpa)
+{
+   return -EFAULT;
+}
 #endif /* CONFIG_PPC_UV */
 
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/ultravisor-api.h 
b/arch/powerpc/include/asm/ultravisor-api.h
index d6d6eb2e6e6b..9f5510b55892 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -24,5 +24,6 @@
 #define UV_UNREGISTER_MEM_SLOT 0xF124
 #define UV_PAGE_IN 0xF128
 #define UV_PAGE_OUT0xF12C
+#define UV_PAGE_INVAL  0xF138
 
 #endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
diff --git a/arch/powerpc/include/asm/ultravisor.h 
b/arch/powerpc/include/asm/ultravisor.h
index fe45be9ee63b..f4f674794b35 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -77,6 +77,13 @@ static inline int uv_unregister_mem_slot(u64 lpid, u64 
slotid)
 
return ucall(UV_UNREGISTER_MEM_SLOT, retbuf, lpid, slotid);
 }
+
+static inline int uv_page_inval(u64 lpid, u64 gpa, u64 page_shift)
+{
+   unsigned long retbuf[UCALL_BUFSIZE];
+
+   return ucall(UV_PAGE_INVAL, retbuf, lpid, gpa, page_shift);
+}
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index f55ef071883f..c454600c454f 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -21,6 +21,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 /*
  * Supported radix tree geometry.
@@ -923,6 +925,9 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
if (!(dsisr & DSISR_PRTABLE_FAULT))
gpa |= ea & 0xfff;
 
+   if (kvmppc_is_guest_secure(kvm))
+   return kvmppc_send_page_to_uv(kvm, gpa & PAGE_MASK);
+
/* Get the corresponding memslot */
memslot = gfn_to_memslot(kvm, gfn);
 
@@ -980,6 +985,11 @@ int kvm_unmap_radix(struct kvm *kvm, struct 
kvm_memory_slot *memslot,
unsigned long gpa = gfn << PAGE_SHIFT;
unsigned int shift;
 
+   if (kvmppc_is_guest_secure(kvm)) {
+   uv_page_inval(kvm->arch.lpid, gpa, PAGE_SIZE);
+   return 0;
+   }
+
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, );
if (ptep && pte_present(*ptep))
kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
@@ -997,6 +1007,9 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot 
*memslot,
int ref = 0;
unsigned long old, *rmapp;
 
+   if (kvmppc_is_guest_secure(kvm))
+   return ref;
+
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, );
if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
@@ -1021,6 +1034,9 @@ int kvm_test_age_radix(struct kvm *kvm, struct 
kvm_memory_slot *memslot,
unsigned int shift;
int ref = 0;
 
+   if (kvmppc_is_guest_secure(kvm))
+   return ref;
+
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, );
if (ptep && pte_present(*ptep) && pte_young(*ptep))
ref = 1;
@@ -1038,6 +1054,9 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,

[PATCH v5 4/7] kvmppc: Handle memory plug/unplug to secure VM

2019-07-09 Thread Bharata B Rao
Register the new memslot with UV during plug and unregister
the memslot during unplug.

Signed-off-by: Bharata B Rao 
Acked-by: Paul Mackerras 
---
 arch/powerpc/include/asm/ultravisor-api.h |  1 +
 arch/powerpc/include/asm/ultravisor.h |  7 +++
 arch/powerpc/kvm/book3s_hv.c  | 19 +++
 3 files changed, 27 insertions(+)

diff --git a/arch/powerpc/include/asm/ultravisor-api.h 
b/arch/powerpc/include/asm/ultravisor-api.h
index 07b7d638e7af..d6d6eb2e6e6b 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -21,6 +21,7 @@
 #define UV_WRITE_PATE  0xF104
 #define UV_RETURN  0xF11C
 #define UV_REGISTER_MEM_SLOT   0xF120
+#define UV_UNREGISTER_MEM_SLOT 0xF124
 #define UV_PAGE_IN 0xF128
 #define UV_PAGE_OUT0xF12C
 
diff --git a/arch/powerpc/include/asm/ultravisor.h 
b/arch/powerpc/include/asm/ultravisor.h
index b46042f1aa8f..fe45be9ee63b 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -70,6 +70,13 @@ static inline int uv_register_mem_slot(u64 lpid, u64 
start_gpa, u64 size,
return ucall(UV_REGISTER_MEM_SLOT, retbuf, lpid, start_gpa,
 size, flags, slotid);
 }
+
+static inline int uv_unregister_mem_slot(u64 lpid, u64 slotid)
+{
+   unsigned long retbuf[UCALL_BUFSIZE];
+
+   return ucall(UV_UNREGISTER_MEM_SLOT, retbuf, lpid, slotid);
+}
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index b8f801d00ad4..7cbb5edaed01 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -77,6 +77,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "book3s.h"
 
@@ -4504,6 +4505,24 @@ static void kvmppc_core_commit_memory_region_hv(struct 
kvm *kvm,
if (change == KVM_MR_FLAGS_ONLY && kvm_is_radix(kvm) &&
((new->flags ^ old->flags) & KVM_MEM_LOG_DIRTY_PAGES))
kvmppc_radix_flush_memslot(kvm, old);
+   /*
+* If UV hasn't yet called H_SVM_INIT_START, don't register memslots.
+*/
+   if (!kvm->arch.secure_guest)
+   return;
+
+   /*
+* TODO: Handle KVM_MR_MOVE
+*/
+   if (change == KVM_MR_CREATE) {
+   uv_register_mem_slot(kvm->arch.lpid,
+  new->base_gfn << PAGE_SHIFT,
+  new->npages * PAGE_SIZE,
+  0,
+  new->id);
+   } else if (change == KVM_MR_DELETE) {
+   uv_unregister_mem_slot(kvm->arch.lpid, old->id);
+   }
 }
 
 /*
-- 
2.21.0



[PATCH v5 3/7] kvmppc: H_SVM_INIT_START and H_SVM_INIT_DONE hcalls

2019-07-09 Thread Bharata B Rao
H_SVM_INIT_START: Initiate securing a VM
H_SVM_INIT_DONE: Conclude securing a VM

As part of H_SVM_INIT_START, register all existing memslots with
the UV. H_SVM_INIT_DONE call by UV informs HV that transition of
the guest to secure mode is complete.

These two states (transition to secure mode STARTED and transition
to secure mode COMPLETED) are recorded in kvm->arch.secure_guest.
Setting these states will cause the assembly code that enters the
guest to call the UV_RETURN ucall instead of trying to enter the
guest directly.

Signed-off-by: Bharata B Rao 
Acked-by: Paul Mackerras 
---
 arch/powerpc/include/asm/hvcall.h |  2 ++
 arch/powerpc/include/asm/kvm_book3s_hmm.h | 12 
 arch/powerpc/include/asm/kvm_host.h   |  4 +++
 arch/powerpc/include/asm/ultravisor-api.h |  1 +
 arch/powerpc/include/asm/ultravisor.h |  9 ++
 arch/powerpc/kvm/book3s_hv.c  |  7 +
 arch/powerpc/kvm/book3s_hv_hmm.c  | 34 +++
 7 files changed, 69 insertions(+)

diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index 05b8536f6653..fa7695928e30 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -343,6 +343,8 @@
 /* Platform-specific hcalls used by the Ultravisor */
 #define H_SVM_PAGE_IN  0xEF00
 #define H_SVM_PAGE_OUT 0xEF04
+#define H_SVM_INIT_START   0xEF08
+#define H_SVM_INIT_DONE0xEF0C
 
 /* Values for 2nd argument to H_SET_MODE */
 #define H_SET_MODE_RESOURCE_SET_CIABR  1
diff --git a/arch/powerpc/include/asm/kvm_book3s_hmm.h 
b/arch/powerpc/include/asm/kvm_book3s_hmm.h
index 21f3de5f2acb..8c7aacabb2e0 100644
--- a/arch/powerpc/include/asm/kvm_book3s_hmm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_hmm.h
@@ -11,6 +11,8 @@ extern unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
  unsigned long gra,
  unsigned long flags,
  unsigned long page_shift);
+extern unsigned long kvmppc_h_svm_init_start(struct kvm *kvm);
+extern unsigned long kvmppc_h_svm_init_done(struct kvm *kvm);
 #else
 static inline unsigned long
 kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gra,
@@ -25,5 +27,15 @@ kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gra,
 {
return H_UNSUPPORTED;
 }
+
+static inline unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
+{
+   return H_UNSUPPORTED;
+}
+
+static inline unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
+{
+   return H_UNSUPPORTED;
+}
 #endif /* CONFIG_PPC_UV */
 #endif /* __POWERPC_KVM_PPC_HMM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index ac1a101beb07..0c49c3401c63 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -272,6 +272,10 @@ struct kvm_hpt_info {
 
 struct kvm_resize_hpt;
 
+/* Flag values for kvm_arch.secure_guest */
+#define KVMPPC_SECURE_INIT_START   0x1 /* H_SVM_INIT_START has been called 
*/
+#define KVMPPC_SECURE_INIT_DONE0x2 /* H_SVM_INIT_DONE 
completed */
+
 struct kvm_arch {
unsigned int lpid;
unsigned int smt_mode;  /* # vcpus per virtual core */
diff --git a/arch/powerpc/include/asm/ultravisor-api.h 
b/arch/powerpc/include/asm/ultravisor-api.h
index f1c5800ac705..07b7d638e7af 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -20,6 +20,7 @@
 /* opcodes */
 #define UV_WRITE_PATE  0xF104
 #define UV_RETURN  0xF11C
+#define UV_REGISTER_MEM_SLOT   0xF120
 #define UV_PAGE_IN 0xF128
 #define UV_PAGE_OUT0xF12C
 
diff --git a/arch/powerpc/include/asm/ultravisor.h 
b/arch/powerpc/include/asm/ultravisor.h
index 16f8e0e8ec3f..b46042f1aa8f 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -61,6 +61,15 @@ static inline int uv_page_out(u64 lpid, u64 dst_ra, u64 
src_gpa, u64 flags,
return ucall(UV_PAGE_OUT, retbuf, lpid, dst_ra, src_gpa, flags,
 page_shift);
 }
+
+static inline int uv_register_mem_slot(u64 lpid, u64 start_gpa, u64 size,
+  u64 flags, u64 slotid)
+{
+   unsigned long retbuf[UCALL_BUFSIZE];
+
+   return ucall(UV_REGISTER_MEM_SLOT, retbuf, lpid, start_gpa,
+size, flags, slotid);
+}
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 8ee66aa0da58..b8f801d00ad4 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1097,6 +1097,13 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));

[PATCH v5 2/7] kvmppc: Shared pages support for secure guests

2019-07-09 Thread Bharata B Rao
A secure guest will share some of its pages with hypervisor (Eg. virtio
bounce buffers etc). Support shared pages in HMM driver.

Once a secure page is converted to shared page, HMM driver will stop
tracking that page.

Signed-off-by: Bharata B Rao 
---
 arch/powerpc/include/asm/hvcall.h |  3 ++
 arch/powerpc/kvm/book3s_hv_hmm.c  | 66 +--
 2 files changed, 66 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index 2f6b952deb0f..05b8536f6653 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -337,6 +337,9 @@
 #define H_TLB_INVALIDATE   0xF808
 #define H_COPY_TOFROM_GUEST0xF80C
 
+/* Flags for H_SVM_PAGE_IN */
+#define H_PAGE_IN_SHARED0x1
+
 /* Platform-specific hcalls used by the Ultravisor */
 #define H_SVM_PAGE_IN  0xEF00
 #define H_SVM_PAGE_OUT 0xEF04
diff --git a/arch/powerpc/kvm/book3s_hv_hmm.c b/arch/powerpc/kvm/book3s_hv_hmm.c
index cd34323888b6..36562b382e70 100644
--- a/arch/powerpc/kvm/book3s_hv_hmm.c
+++ b/arch/powerpc/kvm/book3s_hv_hmm.c
@@ -52,6 +52,7 @@ struct kvmppc_hmm_page_pvt {
unsigned long *rmap;
unsigned int lpid;
unsigned long gpa;
+   bool skip_page_out;
 };
 
 struct kvmppc_hmm_migrate_args {
@@ -215,6 +216,53 @@ static const struct migrate_vma_ops kvmppc_hmm_migrate_ops 
= {
.finalize_and_map = kvmppc_hmm_migrate_finalize_and_map,
 };
 
+/*
+ * Shares the page with HV, thus making it a normal page.
+ *
+ * - If the page is already secure, then provision a new page and share
+ * - If the page is a normal page, share the existing page
+ *
+ * In the former case, uses the HMM fault handler to release the HMM page.
+ */
+static unsigned long
+kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift)
+{
+
+   int ret;
+   struct page *hmm_page;
+   struct kvmppc_hmm_page_pvt *pvt;
+   unsigned long pfn;
+   unsigned long *rmap;
+   struct kvm_memory_slot *slot;
+   unsigned long gfn = gpa >> page_shift;
+   int srcu_idx;
+
+   srcu_idx = srcu_read_lock(>srcu);
+   slot = gfn_to_memslot(kvm, gfn);
+   if (!slot) {
+   srcu_read_unlock(>srcu, srcu_idx);
+   return H_PARAMETER;
+   }
+   rmap = >arch.rmap[gfn - slot->base_gfn];
+   srcu_read_unlock(>srcu, srcu_idx);
+
+   if (kvmppc_is_hmm_pfn(*rmap)) {
+   hmm_page = pfn_to_page(*rmap & ~KVMPPC_PFN_HMM);
+   pvt = (struct kvmppc_hmm_page_pvt *)
+   hmm_devmem_page_get_drvdata(hmm_page);
+   pvt->skip_page_out = true;
+   }
+
+   pfn = gfn_to_pfn(kvm, gpa >> page_shift);
+   if (is_error_noslot_pfn(pfn))
+   return H_PARAMETER;
+
+   ret = uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, page_shift);
+   kvm_release_pfn_clean(pfn);
+
+   return (ret == U_SUCCESS) ? H_SUCCESS : H_PARAMETER;
+}
+
 /*
  * Move page from normal memory to secure memory.
  */
@@ -235,9 +283,12 @@ kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
if (page_shift != PAGE_SHIFT)
return H_P3;
 
-   if (flags)
+   if (flags & ~H_PAGE_IN_SHARED)
return H_P2;
 
+   if (flags & H_PAGE_IN_SHARED)
+   return kvmppc_share_page(kvm, gpa, page_shift);
+
down_read(>mm->mmap_sem);
srcu_idx = srcu_read_lock(>srcu);
slot = gfn_to_memslot(kvm, gfn);
@@ -299,8 +350,17 @@ kvmppc_hmm_fault_migrate_alloc_and_copy(struct 
vm_area_struct *vma,
   hmm_devmem_page_get_drvdata(spage);
 
pfn = page_to_pfn(dpage);
-   ret = uv_page_out(pvt->lpid, pfn << PAGE_SHIFT,
- pvt->gpa, 0, PAGE_SHIFT);
+
+   /*
+* This same alloc_and_copy() callback is used in two cases:
+* - When HV touches a secure page, for which we do page-out
+* - When a secure page is converted to shared page, we touch
+*   the page to essentially discard the HMM page. In this case we
+*   skip page-out.
+*/
+   if (!pvt->skip_page_out)
+   ret = uv_page_out(pvt->lpid, pfn << PAGE_SHIFT,
+ pvt->gpa, 0, PAGE_SHIFT);
if (ret == U_SUCCESS)
*dst_pfn = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED;
 }
-- 
2.21.0



[PATCH v5 1/7] kvmppc: HMM backend driver to manage pages of secure guest

2019-07-09 Thread Bharata B Rao
HMM driver for KVM PPC to manage page transitions of
secure guest via H_SVM_PAGE_IN and H_SVM_PAGE_OUT hcalls.

H_SVM_PAGE_IN: Move the content of a normal page to secure page
H_SVM_PAGE_OUT: Move the content of a secure page to normal page

Private ZONE_DEVICE memory equal to the amount of secure memory
available in the platform for running secure guests is created
via a HMM device. The movement of pages between normal and secure
memory is done by ->alloc_and_copy() callback routine of migrate_vma().

Signed-off-by: Bharata B Rao 
---
 arch/powerpc/include/asm/hvcall.h |   4 +
 arch/powerpc/include/asm/kvm_book3s_hmm.h |  29 ++
 arch/powerpc/include/asm/kvm_host.h   |  12 +
 arch/powerpc/include/asm/ultravisor-api.h |   2 +
 arch/powerpc/include/asm/ultravisor.h |  17 +
 arch/powerpc/kvm/Makefile |   3 +
 arch/powerpc/kvm/book3s_hv.c  |  19 +
 arch/powerpc/kvm/book3s_hv_hmm.c  | 482 ++
 8 files changed, 568 insertions(+)
 create mode 100644 arch/powerpc/include/asm/kvm_book3s_hmm.h
 create mode 100644 arch/powerpc/kvm/book3s_hv_hmm.c

diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index 463c63a9fcf1..2f6b952deb0f 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -337,6 +337,10 @@
 #define H_TLB_INVALIDATE   0xF808
 #define H_COPY_TOFROM_GUEST0xF80C
 
+/* Platform-specific hcalls used by the Ultravisor */
+#define H_SVM_PAGE_IN  0xEF00
+#define H_SVM_PAGE_OUT 0xEF04
+
 /* Values for 2nd argument to H_SET_MODE */
 #define H_SET_MODE_RESOURCE_SET_CIABR  1
 #define H_SET_MODE_RESOURCE_SET_DAWR   2
diff --git a/arch/powerpc/include/asm/kvm_book3s_hmm.h 
b/arch/powerpc/include/asm/kvm_book3s_hmm.h
new file mode 100644
index ..21f3de5f2acb
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_book3s_hmm.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __POWERPC_KVM_PPC_HMM_H__
+#define __POWERPC_KVM_PPC_HMM_H__
+
+#ifdef CONFIG_PPC_UV
+extern unsigned long kvmppc_h_svm_page_in(struct kvm *kvm,
+ unsigned long gra,
+ unsigned long flags,
+ unsigned long page_shift);
+extern unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
+ unsigned long gra,
+ unsigned long flags,
+ unsigned long page_shift);
+#else
+static inline unsigned long
+kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gra,
+unsigned long flags, unsigned long page_shift)
+{
+   return H_UNSUPPORTED;
+}
+
+static inline unsigned long
+kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gra,
+ unsigned long flags, unsigned long page_shift)
+{
+   return H_UNSUPPORTED;
+}
+#endif /* CONFIG_PPC_UV */
+#endif /* __POWERPC_KVM_PPC_HMM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 184becb62ea4..ac1a101beb07 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -858,4 +858,16 @@ static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu 
*vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
+#ifdef CONFIG_PPC_UV
+extern int kvmppc_hmm_init(void);
+extern void kvmppc_hmm_free(void);
+#else
+static inline int kvmppc_hmm_init(void)
+{
+   return 0;
+}
+
+static inline void kvmppc_hmm_free(void) {}
+#endif /* CONFIG_PPC_UV */
+
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/ultravisor-api.h 
b/arch/powerpc/include/asm/ultravisor-api.h
index 7c4d0b4ced12..f1c5800ac705 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -20,5 +20,7 @@
 /* opcodes */
 #define UV_WRITE_PATE  0xF104
 #define UV_RETURN  0xF11C
+#define UV_PAGE_IN 0xF128
+#define UV_PAGE_OUT0xF12C
 
 #endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
diff --git a/arch/powerpc/include/asm/ultravisor.h 
b/arch/powerpc/include/asm/ultravisor.h
index 996c1efd6c6d..16f8e0e8ec3f 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -44,6 +44,23 @@ static inline int uv_register_pate(u64 lpid, u64 dw0, u64 
dw1)
return ucall(UV_WRITE_PATE, retbuf, lpid, dw0, dw1);
 }
 
+static inline int uv_page_in(u64 lpid, u64 src_ra, u64 dst_gpa, u64 flags,
+u64 page_shift)
+{
+   unsigned long retbuf[UCALL_BUFSIZE];
+
+   return ucall(UV_PAGE_IN, retbuf, lpid, src_ra, dst_gpa, flags,
+page_shift);
+}
+
+static inline int uv_page_out(u64 lpid, u64 dst_ra, u64 src_gpa, u64 

[PATCH v5 0/7] kvmppc: HMM driver to manage pages of secure guest

2019-07-09 Thread Bharata B Rao
Hi,

A pseries guest can be run as a secure guest on Ultravisor-enabled
POWER platforms. On such platforms, this driver will be used to manage
the movement of guest pages between the normal memory managed by
hypervisor (HV) and secure memory managed by Ultravisor (UV).

Private ZONE_DEVICE memory equal to the amount of secure memory
available in the platform for running secure guests is created
via a HMM device. The movement of pages between normal and secure
memory is done by ->alloc_and_copy() callback routine of migrate_vma().

The page-in or page-out requests from UV will come to HV as hcalls and
HV will call back into UV via uvcalls to satisfy these page requests.

These patches apply and work on top of the base Ultravisor v4 patches
posted by Claudio Carvalho at:
https://www.mail-archive.com/linuxppc-dev@lists.ozlabs.org/msg152842.html

Changes in v5
=
- Hold kvm->srcu lock until we are done migrating the page.
- Ensure we take heavier lock mmap_sem first before taking kvm->srcu
  lock.
- Code reorgs, comments updates and commit messages updates.
- Ensure we don't lookup HV side partition scoped page tables from
  memslot flush code, this is required for memory unplug to make
  progress.
- Fix reboot of secure SMP guests by unpinng the VPA pages during
  reboot (Ram Pai).
- Added documentation for the new KVM_PP_SVM_OFF ioctl.
- Using different bit slot to differentiate HMM PFN from other uses
  of rmap entries.
- Remove kvmppc_hmm_release_pfns() as releasing of HMM PFNs will be
  done by unmap_vmas() and its callers during VM shutdown.
- Carrying the patch that adds CONFIG_PPC_UV with this patchset.

v4: https://www.mail-archive.com/linuxppc-dev@lists.ozlabs.org/msg151156.html

Anshuman Khandual (1):
  KVM: PPC: Ultravisor: Add PPC_UV config option

Bharata B Rao (6):
  kvmppc: HMM backend driver to manage pages of secure guest
  kvmppc: Shared pages support for secure guests
  kvmppc: H_SVM_INIT_START and H_SVM_INIT_DONE hcalls
  kvmppc: Handle memory plug/unplug to secure VM
  kvmppc: Radix changes for secure guest
  kvmppc: Support reset of secure guest

 Documentation/virtual/kvm/api.txt |  19 +
 arch/powerpc/Kconfig  |  20 +
 arch/powerpc/include/asm/hvcall.h |   9 +
 arch/powerpc/include/asm/kvm_book3s_hmm.h |  48 ++
 arch/powerpc/include/asm/kvm_host.h   |  28 +
 arch/powerpc/include/asm/kvm_ppc.h|   2 +
 arch/powerpc/include/asm/ultravisor-api.h |   6 +
 arch/powerpc/include/asm/ultravisor.h |  47 ++
 arch/powerpc/kvm/Makefile |   3 +
 arch/powerpc/kvm/book3s_64_mmu_radix.c|  22 +
 arch/powerpc/kvm/book3s_hv.c  | 115 
 arch/powerpc/kvm/book3s_hv_hmm.c  | 656 ++
 arch/powerpc/kvm/powerpc.c|  12 +
 include/uapi/linux/kvm.h  |   1 +
 tools/include/uapi/linux/kvm.h|   1 +
 15 files changed, 989 insertions(+)
 create mode 100644 arch/powerpc/include/asm/kvm_book3s_hmm.h
 create mode 100644 arch/powerpc/kvm/book3s_hv_hmm.c

-- 
2.21.0



Re: [RFC PATCH v4 6/6] kvmppc: Support reset of secure guest

2019-07-09 Thread Bharata B Rao
On Mon, Jun 17, 2019 at 02:06:32PM +1000, Paul Mackerras wrote:
> On Tue, May 28, 2019 at 12:19:33PM +0530, Bharata B Rao wrote:
> > Add support for reset of secure guest via a new ioctl KVM_PPC_SVM_OFF.
> > This ioctl will be issued by QEMU during reset and in this ioctl,
> > we ask UV to terminate the guest via UV_SVM_TERMINATE ucall,
> > reinitialize guest's partitioned scoped page tables and release all
> > HMM pages of the secure guest.
> > 
> > After these steps, guest is ready to issue UV_ESM call once again
> > to switch to secure mode.
> 
> Since you are adding a new KVM ioctl, you need to add a description of
> it to Documentation/virtual/kvm/api.txt.

Adding in the next version.

Regards,
Bharata.



Re: [PATCH v4 3/6] kvmppc: H_SVM_INIT_START and H_SVM_INIT_DONE hcalls

2019-07-09 Thread Bharata B Rao
On Tue, Jun 18, 2019 at 08:05:26PM -0300, Thiago Jung Bauermann wrote:
> 
> Hello Bharata,
> 
> Bharata B Rao  writes:
> 
> > diff --git a/arch/powerpc/include/asm/kvm_book3s_hmm.h 
> > b/arch/powerpc/include/asm/kvm_book3s_hmm.h
> > index 21f3de5f2acb..3e13dab7f690 100644
> > --- a/arch/powerpc/include/asm/kvm_book3s_hmm.h
> > +++ b/arch/powerpc/include/asm/kvm_book3s_hmm.h
> > @@ -11,6 +11,8 @@ extern unsigned long kvmppc_h_svm_page_out(struct kvm 
> > *kvm,
> >   unsigned long gra,
> >   unsigned long flags,
> >   unsigned long page_shift);
> > +extern unsigned long kvmppc_h_svm_init_start(struct kvm *kvm);
> > +extern unsigned long kvmppc_h_svm_init_done(struct kvm *kvm);
> >  #else
> >  static inline unsigned long
> >  kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gra,
> > @@ -25,5 +27,15 @@ kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gra,
> >  {
> > return H_UNSUPPORTED;
> >  }
> > +
> > +static inine unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
> > +{
> > +   return H_UNSUPPORTED;
> > +}
> > +
> > +static inine unsigned long kvmppc_h_svm_init_done(struct kvm *kvm);
> > +{
> > +   return H_UNSUPPORTED;
> > +}
> >  #endif /* CONFIG_PPC_UV */
> >  #endif /* __POWERPC_KVM_PPC_HMM_H__ */
> 
> This patch won't build when CONFIG_PPC_UV isn't set because of two
> typos: "inine" and the ';' at the end of kvmppc_h_svm_init_done()
> function prototype.

Thanks. Fixed this.

Regards,
Bharata.



Re: [PATCH v4 3/6] kvmppc: H_SVM_INIT_START and H_SVM_INIT_DONE hcalls

2019-07-09 Thread Bharata B Rao
On Mon, Jun 17, 2019 at 03:37:56PM +1000, Paul Mackerras wrote:
> On Tue, May 28, 2019 at 12:19:30PM +0530, Bharata B Rao wrote:
> > H_SVM_INIT_START: Initiate securing a VM
> > H_SVM_INIT_DONE: Conclude securing a VM
> > 
> > As part of H_SVM_INIT_START register all existing memslots with the UV.
> > H_SVM_INIT_DONE call by UV informs HV that transition of the guest
> > to secure mode is complete.
> 
> It is worth mentioning here that setting any of the flag bits in
> kvm->arch.secure_guest will cause the assembly code that enters the
> guest to call the UV_RETURN ucall instead of trying to enter the guest
> directly.  That's not necessarily obvious to the reader as this patch
> doesn't touch that assembly code.

Documented this in the commit message.

> 
> Apart from that this patch looks fine.
> 
> > Signed-off-by: Bharata B Rao 
> 
> Acked-by: Paul Mackerras 

Thanks,
Bharata.



Re: [PATCH v4 1/6] kvmppc: HMM backend driver to manage pages of secure guest

2019-07-09 Thread Bharata B Rao
On Mon, Jun 17, 2019 at 03:31:06PM +1000, Paul Mackerras wrote:
> On Tue, May 28, 2019 at 12:19:28PM +0530, Bharata B Rao wrote:
> > HMM driver for KVM PPC to manage page transitions of
> > secure guest via H_SVM_PAGE_IN and H_SVM_PAGE_OUT hcalls.
> > 
> > H_SVM_PAGE_IN: Move the content of a normal page to secure page
> > H_SVM_PAGE_OUT: Move the content of a secure page to normal page
> 
> Comments below...
> 
> > @@ -4421,6 +4435,7 @@ static void kvmppc_core_free_memslot_hv(struct 
> > kvm_memory_slot *free,
> > struct kvm_memory_slot *dont)
> >  {
> > if (!dont || free->arch.rmap != dont->arch.rmap) {
> > +   kvmppc_hmm_release_pfns(free);
> 
> I don't think this is the right place to do this.  The memslot will
> have no pages mapped by this time, because higher levels of code will
> have called kvmppc_core_flush_memslot_hv() before calling this.
> Releasing the pfns should be done in that function.

In fact I can get rid of kvmppc_hmm_release_pfns() totally as we don't
have to do free the HMM pages like this explicitly. During guest shutdown
all these pages are dropped when unmap_vmas() is called.

> 
> > diff --git a/arch/powerpc/kvm/book3s_hv_hmm.c 
> > b/arch/powerpc/kvm/book3s_hv_hmm.c
> > new file mode 100644
> > index ..713806003da3
> 
> ...
> 
> > +#define KVMPPC_PFN_HMM (0x1ULL << 61)
> > +
> > +static inline bool kvmppc_is_hmm_pfn(unsigned long pfn)
> > +{
> > +   return !!(pfn & KVMPPC_PFN_HMM);
> > +}
> 
> Since you are putting in these values in the rmap entries, you need to
> be careful about overlaps between these values and the other uses of
> rmap entries.  The value you have chosen would be in the middle of the
> LPID field for an rmap entry for a guest that has nested guests, and
> in fact kvmhv_remove_nest_rmap_range() effectively assumes that a
> non-zero rmap entry must be a list of L2 guest mappings.  (This is for
> radix guests; HPT guests use the rmap entry differently, but I am
> assuming that we will enforce that only radix guests can be secure
> guests.)

Worked out with Suraj on sharing the rmap and got a well defined
bit slot for HMM PFNs in rmap.

> 
> Maybe it is true that the rmap entry will be non-zero only for those
> guest pages which are not mapped on the host side, that is,
> kvmppc_radix_flush_memslot() will see !pte_present(*ptep) for any page
> of a secure guest where the rmap entry contains a HMM pfn.  If that is
> so and is a deliberate part of the design, then I would like to see it
> written down in comments and commit messages so it's clear to others
> working on the code in future.

Yes, rmap entry will be non-zero only for those guest pages which are
not mapped on the host side. However as soon as guest becomes secure
we free the HV side partition scoped page tables and hence
kvmppc_radix_flush_memslot() and other such routines which lookup
kvm->arch.pgtable will no longer touch it.

> 
> Suraj is working on support for nested HPT guests, which will involve
> changing the rmap format to indicate more explicitly what sort of
> entry each rmap entry is.  Please work with him to define a format for
> your rmap entries that is clearly distinguishable from the others.
> 
> I think it is reasonable to say that a secure guest can't have nested
> guests, at least for now, but then we should make sure to kill all
> nested guests when a guest goes secure.

Ok. Yet to figure this part out.

> 
> ...
> 
> > +/*
> > + * Move page from normal memory to secure memory.
> > + */
> > +unsigned long
> > +kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
> > +unsigned long flags, unsigned long page_shift)
> > +{
> > +   unsigned long addr, end;
> > +   unsigned long src_pfn, dst_pfn;
> > +   struct kvmppc_hmm_migrate_args args;
> > +   struct vm_area_struct *vma;
> > +   int srcu_idx;
> > +   unsigned long gfn = gpa >> page_shift;
> > +   struct kvm_memory_slot *slot;
> > +   unsigned long *rmap;
> > +   int ret = H_SUCCESS;
> > +
> > +   if (page_shift != PAGE_SHIFT)
> > +   return H_P3;
> > +
> > +   srcu_idx = srcu_read_lock(>srcu);
> > +   slot = gfn_to_memslot(kvm, gfn);
> > +   rmap = >arch.rmap[gfn - slot->base_gfn];
> > +   addr = gfn_to_hva(kvm, gpa >> page_shift);
> > +   srcu_read_unlock(>srcu, srcu_idx);
> 
> Shouldn't we keep the srcu read lock until we have finished working on
> the page?

I wasn't sure, so keeping it locked till the end in the next version.

> 
> > +   if (kvm_is_error_hva(addr))
> > +   return H_PARAMETER;
> > +
> > +   end = addr + (1UL << page_shift);
> > +
> > +   if (flags)
> > +   return H_P2;
> > +
> > +   args.rmap = rmap;
> > +   args.lpid = kvm->arch.lpid;
> > +   args.gpa = gpa;
> > +   args.page_shift = page_shift;
> > +
> > +   down_read(>mm->mmap_sem);
> > +   vma = find_vma_intersection(kvm->mm, addr, end);
> > +   if (!vma || vma->vm_start > addr || vma->vm_end < end) {
> > +   ret = H_PARAMETER;
> > + 

Re: [PATCH v5] cpufreq/pasemi: fix an use-after-free in pas_cpufreq_cpu_init()

2019-07-09 Thread Viresh Kumar
On 09-07-19, 16:04, Wen Yang wrote:
> The cpu variable is still being used in the of_get_property() call
> after the of_node_put() call, which may result in use-after-free.
> 
> Fixes: a9acc26b75f ("cpufreq/pasemi: fix possible object reference leak")
> Signed-off-by: Wen Yang 
> Cc: "Rafael J. Wysocki" 
> Cc: Viresh Kumar 
> Cc: Michael Ellerman 
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: linux...@vger.kernel.org
> Cc: linux-ker...@vger.kernel.org
> ---
> v5: put together the code to get, use, and release cpu device_node.
> v4: restore the blank line.
> v3: fix a leaked reference.
> v2: clean up the code according to the advice of viresh.
> 
>  drivers/cpufreq/pasemi-cpufreq.c | 21 +
>  1 file changed, 9 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/cpufreq/pasemi-cpufreq.c 
> b/drivers/cpufreq/pasemi-cpufreq.c
> index 6b1e4ab..1f0beb7 100644
> --- a/drivers/cpufreq/pasemi-cpufreq.c
> +++ b/drivers/cpufreq/pasemi-cpufreq.c
> @@ -131,10 +131,17 @@ static int pas_cpufreq_cpu_init(struct cpufreq_policy 
> *policy)
>   int err = -ENODEV;
>  
>   cpu = of_get_cpu_node(policy->cpu, NULL);
> -
> - of_node_put(cpu);
>   if (!cpu)
>   goto out;

I would have loved a blank line here :)

> + max_freqp = of_get_property(cpu, "clock-frequency", NULL);
> + of_node_put(cpu);
> + if (!max_freqp) {
> + err = -EINVAL;
> + goto out;
> + }
> +
> + /* we need the freq in kHz */
> + max_freq = *max_freqp / 1000;
>  
>   dn = of_find_compatible_node(NULL, NULL, "1682m-sdc");
>   if (!dn)
> @@ -171,16 +178,6 @@ static int pas_cpufreq_cpu_init(struct cpufreq_policy 
> *policy)
>   }
>  
>   pr_debug("init cpufreq on CPU %d\n", policy->cpu);
> -
> - max_freqp = of_get_property(cpu, "clock-frequency", NULL);
> - if (!max_freqp) {
> - err = -EINVAL;
> - goto out_unmap_sdcpwr;
> - }
> -
> - /* we need the freq in kHz */
> - max_freq = *max_freqp / 1000;
> -
>   pr_debug("max clock-frequency is at %u kHz\n", max_freq);
>   pr_debug("initializing frequency table\n");

Though, enough versions have happened now.

Acked-by: Viresh Kumar 

-- 
viresh


Re: [PATCH v5] powerpc/64s: support nospectre_v2 cmdline option

2019-07-09 Thread Andrew Donnellan

On 5/6/19 5:42 pm, Andrew Donnellan wrote:

On 24/5/19 12:46 pm, Christopher M. Riedl wrote:

Add support for disabling the kernel implemented spectre v2 mitigation
(count cache flush on context switch) via the nospectre_v2 and
mitigations=off cmdline options.

Suggested-by: Michael Ellerman 
Signed-off-by: Christopher M. Riedl 
Reviewed-by: Andrew Donnellan 


snowpatch is whinging about this breaking the build for some reason... 
https://patchwork.ozlabs.org/patch/1104583/


Upon further inspection, it appears this was bogus, and now snowpatch is 
passing it happily.


--
Andrew Donnellan  OzLabs, ADL Canberra
a...@linux.ibm.com IBM Australia Limited



[PATCH v5] cpufreq/pasemi: fix an use-after-free in pas_cpufreq_cpu_init()

2019-07-09 Thread Wen Yang
The cpu variable is still being used in the of_get_property() call
after the of_node_put() call, which may result in use-after-free.

Fixes: a9acc26b75f ("cpufreq/pasemi: fix possible object reference leak")
Signed-off-by: Wen Yang 
Cc: "Rafael J. Wysocki" 
Cc: Viresh Kumar 
Cc: Michael Ellerman 
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux...@vger.kernel.org
Cc: linux-ker...@vger.kernel.org
---
v5: put together the code to get, use, and release cpu device_node.
v4: restore the blank line.
v3: fix a leaked reference.
v2: clean up the code according to the advice of viresh.

 drivers/cpufreq/pasemi-cpufreq.c | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c
index 6b1e4ab..1f0beb7 100644
--- a/drivers/cpufreq/pasemi-cpufreq.c
+++ b/drivers/cpufreq/pasemi-cpufreq.c
@@ -131,10 +131,17 @@ static int pas_cpufreq_cpu_init(struct cpufreq_policy 
*policy)
int err = -ENODEV;
 
cpu = of_get_cpu_node(policy->cpu, NULL);
-
-   of_node_put(cpu);
if (!cpu)
goto out;
+   max_freqp = of_get_property(cpu, "clock-frequency", NULL);
+   of_node_put(cpu);
+   if (!max_freqp) {
+   err = -EINVAL;
+   goto out;
+   }
+
+   /* we need the freq in kHz */
+   max_freq = *max_freqp / 1000;
 
dn = of_find_compatible_node(NULL, NULL, "1682m-sdc");
if (!dn)
@@ -171,16 +178,6 @@ static int pas_cpufreq_cpu_init(struct cpufreq_policy 
*policy)
}
 
pr_debug("init cpufreq on CPU %d\n", policy->cpu);
-
-   max_freqp = of_get_property(cpu, "clock-frequency", NULL);
-   if (!max_freqp) {
-   err = -EINVAL;
-   goto out_unmap_sdcpwr;
-   }
-
-   /* we need the freq in kHz */
-   max_freq = *max_freqp / 1000;
-
pr_debug("max clock-frequency is at %u kHz\n", max_freq);
pr_debug("initializing frequency table\n");
 
-- 
2.9.5



Re: [PATCH v2] powerpc: slightly improve cache helpers

2019-07-09 Thread Nathan Chancellor
On Tue, Jul 09, 2019 at 07:04:43AM +0200, Christophe Leroy wrote:
> 
> 
> Le 08/07/2019 à 21:14, Nathan Chancellor a écrit :
> > On Mon, Jul 08, 2019 at 11:19:30AM +1000, Michael Ellerman wrote:
> > > On Fri, 2019-05-10 at 09:24:48 UTC, Christophe Leroy wrote:
> > > > Cache instructions (dcbz, dcbi, dcbf and dcbst) take two registers
> > > > that are summed to obtain the target address. Using 'Z' constraint
> > > > and '%y0' argument gives GCC the opportunity to use both registers
> > > > instead of only one with the second being forced to 0.
> > > > 
> > > > Suggested-by: Segher Boessenkool 
> > > > Signed-off-by: Christophe Leroy 
> > > 
> > > Applied to powerpc next, thanks.
> > > 
> > > https://git.kernel.org/powerpc/c/6c5875843b87c3adea2beade9d1b8b3d4523900a
> > > 
> > > cheers
> > 
> > This patch causes a regression with clang:
> 
> Is that a Clang bug ?

No idea, it happens with clang-8 and clang-9 though (pretty sure there
were fixes for PowerPC in clang-8 so something before it probably won't
work but I haven't tried).

> 
> Do you have a disassembly of the code both with and without this patch in
> order to compare ?

I can give you whatever disassembly you want (or I can upload the raw
files if that is easier).

Cheers,
Nathan

> 
> Segher, any idea ?
> 
> Christophe
> 
> > 
> > https://travis-ci.com/ClangBuiltLinux/continuous-integration/jobs/213944668
> > 
> > I've attached my local bisect/build log.
> > 
> > Cheers,
> > Nathan
> >