Re: [PATCH v2 1/4] powerpc/sstep: support new VSX vector paired storage access instructions

2020-07-15 Thread Ravi Bangoria

Hi Bala,


@@ -2382,6 +2386,15 @@ int analyse_instr(struct instruction_op *op, const 
struct pt_regs *regs,
op->vsx_flags = VSX_SPLAT;
break;
  
+		case 333:   /* lxvpx */

+   if (!cpu_has_feature(CPU_FTR_ARCH_31))
+   return -1;
+   op->reg = VSX_REGISTER_XTP(rd);
+   op->type = MKOP(LOAD_VSX, 0, 32);
+   op->element_size = 32;
+   op->vsx_flags = VSX_CHECK_VEC;


Why VSX_CHECK_VEC?

Ravi


[PATCH v2 4/4] powerpc sstep: add testcases for vsx load/store instructions

2020-07-15 Thread Balamuruhan S
add testcases for vsx load/store vector paired instructions,
* Load VSX Vector Paired (lxvp)
* Load VSX Vector Paired Indexed (lxvpx)
* Prefixed Load VSX Vector Paired (plxvp)
* Store VSX Vector Paired (stxvp)
* Store VSX Vector Paired Indexed (stxvpx)
* Prefixed Store VSX Vector Paired (pstxvp)

Signed-off-by: Balamuruhan S 
---
 arch/powerpc/include/asm/ppc-opcode.h |   7 +
 arch/powerpc/lib/test_emulate_step.c  | 273 ++
 2 files changed, 280 insertions(+)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h 
b/arch/powerpc/include/asm/ppc-opcode.h
index f7ffbe11624e..aa688d13981a 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -389,6 +389,10 @@
 #define PPC_INST_VCMPEQUD  0x10c7
 #define PPC_INST_VCMPEQUB  0x1006
 
+/* Prefixes */
+#define PPC_PREFIX_MLS 0x0600
+#define PPC_PREFIX_8LS 0x0400
+
 /* macros to insert fields into opcodes */
 #define ___PPC_RA(a)   (((a) & 0x1f) << 16)
 #define ___PPC_RB(b)   (((b) & 0x1f) << 11)
@@ -420,6 +424,9 @@
 #define __PPC_CT(t)(((t) & 0x0f) << 21)
 #define __PPC_SPR(r)   r) & 0x1f) << 16) | r) >> 5) & 0x1f) << 11))
 #define __PPC_RC21 (0x1 << 10)
+#define __PPC_PRFX_R(r)(((r) & 0x1) << 20)
+#define __PPC_TP(tp)   (((tp) & 0xf) << 22)
+#define __PPC_TX(tx)   (((tx) & 0x1) << 21)
 
 /*
  * Both low and high 16 bits are added as SIGNED additions, so if low 16 bits
diff --git a/arch/powerpc/lib/test_emulate_step.c 
b/arch/powerpc/lib/test_emulate_step.c
index 46af80279ebc..98ecbc66bef8 100644
--- a/arch/powerpc/lib/test_emulate_step.c
+++ b/arch/powerpc/lib/test_emulate_step.c
@@ -14,7 +14,13 @@
 #include 
 
 #define IMM_L(i)   ((uintptr_t)(i) & 0x)
+#define IMM_H(i)   (((uintptr_t)(i) >> 16) & 0x3)
 #define IMM_DS(i)  ((uintptr_t)(i) & 0xfffc)
+#define IMM_DQ(i)  (((uintptr_t)(i) & 0xfff) << 4)
+
+#define PLXVP_EX_OP0xe800
+#define PSTXVP_EX_OP   0xf800
+
 
 /*
  * Defined with TEST_ prefix so it does not conflict with other
@@ -47,6 +53,21 @@
___PPC_RA(a) | ___PPC_RB(b))
 #define TEST_LXVD2X(s, a, b)   ppc_inst(PPC_INST_LXVD2X | VSX_XX1((s), R##a, 
R##b))
 #define TEST_STXVD2X(s, a, b)  ppc_inst(PPC_INST_STXVD2X | VSX_XX1((s), R##a, 
R##b))
+#define TEST_LXVP(tp, tx, a, i) \
+   (PPC_INST_LXVP | __PPC_TP(tp) | __PPC_TX(tx) | ___PPC_RA(a) | IMM_DQ(i))
+#define TEST_STXVP(sp, sx, a, i) \
+   (PPC_INST_STXVP | __PPC_TP(sp) | __PPC_TX(sx) | ___PPC_RA(a) | 
IMM_DQ(i) | 0x1)
+#define TEST_LXVPX(tp, tx, a, b) \
+   (PPC_INST_LXVPX | __PPC_TP(tp) | __PPC_TX(tx) | ___PPC_RA(a) | 
___PPC_RB(b))
+#define TEST_STXVPX(sp, sx, a, b) \
+   (PPC_INST_STXVPX | __PPC_TP(sp) | __PPC_TX(sx) | ___PPC_RA(a) | 
___PPC_RB(b))
+#define TEST_PLXVP(a, i, pr, tp, tx) \
+   ((PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i)) << 32 | \
+(PLXVP_EX_OP | __PPC_TP(tp) | __PPC_TX(tx) | ___PPC_RA(a) | IMM_L(i)))
+#define TEST_PSTXVP(a, i, pr, sp, sx) \
+   ((PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i)) << 32 | \
+(PSTXVP_EX_OP | __PPC_TP(sp) | __PPC_TX(sx) | ___PPC_RA(a) | IMM_L(i)))
+
 #define TEST_ADD(t, a, b)  ppc_inst(PPC_INST_ADD | ___PPC_RT(t) |  
\
___PPC_RA(a) | ___PPC_RB(b))
 #define TEST_ADD_DOT(t, a, b)  ppc_inst(PPC_INST_ADD | ___PPC_RT(t) |  
\
@@ -444,6 +465,255 @@ static void __init test_lxvd2x_stxvd2x(void)
 }
 #endif /* CONFIG_VSX */
 
+#ifdef CONFIG_VSX
+static void __init test_lxvp_stxvp(void)
+{
+   struct pt_regs regs;
+   union {
+   vector128 a[2];
+   u32 b[8];
+   } c;
+   u32 cached_b[8];
+   int stepped = -1;
+
+   init_pt_regs(®s);
+
+   /*** lxvp ***/
+
+   cached_b[0] = c.b[0] = 18233;
+   cached_b[1] = c.b[1] = 34863571;
+   cached_b[2] = c.b[2] = 834;
+   cached_b[3] = c.b[3] = 6138911;
+   cached_b[4] = c.b[4] = 1234;
+   cached_b[5] = c.b[5] = 5678;
+   cached_b[6] = c.b[6] = 91011;
+   cached_b[7] = c.b[7] = 121314;
+
+   regs.gpr[4] = (unsigned long)&c.a;
+
+   /*
+* lxvp XTp,DQ(RA)
+* XTp = 32??TX + 2??Tp
+* let TX=1 Tp=1 RA=4 DQ=0
+*/
+   stepped = emulate_step(®s, ppc_inst(TEST_LXVP(1, 1, 4, 0)));
+
+   if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) {
+   show_result("lxvp", "PASS");
+   } else {
+   if (!cpu_has_feature(CPU_FTR_VSX))
+   show_result("lxvp", "PASS (!CPU_FTR_VSX)");
+   else
+   show_result("lxvp", "FAIL");
+   }
+
+   /*** stxvp ***/
+
+   c.b[0] = 21379463;
+   c.b[1] = 87;
+   c.b[2] = 374234;
+   c.b[3] = 4;
+   c.b[4] = 90;
+   c.b[5] = 122;
+   c.b[6] = 555;
+   c

[PATCH v2 3/4] powerpc ppc-opcode: add opcodes for vsx vector paired instructions

2020-07-15 Thread Balamuruhan S
add instruction opcodes for new vsx vector paired instructions,
* Load VSX Vector Paired (lxvp)
* Load VSX Vector Paired Indexed (lxvpx)
* Store VSX Vector Paired (stxvp)
* Store VSX Vector Paired Indexed (stxvpx)

Signed-off-by: Balamuruhan S 
---
 arch/powerpc/include/asm/ppc-opcode.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h 
b/arch/powerpc/include/asm/ppc-opcode.h
index 777d5056a71c..f7ffbe11624e 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -210,6 +210,10 @@
 #define PPC_INST_ISEL  0x7c1e
 #define PPC_INST_ISEL_MASK 0xfc3e
 #define PPC_INST_LDARX 0x7ca8
+#define PPC_INST_LXVP  0x1800
+#define PPC_INST_LXVPX 0x7c00029a
+#define PPC_INST_STXVP 0x1801
+#define PPC_INST_STXVPX0x7c00039a
 #define PPC_INST_STDCX 0x7c0001ad
 #define PPC_INST_LQARX 0x7c000228
 #define PPC_INST_STQCX 0x7c00016d
-- 
2.24.1



[PATCH v2 2/4] powerpc/sstep: support emulation for vsx vector paired storage access instructions

2020-07-15 Thread Balamuruhan S
add emulate_step() changes to support vsx vector paired storage
access instructions that provides octword operands loads/stores
between storage and set of 2 Vector Scalar Registers (VSRs).

Signed-off-by: Balamuruhan S 
---
 arch/powerpc/include/asm/sstep.h |  2 +-
 arch/powerpc/lib/sstep.c | 58 +++-
 2 files changed, 50 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index 3b01c69a44aa..a6c0b299bcc9 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -126,7 +126,7 @@ union vsx_reg {
unsigned long d[2];
float   fp[4];
double  dp[2];
-   __vector128 v;
+   __vector128 v[2];
 };
 
 /*
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 1af8c1920b36..010ce81aeffb 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -279,6 +279,19 @@ static nokprobe_inline void do_byte_reverse(void *ptr, int 
nb)
up[1] = tmp;
break;
}
+   case 32: {
+   unsigned long *up = (unsigned long *)ptr;
+   unsigned long tmp;
+
+   tmp = byterev_8(up[0]);
+   up[0] = byterev_8(up[3]);
+   up[3] = tmp;
+   tmp = byterev_8(up[2]);
+   up[2] = byterev_8(up[1]);
+   up[1] = tmp;
+   break;
+   }
+
 #endif
default:
WARN_ON_ONCE(1);
@@ -709,6 +722,8 @@ void emulate_vsx_load(struct instruction_op *op, union 
vsx_reg *reg,
reg->d[0] = reg->d[1] = 0;
 
switch (op->element_size) {
+   case 32:
+   /* [p]lxvp[x] or [p]stxvp[x] */
case 16:
/* whole vector; lxv[x] or lxvl[l] */
if (size == 0)
@@ -717,7 +732,7 @@ void emulate_vsx_load(struct instruction_op *op, union 
vsx_reg *reg,
if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
rev = !rev;
if (rev)
-   do_byte_reverse(reg, 16);
+   do_byte_reverse(reg, size);
break;
case 8:
/* scalar loads, lxvd2x, lxvdsx */
@@ -793,6 +808,22 @@ void emulate_vsx_store(struct instruction_op *op, const 
union vsx_reg *reg,
size = GETSIZE(op->type);
 
switch (op->element_size) {
+   case 32:
+   /* [p]lxvp[x] or [p]stxvp[x] */
+   if (size == 0)
+   break;
+   if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
+   rev = !rev;
+   if (rev) {
+   /* reverse 32 bytes */
+   buf.d[0] = byterev_8(reg->d[3]);
+   buf.d[1] = byterev_8(reg->d[2]);
+   buf.d[2] = byterev_8(reg->d[1]);
+   buf.d[3] = byterev_8(reg->d[0]);
+   reg = &buf;
+   }
+   memcpy(mem, reg, size);
+   break;
case 16:
/* stxv, stxvx, stxvl, stxvll */
if (size == 0)
@@ -861,28 +892,33 @@ static nokprobe_inline int do_vsx_load(struct 
instruction_op *op,
   bool cross_endian)
 {
int reg = op->reg;
-   u8 mem[16];
+   int i, nr_vsx_regs;
+   u8 mem[32];
union vsx_reg buf;
int size = GETSIZE(op->type);
 
if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs))
return -EFAULT;
 
+   nr_vsx_regs = size / sizeof(__vector128);
emulate_vsx_load(op, &buf, mem, cross_endian);
preempt_disable();
if (reg < 32) {
/* FP regs + extensions */
if (regs->msr & MSR_FP) {
-   load_vsrn(reg, &buf);
+   for (i = 0; i < nr_vsx_regs; i++)
+   load_vsrn(reg + i, &buf.v[i]);
} else {
current->thread.fp_state.fpr[reg][0] = buf.d[0];
current->thread.fp_state.fpr[reg][1] = buf.d[1];
}
} else {
if (regs->msr & MSR_VEC)
-   load_vsrn(reg, &buf);
+   for (i = 0; i < nr_vsx_regs; i++)
+   load_vsrn(reg + i, &buf.v[i]);
+
else
-   current->thread.vr_state.vr[reg - 32] = buf.v;
+   current->thread.vr_state.vr[reg - 32] = buf.v[0];
}
preempt_enable();
return 0;
@@ -893,27 +929,31 @@ static nokprobe_inline int do_vsx_store(struct 
instruction_op *op,
bool cross_endian)
 {
int reg = op->reg;
-   u8 mem[16];
+   int i, nr_vsx_regs;
+   u8 mem[32];
union vsx_reg buf;
int size = GETSIZE(op->type);
 
if (!address_ok(regs, ea, size))
return -EFAULT;
 
+

[PATCH v2 1/4] powerpc/sstep: support new VSX vector paired storage access instructions

2020-07-15 Thread Balamuruhan S
VSX Vector Paired instructions loads/stores an octword (32 bytes)
from/to storage into two sequential VSRs. Add `analyse_instr()` support
to these new instructions,
* Load VSX Vector Paired (lxvp)
* Load VSX Vector Paired Indexed (lxvpx)
* Prefixed Load VSX Vector Paired (plxvp)
* Store VSX Vector Paired (stxvp)
* Store VSX Vector Paired Indexed (stxvpx)
* Prefixed Store VSX Vector Paired (pstxvp)

Signed-off-by: Balamuruhan S 
---
 arch/powerpc/lib/sstep.c | 52 
 1 file changed, 52 insertions(+)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 5abe98216dc2..1af8c1920b36 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -31,6 +31,10 @@ extern char system_call_common[];
 #define XER_OV32   0x0008U
 #define XER_CA32   0x0004U
 
+#ifdef CONFIG_VSX
+#define VSX_REGISTER_XTP(rd)   rd) & 1) << 5) | ((rd) & 0xfe))
+#endif
+
 #ifdef CONFIG_PPC_FPU
 /*
  * Functions in ldstfp.S
@@ -2382,6 +2386,15 @@ int analyse_instr(struct instruction_op *op, const 
struct pt_regs *regs,
op->vsx_flags = VSX_SPLAT;
break;
 
+   case 333:   /* lxvpx */
+   if (!cpu_has_feature(CPU_FTR_ARCH_31))
+   return -1;
+   op->reg = VSX_REGISTER_XTP(rd);
+   op->type = MKOP(LOAD_VSX, 0, 32);
+   op->element_size = 32;
+   op->vsx_flags = VSX_CHECK_VEC;
+   break;
+
case 364:   /* lxvwsx */
op->reg = rd | ((word & 1) << 5);
op->type = MKOP(LOAD_VSX, 0, 4);
@@ -2410,6 +2423,14 @@ int analyse_instr(struct instruction_op *op, const 
struct pt_regs *regs,
VSX_CHECK_VEC;
break;
}
+   case 461:   /* stxvpx */
+   if (!cpu_has_feature(CPU_FTR_ARCH_31))
+   return -1;
+   op->reg = VSX_REGISTER_XTP(rd);
+   op->type = MKOP(STORE_VSX, 0, 32);
+   op->element_size = 32;
+   op->vsx_flags = VSX_CHECK_VEC;
+   break;
case 524:   /* lxsspx */
op->reg = rd | ((word & 1) << 5);
op->type = MKOP(LOAD_VSX, 0, 4);
@@ -2651,6 +2672,23 @@ int analyse_instr(struct instruction_op *op, const 
struct pt_regs *regs,
 #endif
 
 #ifdef CONFIG_VSX
+   case 6:
+   if (!cpu_has_feature(CPU_FTR_ARCH_31))
+   return -1;
+   op->ea = dqform_ea(word, regs);
+   op->reg = VSX_REGISTER_XTP(rd);
+   op->element_size = 32;
+   op->vsx_flags = VSX_CHECK_VEC;
+   switch (word & 0xf) {
+   case 0: /* lxvp */
+   op->type = MKOP(LOAD_VSX, 0, 32);
+   break;
+   case 1: /* stxvp */
+   op->type = MKOP(STORE_VSX, 0, 32);
+   break;
+   }
+   break;
+
case 61:/* stfdp, lxv, stxsd, stxssp, stxv */
switch (word & 7) {
case 0: /* stfdp with LSB of DS field = 0 */
@@ -2715,6 +2753,8 @@ int analyse_instr(struct instruction_op *op, const struct 
pt_regs *regs,
}
break;
case 1: /* Prefixed instructions */
+   if (!cpu_has_feature(CPU_FTR_ARCH_31))
+   return -1;
prefix_r = word & (1ul << 20);
ra = (suffix >> 16) & 0x1f;
op->update_reg = ra;
@@ -2779,12 +2819,24 @@ int analyse_instr(struct instruction_op *op, const 
struct pt_regs *regs,
case 57:/* pld */
op->type = MKOP(LOAD, PREFIXED, 8);
break;
+   case 58:/* plxvp */
+   op->reg = VSX_REGISTER_XTP(rd);
+   op->type = MKOP(LOAD_VSX, PREFIXED, 32);
+   op->element_size = 32;
+   op->vsx_flags = VSX_CHECK_VEC;
+   break;
case 60:/* stq */
op->type = MKOP(STORE, PREFIXED, 16);
break;
case 61:/* pstd */
op->type = MKOP(STORE, PREFIXED, 8);
break;
+   case 62:/* pstxvp */
+   op->reg = VSX_REGISTER_XTP(rd);
+   op->type = MKOP(STORE_VSX, PREFIXED, 32);
+   op-

[PATCH v2 0/4] VSX 32-byte vector paired load/store instructions

2020-07-15 Thread Balamuruhan S
VSX vector paired instructions operates with octword (32-byte) operand
for loads and stores between storage and a pair of two sequential Vector-Scalar
Registers (VSRs). There are 4 word instructions and 2 prefixed instructions
that provides this 32-byte storage access operations - lxvp, lxvpx, stxvp,
stxvpx, plxvpx, pstxvpx.

Emulation infrastructure doesn't have support for these instructions, to
operate with 32-byte storage access and to operate with 2 VSX registers.
This patch series enables the instruction emulation support and adds test
cases for them respectively.

Changes in v2:
-
* Fix suggestion from Sandipan, wrap ISA 3.1 instructions with
  cpu_has_feature(CPU_FTR_ARCH_31) check.

* Rebase on latest powerpc next branch.

Balamuruhan S (4):
  powerpc/sstep: support new VSX vector paired storage access
instructions
  powerpc/sstep: support emulation for vsx vector paired storage access
instructions
  powerpc ppc-opcode: add opcodes for vsx vector paired instructions
  powerpc sstep: add testcases for vsx load/store instructions

 arch/powerpc/include/asm/ppc-opcode.h |  11 ++
 arch/powerpc/include/asm/sstep.h  |   2 +-
 arch/powerpc/lib/sstep.c  | 110 ++-
 arch/powerpc/lib/test_emulate_step.c  | 273 ++
 4 files changed, 386 insertions(+), 10 deletions(-)


base-commit: b2b46304e9360f3dda49c9d8ba4a1478b9eecf1d
-- 
2.24.1



Re: [RFC PATCH 4/7] x86: use exit_lazy_tlb rather than membarrier_mm_sync_core_before_usermode

2020-07-15 Thread Nicholas Piggin
Excerpts from Andy Lutomirski's message of July 16, 2020 3:18 pm:
> 
> 
>> On Jul 15, 2020, at 9:15 PM, Nicholas Piggin  wrote:
>> 
>> Excerpts from Mathieu Desnoyers's message of July 14, 2020 12:13 am:
>>> - On Jul 13, 2020, at 9:47 AM, Nicholas Piggin npig...@gmail.com wrote:
>>> 
 Excerpts from Nicholas Piggin's message of July 13, 2020 2:45 pm:
> Excerpts from Andy Lutomirski's message of July 11, 2020 3:04 am:
>> Also, as it stands, I can easily see in_irq() ceasing to promise to
>> serialize.  There are older kernels for which it does not promise to
>> serialize.  And I have plans to make it stop serializing in the
>> nearish future.
> 
> You mean x86's return from interrupt? Sounds fun... you'll konw where to
> update the membarrier sync code, at least :)
 
 Oh, I should actually say Mathieu recently clarified a return from
 interrupt doesn't fundamentally need to serialize in order to support
 membarrier sync core.
>>> 
>>> Clarification to your statement:
>>> 
>>> Return from interrupt to kernel code does not need to be context serializing
>>> as long as kernel serializes before returning to user-space.
>>> 
>>> However, return from interrupt to user-space needs to be context 
>>> serializing.
>> 
>> Hmm, I'm not sure it's enough even with the sync in the exit_lazy_tlb
>> in the right places.
>> 
>> A kernel thread does a use_mm, then it blocks and the user process with
>> the same mm runs on that CPU, and then it calls into the kernel, blocks,
>> the kernel thread runs again, another CPU issues a membarrier which does
>> not IPI this one because it's running a kthread, and then the kthread
>> switches back to the user process (still without having unused the mm),
>> and then the user process returns from syscall without having done a 
>> core synchronising instruction.
>> 
>> The cause of the problem is you want to avoid IPI'ing kthreads. Why?
>> I'm guessing it really only matters as an optimisation in case of idle
>> threads. Idle thread is easy (well, easier) because it won't use_mm, so 
>> you could check for rq->curr == rq->idle in your loop (in a suitable 
>> sched accessor function).
>> 
>> But... I'm not really liking this subtlety in the scheduler for all this 
>> (the scheduler still needs the barriers when switching out of idle).
>> 
>> Can it be improved somehow? Let me forget x86 core sync problem for now
>> (that _may_ be a bit harder), and step back and look at what we're doing.
>> The memory barrier case would actually suffer from the same problem as
>> core sync, because in the same situation it has no implicit mmdrop in
>> the scheduler switch code either.
>> 
>> So what are we doing with membarrier? We want any activity caused by the 
>> set of CPUs/threads specified that can be observed by this thread before 
>> calling membarrier is appropriately fenced from activity that can be 
>> observed to happen after the call returns.
>> 
>> CPU0 CPU1
>> 1. user stuff
>> a. membarrier()  2. enter kernel
>> b. read rq->curr 3. rq->curr switched to kthread
>> c. is kthread, skip IPI  4. switch_to kthread
>> d. return to user5. rq->curr switched to user thread
>> 6. switch_to user thread
>> 7. exit kernel
>> 8. more user stuff
>> 
>> As far as I can see, the problem is CPU1 might reorder step 5 and step
>> 8, so you have mmdrop of lazy mm be a mb after step 6.
>> 
>> But why? The membarrier call only cares that there is a full barrier
>> between 1 and 8, right? Which it will get from the previous context
>> switch to the kthread.
>> 
>> I must say the memory barrier comments in membarrier could be improved
>> a bit (unless I'm missing where the main comment is). It's fine to know
>> what barriers pair with one another, but we need to know which exact
>> memory accesses it is ordering
>> 
>>   /*
>> * Matches memory barriers around rq->curr modification in
>> * scheduler.
>> */
>> 
>> Sure, but it doesn't say what else is being ordered. I think it's just
>> the user memory accesses, but would be nice to make that a bit more
>> explicit. If we had such comments then we might know this case is safe.
>> 
>> I think the funny powerpc barrier is a similar case of this. If we
>> ever see remote_rq->curr->flags & PF_KTHREAD, then we _know_ that
>> CPU has or will have issued a memory barrier between running user
>> code.
>> 
>> So AFAIKS all this membarrier stuff in kernel/sched/core.c could
>> just go away. Except x86 because thread switch doesn't imply core
>> sync, so CPU1 between 1 and 8 may never issue a core sync instruction
>> the same way a context switch must be a full mb.
>> 
>> Before getting to x86 -- Am I right, or way off track here?
> 
> I find it hard to believe that this is x86 only. Why would thread switch 
> imply core sync on any architecture?  Is x86 unique in having a 

Re: [PATCH v3 04/12] ppc64/kexec_file: avoid stomping memory used by special regions

2020-07-15 Thread Thiago Jung Bauermann


Thiago Jung Bauermann  writes:

> Hari Bathini  writes:
>
>> diff --git a/arch/powerpc/include/asm/crashdump-ppc64.h 
>> b/arch/powerpc/include/asm/crashdump-ppc64.h
>> new file mode 100644
>> index 000..90deb46
>> --- /dev/null
>> +++ b/arch/powerpc/include/asm/crashdump-ppc64.h
>> @@ -0,0 +1,10 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +#ifndef _ASM_POWERPC_CRASHDUMP_PPC64_H
>> +#define _ASM_POWERPC_CRASHDUMP_PPC64_H
>> +
>> +/* min & max addresses for kdump load segments */
>> +#define KDUMP_BUF_MIN   (crashk_res.start)
>> +#define KDUMP_BUF_MAX   ((crashk_res.end < ppc64_rma_size) ? \
>> + crashk_res.end : (ppc64_rma_size - 1))
>> +
>> +#endif /* __ASM_POWERPC_CRASHDUMP_PPC64_H */
>> diff --git a/arch/powerpc/include/asm/kexec.h 
>> b/arch/powerpc/include/asm/kexec.h
>> index 7008ea1..bf47a01 100644
>> --- a/arch/powerpc/include/asm/kexec.h
>> +++ b/arch/powerpc/include/asm/kexec.h
>> @@ -100,14 +100,16 @@ void relocate_new_kernel(unsigned long 
>> indirection_page, unsigned long reboot_co
>>  #ifdef CONFIG_KEXEC_FILE
>>  extern const struct kexec_file_ops kexec_elf64_ops;
>>
>> -#ifdef CONFIG_IMA_KEXEC
>>  #define ARCH_HAS_KIMAGE_ARCH
>>
>>  struct kimage_arch {
>> +struct crash_mem *exclude_ranges;
>> +
>> +#ifdef CONFIG_IMA_KEXEC
>>  phys_addr_t ima_buffer_addr;
>>  size_t ima_buffer_size;
>> -};
>>  #endif
>> +};
>>
>>  int setup_purgatory(struct kimage *image, const void *slave_code,
>>  const void *fdt, unsigned long kernel_load_addr,
>> @@ -125,6 +127,7 @@ int setup_new_fdt_ppc64(const struct kimage *image, void 
>> *fdt,
>>  unsigned long initrd_load_addr,
>>  unsigned long initrd_len, const char *cmdline);
>>  #endif /* CONFIG_PPC64 */
>> +
>>  #endif /* CONFIG_KEXEC_FILE */
>>
>>  #else /* !CONFIG_KEXEC_CORE */
>> diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
>> index 23ad04c..c695f94 100644
>> --- a/arch/powerpc/kexec/elf_64.c
>> +++ b/arch/powerpc/kexec/elf_64.c
>> @@ -22,6 +22,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>
>>  static void *elf64_load(struct kimage *image, char *kernel_buf,
>>  unsigned long kernel_len, char *initrd,
>> @@ -46,6 +47,12 @@ static void *elf64_load(struct kimage *image, char 
>> *kernel_buf,
>>  if (ret)
>>  goto out;
>>
>> +if (image->type == KEXEC_TYPE_CRASH) {
>> +/* min & max buffer values for kdump case */
>> +kbuf.buf_min = pbuf.buf_min = KDUMP_BUF_MIN;
>> +kbuf.buf_max = pbuf.buf_max = KDUMP_BUF_MAX;
>
> This is only my personal opinion and an actual maintainer may disagree,
> but just looking at the lines above, I would assume that KDUMP_BUF_MIN
> and KDUMP_BUF_MAX were constants, when in fact they aren't.
>
> I suggest using static inline macros in , for
> example:
>
> static inline resource_size_t get_kdump_buf_min(void)
> {
>   return crashk_res.start;
> }
>
> static inline resource_size_t get_kdump_buf_max(void)
> {
>   return (crashk_res.end < ppc64_rma_size) ? \
>crashk_res.end : (ppc64_rma_size - 1)
> }

I later noticed that KDUMP_BUF_MIN and KDUMP_BUF_MAX are only used here.
In this case, I think the best option is to avoid the macros and inline
functions and just use the actual expressions in the code.

-- 
Thiago Jung Bauermann
IBM Linux Technology Center


Re: [PATCH v3 12/12] ppc64/kexec_file: fix kexec load failure with lack of memory hole

2020-07-15 Thread Thiago Jung Bauermann


Hari Bathini  writes:

> The kexec purgatory has to run in real mode. Only the first memory
> block maybe accessible in real mode. And, unlike the case with panic
> kernel, no memory is set aside for regular kexec load. Another thing
> to note is, the memory for crashkernel is reserved at an offset of
> 128MB. So, when crashkernel memory is reserved, the memory ranges to
> load kexec segments shrink further as the generic code only looks for
> memblock free memory ranges and in all likelihood only a tiny bit of
> memory from 0 to 128MB would be available to load kexec segments.
>
> With kdump being used by default in general, kexec file load is likely
> to fail almost always.

Ah. I wasn't aware of this problem.

> This can be fixed by changing the memory hole
> lookup logic for regular kexec to use the same method as kdump.

Right. It doesn't make that much sense to use memblock to find free
memory areas for the kexec kernel, because memblock tracks which memory
areas are free for the currently running kernel. But that's not what
matters for the kernel that will be kexec'd into. In this case, regions
which may be reserved for the current OS instance may well be free for a
freshly started kernel. The kdump method is better at knowing which
memory regions are actually reserved by the firmware/hardware.

> This
> would mean that most kexec segments will overlap with crashkernel
> memory region. That should still be ok as the pages, whose destination
> address isn't available while loading, are placed in an intermediate
> location till a flush to the actual destination address happens during
> kexec boot sequence.

Yes, since the kdump kernel and the "regular" kexec kernel can't be both
booted at the same time, it's not a problem if both plan to use the same
region of memory.

>
> Signed-off-by: Hari Bathini 
> Tested-by: Pingfan Liu 

Reviewed-by: Thiago Jung Bauermann 

> ---
>
> v2 -> v3:
> * Unchanged. Added Tested-by tag from Pingfan.
>
> v1 -> v2:
> * New patch to fix locating memory hole for kexec_file_load (kexec -s -l)
>   when memory is reserved for crashkernel.
>
>
>  arch/powerpc/kexec/file_load_64.c |   33 ++---
>  1 file changed, 14 insertions(+), 19 deletions(-)

-- 
Thiago Jung Bauermann
IBM Linux Technology Center


Re: [RFC PATCH 4/7] x86: use exit_lazy_tlb rather than membarrier_mm_sync_core_before_usermode

2020-07-15 Thread Andy Lutomirski



> On Jul 15, 2020, at 9:15 PM, Nicholas Piggin  wrote:
> 
> Excerpts from Mathieu Desnoyers's message of July 14, 2020 12:13 am:
>> - On Jul 13, 2020, at 9:47 AM, Nicholas Piggin npig...@gmail.com wrote:
>> 
>>> Excerpts from Nicholas Piggin's message of July 13, 2020 2:45 pm:
 Excerpts from Andy Lutomirski's message of July 11, 2020 3:04 am:
> Also, as it stands, I can easily see in_irq() ceasing to promise to
> serialize.  There are older kernels for which it does not promise to
> serialize.  And I have plans to make it stop serializing in the
> nearish future.
 
 You mean x86's return from interrupt? Sounds fun... you'll konw where to
 update the membarrier sync code, at least :)
>>> 
>>> Oh, I should actually say Mathieu recently clarified a return from
>>> interrupt doesn't fundamentally need to serialize in order to support
>>> membarrier sync core.
>> 
>> Clarification to your statement:
>> 
>> Return from interrupt to kernel code does not need to be context serializing
>> as long as kernel serializes before returning to user-space.
>> 
>> However, return from interrupt to user-space needs to be context serializing.
> 
> Hmm, I'm not sure it's enough even with the sync in the exit_lazy_tlb
> in the right places.
> 
> A kernel thread does a use_mm, then it blocks and the user process with
> the same mm runs on that CPU, and then it calls into the kernel, blocks,
> the kernel thread runs again, another CPU issues a membarrier which does
> not IPI this one because it's running a kthread, and then the kthread
> switches back to the user process (still without having unused the mm),
> and then the user process returns from syscall without having done a 
> core synchronising instruction.
> 
> The cause of the problem is you want to avoid IPI'ing kthreads. Why?
> I'm guessing it really only matters as an optimisation in case of idle
> threads. Idle thread is easy (well, easier) because it won't use_mm, so 
> you could check for rq->curr == rq->idle in your loop (in a suitable 
> sched accessor function).
> 
> But... I'm not really liking this subtlety in the scheduler for all this 
> (the scheduler still needs the barriers when switching out of idle).
> 
> Can it be improved somehow? Let me forget x86 core sync problem for now
> (that _may_ be a bit harder), and step back and look at what we're doing.
> The memory barrier case would actually suffer from the same problem as
> core sync, because in the same situation it has no implicit mmdrop in
> the scheduler switch code either.
> 
> So what are we doing with membarrier? We want any activity caused by the 
> set of CPUs/threads specified that can be observed by this thread before 
> calling membarrier is appropriately fenced from activity that can be 
> observed to happen after the call returns.
> 
> CPU0 CPU1
> 1. user stuff
> a. membarrier()  2. enter kernel
> b. read rq->curr 3. rq->curr switched to kthread
> c. is kthread, skip IPI  4. switch_to kthread
> d. return to user5. rq->curr switched to user thread
> 6. switch_to user thread
> 7. exit kernel
> 8. more user stuff
> 
> As far as I can see, the problem is CPU1 might reorder step 5 and step
> 8, so you have mmdrop of lazy mm be a mb after step 6.
> 
> But why? The membarrier call only cares that there is a full barrier
> between 1 and 8, right? Which it will get from the previous context
> switch to the kthread.
> 
> I must say the memory barrier comments in membarrier could be improved
> a bit (unless I'm missing where the main comment is). It's fine to know
> what barriers pair with one another, but we need to know which exact
> memory accesses it is ordering
> 
>   /*
> * Matches memory barriers around rq->curr modification in
> * scheduler.
> */
> 
> Sure, but it doesn't say what else is being ordered. I think it's just
> the user memory accesses, but would be nice to make that a bit more
> explicit. If we had such comments then we might know this case is safe.
> 
> I think the funny powerpc barrier is a similar case of this. If we
> ever see remote_rq->curr->flags & PF_KTHREAD, then we _know_ that
> CPU has or will have issued a memory barrier between running user
> code.
> 
> So AFAIKS all this membarrier stuff in kernel/sched/core.c could
> just go away. Except x86 because thread switch doesn't imply core
> sync, so CPU1 between 1 and 8 may never issue a core sync instruction
> the same way a context switch must be a full mb.
> 
> Before getting to x86 -- Am I right, or way off track here?

I find it hard to believe that this is x86 only. Why would thread switch imply 
core sync on any architecture?  Is x86 unique in having a stupid expensive core 
sync that is heavier than smp_mb()?

But I’m wondering if all this deferred sync stuff is wrong. In the brave new 
world of io_uring and such, perhaps kerne

Re: [PATCH v3] powerpc/pseries: detect secure and trusted boot state of the system.

2020-07-15 Thread Michael Ellerman
Daniel Axtens  writes:
> Hi Nayna,
>
> Looks good to me.
>
> Sorry for not noticing this before, but I think
>> +#include 

> is now superfluous (I think it's leftover from the machine_is
> version?). Maybe mpe will take pity on you and remove it when he picks
> up your patch.

Yeah I did that.

cheers


Re: [RFC PATCH 4/7] x86: use exit_lazy_tlb rather than membarrier_mm_sync_core_before_usermode

2020-07-15 Thread Nicholas Piggin
Excerpts from Nicholas Piggin's message of July 16, 2020 2:15 pm:
> Excerpts from Mathieu Desnoyers's message of July 14, 2020 12:13 am:
>> - On Jul 13, 2020, at 9:47 AM, Nicholas Piggin npig...@gmail.com wrote:
>> 
>>> Excerpts from Nicholas Piggin's message of July 13, 2020 2:45 pm:
 Excerpts from Andy Lutomirski's message of July 11, 2020 3:04 am:
> Also, as it stands, I can easily see in_irq() ceasing to promise to
> serialize.  There are older kernels for which it does not promise to
> serialize.  And I have plans to make it stop serializing in the
> nearish future.
 
 You mean x86's return from interrupt? Sounds fun... you'll konw where to
 update the membarrier sync code, at least :)
>>> 
>>> Oh, I should actually say Mathieu recently clarified a return from
>>> interrupt doesn't fundamentally need to serialize in order to support
>>> membarrier sync core.
>> 
>> Clarification to your statement:
>> 
>> Return from interrupt to kernel code does not need to be context serializing
>> as long as kernel serializes before returning to user-space.
>> 
>> However, return from interrupt to user-space needs to be context serializing.
> 
> Hmm, I'm not sure it's enough even with the sync in the exit_lazy_tlb
> in the right places.
> 
> A kernel thread does a use_mm, then it blocks and the user process with
> the same mm runs on that CPU, and then it calls into the kernel, blocks,
> the kernel thread runs again, another CPU issues a membarrier which does
> not IPI this one because it's running a kthread, and then the kthread
> switches back to the user process (still without having unused the mm),
> and then the user process returns from syscall without having done a 
> core synchronising instruction.
> 
> The cause of the problem is you want to avoid IPI'ing kthreads. Why?
> I'm guessing it really only matters as an optimisation in case of idle
> threads. Idle thread is easy (well, easier) because it won't use_mm, so 
> you could check for rq->curr == rq->idle in your loop (in a suitable 
> sched accessor function).
> 
> But... I'm not really liking this subtlety in the scheduler for all this 
> (the scheduler still needs the barriers when switching out of idle).
> 
> Can it be improved somehow? Let me forget x86 core sync problem for now
> (that _may_ be a bit harder), and step back and look at what we're doing.
> The memory barrier case would actually suffer from the same problem as
> core sync, because in the same situation it has no implicit mmdrop in
> the scheduler switch code either.
> 
> So what are we doing with membarrier? We want any activity caused by the 
> set of CPUs/threads specified that can be observed by this thread before 
> calling membarrier is appropriately fenced from activity that can be 
> observed to happen after the call returns.
> 
> CPU0 CPU1
>  1. user stuff
> a. membarrier()  2. enter kernel
> b. read rq->curr 3. rq->curr switched to kthread
> c. is kthread, skip IPI  4. switch_to kthread
> d. return to user5. rq->curr switched to user thread
>6. switch_to user thread
>7. exit kernel
>  8. more user stuff
> 
> As far as I can see, the problem is CPU1 might reorder step 5 and step
> 8, so you have mmdrop of lazy mm be a mb after step 6.
> 
> But why? The membarrier call only cares that there is a full barrier
> between 1 and 8, right? Which it will get from the previous context
> switch to the kthread.

I should be more complete here, especially since I was complaining
about unclear barrier comment :)


CPU0 CPU1
a. user stuff1. user stuff
b. membarrier()  2. enter kernel
c. smp_mb()  3. smp_mb__after_spinlock(); // in __schedule
d. read rq->curr 4. rq->curr switched to kthread
e. is kthread, skip IPI  5. switch_to kthread
f. return to user6. rq->curr switched to user thread
g. user stuff7. switch_to user thread
 8. exit kernel
 9. more user stuff

What you're really ordering is a, g vs 1, 9 right?

In other words, 9 must see a if it sees g, g must see 1 if it saw 9,
etc.

Userspace does not care where the barriers are exactly or what kernel 
memory accesses might be being ordered by them, so long as there is a
mb somewhere between a and g, and 1 and 9. Right?


Re: [RFC PATCH 4/7] x86: use exit_lazy_tlb rather than membarrier_mm_sync_core_before_usermode

2020-07-15 Thread Nicholas Piggin
Excerpts from Mathieu Desnoyers's message of July 14, 2020 12:13 am:
> - On Jul 13, 2020, at 9:47 AM, Nicholas Piggin npig...@gmail.com wrote:
> 
>> Excerpts from Nicholas Piggin's message of July 13, 2020 2:45 pm:
>>> Excerpts from Andy Lutomirski's message of July 11, 2020 3:04 am:
 Also, as it stands, I can easily see in_irq() ceasing to promise to
 serialize.  There are older kernels for which it does not promise to
 serialize.  And I have plans to make it stop serializing in the
 nearish future.
>>> 
>>> You mean x86's return from interrupt? Sounds fun... you'll konw where to
>>> update the membarrier sync code, at least :)
>> 
>> Oh, I should actually say Mathieu recently clarified a return from
>> interrupt doesn't fundamentally need to serialize in order to support
>> membarrier sync core.
> 
> Clarification to your statement:
> 
> Return from interrupt to kernel code does not need to be context serializing
> as long as kernel serializes before returning to user-space.
> 
> However, return from interrupt to user-space needs to be context serializing.

Hmm, I'm not sure it's enough even with the sync in the exit_lazy_tlb
in the right places.

A kernel thread does a use_mm, then it blocks and the user process with
the same mm runs on that CPU, and then it calls into the kernel, blocks,
the kernel thread runs again, another CPU issues a membarrier which does
not IPI this one because it's running a kthread, and then the kthread
switches back to the user process (still without having unused the mm),
and then the user process returns from syscall without having done a 
core synchronising instruction.

The cause of the problem is you want to avoid IPI'ing kthreads. Why?
I'm guessing it really only matters as an optimisation in case of idle
threads. Idle thread is easy (well, easier) because it won't use_mm, so 
you could check for rq->curr == rq->idle in your loop (in a suitable 
sched accessor function).

But... I'm not really liking this subtlety in the scheduler for all this 
(the scheduler still needs the barriers when switching out of idle).

Can it be improved somehow? Let me forget x86 core sync problem for now
(that _may_ be a bit harder), and step back and look at what we're doing.
The memory barrier case would actually suffer from the same problem as
core sync, because in the same situation it has no implicit mmdrop in
the scheduler switch code either.

So what are we doing with membarrier? We want any activity caused by the 
set of CPUs/threads specified that can be observed by this thread before 
calling membarrier is appropriately fenced from activity that can be 
observed to happen after the call returns.

CPU0 CPU1
 1. user stuff
a. membarrier()  2. enter kernel
b. read rq->curr 3. rq->curr switched to kthread
c. is kthread, skip IPI  4. switch_to kthread
d. return to user5. rq->curr switched to user thread
 6. switch_to user thread
 7. exit kernel
 8. more user stuff

As far as I can see, the problem is CPU1 might reorder step 5 and step
8, so you have mmdrop of lazy mm be a mb after step 6.

But why? The membarrier call only cares that there is a full barrier
between 1 and 8, right? Which it will get from the previous context
switch to the kthread.

I must say the memory barrier comments in membarrier could be improved
a bit (unless I'm missing where the main comment is). It's fine to know
what barriers pair with one another, but we need to know which exact
memory accesses it is ordering

   /*
 * Matches memory barriers around rq->curr modification in
 * scheduler.
 */

Sure, but it doesn't say what else is being ordered. I think it's just
the user memory accesses, but would be nice to make that a bit more
explicit. If we had such comments then we might know this case is safe.

I think the funny powerpc barrier is a similar case of this. If we
ever see remote_rq->curr->flags & PF_KTHREAD, then we _know_ that
CPU has or will have issued a memory barrier between running user
code.

So AFAIKS all this membarrier stuff in kernel/sched/core.c could
just go away. Except x86 because thread switch doesn't imply core
sync, so CPU1 between 1 and 8 may never issue a core sync instruction
the same way a context switch must be a full mb.

Before getting to x86 -- Am I right, or way off track here? 

Thanks,
Nick


Re: [PATCH v8 2/8] powerpc/vdso: Remove __kernel_datapage_offset and simplify __get_datapage()

2020-07-15 Thread Michael Ellerman
Christophe Leroy  writes:
> The VDSO datapage and the text pages are always located immediately
> next to each other, so it can be hardcoded without an indirection
> through __kernel_datapage_offset
>
> In order to ease things, move the data page in front like other
> arches, that way there is no need to know the size of the library
> to locate the data page.
>
> Before:
> clock-getres-realtime-coarse:vdso: 714 nsec/call
> clock-gettime-realtime-coarse:vdso: 792 nsec/call
> clock-gettime-realtime:vdso: 1243 nsec/call
>
> After:
> clock-getres-realtime-coarse:vdso: 699 nsec/call
> clock-gettime-realtime-coarse:vdso: 784 nsec/call
> clock-gettime-realtime:vdso: 1231 nsec/call
>
> Signed-off-by: Christophe Leroy 
> ---
> v7:
> - Moved the removal of the tmp param of __get_datapage()
> in a subsequent patch
> - Included the addition of the offset param to __get_datapage()
> in the further preparatory patch
> ---
>  arch/powerpc/include/asm/vdso_datapage.h |  8 ++--
>  arch/powerpc/kernel/vdso.c   | 53 
>  arch/powerpc/kernel/vdso32/datapage.S|  3 --
>  arch/powerpc/kernel/vdso32/vdso32.lds.S  |  7 +---
>  arch/powerpc/kernel/vdso64/datapage.S|  3 --
>  arch/powerpc/kernel/vdso64/vdso64.lds.S  |  7 +---
>  6 files changed, 16 insertions(+), 65 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/vdso_datapage.h 
> b/arch/powerpc/include/asm/vdso_datapage.h
> index b9ef6cf50ea5..11886467dfdf 100644
> --- a/arch/powerpc/include/asm/vdso_datapage.h
> +++ b/arch/powerpc/include/asm/vdso_datapage.h
> @@ -118,10 +118,12 @@ extern struct vdso_data *vdso_data;
>  
>  .macro get_datapage ptr, tmp
>   bcl 20, 31, .+4
> +999:
>   mflr\ptr
> - addi\ptr, \ptr, (__kernel_datapage_offset - (.-4))@l
> - lwz \tmp, 0(\ptr)
> - add \ptr, \tmp, \ptr
> +#if CONFIG_PPC_PAGE_SHIFT > 14
> + addis   \ptr, \ptr, (_vdso_datapage - 999b)@ha
> +#endif
> + addi\ptr, \ptr, (_vdso_datapage - 999b)@l
>  .endm
>  
>  #endif /* __ASSEMBLY__ */
> diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
> index f38f26e844b6..d33fa22ddbed 100644
> --- a/arch/powerpc/kernel/vdso.c
> +++ b/arch/powerpc/kernel/vdso.c
> @@ -190,7 +190,7 @@ int arch_setup_additional_pages(struct linux_binprm 
> *bprm, int uses_interp)
>* install_special_mapping or the perf counter mmap tracking code
>* will fail to recognise it as a vDSO (since arch_vma_name fails).
>*/
> - current->mm->context.vdso_base = vdso_base;
> + current->mm->context.vdso_base = vdso_base + PAGE_SIZE;

I merged this but then realised it breaks the display of the vdso in 
/proc/self/maps.

ie. the vdso vma gets no name:

  # cat /proc/self/maps
  110f9-110fa r-xp  08:03 17021844 
/usr/bin/cat
  110fa-110fb r--p  08:03 17021844 
/usr/bin/cat
  110fb-110fc rw-p 0001 08:03 17021844 
/usr/bin/cat
  12600-12603 rw-p  00:00 0
[heap]
  7fffa879-7fffa87d rw-p  00:00 0 
  7fffa87d-7fffa883 r--p  08:03 17521786   
/usr/lib/locale/en_AU.utf8/LC_CTYPE
  7fffa883-7fffa884 r--p  08:03 16958337   
/usr/lib/locale/en_AU.utf8/LC_NUMERIC
  7fffa884-7fffa885 r--p  08:03 8501358
/usr/lib/locale/en_AU.utf8/LC_TIME
  7fffa885-7fffa8ad r--p  08:03 16870886   
/usr/lib/locale/en_AU.utf8/LC_COLLATE
  7fffa8ad-7fffa8ae r--p  08:03 8509433
/usr/lib/locale/en_AU.utf8/LC_MONETARY
  7fffa8ae-7fffa8af r--p  08:03 25383753   
/usr/lib/locale/en_AU.utf8/LC_MESSAGES/SYS_LC_MESSAGES
  7fffa8af-7fffa8b0 r--p  08:03 17521790   
/usr/lib/locale/en_AU.utf8/LC_PAPER
  7fffa8b0-7fffa8b1 r--p  08:03 8501354
/usr/lib/locale/en_AU.utf8/LC_NAME
  7fffa8b1-7fffa8b2 r--p  08:03 8509431
/usr/lib/locale/en_AU.utf8/LC_ADDRESS
  7fffa8b2-7fffa8b3 r--p  08:03 8509434
/usr/lib/locale/en_AU.utf8/LC_TELEPHONE
  7fffa8b3-7fffa8b4 r--p  08:03 17521787   
/usr/lib/locale/en_AU.utf8/LC_MEASUREMENT
  7fffa8b4-7fffa8b5 r--s  08:03 25623315   
/usr/lib64/gconv/gconv-modules.cache
  7fffa8b5-7fffa8d4 r-xp  08:03 25383789   
/usr/lib64/libc-2.30.so
  7fffa8d4-7fffa8d5 r--p 001e 08:03 25383789   
/usr/lib64/libc-2.30.so
  7fffa8d5-7fffa8d6 rw-p 001f 08:03 25383789   
/usr/lib64/libc-2.30.so
  7fffa8d6-7fffa8d7 r--p  08:03 8509432
/usr/lib/locale/en_AU.utf8/LC_IDENTIFICATION
  7fffa8d7-7fffa8d9 r

[powerpc:fixes-test] BUILD SUCCESS f0479c4bcbd92d1a457d4a43bcab79f29d11334a

2020-07-15 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  
fixes-test
branch HEAD: f0479c4bcbd92d1a457d4a43bcab79f29d11334a  selftests/powerpc: Use 
proper error code to check fault address

elapsed time: 737m

configs tested: 100
configs skipped: 2

The following configs have been built successfully.
More configs may be tested in the coming days.

arm defconfig
arm  allyesconfig
arm  allmodconfig
arm   allnoconfig
arm64allyesconfig
arm64   defconfig
arm64allmodconfig
arm64 allnoconfig
arc  axs101_defconfig
c6xevmc6457_defconfig
sh kfr2r09-romimage_defconfig
powerpcgamecube_defconfig
arm  lpd270_defconfig
armclps711x_defconfig
arm   corgi_defconfig
riscvallyesconfig
arm orion5x_defconfig
i386  allnoconfig
i386 allyesconfig
i386defconfig
i386  debian-10.3
ia64 allmodconfig
ia64defconfig
ia64  allnoconfig
ia64 allyesconfig
m68k allmodconfig
m68k  allnoconfig
m68k   sun3_defconfig
m68kdefconfig
m68k allyesconfig
nios2   defconfig
nios2allyesconfig
openriscdefconfig
c6x  allyesconfig
c6x   allnoconfig
openrisc allyesconfig
nds32   defconfig
nds32 allnoconfig
csky allyesconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
h8300allmodconfig
xtensa  defconfig
arc defconfig
arc  allyesconfig
sh   allmodconfig
shallnoconfig
microblazeallnoconfig
mips allyesconfig
mips  allnoconfig
mips allmodconfig
pariscallnoconfig
parisc  defconfig
parisc   allyesconfig
parisc   allmodconfig
powerpc  allyesconfig
powerpc  rhel-kconfig
powerpc  allmodconfig
powerpc   allnoconfig
powerpc defconfig
i386 randconfig-a001-20200715
i386 randconfig-a005-20200715
i386 randconfig-a002-20200715
i386 randconfig-a006-20200715
i386 randconfig-a003-20200715
i386 randconfig-a004-20200715
x86_64   randconfig-a005-20200715
x86_64   randconfig-a006-20200715
x86_64   randconfig-a002-20200715
x86_64   randconfig-a001-20200715
x86_64   randconfig-a003-20200715
x86_64   randconfig-a004-20200715
i386 randconfig-a016-20200715
i386 randconfig-a011-20200715
i386 randconfig-a015-20200715
i386 randconfig-a012-20200715
i386 randconfig-a013-20200715
i386 randconfig-a014-20200715
riscv allnoconfig
riscv   defconfig
riscvallmodconfig
s390 allyesconfig
s390  allnoconfig
s390 allmodconfig
s390defconfig
sparcallyesconfig
sparc   defconfig
sparc64 defconfig
sparc64   allnoconfig
sparc64  allyesconfig
sparc64  allmodconfig
x86_64rhel-7.6-kselftests
x86_64   rhel-8.3
x86_64  kexec
x86_64   rhel
x86_64lkp
x86_64  fedora-25

---
0-DAY CI Kernel Test

[powerpc:next] BUILD SUCCESS f88223979044bfae32cb16f814c43739e5ba1301

2020-07-15 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  
next
branch HEAD: f88223979044bfae32cb16f814c43739e5ba1301  powerpc: re-initialise 
lazy FPU/VEC counters on every fault

elapsed time: 724m

configs tested: 100
configs skipped: 2

The following configs have been built successfully.
More configs may be tested in the coming days.

arm defconfig
arm  allyesconfig
arm  allmodconfig
arm   allnoconfig
arm64allyesconfig
arm64   defconfig
arm64allmodconfig
arm64 allnoconfig
arc  axs101_defconfig
c6xevmc6457_defconfig
sh kfr2r09-romimage_defconfig
powerpcgamecube_defconfig
arm  lpd270_defconfig
armclps711x_defconfig
arm   corgi_defconfig
riscvallyesconfig
arm orion5x_defconfig
i386  allnoconfig
i386 allyesconfig
i386defconfig
i386  debian-10.3
ia64 allmodconfig
ia64defconfig
ia64  allnoconfig
ia64 allyesconfig
m68k allmodconfig
m68k  allnoconfig
m68k   sun3_defconfig
m68kdefconfig
m68k allyesconfig
nios2   defconfig
nios2allyesconfig
openriscdefconfig
c6x  allyesconfig
c6x   allnoconfig
openrisc allyesconfig
nds32   defconfig
nds32 allnoconfig
csky allyesconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
h8300allmodconfig
xtensa  defconfig
arc defconfig
arc  allyesconfig
sh   allmodconfig
shallnoconfig
microblazeallnoconfig
mips allyesconfig
mips  allnoconfig
mips allmodconfig
pariscallnoconfig
parisc  defconfig
parisc   allyesconfig
parisc   allmodconfig
powerpc defconfig
powerpc  allyesconfig
powerpc  rhel-kconfig
powerpc  allmodconfig
powerpc   allnoconfig
i386 randconfig-a001-20200715
i386 randconfig-a005-20200715
i386 randconfig-a002-20200715
i386 randconfig-a006-20200715
i386 randconfig-a003-20200715
i386 randconfig-a004-20200715
x86_64   randconfig-a005-20200715
x86_64   randconfig-a006-20200715
x86_64   randconfig-a002-20200715
x86_64   randconfig-a001-20200715
x86_64   randconfig-a003-20200715
x86_64   randconfig-a004-20200715
i386 randconfig-a016-20200715
i386 randconfig-a011-20200715
i386 randconfig-a015-20200715
i386 randconfig-a012-20200715
i386 randconfig-a013-20200715
i386 randconfig-a014-20200715
riscv allnoconfig
riscv   defconfig
riscvallmodconfig
s390 allyesconfig
s390  allnoconfig
s390 allmodconfig
s390defconfig
sparcallyesconfig
sparc   defconfig
sparc64 defconfig
sparc64   allnoconfig
sparc64  allyesconfig
sparc64  allmodconfig
x86_64rhel-7.6-kselftests
x86_64   rhel-8.3
x86_64  kexec
x86_64   rhel
x86_64lkp
x86_64  fedora-25

---
0-DAY CI Kernel Test Service

[powerpc:next-test] BUILD SUCCESS 085563a9059b42c635151e970fd3af941f46a0fd

2020-07-15 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  
next-test
branch HEAD: 085563a9059b42c635151e970fd3af941f46a0fd  powerpc/perf: Add kernel 
support for new MSR[HV PR] bits in trace-imc

elapsed time: 735m

configs tested: 88
configs skipped: 2

The following configs have been built successfully.
More configs may be tested in the coming days.

arm defconfig
arm  allyesconfig
arm  allmodconfig
arm   allnoconfig
arm64allyesconfig
arm64   defconfig
arm64allmodconfig
arm64 allnoconfig
arc  axs101_defconfig
c6xevmc6457_defconfig
sh kfr2r09-romimage_defconfig
powerpcgamecube_defconfig
arm  lpd270_defconfig
armclps711x_defconfig
arm   corgi_defconfig
riscvallyesconfig
arm orion5x_defconfig
i386  allnoconfig
i386 allyesconfig
i386defconfig
i386  debian-10.3
ia64 allmodconfig
ia64defconfig
ia64  allnoconfig
ia64 allyesconfig
m68k allmodconfig
m68k  allnoconfig
m68k   sun3_defconfig
m68kdefconfig
m68k allyesconfig
nios2   defconfig
nios2allyesconfig
openriscdefconfig
c6x  allyesconfig
c6x   allnoconfig
openrisc allyesconfig
nds32   defconfig
nds32 allnoconfig
csky allyesconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
h8300allmodconfig
xtensa  defconfig
arc defconfig
arc  allyesconfig
sh   allmodconfig
shallnoconfig
microblazeallnoconfig
mips allyesconfig
mips  allnoconfig
mips allmodconfig
pariscallnoconfig
parisc  defconfig
parisc   allyesconfig
parisc   allmodconfig
powerpc  allyesconfig
powerpc  rhel-kconfig
powerpc  allmodconfig
powerpc   allnoconfig
powerpc defconfig
i386 randconfig-a016-20200715
i386 randconfig-a011-20200715
i386 randconfig-a015-20200715
i386 randconfig-a012-20200715
i386 randconfig-a013-20200715
i386 randconfig-a014-20200715
riscv allnoconfig
riscv   defconfig
riscvallmodconfig
s390 allyesconfig
s390  allnoconfig
s390 allmodconfig
s390defconfig
sparcallyesconfig
sparc   defconfig
sparc64 defconfig
sparc64   allnoconfig
sparc64  allyesconfig
sparc64  allmodconfig
x86_64rhel-7.6-kselftests
x86_64   rhel-8.3
x86_64  kexec
x86_64   rhel
x86_64lkp
x86_64  fedora-25

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


Re: [RFC PATCH 7/7] lazy tlb: shoot lazies, a non-refcounting lazy tlb option

2020-07-15 Thread Nicholas Piggin
Excerpts from Andy Lutomirski's message of July 14, 2020 10:46 pm:
> 
> 
>> On Jul 13, 2020, at 11:31 PM, Nicholas Piggin  wrote:
>> 
>> Excerpts from Nicholas Piggin's message of July 14, 2020 3:04 pm:
>>> Excerpts from Andy Lutomirski's message of July 14, 2020 4:18 am:
 
> On Jul 13, 2020, at 9:48 AM, Nicholas Piggin  wrote:
> 
> Excerpts from Andy Lutomirski's message of July 14, 2020 1:59 am:
>>> On Thu, Jul 9, 2020 at 6:57 PM Nicholas Piggin  
>>> wrote:
>>> 
>>> On big systems, the mm refcount can become highly contented when doing
>>> a lot of context switching with threaded applications (particularly
>>> switching between the idle thread and an application thread).
>>> 
>>> Abandoning lazy tlb slows switching down quite a bit in the important
>>> user->idle->user cases, so so instead implement a non-refcounted scheme
>>> that causes __mmdrop() to IPI all CPUs in the mm_cpumask and shoot down
>>> any remaining lazy ones.
>>> 
>>> On a 16-socket 192-core POWER8 system, a context switching benchmark
>>> with as many software threads as CPUs (so each switch will go in and
>>> out of idle), upstream can achieve a rate of about 1 million context
>>> switches per second. After this patch it goes up to 118 million.
>>> 
>> 
>> I read the patch a couple of times, and I have a suggestion that could
>> be nonsense.  You are, effectively, using mm_cpumask() as a sort of
>> refcount.  You're saying "hey, this mm has no more references, but it
>> still has nonempty mm_cpumask(), so let's send an IPI and shoot down
>> those references too."  I'm wondering whether you actually need the
>> IPI.  What if, instead, you actually treated mm_cpumask as a refcount
>> for real?  Roughly, in __mmdrop(), you would only free the page tables
>> if mm_cpumask() is empty.  And, in the code that removes a CPU from
>> mm_cpumask(), you would check if mm_users == 0 and, if so, check if
>> you just removed the last bit from mm_cpumask and potentially free the
>> mm.
>> 
>> Getting the locking right here could be a bit tricky -- you need to
>> avoid two CPUs simultaneously exiting lazy TLB and thinking they
>> should free the mm, and you also need to avoid an mm with mm_users
>> hitting zero concurrently with the last remote CPU using it lazily
>> exiting lazy TLB.  Perhaps this could be resolved by having mm_count
>> == 1 mean "mm_cpumask() is might contain bits and, if so, it owns the
>> mm" and mm_count == 0 meaning "now it's dead" and using some careful
>> cmpxchg or dec_return to make sure that only one CPU frees it.
>> 
>> Or maybe you'd need a lock or RCU for this, but the idea would be to
>> only ever take the lock after mm_users goes to zero.
> 
> I don't think it's nonsense, it could be a good way to avoid IPIs.
> 
> I haven't seen much problem here that made me too concerned about IPIs 
> yet, so I think the simple patch may be good enough to start with
> for powerpc. I'm looking at avoiding/reducing the IPIs by combining the
> unlazying with the exit TLB flush without doing anything fancy with
> ref counting, but we'll see.
 
 I would be cautious with benchmarking here. I would expect that the
 nasty cases may affect power consumption more than performance — the 
 specific issue is IPIs hitting idle cores, and the main effects are to 
 slow down exit() a bit but also to kick the idle core out of idle. 
 Although, if the idle core is in a deep sleep, that IPI could be 
 *very* slow.
>>> 
>>> It will tend to be self-limiting to some degree (deeper idle cores
>>> would tend to have less chance of IPI) but we have bigger issues on
>>> powerpc with that, like broadcast IPIs to the mm cpumask for THP
>>> management. Power hasn't really shown up as an issue but powerpc
>>> CPUs may have their own requirements and issues there, shall we say.
>>> 
 So I think it’s worth at least giving this a try.
>>> 
>>> To be clear it's not a complete solution itself. The problem is of 
>>> course that mm cpumask gives you false negatives, so the bits
>>> won't always clean up after themselves as CPUs switch away from their
>>> lazy tlb mms.
>> 
>> ^^
>> 
>> False positives: CPU is in the mm_cpumask, but is not using the mm
>> as a lazy tlb. So there can be bits left and never freed.
>> 
>> If you closed the false positives, you're back to a shared mm cache
>> line on lazy mm context switches.
> 
> x86 has this exact problem. At least no more than 64*8 CPUs share the cache 
> line :)
> 
> Can your share your benchmark?

Just testing the IPI rates (on a smaller 176 CPU system), on a
kernel compile, it causes about 300 shootdown interrupts (not
300 broadcasts but total interrupts).

And very short lived fork;exec;exit things like typical scripting
commands doesn't typically generate any.

So yeah the re

Re: [PATCH v3 11/12] ppc64/kexec_file: add appropriate regions for memory reserve map

2020-07-15 Thread Thiago Jung Bauermann


Hari Bathini  writes:

> While initrd, elfcorehdr and backup regions are already added to the
> reserve map, there are a few missing regions that need to be added to
> the memory reserve map. Add them here. And now that all the changes
> to load panic kernel are in place, claim likewise.
>
> Signed-off-by: Hari Bathini 
> Tested-by: Pingfan Liu 

Reviewed-by: Thiago Jung Bauermann 

Just one oinor nit below.

> ---
>
> v2 -> v3:
> * Unchanged. Added Tested-by tag from Pingfan.
>
> v1 -> v2:
> * Updated add_rtas_mem_range() & add_opal_mem_range() callsites based on
>   the new prototype for these functions.
>
>
>  arch/powerpc/kexec/file_load_64.c |   58 
> ++---
>  1 file changed, 53 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/kexec/file_load_64.c 
> b/arch/powerpc/kexec/file_load_64.c
> index 2531bb5..29e5d11 100644
> --- a/arch/powerpc/kexec/file_load_64.c
> +++ b/arch/powerpc/kexec/file_load_64.c
> @@ -193,6 +193,34 @@ static int get_crash_memory_ranges(struct crash_mem 
> **mem_ranges)
>  }
>  
>  /**
> + * get_reserved_memory_ranges - Get reserve memory ranges. This list includes
> + *  memory regions that should be added to the
> + *  memory reserve map to ensure the region is
> + *  protected from any mischeif.

s/mischeif/mischief/

> + * @mem_ranges: Range list to add the memory ranges to.
> + *
> + * Returns 0 on success, negative errno on error.
> + */
> +static int get_reserved_memory_ranges(struct crash_mem **mem_ranges)
> +{
> + int ret;
> +
> + ret = add_rtas_mem_range(mem_ranges);
> + if (ret)
> + goto out;
> +
> + ret = add_tce_mem_ranges(mem_ranges);
> + if (ret)
> + goto out;
> +
> + ret = add_reserved_ranges(mem_ranges);
> +out:
> + if (ret)
> + pr_err("Failed to setup reserved memory ranges\n");
> + return ret;
> +}

-- 
Thiago Jung Bauermann
IBM Linux Technology Center


Re: [RFC PATCH 7/7] lazy tlb: shoot lazies, a non-refcounting lazy tlb option

2020-07-15 Thread Nicholas Piggin
Excerpts from Andy Lutomirski's message of July 14, 2020 10:46 pm:
> 
> 
>> On Jul 13, 2020, at 11:31 PM, Nicholas Piggin  wrote:
>> 
>> Excerpts from Nicholas Piggin's message of July 14, 2020 3:04 pm:
>>> Excerpts from Andy Lutomirski's message of July 14, 2020 4:18 am:
 
> On Jul 13, 2020, at 9:48 AM, Nicholas Piggin  wrote:
> 
> Excerpts from Andy Lutomirski's message of July 14, 2020 1:59 am:
>>> On Thu, Jul 9, 2020 at 6:57 PM Nicholas Piggin  
>>> wrote:
>>> 
>>> On big systems, the mm refcount can become highly contented when doing
>>> a lot of context switching with threaded applications (particularly
>>> switching between the idle thread and an application thread).
>>> 
>>> Abandoning lazy tlb slows switching down quite a bit in the important
>>> user->idle->user cases, so so instead implement a non-refcounted scheme
>>> that causes __mmdrop() to IPI all CPUs in the mm_cpumask and shoot down
>>> any remaining lazy ones.
>>> 
>>> On a 16-socket 192-core POWER8 system, a context switching benchmark
>>> with as many software threads as CPUs (so each switch will go in and
>>> out of idle), upstream can achieve a rate of about 1 million context
>>> switches per second. After this patch it goes up to 118 million.
>>> 
>> 
>> I read the patch a couple of times, and I have a suggestion that could
>> be nonsense.  You are, effectively, using mm_cpumask() as a sort of
>> refcount.  You're saying "hey, this mm has no more references, but it
>> still has nonempty mm_cpumask(), so let's send an IPI and shoot down
>> those references too."  I'm wondering whether you actually need the
>> IPI.  What if, instead, you actually treated mm_cpumask as a refcount
>> for real?  Roughly, in __mmdrop(), you would only free the page tables
>> if mm_cpumask() is empty.  And, in the code that removes a CPU from
>> mm_cpumask(), you would check if mm_users == 0 and, if so, check if
>> you just removed the last bit from mm_cpumask and potentially free the
>> mm.
>> 
>> Getting the locking right here could be a bit tricky -- you need to
>> avoid two CPUs simultaneously exiting lazy TLB and thinking they
>> should free the mm, and you also need to avoid an mm with mm_users
>> hitting zero concurrently with the last remote CPU using it lazily
>> exiting lazy TLB.  Perhaps this could be resolved by having mm_count
>> == 1 mean "mm_cpumask() is might contain bits and, if so, it owns the
>> mm" and mm_count == 0 meaning "now it's dead" and using some careful
>> cmpxchg or dec_return to make sure that only one CPU frees it.
>> 
>> Or maybe you'd need a lock or RCU for this, but the idea would be to
>> only ever take the lock after mm_users goes to zero.
> 
> I don't think it's nonsense, it could be a good way to avoid IPIs.
> 
> I haven't seen much problem here that made me too concerned about IPIs 
> yet, so I think the simple patch may be good enough to start with
> for powerpc. I'm looking at avoiding/reducing the IPIs by combining the
> unlazying with the exit TLB flush without doing anything fancy with
> ref counting, but we'll see.
 
 I would be cautious with benchmarking here. I would expect that the
 nasty cases may affect power consumption more than performance — the 
 specific issue is IPIs hitting idle cores, and the main effects are to 
 slow down exit() a bit but also to kick the idle core out of idle. 
 Although, if the idle core is in a deep sleep, that IPI could be 
 *very* slow.
>>> 
>>> It will tend to be self-limiting to some degree (deeper idle cores
>>> would tend to have less chance of IPI) but we have bigger issues on
>>> powerpc with that, like broadcast IPIs to the mm cpumask for THP
>>> management. Power hasn't really shown up as an issue but powerpc
>>> CPUs may have their own requirements and issues there, shall we say.
>>> 
 So I think it’s worth at least giving this a try.
>>> 
>>> To be clear it's not a complete solution itself. The problem is of 
>>> course that mm cpumask gives you false negatives, so the bits
>>> won't always clean up after themselves as CPUs switch away from their
>>> lazy tlb mms.
>> 
>> ^^
>> 
>> False positives: CPU is in the mm_cpumask, but is not using the mm
>> as a lazy tlb. So there can be bits left and never freed.
>> 
>> If you closed the false positives, you're back to a shared mm cache
>> line on lazy mm context switches.
> 
> x86 has this exact problem. At least no more than 64*8 CPUs share the cache 
> line :)
> 
> Can your share your benchmark?

It's just context_switch1_threads from will-it-scale, running with 1/2
the number of CPUs, and core affinity with an SMT processor (so each
thread from the switching pairs gets spread to their own CPU and so you
get the task->idle->task switching.

It's really just about the worst case

Re: [PATCH v3 10/12] ppc64/kexec_file: prepare elfcore header for crashing kernel

2020-07-15 Thread Thiago Jung Bauermann


Hari Bathini  writes:

>  /**
> + * get_crash_memory_ranges - Get crash memory ranges. This list includes
> + *   first/crashing kernel's memory regions that
> + *   would be exported via an elfcore.
> + * @mem_ranges:  Range list to add the memory ranges to.
> + *
> + * Returns 0 on success, negative errno on error.
> + */
> +static int get_crash_memory_ranges(struct crash_mem **mem_ranges)
> +{
> + struct memblock_region *reg;
> + struct crash_mem *tmem;
> + int ret;
> +
> + for_each_memblock(memory, reg) {
> + u64 base, size;
> +
> + base = (u64)reg->base;
> + size = (u64)reg->size;
> +
> + /* Skip backup memory region, which needs a separate entry */
> + if (base == BACKUP_SRC_START) {
> + if (size > BACKUP_SRC_SIZE) {
> + base = BACKUP_SRC_END + 1;
> + size -= BACKUP_SRC_SIZE;
> + } else
> + continue;
> + }
> +
> + ret = add_mem_range(mem_ranges, base, size);
> + if (ret)
> + goto out;
> +
> + /* Try merging adjacent ranges before reallocation attempt */
> + if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges)
> + sort_memory_ranges(*mem_ranges, true);
> + }
> +
> + /* Reallocate memory ranges if there is no space to split ranges */
> + tmem = *mem_ranges;
> + if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) {
> + tmem = realloc_mem_ranges(mem_ranges);
> + if (!tmem)
> + goto out;
> + }
> +
> + /* Exclude crashkernel region */
> + ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end);
> + if (ret)
> + goto out;
> +
> + ret = add_rtas_mem_range(mem_ranges);
> + if (ret)
> + goto out;
> +
> + ret = add_opal_mem_range(mem_ranges);
> + if (ret)
> + goto out;

Maybe I'm confused, but don't you add the RTAS and OPAL regions as
usable memory for the crashkernel? In that case they shouldn't show up
in the core file.

> +
> + /* create a separate program header for the backup region */
> + ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE);
> + if (ret)
> + goto out;
> +
> + sort_memory_ranges(*mem_ranges, false);
> +out:
> + if (ret)
> + pr_err("Failed to setup crash memory ranges\n");
> + return ret;
> +}



> +/**
> + * prepare_elf_headers - Prepare headers for the elfcore to be exported as
> + *   /proc/vmcore by the kdump kernel.
> + * @image:   Kexec image.
> + * @cmem:Crash memory ranges to be exported via elfcore.
> + * @addr:Vmalloc'd memory allocated by 
> crash_prepare_elf64_headers
> + *   to prepare the elf headers.
> + * @sz:  Size of the vmalloc'd memory allocated.
> + *
> + * Returns 0 on success, negative errno on error.
> + */
> +static int prepare_elf_headers(struct kimage *image, struct crash_mem *cmem,
> +void **addr, unsigned long *sz)
> +{
> + int ret;
> +
> + ret = crash_prepare_elf64_headers(cmem, false, addr, sz);
> +
> + /* Fix the offset for backup region in the ELF header */
> + if (!ret)
> + update_backup_region_phdr(image, *addr);
> +
> + return ret;
> +}

The code above can be inlined into its caller, I don't see a need to
have a separate function.

> +
> +/**
> + * load_elfcorehdr_segment - Setup crash memory ranges and initialize 
> elfcorehdr
> + *   segment needed to load kdump kernel.
> + * @image:   Kexec image.
> + * @kbuf:Buffer contents and memory parameters.
> + *
> + * Returns 0 on success, negative errno on error.
> + */
> +static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf 
> *kbuf)
> +{
> + struct crash_mem *cmem = NULL;
> + unsigned long headers_sz;
> + void *headers = NULL;
> + int ret;
> +
> + ret = get_crash_memory_ranges(&cmem);
> + if (ret)
> + goto out;
> +
> + /* Setup elfcorehdr segment */
> + ret = prepare_elf_headers(image, cmem, &headers, &headers_sz);
> + if (ret) {
> + pr_err("Failed to prepare elf headers for the core\n");
> + goto out;
> + }
> +
> + kbuf->buffer = headers;
> + kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
> + kbuf->bufsz = kbuf->memsz = headers_sz;
> + kbuf->top_down = false;
> +
> + ret = kexec_add_buffer(kbuf);
> + if (ret) {
> + vfree(headers);
> + goto out;
> + }
> +
> + image->arch.elfcorehdr_addr = kbuf->mem;
> + image->arch.elf_headers_sz = headers_sz;
> + image->arch.elf_headers = header

Re: [PATCH v3 02/12] powerpc/kexec_file: mark PPC64 specific code

2020-07-15 Thread Thiago Jung Bauermann


I didn't forget about this patch. I just wanted to see more of the
changes before comenting on it.

Hari Bathini  writes:

> Some of the kexec_file_load code isn't PPC64 specific. Move PPC64
> specific code from kexec/file_load.c to kexec/file_load_64.c. Also,
> rename purgatory/trampoline.S to purgatory/trampoline_64.S in the
> same spirit.

There's only a 64 bit implementation of kexec_file_load() so this is a
somewhat theoretical exercise, but there's no harm in getting the code
organized, so:

Reviewed-by: Thiago Jung Bauermann 

I have just one question below.

> Signed-off-by: Hari Bathini 
> Tested-by: Pingfan Liu 
> ---
>
> v2 -> v3:
> * Unchanged. Added Tested-by tag from Pingfan.
>
> v1 -> v2:
> * No changes.
>
>
>  arch/powerpc/include/asm/kexec.h   |   11 +++
>  arch/powerpc/kexec/Makefile|2 -
>  arch/powerpc/kexec/elf_64.c|7 +-
>  arch/powerpc/kexec/file_load.c |   37 ++
>  arch/powerpc/kexec/file_load_64.c  |  108 ++
>  arch/powerpc/purgatory/Makefile|4 +
>  arch/powerpc/purgatory/trampoline.S|  117 
> 
>  arch/powerpc/purgatory/trampoline_64.S |  117 
> 
>  8 files changed, 248 insertions(+), 155 deletions(-)
>  create mode 100644 arch/powerpc/kexec/file_load_64.c
>  delete mode 100644 arch/powerpc/purgatory/trampoline.S
>  create mode 100644 arch/powerpc/purgatory/trampoline_64.S



> diff --git a/arch/powerpc/kexec/file_load_64.c 
> b/arch/powerpc/kexec/file_load_64.c
> new file mode 100644
> index 000..e6bff960
> --- /dev/null
> +++ b/arch/powerpc/kexec/file_load_64.c
> @@ -0,0 +1,108 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * ppc64 code to implement the kexec_file_load syscall
> + *
> + * Copyright (C) 2004  Adam Litke (a...@us.ibm.com)
> + * Copyright (C) 2004  IBM Corp.
> + * Copyright (C) 2004,2005  Milton D Miller II, IBM Corporation
> + * Copyright (C) 2005  R Sharada (shar...@in.ibm.com)
> + * Copyright (C) 2006  Mohan Kumar M (mo...@in.ibm.com)
> + * Copyright (C) 2020  IBM Corporation
> + *
> + * Based on kexec-tools' kexec-ppc64.c, kexec-elf-rel-ppc64.c, fs2dt.c.
> + * Heavily modified for the kernel by
> + * Hari Bathini .
> + */
> +
> +#include 
> +#include 
> +#include 
> +
> +const struct kexec_file_ops * const kexec_file_loaders[] = {
> + &kexec_elf64_ops,
> + NULL
> +};
> +
> +/**
> + * setup_purgatory_ppc64 - initialize PPC64 specific purgatory's global
> + * variables and call setup_purgatory() to initialize
> + * common global variable.
> + * @image: kexec image.
> + * @slave_code:Slave code for the purgatory.
> + * @fdt:   Flattened device tree for the next kernel.
> + * @kernel_load_addr:  Address where the kernel is loaded.
> + * @fdt_load_addr: Address where the flattened device tree is loaded.
> + *
> + * Returns 0 on success, negative errno on error.
> + */
> +int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
> +   const void *fdt, unsigned long kernel_load_addr,
> +   unsigned long fdt_load_addr)
> +{
> + int ret;
> +
> + ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
> +   fdt_load_addr);
> + if (ret)
> + pr_err("Failed to setup purgatory symbols");
> + return ret;
> +}
> +
> +/**
> + * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel
> + *   being loaded.
> + * @image:   kexec image being loaded.
> + * @fdt: Flattened device tree for the next kernel.
> + * @initrd_load_addr:Address where the next initrd will be loaded.
> + * @initrd_len:  Size of the next initrd, or 0 if there will be none.
> + * @cmdline: Command line for the next kernel, or NULL if there 
> will
> + *   be none.
> + *
> + * Returns 0 on success, negative errno on error.
> + */
> +int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
> + unsigned long initrd_load_addr,
> + unsigned long initrd_len, const char *cmdline)
> +{
> + int chosen_node, ret;
> +
> + /* Remove memory reservation for the current device tree. */
> + ret = delete_fdt_mem_rsv(fdt, __pa(initial_boot_params),
> +  fdt_totalsize(initial_boot_params));
> + if (ret == 0)
> + pr_debug("Removed old device tree reservation.\n");
> + else if (ret != -ENOENT) {
> + pr_err("Failed to remove old device-tree reservation.\n");
> + return ret;
> + }
> +
> + ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len,
> + cmdline, &chosen_node);
> + if (ret)
> + return ret;
> +
> + ret = fdt_setprop(fdt, chosen_node, "linux,boo

Re: [PATCH v3 08/12] ppc64/kexec_file: setup the stack for purgatory

2020-07-15 Thread Thiago Jung Bauermann


Sorry, forgot to send one comment for this patch:

Hari Bathini  writes:

> @@ -898,10 +900,37 @@ int setup_purgatory_ppc64(struct kimage *image, const 
> void *slave_code,
>   goto out;
>   }
>
> + /* Setup the stack top */
> + stack_buf = kexec_purgatory_get_symbol_addr(image, "stack_buf");
> + if (!stack_buf)
> + goto out;
> +
> + val = (u64)stack_buf + KEXEC_PURGATORY_STACK_SIZE;
> + ret = kexec_purgatory_get_set_symbol(image, "stack", &val, sizeof(val),
> +  false);
> + if (ret)
> + goto out;
> +
>   /* Setup the TOC pointer */
>   val = get_toc_ptr(&(image->purgatory_info));
>   ret = kexec_purgatory_get_set_symbol(image, "my_toc", &val, sizeof(val),
>false);
> + if (ret)
> + goto out;
> +
> + /* Setup OPAL base & entry values */
> + dn = of_find_node_by_path("/ibm,opal");
> + if (dn) {
> + of_property_read_u64(dn, "opal-base-address", &val);
> + ret = kexec_purgatory_get_set_symbol(image, "opal_base", &val,
> +  sizeof(val), false);
> + if (ret)
> + goto out;
> +
> + of_property_read_u64(dn, "opal-entry-address", &val);
> + ret = kexec_purgatory_get_set_symbol(image, "opal_entry", &val,
> +  sizeof(val), false);

You need to call of_node_put(dn) here and in the if (ret) case above.

> + }
>  out:
>   if (ret)
>   pr_err("Failed to setup purgatory symbols");

--
Thiago Jung Bauermann
IBM Linux Technology Center


Re: [PATCH v3 09/12] ppc64/kexec_file: setup backup region for kdump kernel

2020-07-15 Thread Thiago Jung Bauermann


Hari Bathini  writes:

> @@ -968,7 +1040,7 @@ int setup_new_fdt_ppc64(const struct kimage *image, void 
> *fdt,
>
>   /*
>* Restrict memory usage for kdump kernel by setting up
> -  * usable memory ranges.
> +  * usable memory ranges and memory reserve map.
>*/
>   if (image->type == KEXEC_TYPE_CRASH) {
>   ret = get_usable_memory_ranges(&umem);
> @@ -980,6 +1052,24 @@ int setup_new_fdt_ppc64(const struct kimage *image, 
> void *fdt,
>   pr_err("Error setting up usable-memory property for 
> kdump kernel\n");
>   goto out;
>   }
> +
> + ret = fdt_add_mem_rsv(fdt, BACKUP_SRC_START + BACKUP_SRC_SIZE,
> +   crashk_res.start - BACKUP_SRC_SIZE);

I believe this answers my question from the other email about how the
crashkernel is prevented from stomping in the crashed kernel's memory,
right? I needed to think for a bit to understand what the above
reservation was protecting. I think it's worth adding a comment.

> + if (ret) {
> + pr_err("Error reserving crash memory: %s\n",
> +fdt_strerror(ret));
> + goto out;
> + }
> + }
> +
> + if (image->arch.backup_start) {
> + ret = fdt_add_mem_rsv(fdt, image->arch.backup_start,
> +   BACKUP_SRC_SIZE);
> + if (ret) {
> + pr_err("Error reserving memory for backup: %s\n",
> +fdt_strerror(ret));
> + goto out;
> + }
>   }

This is only true for KEXEC_TYPE_CRASH, if I'm following the code
correctly. I think it would be clearer to put the if above inside the if
for KEXEC_TYPE_CRASH to make it clearer.

>
>   ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len,



> diff --git a/arch/powerpc/purgatory/purgatory_64.c 
> b/arch/powerpc/purgatory/purgatory_64.c
> new file mode 100644
> index 000..1eca74c
> --- /dev/null
> +++ b/arch/powerpc/purgatory/purgatory_64.c
> @@ -0,0 +1,36 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * purgatory: Runs between two kernels
> + *
> + * Copyright 2020, Hari Bathini, IBM Corporation.
> + */
> +
> +#include 
> +#include 
> +
> +extern unsigned long backup_start;
> +
> +static void *__memcpy(void *dest, const void *src, unsigned long n)
> +{
> + unsigned long i;
> + unsigned char *d;
> + const unsigned char *s;
> +
> + d = dest;
> + s = src;
> + for (i = 0; i < n; i++)
> + d[i] = s[i];
> +
> + return dest;
> +}
> +
> +void purgatory(void)
> +{
> + void *dest, *src;
> +
> + src = (void *)BACKUP_SRC_START;
> + if (backup_start) {
> + dest = (void *)backup_start;
> + __memcpy(dest, src, BACKUP_SRC_SIZE);
> + }
> +}

In general I'm in favor of using C code over assembly, but having to
bring in that relocation support just for the above makes me wonder if
it's worth it in this case.

--
Thiago Jung Bauermann
IBM Linux Technology Center


[PATCH v3] powerpc: select ARCH_HAS_MEMBARRIER_SYNC_CORE

2020-07-15 Thread Nicholas Piggin
powerpc return from interrupt and return from system call sequences are
context synchronising.

Signed-off-by: Nicholas Piggin 
---

v3: more comment fixes

 .../features/sched/membarrier-sync-core/arch-support.txt  | 4 ++--
 arch/powerpc/Kconfig  | 1 +
 arch/powerpc/include/asm/exception-64e.h  | 6 +-
 arch/powerpc/include/asm/exception-64s.h  | 8 
 arch/powerpc/kernel/entry_32.S| 6 ++
 5 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/Documentation/features/sched/membarrier-sync-core/arch-support.txt 
b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
index 8a521a622966..52ad74a25f54 100644
--- a/Documentation/features/sched/membarrier-sync-core/arch-support.txt
+++ b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
@@ -5,7 +5,7 @@
 #
 # Architecture requirements
 #
-# * arm/arm64
+# * arm/arm64/powerpc
 #
 # Rely on implicit context synchronization as a result of exception return
 # when returning from IPI handler, and when returning to user-space.
@@ -45,7 +45,7 @@
 |   nios2: | TODO |
 |openrisc: | TODO |
 |  parisc: | TODO |
-| powerpc: | TODO |
+| powerpc: |  ok  |
 |   riscv: | TODO |
 |s390: | TODO |
 |  sh: | TODO |
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9fa23eb320ff..920c4e3ca4ef 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -131,6 +131,7 @@ config PPC
select ARCH_HAS_PTE_DEVMAP  if PPC_BOOK3S_64
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_MEMBARRIER_CALLBACKS
+   select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_SCALED_CPUTIME  if VIRT_CPU_ACCOUNTING_NATIVE 
&& PPC_BOOK3S_64
select ARCH_HAS_STRICT_KERNEL_RWX   if (PPC32 && !HIBERNATION)
select ARCH_HAS_TICK_BROADCAST  if GENERIC_CLOCKEVENTS_BROADCAST
diff --git a/arch/powerpc/include/asm/exception-64e.h 
b/arch/powerpc/include/asm/exception-64e.h
index 54a98ef7d7fe..72b6657acd2d 100644
--- a/arch/powerpc/include/asm/exception-64e.h
+++ b/arch/powerpc/include/asm/exception-64e.h
@@ -204,7 +204,11 @@ exc_##label##_book3e:
LOAD_REG_ADDR(r3,interrupt_base_book3e);\
ori r3,r3,vector_offset@l;  \
mtspr   SPRN_IVOR##vector_number,r3;
-
+/*
+ * powerpc relies on return from interrupt/syscall being context synchronising
+ * (which rfi is) to support ARCH_HAS_MEMBARRIER_SYNC_CORE without additional
+ * synchronisation instructions.
+ */
 #define RFI_TO_KERNEL  \
rfi
 
diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 47bd4ea0837d..d7a1a427a690 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -68,6 +68,14 @@
  *
  * The nop instructions allow us to insert one or more instructions to flush 
the
  * L1-D cache when returning to userspace or a guest.
+ *
+ * powerpc relies on return from interrupt/syscall being context synchronising
+ * (which hrfid, rfid, and rfscv are) to support ARCH_HAS_MEMBARRIER_SYNC_CORE
+ * without additional synchronisation instructions.
+ *
+ * soft-masked interrupt replay does not include a context-synchronising rfid,
+ * but those always return to kernel, the sync is only required when returning
+ * to user.
  */
 #define RFI_FLUSH_SLOT \
RFI_FLUSH_FIXUP_SECTION;\
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 217ebdf5b00b..f4d0af8e1136 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -35,6 +35,12 @@
 
 #include "head_32.h"
 
+/*
+ * powerpc relies on return from interrupt/syscall being context synchronising
+ * (which rfi is) to support ARCH_HAS_MEMBARRIER_SYNC_CORE without additional
+ * synchronisation instructions.
+ */
+
 /*
  * Align to 4k in order to ensure that all functions modyfing srr0/srr1
  * fit into one page in order to not encounter a TLB miss between the
-- 
2.23.0



Re: [PATCH net-next] ibmvnic: Increase driver logging

2020-07-15 Thread David Miller
From: Jakub Kicinski 
Date: Wed, 15 Jul 2020 17:06:32 -0700

> On Wed, 15 Jul 2020 18:51:55 -0500 Thomas Falcon wrote:
>>  free_netdev(netdev);
>>  dev_set_drvdata(&dev->dev, NULL);
>> +netdev_info(netdev, "VNIC client device has been successfully 
>> removed.\n");
> 
> A step too far, perhaps.
> 
> In general this patch looks a little questionable IMHO, this amount of
> logging output is not commonly seen in drivers. All the the info
> messages are just static text, not even carrying any extra information.
> In an era of ftrace, and bpftrace, do we really need this?

Agreed, this is too much.  This is debugging, and thus suitable for tracing
facilities, at best.


Re: [PATCH v2] powerpc: select ARCH_HAS_MEMBARRIER_SYNC_CORE

2020-07-15 Thread Nicholas Piggin
Excerpts from Mathieu Desnoyers's message of July 15, 2020 10:27 pm:
> - On Jul 15, 2020, at 5:48 AM, Nicholas Piggin npig...@gmail.com wrote:
> [...]
>> index 47bd4ea0837d..a4704f405e8d 100644
>> --- a/arch/powerpc/include/asm/exception-64s.h
>> +++ b/arch/powerpc/include/asm/exception-64s.h
>> @@ -68,6 +68,13 @@
>>  *
>>  * The nop instructions allow us to insert one or more instructions to flush 
>> the
>>  * L1-D cache when returning to userspace or a guest.
>> + *
>> + * powerpc relies on return from interrupt/syscall being context 
>> synchronising
>> + * (which hrfid, rfid, and rfscv are) to support 
>> ARCH_HAS_MEMBARRIER_SYNC_CORE
>> + * without additional additional synchronisation instructions. soft-masked
>> + * interrupt replay does not include a context-synchronising rfid, but those
>> + * always return to kernel, the context sync is only required for IPIs which
>> + * return to user.
>>  */
>> #define RFI_FLUSH_SLOT   
>> \
>>  RFI_FLUSH_FIXUP_SECTION;\
> 
> I suspect the statement "the context sync is only required for IPIs which 
> return to
> user." is misleading.
> 
> As I recall that we need more than just context sync after IPI. We need 
> context sync
> in return path of any trap/interrupt/system call which returns to user-space, 
> else
> we'd need to add the proper core serializing barriers in the scheduler, as we 
> had
> to do for lazy tlb on x86.
> 
> Or am I missing something ?

Maybe ambiguous wording. For IPIs, the context synch is only required 
for those which return to user. Other things also require context sync.

I will try to improve it.

Thanks,
Nick


Re: [PATCH v2] powerpc: select ARCH_HAS_MEMBARRIER_SYNC_CORE

2020-07-15 Thread Nicholas Piggin
Excerpts from Andreas Schwab's message of July 15, 2020 8:08 pm:
> On Jul 15 2020, Nicholas Piggin wrote:
> 
>> diff --git a/arch/powerpc/include/asm/exception-64e.h 
>> b/arch/powerpc/include/asm/exception-64e.h
>> index 54a98ef7d7fe..071d7ccb830f 100644
>> --- a/arch/powerpc/include/asm/exception-64e.h
>> +++ b/arch/powerpc/include/asm/exception-64e.h
>> @@ -204,7 +204,11 @@ exc_##label##_book3e:
>>  LOAD_REG_ADDR(r3,interrupt_base_book3e);\
>>  ori r3,r3,vector_offset@l;  \
>>  mtspr   SPRN_IVOR##vector_number,r3;
>> -
>> +/*
>> + * powerpc relies on return from interrupt/syscall being context 
>> synchronising
>> + * (which rfi is) to support ARCH_HAS_MEMBARRIER_SYNC_CORE without 
>> additional
>> + * additional synchronisation instructions.
> 
> s/additonal//

Will fix in a respin.

Thanks,
Nick


Re: [PATCH v3] powerpc/pseries: detect secure and trusted boot state of the system.

2020-07-15 Thread Daniel Axtens
Hi Nayna,

Looks good to me.

Sorry for not noticing this before, but I think
> +#include 
is now superfluous (I think it's leftover from the machine_is
version?). Maybe mpe will take pity on you and remove it when he picks
up your patch.

Kind regards,
Daniel

>  
>  static struct device_node *get_ppc_fw_sb_node(void)
>  {
> @@ -23,12 +24,19 @@ bool is_ppc_secureboot_enabled(void)
>  {
>   struct device_node *node;
>   bool enabled = false;
> + u32 secureboot;
>  
>   node = get_ppc_fw_sb_node();
>   enabled = of_property_read_bool(node, "os-secureboot-enforcing");
> -
>   of_node_put(node);
>  
> + if (enabled)
> + goto out;
> +
> + if (!of_property_read_u32(of_root, "ibm,secure-boot", &secureboot))
> + enabled = (secureboot > 1);
> +
> +out:
>   pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled");
>  
>   return enabled;
> @@ -38,12 +46,19 @@ bool is_ppc_trustedboot_enabled(void)
>  {
>   struct device_node *node;
>   bool enabled = false;
> + u32 trustedboot;
>  
>   node = get_ppc_fw_sb_node();
>   enabled = of_property_read_bool(node, "trusted-enabled");
> -
>   of_node_put(node);
>  
> + if (enabled)
> + goto out;
> +
> + if (!of_property_read_u32(of_root, "ibm,trusted-boot", &trustedboot))
> + enabled = (trustedboot > 0);
> +
> +out:
>   pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled");
>  
>   return enabled;
> -- 
> 2.26.2


Re: [PATCH v3 08/12] ppc64/kexec_file: setup the stack for purgatory

2020-07-15 Thread Thiago Jung Bauermann


Hari Bathini  writes:

> To avoid any weird errors, the purgatory should run with its own
> stack. Set one up by adding the stack buffer to .data section of
> the purgatory. Also, setup opal base & entry values in r8 & r9
> registers to help early OPAL debugging.
>
> Signed-off-by: Hari Bathini 
> Tested-by: Pingfan Liu 

Reviewed-by: Thiago Jung Bauermann 

> ---
>
> v2 -> v3:
> * Unchanged. Added Tested-by tag from Pingfan.
>
> v1 -> v2:
> * Setting up opal base & entry values in r8 & r9 for early OPAL debug.
>
>
>  arch/powerpc/include/asm/kexec.h   |4 
>  arch/powerpc/kexec/file_load_64.c  |   29 +
>  arch/powerpc/purgatory/trampoline_64.S |   32 
> 
>  3 files changed, 65 insertions(+)
>

-- 
Thiago Jung Bauermann
IBM Linux Technology Center


Re: [PATCH 1/1] powerpc: Fix incorrect stw{, ux, u, x} instructions in __set_pte_at

2020-07-15 Thread Sasha Levin
Hi

[This is an automated email]

This commit has been processed because it contains a "Fixes:" tag
fixing commit: 9bf2b5cdc5fe ("powerpc: Fixes for CONFIG_PTE_64BIT for SMP 
support").

The bot has tested the following trees: v5.7.8, v5.4.51, v4.19.132, v4.14.188, 
v4.9.230, v4.4.230.

v5.7.8: Build OK!
v5.4.51: Build OK!
v4.19.132: Build OK!
v4.14.188: Failed to apply! Possible dependencies:
45201c8794693 ("powerpc/nohash: Remove hash related code from nohash 
headers.")
4e003747043d5 ("powerpc/64s: Replace CONFIG_PPC_STD_MMU_64 with 
CONFIG_PPC_BOOK3S_64")
d5808ffaec817 ("powerpc/nohash: Use IS_ENABLED() to simplify 
__set_pte_at()")

v4.9.230: Failed to apply! Possible dependencies:
45201c8794693 ("powerpc/nohash: Remove hash related code from nohash 
headers.")
4546561551106 ("powerpc/asm: Use OFFSET macro in asm-offsets.c")
4e003747043d5 ("powerpc/64s: Replace CONFIG_PPC_STD_MMU_64 with 
CONFIG_PPC_BOOK3S_64")
5d451a87e5ebb ("powerpc/64: Retrieve number of L1 cache sets from 
device-tree")
7c5b06cadf274 ("KVM: PPC: Book3S HV: Adapt TLB invalidations to work on 
POWER9")
83677f551e0a6 ("KVM: PPC: Book3S HV: Adjust host/guest context switch for 
POWER9")
902e06eb86cd6 ("powerpc/32: Change the stack protector canary value per 
task")
a3d96f70c1477 ("powerpc/64s: Fix system reset vs general interrupt 
reentrancy")
bd067f83b0840 ("powerpc/64: Fix naming of cache block vs. cache line")
d5808ffaec817 ("powerpc/nohash: Use IS_ENABLED() to simplify 
__set_pte_at()")
e2827fe5c1566 ("powerpc/64: Clean up ppc64_caches using a struct per cache")
e9cf1e085647b ("KVM: PPC: Book3S HV: Add new POWER9 guest-accessible SPRs")
f4c51f841d2ac ("KVM: PPC: Book3S HV: Modify guest entry/exit paths to 
handle radix guests")

v4.4.230: Failed to apply! Possible dependencies:
1ca7212932862 ("powerpc/mm: Move PTE bits from generic functions to hash64 
functions.")
26b6a3d9bb48f ("powerpc/mm: move pte headers to book3s directory")
371352ca0e7f3 ("powerpc/mm: Move hash64 PTE bits from book3s/64/pgtable.h 
to hash.h")
3dfcb315d81e6 ("powerpc/mm: make a separate copy for book3s")
ab537dca2f330 ("powerpc/mm: Move hash specific pte width and other defines 
to book3s")
b0412ea94bcbd ("powerpc/mm: Drop pte-common.h from BOOK3S 64")
cbbb8683fb632 ("powerpc/mm: Delete booke bits from book3s")


NOTE: The patch will not be queued to stable trees until it is upstream.

How should we proceed with this patch?

-- 
Thanks
Sasha


Re: [PATCH v3 07/12] ppc64/kexec_file: add support to relocate purgatory

2020-07-15 Thread Thiago Jung Bauermann


Hari Bathini  writes:

> Right now purgatory implementation is only minimal. But if purgatory
> code is to be enhanced to copy memory to the backup region and verify

Can't the memcpy be done in asm? We have arch/powerpc/lib/memcpy_64.S
for example, perhaps it could be linked in with the purgatory?

> sha256 digest, relocations may have to be applied to the purgatory.

Do we want to do the sha256 verification? My original patch series for
kexec_file_load() had a purgatory in C from kexec-tools which did the
sha256 verification but Michael Ellerman thought it was unnecessary and
decided to use the simpler purgatory in asm from kexec-lite.

As a result, this relocation processing became unnecessary.

> So, add support to relocate purgatory in kexec_file_load system call
> by setting up TOC pointer and applying RELA relocations as needed.

If we do want to use a C purgatory, Michael Ellerman had suggested
building it as a Position Independent Executable, which greatly reduces
the number and types of relocations that are needed. See patches 4 and 9
here:

https://lore.kernel.org/linuxppc-dev/1478748449-3894-1-git-send-email-bauer...@linux.vnet.ibm.com/

In the series above I hadn't converted x86 to PIE. If I had done that,
possibly Dave Young's opinion would have been different. :-)

If that's still not desirable, he suggested in that discussion lifting
some code from x86 to generic code, which I implemented and would
simplify this patch as well:

https://lore.kernel.org/linuxppc-dev/5009580.5GxAkTrMYA@morokweng/

> Reported-by: kernel test robot 
> [lkp: In v1, 'struct mem_sym' was declared in parameter list]
> Signed-off-by: Hari Bathini 
> ---
>
> v2 -> v3:
> * Fixed get_toc_section() to return the section info that had relocations
>   applied, to calculate the correct toc pointer.
> * Fixed how relocation value is converted to relative while applying
>   R_PPC64_REL64 & R_PPC64_REL32 relocations.
>
> v1 -> v2:
> * Fixed wrong use of 'struct mem_sym' in local_entry_offset() as
>   reported by lkp. lkp report for reference:
> - https://lore.kernel.org/patchwork/patch/1264421/
>
>
>  arch/powerpc/kexec/file_load_64.c  |  337 
> 
>  arch/powerpc/purgatory/trampoline_64.S |8 +
>  2 files changed, 345 insertions(+)

--
Thiago Jung Bauermann
IBM Linux Technology Center


Re: [PATCH net-next] ibmvnic: Increase driver logging

2020-07-15 Thread Jakub Kicinski
On Wed, 15 Jul 2020 18:51:55 -0500 Thomas Falcon wrote:
>   free_netdev(netdev);
>   dev_set_drvdata(&dev->dev, NULL);
> + netdev_info(netdev, "VNIC client device has been successfully 
> removed.\n");

A step too far, perhaps.

In general this patch looks a little questionable IMHO, this amount of
logging output is not commonly seen in drivers. All the the info
messages are just static text, not even carrying any extra information.
In an era of ftrace, and bpftrace, do we really need this?


[PATCH net-next] ibmvnic: Increase driver logging

2020-07-15 Thread Thomas Falcon
Improve the ibmvnic driver's logging capabilities by providing
more informational messages to track driver operations, facilitating
first-pass debug.

Signed-off-by: Thomas Falcon 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 76 --
 1 file changed, 62 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 0fd7eae25fe9..7382f11872fc 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -561,6 +561,7 @@ static int init_rx_pools(struct net_device *netdev)
u64 *size_array;
int i, j;
 
+   netdev_info(netdev, "Initializing RX queue buffer pools.\n");
rxadd_subcrqs =
be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
@@ -618,6 +619,7 @@ static int init_rx_pools(struct net_device *netdev)
rx_pool->next_alloc = 0;
rx_pool->next_free = 0;
}
+   netdev_info(netdev, "RX queue buffer pools allocated successfully.\n");
 
return 0;
 }
@@ -738,6 +740,7 @@ static int init_tx_pools(struct net_device *netdev)
int tx_subcrqs;
int i, rc;
 
+   netdev_info(netdev, "Initializing TX queue buffer pools.\n");
tx_subcrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
adapter->tx_pool = kcalloc(tx_subcrqs,
   sizeof(struct ibmvnic_tx_pool), GFP_KERNEL);
@@ -768,7 +771,7 @@ static int init_tx_pools(struct net_device *netdev)
return rc;
}
}
-
+   netdev_info(netdev, "TX queue buffer pools allocated successfully.\n");
return 0;
 }
 
@@ -792,6 +795,7 @@ static void ibmvnic_napi_disable(struct ibmvnic_adapter 
*adapter)
if (!adapter->napi_enabled)
return;
 
+   netdev_info(adapter->netdev, "Disable all NAPI threads.\n");
for (i = 0; i < adapter->req_rx_queues; i++) {
netdev_dbg(adapter->netdev, "Disabling napi[%d]\n", i);
napi_disable(&adapter->napi[i]);
@@ -910,12 +914,14 @@ static int ibmvnic_login(struct net_device *netdev)
return -1;
}
} else if (adapter->init_done_rc) {
-   netdev_warn(netdev, "Adapter login failed\n");
+   netdev_warn(netdev, "Adapter login failed, rc = %d\n",
+   adapter->init_done_rc);
return -1;
}
} while (retry);
 
__ibmvnic_set_mac(netdev, adapter->mac_addr);
+   netdev_info(netdev, "Adapter login success!\n");
 
return 0;
 }
@@ -934,6 +940,7 @@ static void release_login_rsp_buffer(struct ibmvnic_adapter 
*adapter)
 
 static void release_resources(struct ibmvnic_adapter *adapter)
 {
+   netdev_info(adapter->netdev, "Releasing VNIC client device memory 
structures.\n");
release_vpd_data(adapter);
 
release_tx_pools(adapter);
@@ -941,6 +948,7 @@ static void release_resources(struct ibmvnic_adapter 
*adapter)
 
release_napi(adapter);
release_login_rsp_buffer(adapter);
+   netdev_info(adapter->netdev, "VNIC client device memory released.\n");
 }
 
 static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state)
@@ -964,7 +972,7 @@ static int set_link_state(struct ibmvnic_adapter *adapter, 
u8 link_state)
reinit_completion(&adapter->init_done);
rc = ibmvnic_send_crq(adapter, &crq);
if (rc) {
-   netdev_err(netdev, "Failed to set link state\n");
+   netdev_err(netdev, "Failed to set link state, rc = 
%d\n", rc);
return rc;
}
 
@@ -1098,6 +1106,8 @@ static int init_resources(struct ibmvnic_adapter *adapter)
struct net_device *netdev = adapter->netdev;
int rc;
 
+   netdev_info(netdev, "Allocate device resources.\n");
+
rc = set_real_num_queues(netdev);
if (rc)
return rc;
@@ -1126,7 +1136,11 @@ static int init_resources(struct ibmvnic_adapter 
*adapter)
return rc;
 
rc = init_tx_pools(netdev);
-   return rc;
+   if (rc)
+   return rc;
+
+   netdev_info(netdev, "Device resources allocated.\n");
+   return 0;
 }
 
 static int __ibmvnic_open(struct net_device *netdev)
@@ -1136,9 +1150,10 @@ static int __ibmvnic_open(struct net_device *netdev)
int i, rc;
 
adapter->state = VNIC_OPENING;
+   netdev_info(netdev, "Allocating RX buffer pools and enable NAPI 
structures.\n");
replenish_pools(adapter);
ibmvnic_napi_enable(adapter);
-
+   netdev_info(netdev, "Activating RX and TX interrupt lines.\n");
/* We're ready to receive frames, enable the sub-crq interrupts and
 * set the logical link stat

[PATCH] powerpc/vdso: Fix vdso cpu truncation

2020-07-15 Thread Anton Blanchard
From: Milton Miller 

The code in vdso_cpu_init that exposes the cpu and numa node to
userspace via SPRG_VDSO incorrctly masks the cpu to 12 bits. This means
that any kernel running on a box with more than 4096 threads (NR_CPUS
advertises a limit of of 8192 cpus) would expose userspace to two cpu
contexts running at the same time with the same cpu number.

Note: I'm not aware of any distro shipping a kernel with support for more
than 4096 threads today, nor of any system image that currently exceeds
4096 threads. Found via code browsing.

Fixes: 18ad51dd342a7eb09dbcd059d0b451b616d4dafc ("powerpc: Add VDSO version of 
getcpu")
Signed-off-by: Milton Miller 
Signed-off-by: Anton Blanchard 
---
 arch/powerpc/kernel/vdso.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index e0f4ba45b6cc..8dad44262e75 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -677,7 +677,7 @@ int vdso_getcpu_init(void)
node = cpu_to_node(cpu);
WARN_ON_ONCE(node > 0x);
 
-   val = (cpu & 0xfff) | ((node & 0x) << 16);
+   val = (cpu & 0x) | ((node & 0x) << 16);
mtspr(SPRN_SPRG_VDSO_WRITE, val);
get_paca()->sprg_vdso = val;
 
-- 
2.26.2



Re: [PATCH v3 06/12] ppc64/kexec_file: restrict memory usage of kdump kernel

2020-07-15 Thread Thiago Jung Bauermann


Hari Bathini  writes:

>  /**
> + * get_usable_memory_ranges - Get usable memory ranges. This list includes
> + *regions like crashkernel, opal/rtas & 
> tce-table,
> + *that kdump kernel could use.
> + * @mem_ranges:   Range list to add the memory ranges to.
> + *
> + * Returns 0 on success, negative errno on error.
> + */
> +static int get_usable_memory_ranges(struct crash_mem **mem_ranges)
> +{
> + int ret;
> +
> + /* First memory block & crashkernel region */
> + ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1);

This is a bit surprising. I guess I don't have a complete big picture of
the patch series yet. What prevents the crashkernel from using memory at
the [0, _end] range and overwriting the crashed kernel's memory?

Shouldn't the above range start at crashk_res.start?

> + if (ret)
> + goto out;
> +
> + ret = add_rtas_mem_range(mem_ranges);
> + if (ret)
> + goto out;
> +
> + ret = add_opal_mem_range(mem_ranges);
> + if (ret)
> + goto out;
> +
> + ret = add_tce_mem_ranges(mem_ranges);
> +out:
> + if (ret)
> + pr_err("Failed to setup usable memory ranges\n");
> + return ret;
> +}
> +
> +/**
>   * __locate_mem_hole_top_down - Looks top down for a large enough memory hole
>   *  in the memory regions between buf_min & 
> buf_max
>   *  for the buffer. If found, sets kbuf->mem.
> @@ -261,6 +305,322 @@ static int locate_mem_hole_bottom_up_ppc64(struct 
> kexec_buf *kbuf,
>  }
>
>  /**
> + * check_realloc_usable_mem - Reallocate buffer if it can't accommodate 
> entries
> + * @um_info:  Usable memory buffer and ranges info.
> + * @cnt:  No. of entries to accommodate.
> + *
> + * Returns 0 on success, negative errno on error.

It actually returns the buffer on success, and NULL on error.

> + */
> +static uint64_t *check_realloc_usable_mem(struct umem_info *um_info, int cnt)
> +{
> + void *tbuf;
> +
> + if (um_info->size >=
> + ((um_info->idx + cnt) * sizeof(*(um_info->buf
> + return um_info->buf;
> +
> + um_info->size += MEM_RANGE_CHUNK_SZ;
> + tbuf = krealloc(um_info->buf, um_info->size, GFP_KERNEL);
> + if (!tbuf) {
> + um_info->size -= MEM_RANGE_CHUNK_SZ;
> + return NULL;
> + }
> +
> + memset(tbuf + um_info->idx, 0, MEM_RANGE_CHUNK_SZ);
> + return tbuf;
> +}



> +/**
> + * get_node_path - Get the full path of the given node.
> + * @dn:Node.
> + * @path:  Updated with the full path of the node.
> + *
> + * Returns nothing.
> + */
> +static void get_node_path(struct device_node *dn, char *path)
> +{
> + if (!dn)
> + return;
> +
> + get_node_path(dn->parent, path);

Is it ok to do recursion in the kernel? In this case I believe it's not
problematic since the maximum call depth will be the maximum depth of a
device tree node which shouldn't be too much. Also, there are no local
variables in this function. But I thought it was worth mentioning.

> + sprintf(path, "/%s", dn->full_name);
> +}
> +
> +/**
> + * get_node_pathlen - Get the full path length of the given node.
> + * @dn:   Node.
> + *
> + * Returns the length of the full path of the node.
> + */
> +static int get_node_pathlen(struct device_node *dn)
> +{
> + int len = 0;
> +
> + while (dn) {
> + len += strlen(dn->full_name) + 1;
> + dn = dn->parent;
> + }
> + len++;
> +
> + return len;
> +}
> +
> +/**
> + * add_usable_mem_property - Add usable memory property for the given
> + *   memory node.
> + * @fdt: Flattened device tree for the kdump kernel.
> + * @dn:  Memory node.
> + * @um_info: Usable memory buffer and ranges info.
> + *
> + * Returns 0 on success, negative errno on error.
> + */
> +static int add_usable_mem_property(void *fdt, struct device_node *dn,
> +struct umem_info *um_info)
> +{
> + int n_mem_addr_cells, n_mem_size_cells, node;
> + int i, len, ranges, cnt, ret;
> + uint64_t base, end, *buf;
> + const __be32 *prop;
> + char *pathname;
> +
> + /* Allocate memory for node path */
> + pathname = kzalloc(ALIGN(get_node_pathlen(dn), 8), GFP_KERNEL);
> + if (!pathname)
> + return -ENOMEM;
> +
> + /* Get the full path of the memory node */
> + get_node_path(dn, pathname);
> + pr_debug("Memory node path: %s\n", pathname);
> +
> + /* Now that we know the path, find its offset in kdump kernel's fdt */
> + node = fdt_path_offset(fdt, pathname);
> + if (node < 0) {
> + pr_err("Malformed device tree: error reading %s\n",
> +pathname);
> + ret = -EINVAL;
> + goto out;
> + }
> 

Re: [RFC PATCH 00/35] Move all PCIBIOS* definitions into arch/x86

2020-07-15 Thread Benjamin Herrenschmidt
On Wed, 2020-07-15 at 17:12 -0500, Bjorn Helgaas wrote:
> > I've 'played' with PCIe error handling - without much success.
> > What might be useful is for a driver that has just read ~0u to
> > be able to ask 'has there been an error signalled for this device?'.
> 
> In many cases a driver will know that ~0 is not a valid value for the
> register it's reading.  But if ~0 *could* be valid, an interface like
> you suggest could be useful.  I don't think we have anything like that
> today, but maybe we could.  It would certainly be nice if the PCI core
> noticed, logged, and cleared errors.  We have some of that for AER,
> but that's an optional feature, and support for the error bits in the
> garden-variety PCI_STATUS register is pretty haphazard.  As you note
> below, this sort of SERR/PERR reporting is frequently hard-wired in
> ways that takes it out of our purview.

We do have pci_channel_state (via pci_channel_offline()) which covers
the cases where the underlying error handling (such as EEH or unplug)
results in the device being offlined though this tend to be
asynchronous so it might take a few ~0's before you get it.

It's typically used to break potentially infinite loops in some
drivers.

There is no interface to check whether *an* error happened though for
the most cases it will be captured in the status register, which is
harvested (and cleared ?) by some EDAC drivers iirc... 

All this lacks coordination, I agree.

Cheers,
Ben.




Re: [RFC PATCH 00/35] Move all PCIBIOS* definitions into arch/x86

2020-07-15 Thread Benjamin Herrenschmidt
On Wed, 2020-07-15 at 08:47 +0200, Arnd Bergmann wrote:
> drivers/misc/cardreader/rts5261.c:  retval =
> rtsx_pci_write_config_dword(pcr, PCR_SETTING_REG2, lval);
> 
> That last one is interesting because I think this is a case in which
> we
> actually want to check for errors, as the driver seems to use it
> to ensure that accessing extended config space at offset 0x814
> works before relying on the value. Unfortunately the implementation
> seems buggy as it a) keeps using the possibly uninitialized value
> after
> printing a warning and b) returns the PCIBIOS_* value in place of a
> negative errno and then ignores it in the caller.

In cases like this, usually checking against ~0 is sufficient

Cheers,
Ben.




Re: [RFC PATCH 00/35] Move all PCIBIOS* definitions into arch/x86

2020-07-15 Thread Bjorn Helgaas
On Wed, Jul 15, 2020 at 02:38:29PM +, David Laight wrote:
> From: Oliver O'Halloran
> > Sent: 15 July 2020 05:19
> > 
> > On Wed, Jul 15, 2020 at 8:03 AM Arnd Bergmann  wrote:
> ...
> > > - config space accesses are very rare compared to memory
> > >   space access and on the hardware side the error handling
> > >   would be similar, but readl/writel don't return errors, they just
> > >   access wrong registers or return 0x.
> > >   arch/powerpc/kernel/eeh.c has a ton extra code written to
> > >   deal with it, but no other architectures do.
> > 
> > TBH the EEH MMIO hooks were probably a mistake to begin with. Errors
> > detected via MMIO are almost always asynchronous to the error itself
> > so you usually just wind up with a misleading stack trace rather than
> > any kind of useful synchronous error reporting. It seems like most
> > drivers don't bother checking for 0xFFs either and rely on the
> > asynchronous reporting via .error_detected() instead, so I have to
> > wonder what the point is. I've been thinking of removing the MMIO
> > hooks and using a background poller to check for errors on each PHB
> > periodically (assuming we don't have an EEH interrupt) instead. That
> > would remove the requirement for eeh_dev_check_failure() to be
> > interrupt safe too, so it might even let us fix all the godawful races
> > in EEH.
> 
> I've 'played' with PCIe error handling - without much success.
> What might be useful is for a driver that has just read ~0u to
> be able to ask 'has there been an error signalled for this device?'.

In many cases a driver will know that ~0 is not a valid value for the
register it's reading.  But if ~0 *could* be valid, an interface like
you suggest could be useful.  I don't think we have anything like that
today, but maybe we could.  It would certainly be nice if the PCI core
noticed, logged, and cleared errors.  We have some of that for AER,
but that's an optional feature, and support for the error bits in the
garden-variety PCI_STATUS register is pretty haphazard.  As you note
below, this sort of SERR/PERR reporting is frequently hard-wired in
ways that takes it out of our purview.

Bjorn


Re: [RFC PATCH 00/35] Move all PCIBIOS* definitions into arch/x86

2020-07-15 Thread Bjorn Helgaas
On Wed, Jul 15, 2020 at 02:24:21PM +, David Laight wrote:
> From: Arnd Bergmann
> > Sent: 15 July 2020 07:47
> > On Wed, Jul 15, 2020 at 1:46 AM Bjorn Helgaas  wrote:
> > 
> >  So something like:
> > >
> > >   void pci_read_config_word(struct pci_dev *dev, int where, u16 *val)
> > >
> > > and where we used to return anything non-zero, we just set *val = ~0
> > > instead?  I think we do that already in most, maybe all, cases.
> > 
> > Right, this is what I had in mind. If we start by removing the handling
> > of the return code in all files that clearly don't need it, looking at
> > whatever remains will give a much better idea of what a good interface
> > should be.
> 
> It would be best to get rid of that nasty 'u16 *' parameter.

Do you mean nasty because it's basically a return value, but not
returned as the *function's* return value?  I agree that if we were
starting from scratch it would nicer to have:

  u16 pci_read_config_word(struct pci_dev *dev, int where)

but I don't think it's worth changing the thousands of callers just
for that.

> Make the return int and return the read value or -1 on error.
> (Or maybe 0x on error??)
> 
> For a 32bit read (there must be one for the BARs) returning
> a 64bit signed integer would work even for 32bit systems.
> 
> If code cares about the error, and it can be detected then
> it can check. Otherwise the it all 'just works'.

There are u8 (byte), u16 (word), and u32 (dword) config reads &
writes.  But I don't think it really helps to return something wider
than the access.  For programmatic errors like invalid alignment, we
could indeed use the extra bits to return an unambiguous error.  But
we still have the "device was unplugged" sort of errors where the
*hardware* typically returns ~0 and the config accessor doesn't know
whether that's valid data or an error.

Bjorn


Re: [PATCH] pseries: Fix 64 bit logical memory block panic

2020-07-15 Thread Anton Blanchard
Hi Aneesh,

> > Booting with a 4GB LMB size causes us to panic:
> >
> >   qemu-system-ppc64: OS terminated: OS panic:
> >   Memory block size not suitable: 0x0
> >
> > Fix pseries_memory_block_size() to handle 64 bit LMBs.

> We need similar changes at more places?

I agree. I wanted to get a minimal and tested fix (using QEMU) that
could make it into stable, so that the distros will at least boot.

Thanks,
Anton


Re: [PATCH v2 1/1] ASoC: fsl: fsl-asoc-card: Trivial: Fix misspelling of 'exists'

2020-07-15 Thread Nicolin Chen
On Wed, Jul 15, 2020 at 04:00:09PM +0100, Lee Jones wrote:
> Cc: Timur Tabi 
> Cc: Nicolin Chen 

Acked-by: Nicolin Chen 

> Cc: Xiubo Li 
> Cc: Fabio Estevam 
> Cc: linuxppc-dev@lists.ozlabs.org
> Signed-off-by: Lee Jones 
> ---
>  sound/soc/fsl/fsl-asoc-card.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c
> index faac6ce9a82cb..dbacdd25dfe76 100644
> --- a/sound/soc/fsl/fsl-asoc-card.c
> +++ b/sound/soc/fsl/fsl-asoc-card.c
> @@ -92,7 +92,7 @@ struct fsl_asoc_card_priv {
>  };
>  
>  /*
> - * This dapm route map exits for DPCM link only.
> + * This dapm route map exists for DPCM link only.
>   * The other routes shall go through Device Tree.
>   *
>   * Note: keep all ASRC routes in the second half
> -- 
> 2.25.1
> 


Re: [PATCH v8 5/8] powerpc/vdso: Prepare for switching VDSO to generic C implementation.

2020-07-15 Thread Christophe Leroy

Michael Ellerman  a écrit :


Christophe Leroy  writes:

Prepare for switching VDSO to generic C implementation in following
patch. Here, we:
- Modify __get_datapage() to take an offset
- Prepare the helpers to call the C VDSO functions
- Prepare the required callbacks for the C VDSO functions
- Prepare the clocksource.h files to define VDSO_ARCH_CLOCKMODES
- Add the C trampolines to the generic C VDSO functions

powerpc is a bit special for VDSO as well as system calls in the
way that it requires setting CR SO bit which cannot be done in C.
Therefore, entry/exit needs to be performed in ASM.

Implementing __arch_get_vdso_data() would clobber the link register,
requiring the caller to save it. As the ASM calling function already
has to set a stack frame and saves the link register before calling
the C vdso function, retriving the vdso data pointer there is lighter.

...

diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h  
b/arch/powerpc/include/asm/vdso/gettimeofday.h

new file mode 100644
index ..4452897f9bd8
--- /dev/null
+++ b/arch/powerpc/include/asm/vdso/gettimeofday.h
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_GETTIMEOFDAY_H
+#define __ASM_VDSO_GETTIMEOFDAY_H
+
+#include 
+
+#ifdef __ASSEMBLY__
+
+.macro cvdso_call funct
+  .cfi_startproc
+   PPC_STLUr1, -STACK_FRAME_OVERHEAD(r1)
+   mflrr0
+  .cfi_register lr, r0
+   PPC_STL r0, STACK_FRAME_OVERHEAD + PPC_LR_STKOFF(r1)


This doesn't work for me on ppc64(le) with glibc.

glibc doesn't create a stack frame before making the VDSO call, so the
store of r0 (LR) goes into the caller's frame, corrupting the saved LR,
leading to an infinite loop.


Where should it be saved if it can't be saved in the standard location ?



This is an example from a statically built program that calls
clock_gettime():

10030cb0 <__clock_gettime>:
10030cb0:   0e 10 40 3c lis r2,4110
10030cb4:   00 7a 42 38 addir2,r2,31232
10030cb8:   a6 02 08 7c mflrr0
10030cbc:   ff ff 22 3d addis   r9,r2,-1
10030cc0:   58 6d 29 39 addir9,r9,27992
10030cc4:   f0 ff c1 fb std r30,-16(r1) <-- 
redzone store
10030cc8:   78 23 9e 7c mr  r30,r4
10030ccc:   f8 ff e1 fb std r31,-8(r1)  <-- 
redzone store
10030cd0:   78 1b 7f 7c mr  r31,r3
10030cd4:   10 00 01 f8 std r0,16(r1)			<-- save LR to  
caller's frame

10030cd8:   00 00 09 e8 ld  r0,0(r9)
10030cdc:   00 00 20 2c cmpdi   r0,0
10030ce0:   50 00 82 41 beq 10030d30 <__clock_gettime+0x80>
10030ce4:   a6 03 09 7c mtctr   r0
10030ce8:   21 04 80 4e bctrl   <-- 
vdso call
10030cec:   26 00 00 7c mfcrr0
10030cf0:   00 10 09 74 andis.  r9,r0,4096
10030cf4:   78 1b 69 7c mr  r9,r3
10030cf8:   28 00 82 40 bne 10030d20 <__clock_gettime+0x70>
10030cfc:   b4 07 23 7d extsw   r3,r9
10030d00:   10 00 01 e8 ld  r0,16(r1)			<-- load saved  
LR, since clobbered by the VDSO

10030d04:   f0 ff c1 eb ld  r30,-16(r1)
10030d08:   f8 ff e1 eb ld  r31,-8(r1)
10030d0c:   a6 03 08 7c mtlrr0  <-- 
restore LR
10030d10:   20 00 80 4e blr <-- 
jumps to 10030cec


I'm kind of confused how it worked for you on 32-bit.


So am I then. I'm away for 3 weeks, summer break. I'll check when I'm back.



There's also no code to load/restore the TOC pointer on BE, which I
think we'll need to handle.


What does it means exactly ? Just saving r2 all the time ? Is there a  
dedicated location in the stack frame for it ? Is that only for 64 be ?


Christophe




Re: [PATCH v3] powerpc/pseries: detect secure and trusted boot state of the system.

2020-07-15 Thread Mimi Zohar
On Wed, 2020-07-15 at 07:52 -0400, Nayna Jain wrote:
> The device-tree property to check secure and trusted boot state is
> different for guests(pseries) compared to baremetal(powernv).
> 
> This patch updates the existing is_ppc_secureboot_enabled() and
> is_ppc_trustedboot_enabled() functions to add support for pseries.
> 
> The secureboot and trustedboot state are exposed via device-tree property:
> /proc/device-tree/ibm,secure-boot and /proc/device-tree/ibm,trusted-boot
> 
> The values of ibm,secure-boot under pseries are interpreted as:
> 
> 0 - Disabled
> 1 - Enabled in Log-only mode. This patch interprets this value as
> disabled, since audit mode is currently not supported for Linux.
> 2 - Enabled and enforced.
> 3-9 - Enabled and enforcing; requirements are at the discretion of the
> operating system.
> 
> The values of ibm,trusted-boot under pseries are interpreted as:
> 0 - Disabled
> 1 - Enabled
> 
> Signed-off-by: Nayna Jain 
> Reviewed-by: Daniel Axtens 

Thanks for updating the patch description.

Reviewed-by: Mimi Zohar 


Re: [PATCH -next] cpuidle/pseries: Make symbol 'pseries_idle_driver' static

2020-07-15 Thread Rafael J. Wysocki
On Tue, Jul 14, 2020 at 4:14 PM Wei Yongjun  wrote:
>
> The sparse tool complains as follows:
>
> drivers/cpuidle/cpuidle-pseries.c:25:23: warning:
>  symbol 'pseries_idle_driver' was not declared. Should it be static?
>
> 'pseries_idle_driver' is not used outside of this file, so marks
> it static.
>
> Reported-by: Hulk Robot 
> Signed-off-by: Wei Yongjun 
> ---
>  drivers/cpuidle/cpuidle-pseries.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/cpuidle/cpuidle-pseries.c 
> b/drivers/cpuidle/cpuidle-pseries.c
> index 6513ef2af66a..3e058ad2bb51 100644
> --- a/drivers/cpuidle/cpuidle-pseries.c
> +++ b/drivers/cpuidle/cpuidle-pseries.c
> @@ -22,7 +22,7 @@
>  #include 
>  #include 
>
> -struct cpuidle_driver pseries_idle_driver = {
> +static struct cpuidle_driver pseries_idle_driver = {
> .name = "pseries_idle",
> .owner= THIS_MODULE,
>  };

Applied as 5.9 material, thanks!


Re: [PATCH -next] cpufreq: powernv: Make some symbols static

2020-07-15 Thread Rafael J. Wysocki
On Tue, Jul 14, 2020 at 4:14 PM Wei Yongjun  wrote:
>
> The sparse tool complains as follows:
>
> drivers/cpufreq/powernv-cpufreq.c:88:1: warning:
>  symbol 'pstate_revmap' was not declared. Should it be static?
> drivers/cpufreq/powernv-cpufreq.c:383:18: warning:
>  symbol 'cpufreq_freq_attr_cpuinfo_nominal_freq' was not declared. Should it 
> be static?
> drivers/cpufreq/powernv-cpufreq.c:669:6: warning:
>  symbol 'gpstate_timer_handler' was not declared. Should it be static?
> drivers/cpufreq/powernv-cpufreq.c:902:6: warning:
>  symbol 'powernv_cpufreq_work_fn' was not declared. Should it be static?
>
> Those symbols are not used outside of this file, so mark
> them static.
>
> Reported-by: Hulk Robot 
> Signed-off-by: Wei Yongjun 
> ---
>  drivers/cpufreq/powernv-cpufreq.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/cpufreq/powernv-cpufreq.c 
> b/drivers/cpufreq/powernv-cpufreq.c
> index 8646eb197cd9..cf118263ec65 100644
> --- a/drivers/cpufreq/powernv-cpufreq.c
> +++ b/drivers/cpufreq/powernv-cpufreq.c
> @@ -85,7 +85,7 @@ struct global_pstate_info {
>
>  static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
>
> -DEFINE_HASHTABLE(pstate_revmap, POWERNV_MAX_PSTATES_ORDER);
> +static DEFINE_HASHTABLE(pstate_revmap, POWERNV_MAX_PSTATES_ORDER);
>  /**
>   * struct pstate_idx_revmap_data: Entry in the hashmap pstate_revmap
>   *   indexed by a function of pstate id.
> @@ -380,7 +380,7 @@ static ssize_t cpuinfo_nominal_freq_show(struct 
> cpufreq_policy *policy,
> powernv_freqs[powernv_pstate_info.nominal].frequency);
>  }
>
> -struct freq_attr cpufreq_freq_attr_cpuinfo_nominal_freq =
> +static struct freq_attr cpufreq_freq_attr_cpuinfo_nominal_freq =
> __ATTR_RO(cpuinfo_nominal_freq);
>
>  #define SCALING_BOOST_FREQS_ATTR_INDEX 2
> @@ -666,7 +666,7 @@ static inline void  queue_gpstate_timer(struct 
> global_pstate_info *gpstates)
>   * according quadratic equation. Queues a new timer if it is still not equal
>   * to local pstate
>   */
> -void gpstate_timer_handler(struct timer_list *t)
> +static void gpstate_timer_handler(struct timer_list *t)
>  {
> struct global_pstate_info *gpstates = from_timer(gpstates, t, timer);
> struct cpufreq_policy *policy = gpstates->policy;
> @@ -899,7 +899,7 @@ static struct notifier_block powernv_cpufreq_reboot_nb = {
> .notifier_call = powernv_cpufreq_reboot_notifier,
>  };
>
> -void powernv_cpufreq_work_fn(struct work_struct *work)
> +static void powernv_cpufreq_work_fn(struct work_struct *work)
>  {
> struct chip *chip = container_of(work, struct chip, throttle);
> struct cpufreq_policy *policy;
>

Applied as 5.9 material, thanks!


[PATCH v2 1/1] ASoC: fsl: fsl-asoc-card: Trivial: Fix misspelling of 'exists'

2020-07-15 Thread Lee Jones
Cc: Timur Tabi 
Cc: Nicolin Chen 
Cc: Xiubo Li 
Cc: Fabio Estevam 
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Lee Jones 
---
 sound/soc/fsl/fsl-asoc-card.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c
index faac6ce9a82cb..dbacdd25dfe76 100644
--- a/sound/soc/fsl/fsl-asoc-card.c
+++ b/sound/soc/fsl/fsl-asoc-card.c
@@ -92,7 +92,7 @@ struct fsl_asoc_card_priv {
 };
 
 /*
- * This dapm route map exits for DPCM link only.
+ * This dapm route map exists for DPCM link only.
  * The other routes shall go through Device Tree.
  *
  * Note: keep all ASRC routes in the second half
-- 
2.25.1



Re: [PATCH 1/1] ASoC: fsl: fsl-asoc-card: Trivial: Fix misspelling of 'exists'

2020-07-15 Thread Lee Jones
On Wed, 15 Jul 2020, Lee Jones wrote:

> On Wed, 15 Jul 2020, Mark Brown wrote:
> 
> > On Wed, Jul 15, 2020 at 10:44:47AM +0100, Lee Jones wrote:
> > 
> > >  /*
> > > - * This dapm route map exits for DPCM link only.
> > > + * This dapm route map exists for DPCM link only.
> > >   * The other routes shall go through Device Tree.
> > 
> > This doesn't apply against current code, please check and resend.
> 
> What is 'current code'?  It's applied against today's -next.

Hmm... something went wrong.  Stand by ...

-- 
Lee Jones [李琼斯]
Senior Technical Lead - Developer Services
Linaro.org │ Open source software for Arm SoCs
Follow Linaro: Facebook | Twitter | Blog


Re: [PATCH 1/1] ASoC: fsl: fsl-asoc-card: Trivial: Fix misspelling of 'exists'

2020-07-15 Thread Lee Jones
On Wed, 15 Jul 2020, Mark Brown wrote:

> On Wed, Jul 15, 2020 at 10:44:47AM +0100, Lee Jones wrote:
> 
> >  /*
> > - * This dapm route map exits for DPCM link only.
> > + * This dapm route map exists for DPCM link only.
> >   * The other routes shall go through Device Tree.
> 
> This doesn't apply against current code, please check and resend.

What is 'current code'?  It's applied against today's -next.

-- 
Lee Jones [李琼斯]
Senior Technical Lead - Developer Services
Linaro.org │ Open source software for Arm SoCs
Follow Linaro: Facebook | Twitter | Blog


RE: [RFC PATCH 00/35] Move all PCIBIOS* definitions into arch/x86

2020-07-15 Thread David Laight
From: Oliver O'Halloran
> Sent: 15 July 2020 05:19
> 
> On Wed, Jul 15, 2020 at 8:03 AM Arnd Bergmann  wrote:
...
> > - config space accesses are very rare compared to memory
> >   space access and on the hardware side the error handling
> >   would be similar, but readl/writel don't return errors, they just
> >   access wrong registers or return 0x.
> >   arch/powerpc/kernel/eeh.c has a ton extra code written to
> >   deal with it, but no other architectures do.
> 
> TBH the EEH MMIO hooks were probably a mistake to begin with. Errors
> detected via MMIO are almost always asynchronous to the error itself
> so you usually just wind up with a misleading stack trace rather than
> any kind of useful synchronous error reporting. It seems like most
> drivers don't bother checking for 0xFFs either and rely on the
> asynchronous reporting via .error_detected() instead, so I have to
> wonder what the point is. I've been thinking of removing the MMIO
> hooks and using a background poller to check for errors on each PHB
> periodically (assuming we don't have an EEH interrupt) instead. That
> would remove the requirement for eeh_dev_check_failure() to be
> interrupt safe too, so it might even let us fix all the godawful races
> in EEH.

I've 'played' with PCIe error handling - without much success.
What might be useful is for a driver that has just read ~0u to
be able to ask 'has there been an error signalled for this device?'.

I got an error generated by doing an MMIO access that was inside
the address range forwarded to the slave, but outside any of its BARs.
(Two BARs of different sizes leaves a nice gap.)
This got reported up to the bridge nearest the slave (which supported
error handling), but not to the root bridge (which I don't think does).
ISTR a message about EEH being handled by the hardware (the machine
is up but dmesg is full of messages from a bouncing USB mouse).

With such partial error reporting useful info can still be extracted.

Of course, what actually happens on a PCIe error is that the signal
gets routed to some 'board support logic' and then passed back into
the kernel as an NMI - which then crashes the kernel!
This even happens when the PCIe link goes down after we've done a
soft-remove of the device itself!
Rather makes updating the board's FPGA without a reboot tricky.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, 
UK
Registration No: 1397386 (Wales)


Re: [PATCH 1/1] ASoC: fsl: fsl-asoc-card: Trivial: Fix misspelling of 'exists'

2020-07-15 Thread Mark Brown
On Wed, Jul 15, 2020 at 10:44:47AM +0100, Lee Jones wrote:

>  /*
> - * This dapm route map exits for DPCM link only.
> + * This dapm route map exists for DPCM link only.
>   * The other routes shall go through Device Tree.

This doesn't apply against current code, please check and resend.


signature.asc
Description: PGP signature


RE: [RFC PATCH 00/35] Move all PCIBIOS* definitions into arch/x86

2020-07-15 Thread David Laight
From: Arnd Bergmann
> Sent: 15 July 2020 07:47
> On Wed, Jul 15, 2020 at 1:46 AM Bjorn Helgaas  wrote:
> 
>  So something like:
> >
> >   void pci_read_config_word(struct pci_dev *dev, int where, u16 *val)
> >
> > and where we used to return anything non-zero, we just set *val = ~0
> > instead?  I think we do that already in most, maybe all, cases.
> 
> Right, this is what I had in mind. If we start by removing the handling
> of the return code in all files that clearly don't need it, looking at
> whatever remains will give a much better idea of what a good interface
> should be.

It would be best to get rid of that nasty 'u16 *' parameter.
Make the return int and return the read value or -1 on error.
(Or maybe 0x on error??)

For a 32bit read (there must be one for the BARs) returning
a 64bit signed integer would work even for 32bit systems.

If code cares about the error, and it can be detected then
it can check. Otherwise the it all 'just works'.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, 
UK
Registration No: 1397386 (Wales)


[PATCH v2 3/3] ASoC: fsl-asoc-card: Support Headphone and Microphone Jack detection

2020-07-15 Thread Shengjiu Wang
Use asoc_simple_init_jack function from simple card to implement
the Headphone and Microphone detection.
Register notifier to disable Speaker when Headphone is plugged in
and enable Speaker when Headphone is unplugged.
Register notifier to disable Digital Microphone when Analog Microphone
is plugged in and enable DMIC when Analog Microphone is unplugged.

Signed-off-by: Shengjiu Wang 
Acked-by: Nicolin Chen 
---
 sound/soc/fsl/Kconfig |  1 +
 sound/soc/fsl/fsl-asoc-card.c | 77 ++-
 2 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig
index ea7b4787a8af..1c4ca5ec8caf 100644
--- a/sound/soc/fsl/Kconfig
+++ b/sound/soc/fsl/Kconfig
@@ -315,6 +315,7 @@ config SND_SOC_FSL_ASOC_CARD
depends on OF && I2C
# enforce SND_SOC_FSL_ASOC_CARD=m if SND_AC97_CODEC=m:
depends on SND_AC97_CODEC || SND_AC97_CODEC=n
+   select SND_SIMPLE_CARD_UTILS
select SND_SOC_IMX_AUDMUX
select SND_SOC_IMX_PCM_DMA
select SND_SOC_FSL_ESAI
diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c
index faac6ce9a82c..f0cde3ecb5b7 100644
--- a/sound/soc/fsl/fsl-asoc-card.c
+++ b/sound/soc/fsl/fsl-asoc-card.c
@@ -15,6 +15,8 @@
 #endif
 #include 
 #include 
+#include 
+#include 
 
 #include "fsl_esai.h"
 #include "fsl_sai.h"
@@ -65,6 +67,8 @@ struct cpu_priv {
 /**
  * struct fsl_asoc_card_priv - Freescale Generic ASOC card private data
  * @dai_link: DAI link structure including normal one and DPCM link
+ * @hp_jack: Headphone Jack structure
+ * @mic_jack: Microphone Jack structure
  * @pdev: platform device pointer
  * @codec_priv: CODEC private data
  * @cpu_priv: CPU private data
@@ -79,6 +83,8 @@ struct cpu_priv {
 
 struct fsl_asoc_card_priv {
struct snd_soc_dai_link dai_link[3];
+   struct asoc_simple_jack hp_jack;
+   struct asoc_simple_jack mic_jack;
struct platform_device *pdev;
struct codec_priv codec_priv;
struct cpu_priv cpu_priv;
@@ -445,6 +451,44 @@ static int fsl_asoc_card_audmux_init(struct device_node 
*np,
return 0;
 }
 
+static int hp_jack_event(struct notifier_block *nb, unsigned long event,
+void *data)
+{
+   struct snd_soc_jack *jack = (struct snd_soc_jack *)data;
+   struct snd_soc_dapm_context *dapm = &jack->card->dapm;
+
+   if (event & SND_JACK_HEADPHONE)
+   /* Disable speaker if headphone is plugged in */
+   snd_soc_dapm_disable_pin(dapm, "Ext Spk");
+   else
+   snd_soc_dapm_enable_pin(dapm, "Ext Spk");
+
+   return 0;
+}
+
+static struct notifier_block hp_jack_nb = {
+   .notifier_call = hp_jack_event,
+};
+
+static int mic_jack_event(struct notifier_block *nb, unsigned long event,
+ void *data)
+{
+   struct snd_soc_jack *jack = (struct snd_soc_jack *)data;
+   struct snd_soc_dapm_context *dapm = &jack->card->dapm;
+
+   if (event & SND_JACK_MICROPHONE)
+   /* Disable dmic if microphone is plugged in */
+   snd_soc_dapm_disable_pin(dapm, "DMIC");
+   else
+   snd_soc_dapm_enable_pin(dapm, "DMIC");
+
+   return 0;
+}
+
+static struct notifier_block mic_jack_nb = {
+   .notifier_call = mic_jack_event,
+};
+
 static int fsl_asoc_card_late_probe(struct snd_soc_card *card)
 {
struct fsl_asoc_card_priv *priv = snd_soc_card_get_drvdata(card);
@@ -745,8 +789,37 @@ static int fsl_asoc_card_probe(struct platform_device 
*pdev)
snd_soc_card_set_drvdata(&priv->card, priv);
 
ret = devm_snd_soc_register_card(&pdev->dev, &priv->card);
-   if (ret && ret != -EPROBE_DEFER)
-   dev_err(&pdev->dev, "snd_soc_register_card failed (%d)\n", ret);
+   if (ret) {
+   if (ret != -EPROBE_DEFER)
+   dev_err(&pdev->dev, "snd_soc_register_card failed 
(%d)\n", ret);
+   goto asrc_fail;
+   }
+
+   /*
+* Properties "hp-det-gpio" and "mic-det-gpio" are optional, and
+* asoc_simple_init_jack uses these properties for creating
+* Headphone Jack and Microphone Jack.
+*
+* The notifier is initialized in snd_soc_card_jack_new(), then
+* snd_soc_jack_notifier_register can be called.
+*/
+   if (of_property_read_bool(np, "hp-det-gpio")) {
+   ret = asoc_simple_init_jack(&priv->card, &priv->hp_jack,
+   1, NULL, "Headphone Jack");
+   if (ret)
+   goto asrc_fail;
+
+   snd_soc_jack_notifier_register(&priv->hp_jack.jack, 
&hp_jack_nb);
+   }
+
+   if (of_property_read_bool(np, "mic-det-gpio")) {
+   ret = asoc_simple_init_jack(&priv->card, &priv->mic_jack,
+   0, NULL, "Mic Jack");
+   if (ret)
+   goto asrc_fail;
+
+   snd_soc_

[PATCH v2 2/3] ASoC: bindings: fsl-asoc-card: Support hp-det-gpio and mic-det-gpio

2020-07-15 Thread Shengjiu Wang
Add headphone and microphone detection GPIO support.
These properties are optional.

Signed-off-by: Shengjiu Wang 
Acked-by: Nicolin Chen 
---
 Documentation/devicetree/bindings/sound/fsl-asoc-card.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/sound/fsl-asoc-card.txt 
b/Documentation/devicetree/bindings/sound/fsl-asoc-card.txt
index 133d7e14a4d0..8a6a3d0fda5e 100644
--- a/Documentation/devicetree/bindings/sound/fsl-asoc-card.txt
+++ b/Documentation/devicetree/bindings/sound/fsl-asoc-card.txt
@@ -69,6 +69,9 @@ Optional properties:
coexisting in order to support the old bindings
of wm8962 and sgtl5000.
 
+  - hp-det-gpio: The GPIO that detect headphones are plugged in
+  - mic-det-gpio   : The GPIO that detect microphones are plugged in
+
 Optional unless SSI is selected as a CPU DAI:
 
   - mux-int-port   : The internal port of the i.MX audio muxer (AUDMUX)
-- 
2.27.0



[PATCH v2 1/3] ASoC: simple-card-utils: Support configure pin_name for asoc_simple_init_jack

2020-07-15 Thread Shengjiu Wang
Currently the pin_name is fixed in asoc_simple_init_jack, but some driver
may use a different pin_name. So add a new parameter in
asoc_simple_init_jack for configuring pin_name.

If this parameter is NULL, then the default pin_name is used.

Signed-off-by: Shengjiu Wang 
Acked-by: Nicolin Chen 
---
 include/sound/simple_card_utils.h | 6 +++---
 sound/soc/generic/simple-card-utils.c | 7 ---
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/sound/simple_card_utils.h 
b/include/sound/simple_card_utils.h
index bbdd1542d6f1..86a1e956991e 100644
--- a/include/sound/simple_card_utils.h
+++ b/include/sound/simple_card_utils.h
@@ -12,9 +12,9 @@
 #include 
 
 #define asoc_simple_init_hp(card, sjack, prefix) \
-   asoc_simple_init_jack(card, sjack, 1, prefix)
+   asoc_simple_init_jack(card, sjack, 1, prefix, NULL)
 #define asoc_simple_init_mic(card, sjack, prefix) \
-   asoc_simple_init_jack(card, sjack, 0, prefix)
+   asoc_simple_init_jack(card, sjack, 0, prefix, NULL)
 
 struct asoc_simple_dai {
const char *name;
@@ -131,7 +131,7 @@ int asoc_simple_parse_pin_switches(struct snd_soc_card 
*card,
 
 int asoc_simple_init_jack(struct snd_soc_card *card,
   struct asoc_simple_jack *sjack,
-  int is_hp, char *prefix);
+  int is_hp, char *prefix, char *pin);
 int asoc_simple_init_priv(struct asoc_simple_priv *priv,
   struct link_info *li);
 
diff --git a/sound/soc/generic/simple-card-utils.c 
b/sound/soc/generic/simple-card-utils.c
index 8c54dc6710fe..b408cb5ed644 100644
--- a/sound/soc/generic/simple-card-utils.c
+++ b/sound/soc/generic/simple-card-utils.c
@@ -540,7 +540,8 @@ EXPORT_SYMBOL_GPL(asoc_simple_parse_pin_switches);
 
 int asoc_simple_init_jack(struct snd_soc_card *card,
  struct asoc_simple_jack *sjack,
- int is_hp, char *prefix)
+ int is_hp, char *prefix,
+ char *pin)
 {
struct device *dev = card->dev;
enum of_gpio_flags flags;
@@ -557,12 +558,12 @@ int asoc_simple_init_jack(struct snd_soc_card *card,
 
if (is_hp) {
snprintf(prop, sizeof(prop), "%shp-det-gpio", prefix);
-   pin_name= "Headphones";
+   pin_name= pin ? pin : "Headphones";
gpio_name   = "Headphone detection";
mask= SND_JACK_HEADPHONE;
} else {
snprintf(prop, sizeof(prop), "%smic-det-gpio", prefix);
-   pin_name= "Mic Jack";
+   pin_name= pin ? pin : "Mic Jack";
gpio_name   = "Mic detection";
mask= SND_JACK_MICROPHONE;
}
-- 
2.27.0



[PATCH v2 0/3] ASoC: fsl-asoc-card: Support hp and mic detection

2020-07-15 Thread Shengjiu Wang
Support hp and mic detection.
Add a parameter for asoc_simple_init_jack.

Shengjiu Wang (3):
  ASoC: simple-card-utils: Support configure pin_name for
asoc_simple_init_jack
  ASoC: bindings: fsl-asoc-card: Support hp-det-gpio and mic-det-gpio
  ASoC: fsl-asoc-card: Support Headphone and Microphone Jack detection

changes in v2:
- Add more comments in third commit
- Add Acked-by Nicolin.

 .../bindings/sound/fsl-asoc-card.txt  |  3 +
 include/sound/simple_card_utils.h |  6 +-
 sound/soc/fsl/Kconfig |  1 +
 sound/soc/fsl/fsl-asoc-card.c | 77 ++-
 sound/soc/generic/simple-card-utils.c |  7 +-
 5 files changed, 86 insertions(+), 8 deletions(-)

-- 
2.27.0



Re: [PATCH V2 1/2] powerpc/vas: Report proper error code for address translation failure

2020-07-15 Thread Michael Ellerman
Haren Myneni  writes:
> P9 DD2 NX workbook (Table 4-36) says DMA controller uses CC=5
> internally for translation fault handling. NX reserves CC=250 for
> OS to notify user space when NX encounters address translation
> failure on the request buffer. Not an issue in earlier releases
> as NX does not get faults on kernel addresses.
>
> This patch defines CSB_CC_FAULT_ADDRESS(250) and updates CSB.CC with
> this proper error code for user space.

I added:

Fixes: c96c4436aba4 ("powerpc/vas: Update CSB and notify process for fault 
CRBs")

> Signed-off-by: Haren Myneni 
>
> Changes in V2:
> - Use CSB_CC_FAULT_ADDRESS instead of CSB_CC_ADDRESS_TRANSLATION
>   to distinguish from other error codes.
> - Add NX workbook reference in the comment.

The change log goes after the --- line.

> ---
>  Documentation/powerpc/vas-api.rst  | 2 +-
>  arch/powerpc/include/asm/icswx.h   | 4 
>  arch/powerpc/platforms/powernv/vas-fault.c | 2 +-
>  3 files changed, 6 insertions(+), 2 deletions(-)
>
> diff --git a/Documentation/powerpc/vas-api.rst 
> b/Documentation/powerpc/vas-api.rst
> index 1217c2f..788dc83 100644
> --- a/Documentation/powerpc/vas-api.rst
> +++ b/Documentation/powerpc/vas-api.rst
> @@ -213,7 +213,7 @@ request buffers are not in memory. The operating system 
> handles the fault by
>  updating CSB with the following data:
>  
>   csb.flags = CSB_V;
> - csb.cc = CSB_CC_TRANSLATION;
> + csb.cc = CSB_CC_FAULT_ADDRESS;
>   csb.ce = CSB_CE_TERMINATION;
>   csb.address = fault_address;
>  
> diff --git a/arch/powerpc/include/asm/icswx.h 
> b/arch/powerpc/include/asm/icswx.h
> index 965b1f3..9bc7c58 100644
> --- a/arch/powerpc/include/asm/icswx.h
> +++ b/arch/powerpc/include/asm/icswx.h
> @@ -77,6 +77,10 @@ struct coprocessor_completion_block {
>  #define CSB_CC_CHAIN (37)
>  #define CSB_CC_SEQUENCE  (38)
>  #define CSB_CC_HW(39)
> +/*
> + * P9 DD NX Workbook 3.2 (Table 4-36): Address translation fault

I changed that to "P9 DD2 NX" which I assume is what you meant, it
matches the change log.

> + */
> +#define  CSB_CC_FAULT_ADDRESS(250)
>  
>  #define CSB_SIZE (0x10)
>  #define CSB_ALIGNCSB_SIZE
> diff --git a/arch/powerpc/platforms/powernv/vas-fault.c 
> b/arch/powerpc/platforms/powernv/vas-fault.c
> index 266a6ca..3d21fce 100644
> --- a/arch/powerpc/platforms/powernv/vas-fault.c
> +++ b/arch/powerpc/platforms/powernv/vas-fault.c
> @@ -79,7 +79,7 @@ static void update_csb(struct vas_window *window,
>   csb_addr = (void __user *)be64_to_cpu(crb->csb_addr);
>  
>   memset(&csb, 0, sizeof(csb));
> - csb.cc = CSB_CC_TRANSLATION;
> + csb.cc = CSB_CC_FAULT_ADDRESS;
>   csb.ce = CSB_CE_TERMINATION;
>   csb.cs = 0;
>   csb.count = 0;
> -- 
> 1.8.3.1


cheers


Re: [PATCH] pseries: Fix 64 bit logical memory block panic

2020-07-15 Thread Aneesh Kumar K.V
Anton Blanchard  writes:

> Booting with a 4GB LMB size causes us to panic:
>
>   qemu-system-ppc64: OS terminated: OS panic:
>   Memory block size not suitable: 0x0
>
> Fix pseries_memory_block_size() to handle 64 bit LMBs.
>
> Cc: sta...@vger.kernel.org
> Signed-off-by: Anton Blanchard 
> ---
>  arch/powerpc/platforms/pseries/hotplug-memory.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
> b/arch/powerpc/platforms/pseries/hotplug-memory.c
> index 5ace2f9a277e..6574ac33e887 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> @@ -27,7 +27,7 @@ static bool rtas_hp_event;
>  unsigned long pseries_memory_block_size(void)
>  {
>   struct device_node *np;
> - unsigned int memblock_size = MIN_MEMORY_BLOCK_SIZE;
> + uint64_t memblock_size = MIN_MEMORY_BLOCK_SIZE;
>   struct resource r;
>  
>   np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");

We need similar changes at more places?

modified   arch/powerpc/include/asm/book3s/64/mmu.h
@@ -85,7 +85,7 @@ extern unsigned int mmu_base_pid;
 /*
  * memory block size used with radix translation.
  */
-extern unsigned int __ro_after_init radix_mem_block_size;
+extern unsigned long __ro_after_init radix_mem_block_size;
 
 #define PRTB_SIZE_SHIFT(mmu_pid_bits + 4)
 #define PRTB_ENTRIES   (1ul << mmu_pid_bits)
modified   arch/powerpc/include/asm/drmem.h
@@ -21,7 +21,7 @@ struct drmem_lmb {
 struct drmem_lmb_info {
struct drmem_lmb*lmbs;
int n_lmbs;
-   u32 lmb_size;
+   u64 lmb_size;
 };
 
 extern struct drmem_lmb_info *drmem_info;
modified   arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -34,7 +34,7 @@
 
 unsigned int mmu_pid_bits;
 unsigned int mmu_base_pid;
-unsigned int radix_mem_block_size __ro_after_init;
+unsigned long radix_mem_block_size __ro_after_init;
 
 static __ref void *early_alloc_pgtable(unsigned long size, int nid,
unsigned long region_start, unsigned long region_end)
modified   arch/powerpc/mm/drmem.c
@@ -268,14 +268,15 @@ static void __init __walk_drmem_v2_lmbs(const __be32 
*prop, const __be32 *usm,
 void __init walk_drmem_lmbs_early(unsigned long node,
void (*func)(struct drmem_lmb *, const __be32 **))
 {
+   const __be64 *lmb_prop;
const __be32 *prop, *usm;
int len;
 
-   prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
-   if (!prop || len < dt_root_size_cells * sizeof(__be32))
+   lmb_prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
+   if (!lmb_prop || len < sizeof(__be64))
return;
 
-   drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop);
+   drmem_info->lmb_size = be64_to_cpup(lmb_prop);
 
usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory", &len);
 
@@ -296,19 +297,19 @@ void __init walk_drmem_lmbs_early(unsigned long node,
 
 static int __init init_drmem_lmb_size(struct device_node *dn)
 {
-   const __be32 *prop;
+   const __be64 *prop;
int len;
 
if (drmem_info->lmb_size)
return 0;
 
prop = of_get_property(dn, "ibm,lmb-size", &len);
-   if (!prop || len < dt_root_size_cells * sizeof(__be32)) {
+   if (!prop || len < sizeof(__be64)) {
pr_info("Could not determine LMB size\n");
return -1;
}
 
-   drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop);
+   drmem_info->lmb_size = be64_to_cpup(prop);
return 0;
 }
 
modified   arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -27,7 +27,7 @@ static bool rtas_hp_event;
 unsigned long pseries_memory_block_size(void)
 {
struct device_node *np;
-   unsigned int memblock_size = MIN_MEMORY_BLOCK_SIZE;
+   unsigned long memblock_size = MIN_MEMORY_BLOCK_SIZE;
struct resource r;
 
np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");


Re: [PATCH v2] powerpc: select ARCH_HAS_MEMBARRIER_SYNC_CORE

2020-07-15 Thread Mathieu Desnoyers
- On Jul 15, 2020, at 5:48 AM, Nicholas Piggin npig...@gmail.com wrote:
[...]
> index 47bd4ea0837d..a4704f405e8d 100644
> --- a/arch/powerpc/include/asm/exception-64s.h
> +++ b/arch/powerpc/include/asm/exception-64s.h
> @@ -68,6 +68,13 @@
>  *
>  * The nop instructions allow us to insert one or more instructions to flush 
> the
>  * L1-D cache when returning to userspace or a guest.
> + *
> + * powerpc relies on return from interrupt/syscall being context 
> synchronising
> + * (which hrfid, rfid, and rfscv are) to support 
> ARCH_HAS_MEMBARRIER_SYNC_CORE
> + * without additional additional synchronisation instructions. soft-masked
> + * interrupt replay does not include a context-synchronising rfid, but those
> + * always return to kernel, the context sync is only required for IPIs which
> + * return to user.
>  */
> #define RFI_FLUSH_SLOT
> \
>   RFI_FLUSH_FIXUP_SECTION;\

I suspect the statement "the context sync is only required for IPIs which 
return to
user." is misleading.

As I recall that we need more than just context sync after IPI. We need context 
sync
in return path of any trap/interrupt/system call which returns to user-space, 
else
we'd need to add the proper core serializing barriers in the scheduler, as we 
had
to do for lazy tlb on x86.

Or am I missing something ?

Thanks,

Mathieu

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com


RE: [PATCH v2 1/3] module: Rename module_alloc() to text_alloc() and move to kernel proper

2020-07-15 Thread David Laight
From: Jarkko Sakkinen
> Sent: 14 July 2020 17:31
..
> There is one arch (nios2), which uses a regular kzalloc(). This means
> that both text_alloc() and text_memfree() need to be weaks symbols and
> nios2 needs to have overriding text.c to do its thing.

That could be handled by an arch-specific inline wrapper in a .h file.

Although I actually suspect the nios2 code is just being lazy.
The processor is a very simple in-order 32bit soft cpu for Intel (Altera)
FPGA.

We have four nios2 cpu on one of our fpga images - all running small
(almost single function) code loops.
I can't imagine actually trying to run Linux on one to do anything
useful - clock speed is unlikely to be much above 100MHz.
We can't meet timing at 125MHZ (I've tried quite hard) so have
to run at 62.5MHz because of the PCIe clock.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, 
UK
Registration No: 1397386 (Wales)



Re: [PATCH] pseries: Fix 64 bit logical memory block panic

2020-07-15 Thread Michael Ellerman
Anton Blanchard  writes:
> Booting with a 4GB LMB size causes us to panic:
>
>   qemu-system-ppc64: OS terminated: OS panic:
>   Memory block size not suitable: 0x0
>
> Fix pseries_memory_block_size() to handle 64 bit LMBs.
>
> Cc: sta...@vger.kernel.org
> Signed-off-by: Anton Blanchard 
> ---
>  arch/powerpc/platforms/pseries/hotplug-memory.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
> b/arch/powerpc/platforms/pseries/hotplug-memory.c
> index 5ace2f9a277e..6574ac33e887 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> @@ -27,7 +27,7 @@ static bool rtas_hp_event;
>  unsigned long pseries_memory_block_size(void)
>  {
>   struct device_node *np;
> - unsigned int memblock_size = MIN_MEMORY_BLOCK_SIZE;
> + uint64_t memblock_size = MIN_MEMORY_BLOCK_SIZE;

I changed it to u64.

cheers


[PATCH 2/2] powerpc/numa: Remove a redundant variable

2020-07-15 Thread Srikar Dronamraju
In of_drconf_to_nid_single() default_nid always refers to NUMA_NO_NODE.
Hence replace it with NUMA_NO_NODE.

No functional changes.

Cc: linuxppc-dev 
Cc: Michael Ellerman 
Cc: Nathan Lynch 
Cc: Tyrel Datwyler 
Signed-off-by: Srikar Dronamraju 
---
 arch/powerpc/mm/numa.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index a2c5fe0d0cad..b066d89c2975 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -432,16 +432,15 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa)
 static int of_drconf_to_nid_single(struct drmem_lmb *lmb)
 {
struct assoc_arrays aa = { .arrays = NULL };
-   int default_nid = NUMA_NO_NODE;
-   int nid = default_nid;
+   int nid = NUMA_NO_NODE;
int rc, index;
 
if ((min_common_depth < 0) || !numa_enabled)
-   return default_nid;
+   return NUMA_NO_NODE;
 
rc = of_get_assoc_arrays(&aa);
if (rc)
-   return default_nid;
+   return NUMA_NO_NODE;
 
if (min_common_depth <= aa.array_sz &&
!(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < 
aa.n_arrays) {
@@ -449,7 +448,7 @@ static int of_drconf_to_nid_single(struct drmem_lmb *lmb)
nid = of_read_number(&aa.arrays[index], 1);
 
if (nid == 0x || nid >= num_possible_nodes())
-   nid = default_nid;
+   nid = NUMA_NO_NODE;
 
if (nid > 0) {
index = lmb->aa_index * aa.array_sz;
-- 
2.18.2



[PATCH 1/2] powerpc/numa: Limit possible nodes to within num_possible_nodes

2020-07-15 Thread Srikar Dronamraju
MAX_NUMNODES is a theoretical maximum number of nodes thats is supported
by the kernel. Device tree properties exposes the number of possible
nodes on the current platform. The kernel would detected this and would
use it for most of its resource allocations.  If the platform now
increases the nodes to over what was already exposed, then it may lead
to inconsistencies. Hence limit it to the already exposed nodes.

Suggested-by: Nathan Lynch 
Cc: linuxppc-dev 
Cc: Michael Ellerman 
Cc: Nathan Lynch 
Cc: Tyrel Datwyler 
Signed-off-by: Srikar Dronamraju 
---
 arch/powerpc/mm/numa.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 8ec7ff05ae47..a2c5fe0d0cad 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -221,7 +221,7 @@ static void initialize_distance_lookup_table(int nid,
}
 }
 
-/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
+/* Returns nid in the range [0..num_possible_nodes()-1], or -1 if no useful 
numa
  * info is found.
  */
 static int associativity_to_nid(const __be32 *associativity)
@@ -235,7 +235,7 @@ static int associativity_to_nid(const __be32 *associativity)
nid = of_read_number(&associativity[min_common_depth], 1);
 
/* POWER4 LPAR uses 0x as invalid node */
-   if (nid == 0x || nid >= MAX_NUMNODES)
+   if (nid == 0x || nid >= num_possible_nodes())
nid = NUMA_NO_NODE;
 
if (nid > 0 &&
@@ -448,7 +448,7 @@ static int of_drconf_to_nid_single(struct drmem_lmb *lmb)
index = lmb->aa_index * aa.array_sz + min_common_depth - 1;
nid = of_read_number(&aa.arrays[index], 1);
 
-   if (nid == 0x || nid >= MAX_NUMNODES)
+   if (nid == 0x || nid >= num_possible_nodes())
nid = default_nid;
 
if (nid > 0) {
-- 
2.18.2



[PATCH v3] powerpc/pseries: detect secure and trusted boot state of the system.

2020-07-15 Thread Nayna Jain
The device-tree property to check secure and trusted boot state is
different for guests(pseries) compared to baremetal(powernv).

This patch updates the existing is_ppc_secureboot_enabled() and
is_ppc_trustedboot_enabled() functions to add support for pseries.

The secureboot and trustedboot state are exposed via device-tree property:
/proc/device-tree/ibm,secure-boot and /proc/device-tree/ibm,trusted-boot

The values of ibm,secure-boot under pseries are interpreted as:

0 - Disabled
1 - Enabled in Log-only mode. This patch interprets this value as
disabled, since audit mode is currently not supported for Linux.
2 - Enabled and enforced.
3-9 - Enabled and enforcing; requirements are at the discretion of the
operating system.

The values of ibm,trusted-boot under pseries are interpreted as:
0 - Disabled
1 - Enabled

Signed-off-by: Nayna Jain 
Reviewed-by: Daniel Axtens 
---
v3:
* fixed double check. Thanks Daniel for noticing it.
* updated patch description.

v2:
* included Michael Ellerman's feedback.
* added Daniel Axtens's Reviewed-by.

 arch/powerpc/kernel/secure_boot.c | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/secure_boot.c 
b/arch/powerpc/kernel/secure_boot.c
index 4b982324d368..118bcb5f79c4 100644
--- a/arch/powerpc/kernel/secure_boot.c
+++ b/arch/powerpc/kernel/secure_boot.c
@@ -6,6 +6,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static struct device_node *get_ppc_fw_sb_node(void)
 {
@@ -23,12 +24,19 @@ bool is_ppc_secureboot_enabled(void)
 {
struct device_node *node;
bool enabled = false;
+   u32 secureboot;
 
node = get_ppc_fw_sb_node();
enabled = of_property_read_bool(node, "os-secureboot-enforcing");
-
of_node_put(node);
 
+   if (enabled)
+   goto out;
+
+   if (!of_property_read_u32(of_root, "ibm,secure-boot", &secureboot))
+   enabled = (secureboot > 1);
+
+out:
pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled");
 
return enabled;
@@ -38,12 +46,19 @@ bool is_ppc_trustedboot_enabled(void)
 {
struct device_node *node;
bool enabled = false;
+   u32 trustedboot;
 
node = get_ppc_fw_sb_node();
enabled = of_property_read_bool(node, "trusted-enabled");
-
of_node_put(node);
 
+   if (enabled)
+   goto out;
+
+   if (!of_property_read_u32(of_root, "ibm,trusted-boot", &trustedboot))
+   enabled = (trustedboot > 0);
+
+out:
pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled");
 
return enabled;
-- 
2.26.2



Re: [PATCH v2] powerpc: select ARCH_HAS_MEMBARRIER_SYNC_CORE

2020-07-15 Thread Andreas Schwab
On Jul 15 2020, Nicholas Piggin wrote:

> diff --git a/arch/powerpc/include/asm/exception-64e.h 
> b/arch/powerpc/include/asm/exception-64e.h
> index 54a98ef7d7fe..071d7ccb830f 100644
> --- a/arch/powerpc/include/asm/exception-64e.h
> +++ b/arch/powerpc/include/asm/exception-64e.h
> @@ -204,7 +204,11 @@ exc_##label##_book3e:
>   LOAD_REG_ADDR(r3,interrupt_base_book3e);\
>   ori r3,r3,vector_offset@l;  \
>   mtspr   SPRN_IVOR##vector_number,r3;
> -
> +/*
> + * powerpc relies on return from interrupt/syscall being context 
> synchronising
> + * (which rfi is) to support ARCH_HAS_MEMBARRIER_SYNC_CORE without additional
> + * additional synchronisation instructions.

s/additonal//

Andreas.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."


Re: [PATCH v2 06/13] cpufreq: powernv-cpufreq: Functions only used in call-backs should be static

2020-07-15 Thread Lee Jones
On Wed, 15 Jul 2020, Viresh Kumar wrote:

> On 15-07-20, 09:26, Lee Jones wrote:
> > Fixes the following W=1 kernel build warning(s):
> > 
> >  drivers/cpufreq/powernv-cpufreq.c:669:6: warning: no previous prototype 
> > for ‘gpstate_timer_handler’ [-Wmissing-prototypes]
> >  drivers/cpufreq/powernv-cpufreq.c:902:6: warning: no previous prototype 
> > for ‘powernv_cpufreq_work_fn’ [-Wmissing-prototypes]
> > 
> > Cc: Michael Ellerman 
> > Cc: Benjamin Herrenschmidt 
> > Cc: Paul Mackerras 
> > Cc: linuxppc-dev@lists.ozlabs.org
> > Signed-off-by: Lee Jones 
> > Acked-by: Viresh Kumar 
> > ---
> >  drivers/cpufreq/powernv-cpufreq.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/cpufreq/powernv-cpufreq.c 
> > b/drivers/cpufreq/powernv-cpufreq.c
> > index 8646eb197cd96..068cc53abe320 100644
> > --- a/drivers/cpufreq/powernv-cpufreq.c
> > +++ b/drivers/cpufreq/powernv-cpufreq.c
> > @@ -666,7 +666,7 @@ static inline void  queue_gpstate_timer(struct 
> > global_pstate_info *gpstates)
> >   * according quadratic equation. Queues a new timer if it is still not 
> > equal
> >   * to local pstate
> >   */
> > -void gpstate_timer_handler(struct timer_list *t)
> > +static void gpstate_timer_handler(struct timer_list *t)
> >  {
> > struct global_pstate_info *gpstates = from_timer(gpstates, t, timer);
> > struct cpufreq_policy *policy = gpstates->policy;
> > @@ -899,7 +899,7 @@ static struct notifier_block powernv_cpufreq_reboot_nb 
> > = {
> > .notifier_call = powernv_cpufreq_reboot_notifier,
> >  };
> >  
> > -void powernv_cpufreq_work_fn(struct work_struct *work)
> > +static void powernv_cpufreq_work_fn(struct work_struct *work)
> >  {
> > struct chip *chip = container_of(work, struct chip, throttle);
> > struct cpufreq_policy *policy;
> 
> Don't you want to drop this patch now ? As you already reviewed the
> other one on the list ?

Yes, please drop/ignore.

-- 
Lee Jones [李琼斯]
Senior Technical Lead - Developer Services
Linaro.org │ Open source software for Arm SoCs
Follow Linaro: Facebook | Twitter | Blog


[PATCH v2] powerpc: select ARCH_HAS_MEMBARRIER_SYNC_CORE

2020-07-15 Thread Nicholas Piggin
powerpc return from interrupt and return from system call sequences are
context synchronising.

Signed-off-by: Nicholas Piggin 
---
v2: add more comments

 .../features/sched/membarrier-sync-core/arch-support.txt   | 4 ++--
 arch/powerpc/Kconfig   | 1 +
 arch/powerpc/include/asm/exception-64e.h   | 6 +-
 arch/powerpc/include/asm/exception-64s.h   | 7 +++
 arch/powerpc/kernel/entry_32.S | 6 ++
 5 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/Documentation/features/sched/membarrier-sync-core/arch-support.txt 
b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
index 8a521a622966..52ad74a25f54 100644
--- a/Documentation/features/sched/membarrier-sync-core/arch-support.txt
+++ b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
@@ -5,7 +5,7 @@
 #
 # Architecture requirements
 #
-# * arm/arm64
+# * arm/arm64/powerpc
 #
 # Rely on implicit context synchronization as a result of exception return
 # when returning from IPI handler, and when returning to user-space.
@@ -45,7 +45,7 @@
 |   nios2: | TODO |
 |openrisc: | TODO |
 |  parisc: | TODO |
-| powerpc: | TODO |
+| powerpc: |  ok  |
 |   riscv: | TODO |
 |s390: | TODO |
 |  sh: | TODO |
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9fa23eb320ff..920c4e3ca4ef 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -131,6 +131,7 @@ config PPC
select ARCH_HAS_PTE_DEVMAP  if PPC_BOOK3S_64
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_MEMBARRIER_CALLBACKS
+   select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_SCALED_CPUTIME  if VIRT_CPU_ACCOUNTING_NATIVE 
&& PPC_BOOK3S_64
select ARCH_HAS_STRICT_KERNEL_RWX   if (PPC32 && !HIBERNATION)
select ARCH_HAS_TICK_BROADCAST  if GENERIC_CLOCKEVENTS_BROADCAST
diff --git a/arch/powerpc/include/asm/exception-64e.h 
b/arch/powerpc/include/asm/exception-64e.h
index 54a98ef7d7fe..071d7ccb830f 100644
--- a/arch/powerpc/include/asm/exception-64e.h
+++ b/arch/powerpc/include/asm/exception-64e.h
@@ -204,7 +204,11 @@ exc_##label##_book3e:
LOAD_REG_ADDR(r3,interrupt_base_book3e);\
ori r3,r3,vector_offset@l;  \
mtspr   SPRN_IVOR##vector_number,r3;
-
+/*
+ * powerpc relies on return from interrupt/syscall being context synchronising
+ * (which rfi is) to support ARCH_HAS_MEMBARRIER_SYNC_CORE without additional
+ * additional synchronisation instructions.
+ */
 #define RFI_TO_KERNEL  \
rfi
 
diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 47bd4ea0837d..a4704f405e8d 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -68,6 +68,13 @@
  *
  * The nop instructions allow us to insert one or more instructions to flush 
the
  * L1-D cache when returning to userspace or a guest.
+ *
+ * powerpc relies on return from interrupt/syscall being context synchronising
+ * (which hrfid, rfid, and rfscv are) to support ARCH_HAS_MEMBARRIER_SYNC_CORE
+ * without additional additional synchronisation instructions. soft-masked
+ * interrupt replay does not include a context-synchronising rfid, but those
+ * always return to kernel, the context sync is only required for IPIs which
+ * return to user.
  */
 #define RFI_FLUSH_SLOT \
RFI_FLUSH_FIXUP_SECTION;\
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 217ebdf5b00b..23bb7352e7c3 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -35,6 +35,12 @@
 
 #include "head_32.h"
 
+/*
+ * powerpc relies on return from interrupt/syscall being context synchronising
+ * (which rfi is) to support ARCH_HAS_MEMBARRIER_SYNC_CORE without additional
+ * additional synchronisation instructions.
+ */
+
 /*
  * Align to 4k in order to ensure that all functions modyfing srr0/srr1
  * fit into one page in order to not encounter a TLB miss between the
-- 
2.23.0



Re: [PATCH v2 06/13] cpufreq: powernv-cpufreq: Functions only used in call-backs should be static

2020-07-15 Thread Viresh Kumar
On 15-07-20, 09:26, Lee Jones wrote:
> Fixes the following W=1 kernel build warning(s):
> 
>  drivers/cpufreq/powernv-cpufreq.c:669:6: warning: no previous prototype for 
> ‘gpstate_timer_handler’ [-Wmissing-prototypes]
>  drivers/cpufreq/powernv-cpufreq.c:902:6: warning: no previous prototype for 
> ‘powernv_cpufreq_work_fn’ [-Wmissing-prototypes]
> 
> Cc: Michael Ellerman 
> Cc: Benjamin Herrenschmidt 
> Cc: Paul Mackerras 
> Cc: linuxppc-dev@lists.ozlabs.org
> Signed-off-by: Lee Jones 
> Acked-by: Viresh Kumar 
> ---
>  drivers/cpufreq/powernv-cpufreq.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/cpufreq/powernv-cpufreq.c 
> b/drivers/cpufreq/powernv-cpufreq.c
> index 8646eb197cd96..068cc53abe320 100644
> --- a/drivers/cpufreq/powernv-cpufreq.c
> +++ b/drivers/cpufreq/powernv-cpufreq.c
> @@ -666,7 +666,7 @@ static inline void  queue_gpstate_timer(struct 
> global_pstate_info *gpstates)
>   * according quadratic equation. Queues a new timer if it is still not equal
>   * to local pstate
>   */
> -void gpstate_timer_handler(struct timer_list *t)
> +static void gpstate_timer_handler(struct timer_list *t)
>  {
>   struct global_pstate_info *gpstates = from_timer(gpstates, t, timer);
>   struct cpufreq_policy *policy = gpstates->policy;
> @@ -899,7 +899,7 @@ static struct notifier_block powernv_cpufreq_reboot_nb = {
>   .notifier_call = powernv_cpufreq_reboot_notifier,
>  };
>  
> -void powernv_cpufreq_work_fn(struct work_struct *work)
> +static void powernv_cpufreq_work_fn(struct work_struct *work)
>  {
>   struct chip *chip = container_of(work, struct chip, throttle);
>   struct cpufreq_policy *policy;

Don't you want to drop this patch now ? As you already reviewed the
other one on the list ?

-- 
viresh


[PATCH 1/1] ASoC: fsl: fsl-asoc-card: Trivial: Fix misspelling of 'exists'

2020-07-15 Thread Lee Jones
Cc: Timur Tabi 
Cc: Nicolin Chen 
Cc: Xiubo Li 
Cc: Fabio Estevam 
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Lee Jones 
---
 sound/soc/fsl/fsl-asoc-card.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c
index faac6ce9a82cb..399c9aad48b1d 100644
--- a/sound/soc/fsl/fsl-asoc-card.c
+++ b/sound/soc/fsl/fsl-asoc-card.c
@@ -92,8 +92,7 @@ struct fsl_asoc_card_priv {
 };
 
 /*
- * This dapm route map exits for DPCM link only.
+ * This dapm route map exists for DPCM link only.
  * The other routes shall go through Device Tree.
  *
  * Note: keep all ASRC routes in the second half
-- 
2.25.1



Re: [PATCH 03/15] powerpc/powernv/pci: Add explicit tracking of the DMA setup state

2020-07-15 Thread Oliver O'Halloran
On Wed, Jul 15, 2020 at 5:05 PM Cédric Le Goater  wrote:
>
> I could but can we fix the issue below before I reboot ?  I don't have a
> console anymore on these boxes.
>
> Firmware is :
> *snip*

Do you know when that started happening? I don't think anything
console related has changed in a very long time, but we probably
haven't tested it on p7 in even longer.


[PATCH v2 07/13] cpufreq: powernv-cpufreq: Fix a bunch of kerneldoc related issues

2020-07-15 Thread Lee Jones
Repair problems with formatting and missing attributes/parameters, and
demote header comments which do not meet the required standards
applicable to kerneldoc.

Fixes the following W=1 kernel build warning(s):

 drivers/cpufreq/powernv-cpufreq.c:84: warning: Function parameter or member 
'last_lpstate_idx' not described in 'global_pstate_info'
 drivers/cpufreq/powernv-cpufreq.c:84: warning: Function parameter or member 
'last_gpstate_idx' not described in 'global_pstate_info'
 drivers/cpufreq/powernv-cpufreq.c:84: warning: Function parameter or member 
'policy' not described in 'global_pstate_info'
 drivers/cpufreq/powernv-cpufreq.c:182: warning: Function parameter or member 
'i' not described in 'idx_to_pstate'
 drivers/cpufreq/powernv-cpufreq.c:201: warning: Function parameter or member 
'pstate' not described in 'pstate_to_idx'
 drivers/cpufreq/powernv-cpufreq.c:670: warning: Function parameter or member 
't' not described in 'gpstate_timer_handler'
 drivers/cpufreq/powernv-cpufreq.c:670: warning: Excess function parameter 
'data' description in 'gpstate_timer_handler'

Cc: Michael Ellerman 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Lee Jones 
Acked-by: Viresh Kumar 
---
 drivers/cpufreq/powernv-cpufreq.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/cpufreq/powernv-cpufreq.c 
b/drivers/cpufreq/powernv-cpufreq.c
index 068cc53abe320..2e5a8b8a4abaa 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -64,13 +64,14 @@
  * highest_lpstate_idx
  * @last_sampled_time: Time from boot in ms when global pstates were
  * last set
- * @last_lpstate_idx,  Last set value of local pstate and global
- * last_gpstate_idxpstate in terms of cpufreq table index
+ * @last_lpstate_idx:  Last set value of local pstate and global
+ * @last_gpstate_idx:  pstate in terms of cpufreq table index
  * @timer: Is used for ramping down if cpu goes idle for
  * a long time with global pstate held high
  * @gpstate_lock:  A spinlock to maintain synchronization between
  * routines called by the timer handler and
  * governer's target_index calls
+ * @policy:Associated CPUFreq policy
  */
 struct global_pstate_info {
int highest_lpstate_idx;
@@ -170,7 +171,7 @@ static inline u8 extract_pstate(u64 pmsr_val, unsigned int 
shift)
 
 /* Use following functions for conversions between pstate_id and index */
 
-/**
+/*
  * idx_to_pstate : Returns the pstate id corresponding to the
  *frequency in the cpufreq frequency table
  *powernv_freqs indexed by @i.
@@ -188,7 +189,7 @@ static inline u8 idx_to_pstate(unsigned int i)
return powernv_freqs[i].driver_data;
 }
 
-/**
+/*
  * pstate_to_idx : Returns the index in the cpufreq frequencytable
  *powernv_freqs for the frequency whose corresponding
  *pstate id is @pstate.
@@ -660,7 +661,7 @@ static inline void  queue_gpstate_timer(struct 
global_pstate_info *gpstates)
 /**
  * gpstate_timer_handler
  *
- * @data: pointer to cpufreq_policy on which timer was queued
+ * @t: Timer context used to fetch global pstate info struct
  *
  * This handler brings down the global pstate closer to the local pstate
  * according quadratic equation. Queues a new timer if it is still not equal
-- 
2.25.1



[PATCH v2 05/13] cpufreq: pasemi: Include header file for {check, restore}_astate prototypes

2020-07-15 Thread Lee Jones
If function callers and providers do not share the same prototypes the
compiler complains of missing prototypes.  Fix this by including the
correct platforms header file.

Fixes the following W=1 kernel build warning(s):

 drivers/cpufreq/pasemi-cpufreq.c:109:5: warning: no previous prototype for 
‘check_astate’ [-Wmissing-prototypes]
 109 | int check_astate(void)
 | ^~~~
 drivers/cpufreq/pasemi-cpufreq.c:114:6: warning: no previous prototype for 
‘restore_astate’ [-Wmissing-prototypes]
 114 | void restore_astate(int cpu)
 | ^~

Cc: Michael Ellerman 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: linuxppc-dev@lists.ozlabs.org
Suggested-by: Olof Johansson 
Signed-off-by: Lee Jones 
---
 drivers/cpufreq/pasemi-cpufreq.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c
index c66f566a854cb..815645170c4de 100644
--- a/drivers/cpufreq/pasemi-cpufreq.c
+++ b/drivers/cpufreq/pasemi-cpufreq.c
@@ -22,6 +22,8 @@
 #include 
 #include 
 
+#include 
+
 #define SDCASR_REG 0x0100
 #define SDCASR_REG_STRIDE  0x1000
 #define SDCPWR_CFGA0_REG   0x0100
-- 
2.25.1



[PATCH v2 06/13] cpufreq: powernv-cpufreq: Functions only used in call-backs should be static

2020-07-15 Thread Lee Jones
Fixes the following W=1 kernel build warning(s):

 drivers/cpufreq/powernv-cpufreq.c:669:6: warning: no previous prototype for 
‘gpstate_timer_handler’ [-Wmissing-prototypes]
 drivers/cpufreq/powernv-cpufreq.c:902:6: warning: no previous prototype for 
‘powernv_cpufreq_work_fn’ [-Wmissing-prototypes]

Cc: Michael Ellerman 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Lee Jones 
Acked-by: Viresh Kumar 
---
 drivers/cpufreq/powernv-cpufreq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/powernv-cpufreq.c 
b/drivers/cpufreq/powernv-cpufreq.c
index 8646eb197cd96..068cc53abe320 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -666,7 +666,7 @@ static inline void  queue_gpstate_timer(struct 
global_pstate_info *gpstates)
  * according quadratic equation. Queues a new timer if it is still not equal
  * to local pstate
  */
-void gpstate_timer_handler(struct timer_list *t)
+static void gpstate_timer_handler(struct timer_list *t)
 {
struct global_pstate_info *gpstates = from_timer(gpstates, t, timer);
struct cpufreq_policy *policy = gpstates->policy;
@@ -899,7 +899,7 @@ static struct notifier_block powernv_cpufreq_reboot_nb = {
.notifier_call = powernv_cpufreq_reboot_notifier,
 };
 
-void powernv_cpufreq_work_fn(struct work_struct *work)
+static void powernv_cpufreq_work_fn(struct work_struct *work)
 {
struct chip *chip = container_of(work, struct chip, throttle);
struct cpufreq_policy *policy;
-- 
2.25.1



Re: [PATCH 03/15] powerpc/powernv/pci: Add explicit tracking of the DMA setup state

2020-07-15 Thread Cédric Le Goater
On 7/15/20 5:33 AM, Alexey Kardashevskiy wrote:
> 
> 
> On 15/07/2020 11:38, Oliver O'Halloran wrote:
>> On Tue, Jul 14, 2020 at 5:21 PM Alexey Kardashevskiy  wrote:
>>>
>>> On 14/07/2020 15:58, Oliver O'Halloran wrote:
 On Tue, Jul 14, 2020 at 3:37 PM Alexey Kardashevskiy  
 wrote:
>
> On 10/07/2020 15:23, Oliver O'Halloran wrote:
>> This also means the only remaining user of the old "DMA Weight" code is
>> the IODA1 DMA setup code that it was originally added for, which is good.
>
>
> Is ditching IODA1 in the plan? :)

 That or separating out the pci_controller_ops for IODA1 and IODA2 so
 we can stop any IODA2 specific changes from breaking it.
>>>
>>> Is IODA1 tested at all these days? Or, is anyone running upstream
>>> kernels anywhere and keeps shouting when it does not work on IODA1? Thanks,
>>
>> Cedric has a P7 with OPAL. That's probably the one left though.
> 
> Has he tried these patches on that box? Or we hope for the best here? :)

I could but can we fix the issue below before I reboot ?  I don't have a 
console anymore on these boxes. 

Firmware is : 

root@amure:~# dtc -I fs /proc/device-tree/ibm,opal/firmware/ -f
: ERROR (name_properties): /: "name" property is incorrect ("firmware" 
instead of base node name)
Warning: Input tree has errors, output forced
/dts-v1/;

/ {
git-id = "34b3400";
ml-version = [4d 4c 20 46 57 37 37 30 2e 32 30 20 46 57 37 37 30 2e 32 
30 20 46 57 37 37 30 2e 32 30];
compatible = "ibm,opal-firmware";
phandle = <0x4d>;
mi-version = <0x4d49205a 0x4c373730 0x5f303735 0x205a4c37 0x37305f30 
0x3735205a 0x4c373730 0x5f303735>;
linux,phandle = <0x4d>;
name = "firmware";
};

I rather not change it if possible. 


C.

[1.979581] [ cut here ]
[1.979582] opal: OPAL_CONSOLE_FLUSH missing.
[1.979583] WARNING: CPU: 0 PID: 253 at 
arch/powerpc/platforms/powernv/opal.c:446 .__opal_flush_console+0xfc/0x110
[1.979584] Modules linked in: ipr(E+) ptp(E) usb_common(E) pps_core(E)
[1.979587] CPU: 0 PID: 253 Comm: udevadm Tainted: GE 
5.4.0-4-powerpc64 #1 Debian 5.4.19-1
[1.979588] NIP:  c00d10ec LR: c00d10e8 CTR: c0b13510
[1.979589] REGS: c381f130 TRAP: 0700   Tainted: GE  
(5.4.0-4-powerpc64 Debian 5.4.19-1)
[1.979590] MSR:  90021032   CR: 28002282  XER: 
2000
[1.979594] CFAR: c0157d2c IRQMASK: 3 
[1.979595] GPR00: c00d10e8 c381f3c0 c1618700 
0022 
[1.979598] GPR04: c0c95df2 0002 414c5f434f4e534f 
4c455f464c555348 
[1.979601] GPR08: 0003 0003 0001 
90001032 
[1.979604] GPR12: c00d0818 c182  
c14342a8 
[1.979607] GPR16: c173b850 c148b218 00011a2d5db8 
 
[1.979609] GPR20:  c4b50e00  
c173e208 
[1.979612] GPR24: c173bde8  c148b1d8 
c16620e0 
[1.979615] GPR28: c17f7c40   
 
[1.979618] NIP [c00d10ec] .__opal_flush_console+0xfc/0x110
[1.979618] LR [c00d10e8] .__opal_flush_console+0xf8/0x110
[1.979619] Call Trace:
[1.979620] [c381f3c0] [c00d10e8] 
.__opal_flush_console+0xf8/0x110 (unreliable)
[1.979621] [c381f450] [c00d1428] .opal_flush_chars+0x38/0xc0
[1.979623] [c381f4d0] [c07680a8] 
.hvc_console_print+0x188/0x2d0
[1.979624] [c381f5b0] [c01eff08] .console_unlock+0x348/0x720
[1.979625] [c381f6c0] [c01f268c] .vprintk_emit+0x27c/0x3a0
[1.979626] [c381f780] [c07af2f4] 
.dev_vprintk_emit+0x208/0x258
[1.979628] [c381f8e0] [c07af38c] .dev_printk_emit+0x48/0x58
[1.979629] [c381f950] [c07af748] ._dev_err+0x6c/0x9c
[1.979630] [c381fa00] [c07aaff8] .uevent_store+0x78/0x80
[1.979631] [c381fa90] [c07a8ce4] .dev_attr_store+0x64/0x90
[1.979633] [c381fb20] [c054becc] .sysfs_kf_write+0x7c/0xa0
[1.979634] [c381fbb0] [c054b294] 
.kernfs_fop_write+0x114/0x270
[1.979635] [c381fc50] [c0456b58] .__vfs_write+0x68/0xe0
[1.979636] [c381fce0] [c0457e44] .vfs_write+0xc4/0x270
[1.979638] [c381fd80] [c045adc4] .ksys_write+0x84/0x140
[1.979639] [c381fe20] [c000c050] system_call+0x5c/0x68
[1.979640] Instruction dump:
[1.979641] 3be0fffe 4bffb581 6000 4b90 6000 3c62ff68 3921 
3d42ffea 
[1.979644] 3863d6d0 992a9d98 48086be1 6000 <0fe0> 4b50 480867ad 
6000 
[1.979648] ---[ end trace 34198c4c2c15e0e2 ]---


Re: [v3 3/5] KVM: PPC: Book3S HV: migrate remaining normal-GFNs to secure-GFNs in H_SVM_INIT_DONE

2020-07-15 Thread Bharata B Rao
On Tue, Jul 14, 2020 at 10:05:41PM -0700, Ram Pai wrote:
> On Mon, Jul 13, 2020 at 03:15:06PM +0530, Bharata B Rao wrote:
> > On Sat, Jul 11, 2020 at 02:13:45AM -0700, Ram Pai wrote:
> > > The Ultravisor is expected to explicitly call H_SVM_PAGE_IN for all the 
> > > pages
> > >  
> > >   if (!(*mig.src & MIGRATE_PFN_MIGRATE)) {
> > > - ret = -1;
> > > + ret = -2;
> > 
> > migrate_vma_setup() has marked that this pfn can't be migrated. What
> > transient errors are you observing which will disappear within 10
> > retries?
> > 
> > Also till now when UV used to pull in all the pages, we never seemed to
> > have hit these transient errors. But now when HV is pushing the same
> > pages, we see these errors which are disappearing after 10 retries.
> > Can you explain this more please? What sort of pages are these?
> 
> We did see them even before this patch. The retry alleviates the
> problem, but does not entirely eliminate it. If the chance of seeing
> the issue without the patch is 1%,  the chance of seeing this issue
> with this patch becomes 0.25%.

Okay, but may be we should investigate the problem a bit more to
understand why the page migrations are failing before taking this
route?

> 
> > 
> > >   goto out_finalize;
> > >   }
> > > + bool retry = 0;
> ...snip...
> > > +
> > > + *ret = 0;
> > > + while (kvmppc_next_nontransitioned_gfn(memslot, kvm, &gfn)) {
> > > +
> > > + down_write(&kvm->mm->mmap_sem);
> > 
> > Acquiring and releasing mmap_sem in a loop? Any reason?
> > 
> > Now that you have moved ksm_madvise() calls to init time, any specific
> > reason to take write mmap_sem here?
> 
> The semaphore protects the vma. right?

We took write lock just for ksm_madvise() and then downgraded to
read. Now that you are moving that to init time, read is sufficient here.

Regards,
Bharata.


Re: [PATCH 15/15] powerpc/powernv/sriov: Make single PE mode a per-BAR setting

2020-07-15 Thread Alexey Kardashevskiy



On 15/07/2020 16:16, Oliver O'Halloran wrote:
> On Wed, Jul 15, 2020 at 3:24 PM Alexey Kardashevskiy  wrote:
>>
>>
>>> @@ -158,9 +157,9 @@ static void pnv_pci_ioda_fixup_iov_resources(struct 
>>> pci_dev *pdev)
>>>   goto disable_iov;
>>>   pdev->dev.archdata.iov_data = iov;
>>>
>>> + /* FIXME: totalvfs > phb->ioda.total_pe_num is going to be a problem 
>>> */
>>
>>
>> WARN_ON_ONCE() then?
> 
> can't hurt
> 
>>> @@ -173,50 +172,51 @@ static void pnv_pci_ioda_fixup_iov_resources(struct 
>>> pci_dev *pdev)
>>>   goto disable_iov;
>>>   }
>>>
>>> - total_vf_bar_sz += pci_iov_resource_size(pdev,
>>> - i + PCI_IOV_RESOURCES);
>>> + vf_bar_sz = pci_iov_resource_size(pdev, i + 
>>> PCI_IOV_RESOURCES);
>>>
>>>   /*
>>> -  * If bigger than quarter of M64 segment size, just round up
>>> -  * power of two.
>>> +  * Generally, one segmented M64 BAR maps one IOV BAR. However,
>>> +  * if a VF BAR is too large we end up wasting a lot of space.
>>> +  * If we've got a BAR that's bigger than greater than 1/4 of 
>>> the
>>
>>
>> bigger, greater, huger? :)
>>
>> Also, a nit: s/got a BAR/got a VF BAR/
> 
> whatever, it's just words

You are talking about these BARs and those BARs and since we want "to
help out
the next sucker^Wperson who needs to tinker with it", using precise term
is kinda essential here.


> 
>>> +  * default window's segment size then switch to using single 
>>> PE
>>> +  * windows. This limits the total number of VFs we can 
>>> support.
>>
>> Just to get idea about absolute numbers here.
>>
>> On my P9:
>>
>> ./pciex@600c3c030/ibm,opal-m64-window
>>  00060200  00060200  0040 
>>
>> so that default window's segment size is 0x40../512 = 512MB?
> 
> Yeah. It'll vary a bit since PHB3 and some PHB4s have 256.
> 
>>>*
>>> -  * Generally, one M64 BAR maps one IOV BAR. To avoid conflict
>>> -  * with other devices, IOV BAR size is expanded to be
>>> -  * (total_pe * VF_BAR_size).  When VF_BAR_size is half of M64
>>> -  * segment size , the expanded size would equal to half of the
>>> -  * whole M64 space size, which will exhaust the M64 Space and
>>> -  * limit the system flexibility.  This is a design decision to
>>> -  * set the boundary to quarter of the M64 segment size.
>>> +  * The 1/4 limit is arbitrary and can be tweaked.
>>>*/
>>> - if (total_vf_bar_sz > gate) {
>>> - mul = roundup_pow_of_two(total_vfs);
>>> - dev_info(&pdev->dev,
>>> - "VF BAR Total IOV size %llx > %llx, roundup 
>>> to %d VFs\n",
>>> - total_vf_bar_sz, gate, mul);
>>> - iov->m64_single_mode = true;
>>> - break;
>>> - }
>>> - }
>>> + if (vf_bar_sz > (phb->ioda.m64_segsize >> 2)) {
>>> + /*
>>> +  * On PHB3, the minimum size alignment of M64 BAR in
>>> +  * single mode is 32MB. If this VF BAR is smaller than
>>> +  * 32MB, but still too large for a segmented window
>>> +  * then we can't map it and need to disable SR-IOV for
>>> +  * this device.
>>
>>
>> Why not use single PE mode for such BAR? Better than nothing.
> 
> Suppose you could, but I figured VFs were mainly interesting since you
> could give each VF to a separate guest. If there's multiple VFs under
> the same single PE BAR then they'd have to be assigned to the same

True. But with one PE per VF we can still have 15 (or 14?) isolated VFs
which is not hundreds but better than 0.


> guest in order to retain the freeze/unfreeze behaviour that PAPR
> requires. I guess that's how it used to work, but it seems better just
> to disable them rather than having VFs which sort of work.

Well, realistically the segment size should be 8MB to make this matter
(or the whole window 2GB) which does not seem to happen so it does not
matter.


-- 
Alexey


Re: [v3 1/5] KVM: PPC: Book3S HV: Disable page merging in H_SVM_INIT_START

2020-07-15 Thread Bharata B Rao
On Tue, Jul 14, 2020 at 10:16:14PM -0700, Ram Pai wrote:
> On Mon, Jul 13, 2020 at 10:59:41AM +0530, Bharata B Rao wrote:
> > On Sat, Jul 11, 2020 at 02:13:43AM -0700, Ram Pai wrote:
> > > Merging of pages associated with each memslot of a SVM is
> > > disabled the page is migrated in H_SVM_PAGE_IN handler.
> > > 
> > > This operation should have been done much earlier; the moment the VM
> > > is initiated for secure-transition. Delaying this operation, increases
> > > the probability for those pages to acquire new references , making it
> > > impossible to migrate those pages in H_SVM_PAGE_IN handler.
> > > 
> > > Disable page-migration in H_SVM_INIT_START handling.
> > 
> > While it is a good idea to disable KSM merging for all VMAs during
> > H_SVM_INIT_START, I am curious if you did observe an actual case of
> > ksm_madvise() failing which resulted in subsequent H_SVM_PAGE_IN
> > failing to migrate?
> 
> No. I did not find any ksm_madvise() failing.  But it did not make sense
> to ksm_madvise() everytime a page_in was requested. Hence i proposed
> this patch. H_SVM_INIT_START is the right place for ksm_advise().

Indeed yes. Then you may want to update the description which currently
seems to imply that this change is being done to avoid issues arising
out of delayed KSM unmerging advice.

> 
> > 
> > > 
> > > Signed-off-by: Ram Pai 
> > > ---
> > >  arch/powerpc/kvm/book3s_hv_uvmem.c | 96 
> > > +-
> > >  1 file changed, 74 insertions(+), 22 deletions(-)
> > > 
> > > diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c 
> > > b/arch/powerpc/kvm/book3s_hv_uvmem.c
> > > index 3d987b1..bfc3841 100644
> > > --- a/arch/powerpc/kvm/book3s_hv_uvmem.c
> > > +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
> > > @@ -211,6 +211,65 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long 
> > > gfn, struct kvm *kvm,
> > >   return false;
> > >  }
> > >  
> > > +static int kvmppc_memslot_page_merge(struct kvm *kvm,
> > > + struct kvm_memory_slot *memslot, bool merge)
> > > +{
> > > + unsigned long gfn = memslot->base_gfn;
> > > + unsigned long end, start = gfn_to_hva(kvm, gfn);
> > > + int ret = 0;
> > > + struct vm_area_struct *vma;
> > > + int merge_flag = (merge) ? MADV_MERGEABLE : MADV_UNMERGEABLE;
> > > +
> > > + if (kvm_is_error_hva(start))
> > > + return H_STATE;
> > 
> > This and other cases below seem to be a new return value from
> > H_SVM_INIT_START. May be update the documentation too along with
> > this patch?
> 
> ok.
> 
> > 
> > > +
> > > + end = start + (memslot->npages << PAGE_SHIFT);
> > > +
> > > + down_write(&kvm->mm->mmap_sem);
> > 
> > When you rebase the patches against latest upstream you may want to
> > replace the above and other instances by mmap_write/read_lock().
> 
> ok.
> 
> > 
> > > + do {
> > > + vma = find_vma_intersection(kvm->mm, start, end);
> > > + if (!vma) {
> > > + ret = H_STATE;
> > > + break;
> > > + }
> > > + ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
> > > +   merge_flag, &vma->vm_flags);
> > > + if (ret) {
> > > + ret = H_STATE;
> > > + break;
> > > + }
> > > + start = vma->vm_end + 1;
> > > + } while (end > vma->vm_end);
> > > +
> > > + up_write(&kvm->mm->mmap_sem);
> > > + return ret;
> > > +}
> > > +
> > > +static int __kvmppc_page_merge(struct kvm *kvm, bool merge)
> > > +{
> > > + struct kvm_memslots *slots;
> > > + struct kvm_memory_slot *memslot;
> > > + int ret = 0;
> > > +
> > > + slots = kvm_memslots(kvm);
> > > + kvm_for_each_memslot(memslot, slots) {
> > > + ret = kvmppc_memslot_page_merge(kvm, memslot, merge);
> > > + if (ret)
> > > + break;
> > > + }
> > > + return ret;
> > > +}
> > > +
> > > +static inline int kvmppc_disable_page_merge(struct kvm *kvm)
> > > +{
> > > + return __kvmppc_page_merge(kvm, false);
> > > +}
> > > +
> > > +static inline int kvmppc_enable_page_merge(struct kvm *kvm)
> > > +{
> > > + return __kvmppc_page_merge(kvm, true);
> > > +}
> > > +
> > >  unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
> > >  {
> > >   struct kvm_memslots *slots;
> > > @@ -232,11 +291,18 @@ unsigned long kvmppc_h_svm_init_start(struct kvm 
> > > *kvm)
> > >   return H_AUTHORITY;
> > >  
> > >   srcu_idx = srcu_read_lock(&kvm->srcu);
> > > +
> > > + /* disable page-merging for all memslot */
> > > + ret = kvmppc_disable_page_merge(kvm);
> > > + if (ret)
> > > + goto out;
> > > +
> > > + /* register the memslot */
> > >   slots = kvm_memslots(kvm);
> > >   kvm_for_each_memslot(memslot, slots) {
> > >   if (kvmppc_uvmem_slot_init(kvm, memslot)) {
> > >   ret = H_PARAMETER;
> > > - goto out;
> > > + break;
> > >   }
> > >   ret = uv_register_mem_slot(kvm->arch.lpid,
> > >  memslot->base_gfn << PAGE_SHIFT,
> > > @@ -245,9 +31

Re: [PATCH 05/13] cpufreq/arch: powerpc: pasemi: Move prototypes to shared header

2020-07-15 Thread Lee Jones
On Tue, 14 Jul 2020, Olof Johansson wrote:

> On Tue, Jul 14, 2020 at 11:33 PM Lee Jones  wrote:
> >
> > On Tue, 14 Jul 2020, Olof Johansson wrote:
> >
> > > On Tue, Jul 14, 2020 at 7:50 AM Lee Jones  wrote:
> > > >
> > > > If function callers and providers do not share the same prototypes the
> > > > compiler complains of missing prototypes.  Fix this by moving the
> > > > already existing prototypes out to a mutually convenient location.
> > > >
> > > > Fixes the following W=1 kernel build warning(s):
> > > >
> > > >  drivers/cpufreq/pasemi-cpufreq.c:109:5: warning: no previous prototype 
> > > > for ‘check_astate’ [-Wmissing-prototypes]
> > > >  109 | int check_astate(void)
> > > >  | ^~~~
> > > >  drivers/cpufreq/pasemi-cpufreq.c:114:6: warning: no previous prototype 
> > > > for ‘restore_astate’ [-Wmissing-prototypes]
> > > >  114 | void restore_astate(int cpu)
> > > >  | ^~
> > > >
> > > > Cc: Olof Johansson 
> > > > Cc: Michael Ellerman 
> > > > Cc: Benjamin Herrenschmidt 
> > > > Cc: Paul Mackerras 
> > > > Cc: linuxppc-dev@lists.ozlabs.org
> > > > Signed-off-by: Lee Jones 
> > > > ---
> > > >  arch/powerpc/platforms/pasemi/pasemi.h| 15 
> > > >  arch/powerpc/platforms/pasemi/powersave.S |  2 ++
> > > >  drivers/cpufreq/pasemi-cpufreq.c  |  1 +
> > > >  include/linux/platform_data/pasemi.h  | 28 +++
> > > >  4 files changed, 31 insertions(+), 15 deletions(-)
> > > >  create mode 100644 include/linux/platform_data/pasemi.h
> > > >
> > > > diff --git a/arch/powerpc/platforms/pasemi/pasemi.h 
> > > > b/arch/powerpc/platforms/pasemi/pasemi.h
> > > > index 70b56048ed1be..528d81ef748ad 100644
> > > > --- a/arch/powerpc/platforms/pasemi/pasemi.h
> > > > +++ b/arch/powerpc/platforms/pasemi/pasemi.h
> > > > @@ -15,21 +15,6 @@ extern void __init pasemi_map_registers(void);
> > > >  extern void idle_spin(void);
> > > >  extern void idle_doze(void);
> > > >
> > > > -/* Restore astate to last set */
> > > > -#ifdef CONFIG_PPC_PASEMI_CPUFREQ
> > > > -extern int check_astate(void);
> > > > -extern void restore_astate(int cpu);
> > > > -#else
> > > > -static inline int check_astate(void)
> > > > -{
> > > > -   /* Always return >0 so we never power save */
> > > > -   return 1;
> > > > -}
> > > > -static inline void restore_astate(int cpu)
> > > > -{
> > > > -}
> > > > -#endif
> > > > -
> > > >  extern struct pci_controller_ops pasemi_pci_controller_ops;
> > > >
> > > >  #endif /* _PASEMI_PASEMI_H */
> > > > diff --git a/arch/powerpc/platforms/pasemi/powersave.S 
> > > > b/arch/powerpc/platforms/pasemi/powersave.S
> > > > index d0215d5329ca7..7747b48963286 100644
> > > > --- a/arch/powerpc/platforms/pasemi/powersave.S
> > > > +++ b/arch/powerpc/platforms/pasemi/powersave.S
> > > > @@ -5,6 +5,8 @@
> > > >   * Maintained by: Olof Johansson 
> > > >   */
> > > >
> > > > +#include 
> > > > +
> > > >  #include 
> > > >  #include 
> > > >  #include 
> > > > diff --git a/drivers/cpufreq/pasemi-cpufreq.c 
> > > > b/drivers/cpufreq/pasemi-cpufreq.c
> > > > index c66f566a854cb..c6bb3ecc90ef3 100644
> > > > --- a/drivers/cpufreq/pasemi-cpufreq.c
> > > > +++ b/drivers/cpufreq/pasemi-cpufreq.c
> > > > @@ -15,6 +15,7 @@
> > > >  #include 
> > > >  #include 
> > > >  #include 
> > > > +#include 
> > > >
> > > >  #include 
> > > >  #include 
> > > > diff --git a/include/linux/platform_data/pasemi.h 
> > > > b/include/linux/platform_data/pasemi.h
> > > > new file mode 100644
> > > > index 0..3fed0687fcc9a
> > > > --- /dev/null
> > > > +++ b/include/linux/platform_data/pasemi.h
> > > > @@ -0,0 +1,28 @@
> > > > +/* SPDX-License-Identifier: GPL-2.0-only */
> > > > +/*
> > > > + * Copyright (C) 2020 Linaro Ltd.
> > > > + *
> > > > + * Author: Lee Jones 
> > > > + */
> > >
> > > Absolutely not. It's neither your copyright, nor your authorship.
> >
> > The file was new.  Anyway, the point is now moot.
> 
> The contents was copied and pasted from other material, not originally
> produced by you.
> 
> I suggest you consult with Linaro lawyers on how to handle this if you
> have to do something like it in the future.

Very well.  Thanks for the heads-up.

-- 
Lee Jones [李琼斯]
Senior Technical Lead - Developer Services
Linaro.org │ Open source software for Arm SoCs
Follow Linaro: Facebook | Twitter | Blog